Lumi/plugins/lumi_ai_web_search/backend/search_tool.js
2026-06-13 21:32:36 +02:00

185 lines
6.8 KiB
JavaScript

const fs = require("fs");
const path = require("path");
const { SearchProvider } = require("./provider_adapter");
const { formatResults, normalizeOrigin } = require("./result_formatter");
const { readSettings } = require("./settings");
const { evaluateUrl } = require("./url_policy");
class WebSearchTool {
constructor(options = {}) {
this.dataDir = options.dataDir;
this.provider = options.provider || new SearchProvider(options);
this.now = options.now || Date.now;
this.cache = new Map();
this.rateLimits = new Map();
}
async run(input = {}) {
const started = this.now();
const settings = readSettings(this.dataDir);
const query = String(input.query || "").trim().slice(0, 500);
const reason = String(input.reason || "general_lookup");
const origin = trustedOrigin(input.ctx, input.origin);
const actor = String(input.user?.id || input.user?.username || "unknown").slice(0, 120);
const server = String(input.ctx?.server_id || input.ctx?.channel_id || "direct").slice(0, 120);
const auditBase = { query, reason, origin, actor, server };
if (!settings.enabled) return this.finish(blockedResult(query, reason, "tool_disabled", settings, started, this.now()), auditBase);
if (!settings.allowed_origins.includes(origin)) {
return this.finish(blockedResult(query, reason, "origin_not_allowed", settings, started, this.now()), auditBase);
}
if (!query) return this.finish(blockedResult(query, reason, "query_required", settings, started, this.now()), auditBase);
if (!this.consumeRateLimit(`${origin}:${server}:${actor}`, settings.requests_per_minute)) {
return this.finish(blockedResult(query, reason, "rate_limited", settings, started, this.now()), auditBase);
}
const cacheKey = JSON.stringify([
query.toLowerCase(), reason, input.freshness || "", settings.provider_endpoint,
settings.policy_mode, settings.url_rules, settings.safe_search, settings.max_results,
origin, settings[`${origin}_output_chars`], settings.show_source_links,
input.requested_depth || "search", settings.allow_full_page_fetch
]);
const cached = this.cache.get(cacheKey);
if (cached && cached.expiresAt > this.now()) {
return this.finish({ ...cached.value, cache_hit: true, timing_ms: this.now() - started }, auditBase);
}
try {
const discovered = await this.provider.search(query, {
...settings,
freshness: input.freshness
});
const allowed = [];
for (const row of discovered) {
if (allowed.length >= settings.max_results) break;
const policy = await evaluateUrl(row.url, {
mode: settings.policy_mode,
rules: settings.url_rules,
resolveHost: this.provider.resolveHost
});
if (!policy.allowed) continue;
const normalized = { ...row, url: policy.url };
if (input.requested_depth === "full_page" && settings.allow_full_page_fetch) {
try {
const page = await this.provider.fetchPage(policy.url, settings);
normalized.url = page.url;
normalized.page_excerpt = page.text;
} catch (error) {
if (error.code === "URL_BLOCKED") continue;
}
}
allowed.push(normalized);
}
const formatted = formatResults(allowed, { reason, origin, settings });
const value = {
query,
reason: formatted.reason,
status: allowed.length ? "ok" : "no_results",
blocked_reason: null,
result_count: formatted.results.length,
results: formatted.results,
condensed_text: formatted.condensed_text,
output_budget_chars: formatted.output_budget_chars,
truncated: formatted.truncated,
timing_ms: this.now() - started,
cache_hit: false,
policy_mode: settings.policy_mode
};
if (settings.cache_ttl_seconds > 0 && allowed.length) {
this.cache.set(cacheKey, {
expiresAt: this.now() + settings.cache_ttl_seconds * 1000,
value
});
}
return this.finish(value, auditBase);
} catch (error) {
return this.finish({
query,
reason,
status: error.code === "URL_BLOCKED" ? "blocked" : "unavailable",
blocked_reason: error.blockedReason || null,
error: cleanError(error),
result_count: 0,
results: [],
condensed_text: "",
timing_ms: this.now() - started,
cache_hit: false,
policy_mode: settings.policy_mode
}, auditBase);
}
}
consumeRateLimit(key, maximum) {
const cutoff = this.now() - 60000;
const recent = (this.rateLimits.get(key) || []).filter((timestamp) => timestamp > cutoff);
if (recent.length >= maximum) {
this.rateLimits.set(key, recent);
return false;
}
recent.push(this.now());
this.rateLimits.set(key, recent);
return true;
}
finish(result, base) {
result.user_message ||= userMessage(result);
this.audit({
...base,
status: result.status,
allowed: result.status === "ok" || result.status === "no_results",
blocked_reason: result.blocked_reason || null,
result_count: result.result_count,
timing_ms: result.timing_ms,
cache_hit: result.cache_hit
});
return result;
}
audit(entry) {
fs.mkdirSync(this.dataDir, { recursive: true });
fs.appendFileSync(path.join(this.dataDir, "audit.jsonl"), `${JSON.stringify({
timestamp: new Date().toISOString(),
...entry
})}\n`);
}
}
function trustedOrigin(ctx, fallback) {
return normalizeOrigin(ctx?.origin || ctx?.platform || fallback || "other");
}
function blockedResult(query, reason, blockedReason, settings, started, now = Date.now()) {
return {
query,
reason,
status: "blocked",
blocked_reason: blockedReason,
result_count: 0,
results: [],
condensed_text: "",
timing_ms: Math.max(0, now - started),
cache_hit: false,
policy_mode: settings.policy_mode
};
}
function cleanError(error) {
if (error?.name === "AbortError") return "Search provider timed out.";
return "Search provider is unavailable.";
}
function userMessage(result) {
if (result.status === "ok") return result.condensed_text || "Web search completed without a usable summary.";
if (result.status === "no_results") return "No permitted web results were found.";
if (result.status === "unavailable") return "Web search is currently unavailable.";
if (result.blocked_reason === "rate_limited") return "Web search is temporarily rate-limited.";
if (result.blocked_reason === "origin_not_allowed") return "Web search is not enabled for this platform.";
if (result.blocked_reason === "tool_disabled") return "Web search is disabled.";
return "Web search was blocked by the configured safety policy.";
}
module.exports = {
WebSearchTool,
blockedResult,
cleanError,
trustedOrigin,
userMessage
};