const fs = require("fs"); const path = require("path"); const { SearchProvider } = require("./provider_adapter"); const { formatResults, normalizeOrigin } = require("./result_formatter"); const { readSettings } = require("./settings"); const { evaluateUrl } = require("./url_policy"); class WebSearchTool { constructor(options = {}) { this.dataDir = options.dataDir; this.provider = options.provider || new SearchProvider(options); this.now = options.now || Date.now; this.cache = new Map(); this.rateLimits = new Map(); } async run(input = {}) { const started = this.now(); const settings = readSettings(this.dataDir); const query = String(input.query || "").trim().slice(0, 500); const reason = String(input.reason || "general_lookup"); const origin = trustedOrigin(input.ctx, input.origin); const actor = String(input.user?.id || input.user?.username || "unknown").slice(0, 120); const server = String(input.ctx?.server_id || input.ctx?.channel_id || "direct").slice(0, 120); const auditBase = { query, reason, origin, actor, server }; if (!settings.enabled) return this.finish(blockedResult(query, reason, "tool_disabled", settings, started, this.now()), auditBase); if (!settings.allowed_origins.includes(origin)) { return this.finish(blockedResult(query, reason, "origin_not_allowed", settings, started, this.now()), auditBase); } if (!query) return this.finish(blockedResult(query, reason, "query_required", settings, started, this.now()), auditBase); if (!this.consumeRateLimit(`${origin}:${server}:${actor}`, settings.requests_per_minute)) { return this.finish(blockedResult(query, reason, "rate_limited", settings, started, this.now()), auditBase); } const cacheKey = JSON.stringify([ query.toLowerCase(), reason, input.freshness || "", settings.provider_endpoint, settings.policy_mode, settings.url_rules, settings.safe_search, settings.max_results, origin, settings[`${origin}_output_chars`], settings.show_source_links, input.requested_depth || "search", settings.allow_full_page_fetch ]); const cached = this.cache.get(cacheKey); if (cached && cached.expiresAt > this.now()) { return this.finish({ ...cached.value, cache_hit: true, timing_ms: this.now() - started }, auditBase); } try { const discovered = await this.provider.search(query, { ...settings, freshness: input.freshness }); const allowed = []; for (const row of discovered) { if (allowed.length >= settings.max_results) break; const policy = await evaluateUrl(row.url, { mode: settings.policy_mode, rules: settings.url_rules, resolveHost: this.provider.resolveHost }); if (!policy.allowed) continue; const normalized = { ...row, url: policy.url }; if (input.requested_depth === "full_page" && settings.allow_full_page_fetch) { try { const page = await this.provider.fetchPage(policy.url, settings); normalized.url = page.url; normalized.page_excerpt = page.text; } catch (error) { if (error.code === "URL_BLOCKED") continue; } } allowed.push(normalized); } const formatted = formatResults(allowed, { reason, origin, settings }); const value = { query, reason: formatted.reason, status: allowed.length ? "ok" : "no_results", blocked_reason: null, result_count: formatted.results.length, results: formatted.results, condensed_text: formatted.condensed_text, output_budget_chars: formatted.output_budget_chars, truncated: formatted.truncated, timing_ms: this.now() - started, cache_hit: false, policy_mode: settings.policy_mode }; if (settings.cache_ttl_seconds > 0 && allowed.length) { this.cache.set(cacheKey, { expiresAt: this.now() + settings.cache_ttl_seconds * 1000, value }); } return this.finish(value, auditBase); } catch (error) { return this.finish({ query, reason, status: error.code === "URL_BLOCKED" ? "blocked" : "unavailable", blocked_reason: error.blockedReason || null, error: cleanError(error), result_count: 0, results: [], condensed_text: "", timing_ms: this.now() - started, cache_hit: false, policy_mode: settings.policy_mode }, auditBase); } } consumeRateLimit(key, maximum) { const cutoff = this.now() - 60000; const recent = (this.rateLimits.get(key) || []).filter((timestamp) => timestamp > cutoff); if (recent.length >= maximum) { this.rateLimits.set(key, recent); return false; } recent.push(this.now()); this.rateLimits.set(key, recent); return true; } finish(result, base) { result.user_message ||= userMessage(result); this.audit({ ...base, status: result.status, allowed: result.status === "ok" || result.status === "no_results", blocked_reason: result.blocked_reason || null, result_count: result.result_count, timing_ms: result.timing_ms, cache_hit: result.cache_hit }); return result; } audit(entry) { fs.mkdirSync(this.dataDir, { recursive: true }); fs.appendFileSync(path.join(this.dataDir, "audit.jsonl"), `${JSON.stringify({ timestamp: new Date().toISOString(), ...entry })}\n`); } } function trustedOrigin(ctx, fallback) { return normalizeOrigin(ctx?.origin || ctx?.platform || fallback || "other"); } function blockedResult(query, reason, blockedReason, settings, started, now = Date.now()) { return { query, reason, status: "blocked", blocked_reason: blockedReason, result_count: 0, results: [], condensed_text: "", timing_ms: Math.max(0, now - started), cache_hit: false, policy_mode: settings.policy_mode }; } function cleanError(error) { if (error?.name === "AbortError") return "Search provider timed out."; return "Search provider is unavailable."; } function userMessage(result) { if (result.status === "ok") return result.condensed_text || "Web search completed without a usable summary."; if (result.status === "no_results") return "No permitted web results were found."; if (result.status === "unavailable") return "Web search is currently unavailable."; if (result.blocked_reason === "rate_limited") return "Web search is temporarily rate-limited."; if (result.blocked_reason === "origin_not_allowed") return "Web search is not enabled for this platform."; if (result.blocked_reason === "tool_disabled") return "Web search is disabled."; return "Web search was blocked by the configured safety policy."; } module.exports = { WebSearchTool, blockedResult, cleanError, trustedOrigin, userMessage };