185 lines
6.8 KiB
JavaScript
185 lines
6.8 KiB
JavaScript
const fs = require("fs");
|
|
const path = require("path");
|
|
const { SearchProvider } = require("./provider_adapter");
|
|
const { formatResults, normalizeOrigin } = require("./result_formatter");
|
|
const { readSettings } = require("./settings");
|
|
const { evaluateUrl } = require("./url_policy");
|
|
|
|
class WebSearchTool {
|
|
constructor(options = {}) {
|
|
this.dataDir = options.dataDir;
|
|
this.provider = options.provider || new SearchProvider(options);
|
|
this.now = options.now || Date.now;
|
|
this.cache = new Map();
|
|
this.rateLimits = new Map();
|
|
}
|
|
|
|
async run(input = {}) {
|
|
const started = this.now();
|
|
const settings = readSettings(this.dataDir);
|
|
const query = String(input.query || "").trim().slice(0, 500);
|
|
const reason = String(input.reason || "general_lookup");
|
|
const origin = trustedOrigin(input.ctx, input.origin);
|
|
const actor = String(input.user?.id || input.user?.username || "unknown").slice(0, 120);
|
|
const server = String(input.ctx?.server_id || input.ctx?.channel_id || "direct").slice(0, 120);
|
|
const auditBase = { query, reason, origin, actor, server };
|
|
if (!settings.enabled) return this.finish(blockedResult(query, reason, "tool_disabled", settings, started, this.now()), auditBase);
|
|
if (!settings.allowed_origins.includes(origin)) {
|
|
return this.finish(blockedResult(query, reason, "origin_not_allowed", settings, started, this.now()), auditBase);
|
|
}
|
|
if (!query) return this.finish(blockedResult(query, reason, "query_required", settings, started, this.now()), auditBase);
|
|
if (!this.consumeRateLimit(`${origin}:${server}:${actor}`, settings.requests_per_minute)) {
|
|
return this.finish(blockedResult(query, reason, "rate_limited", settings, started, this.now()), auditBase);
|
|
}
|
|
const cacheKey = JSON.stringify([
|
|
query.toLowerCase(), reason, input.freshness || "", settings.provider_endpoint,
|
|
settings.policy_mode, settings.url_rules, settings.safe_search, settings.max_results,
|
|
origin, settings[`${origin}_output_chars`], settings.show_source_links,
|
|
input.requested_depth || "search", settings.allow_full_page_fetch
|
|
]);
|
|
const cached = this.cache.get(cacheKey);
|
|
if (cached && cached.expiresAt > this.now()) {
|
|
return this.finish({ ...cached.value, cache_hit: true, timing_ms: this.now() - started }, auditBase);
|
|
}
|
|
try {
|
|
const discovered = await this.provider.search(query, {
|
|
...settings,
|
|
freshness: input.freshness
|
|
});
|
|
const allowed = [];
|
|
for (const row of discovered) {
|
|
if (allowed.length >= settings.max_results) break;
|
|
const policy = await evaluateUrl(row.url, {
|
|
mode: settings.policy_mode,
|
|
rules: settings.url_rules,
|
|
resolveHost: this.provider.resolveHost
|
|
});
|
|
if (!policy.allowed) continue;
|
|
const normalized = { ...row, url: policy.url };
|
|
if (input.requested_depth === "full_page" && settings.allow_full_page_fetch) {
|
|
try {
|
|
const page = await this.provider.fetchPage(policy.url, settings);
|
|
normalized.url = page.url;
|
|
normalized.page_excerpt = page.text;
|
|
} catch (error) {
|
|
if (error.code === "URL_BLOCKED") continue;
|
|
}
|
|
}
|
|
allowed.push(normalized);
|
|
}
|
|
const formatted = formatResults(allowed, { reason, origin, settings });
|
|
const value = {
|
|
query,
|
|
reason: formatted.reason,
|
|
status: allowed.length ? "ok" : "no_results",
|
|
blocked_reason: null,
|
|
result_count: formatted.results.length,
|
|
results: formatted.results,
|
|
condensed_text: formatted.condensed_text,
|
|
output_budget_chars: formatted.output_budget_chars,
|
|
truncated: formatted.truncated,
|
|
timing_ms: this.now() - started,
|
|
cache_hit: false,
|
|
policy_mode: settings.policy_mode
|
|
};
|
|
if (settings.cache_ttl_seconds > 0 && allowed.length) {
|
|
this.cache.set(cacheKey, {
|
|
expiresAt: this.now() + settings.cache_ttl_seconds * 1000,
|
|
value
|
|
});
|
|
}
|
|
return this.finish(value, auditBase);
|
|
} catch (error) {
|
|
return this.finish({
|
|
query,
|
|
reason,
|
|
status: error.code === "URL_BLOCKED" ? "blocked" : "unavailable",
|
|
blocked_reason: error.blockedReason || null,
|
|
error: cleanError(error),
|
|
result_count: 0,
|
|
results: [],
|
|
condensed_text: "",
|
|
timing_ms: this.now() - started,
|
|
cache_hit: false,
|
|
policy_mode: settings.policy_mode
|
|
}, auditBase);
|
|
}
|
|
}
|
|
|
|
consumeRateLimit(key, maximum) {
|
|
const cutoff = this.now() - 60000;
|
|
const recent = (this.rateLimits.get(key) || []).filter((timestamp) => timestamp > cutoff);
|
|
if (recent.length >= maximum) {
|
|
this.rateLimits.set(key, recent);
|
|
return false;
|
|
}
|
|
recent.push(this.now());
|
|
this.rateLimits.set(key, recent);
|
|
return true;
|
|
}
|
|
|
|
finish(result, base) {
|
|
result.user_message ||= userMessage(result);
|
|
this.audit({
|
|
...base,
|
|
status: result.status,
|
|
allowed: result.status === "ok" || result.status === "no_results",
|
|
blocked_reason: result.blocked_reason || null,
|
|
result_count: result.result_count,
|
|
timing_ms: result.timing_ms,
|
|
cache_hit: result.cache_hit
|
|
});
|
|
return result;
|
|
}
|
|
|
|
audit(entry) {
|
|
fs.mkdirSync(this.dataDir, { recursive: true });
|
|
fs.appendFileSync(path.join(this.dataDir, "audit.jsonl"), `${JSON.stringify({
|
|
timestamp: new Date().toISOString(),
|
|
...entry
|
|
})}\n`);
|
|
}
|
|
}
|
|
|
|
function trustedOrigin(ctx, fallback) {
|
|
return normalizeOrigin(ctx?.origin || ctx?.platform || fallback || "other");
|
|
}
|
|
|
|
function blockedResult(query, reason, blockedReason, settings, started, now = Date.now()) {
|
|
return {
|
|
query,
|
|
reason,
|
|
status: "blocked",
|
|
blocked_reason: blockedReason,
|
|
result_count: 0,
|
|
results: [],
|
|
condensed_text: "",
|
|
timing_ms: Math.max(0, now - started),
|
|
cache_hit: false,
|
|
policy_mode: settings.policy_mode
|
|
};
|
|
}
|
|
|
|
function cleanError(error) {
|
|
if (error?.name === "AbortError") return "Search provider timed out.";
|
|
return "Search provider is unavailable.";
|
|
}
|
|
|
|
function userMessage(result) {
|
|
if (result.status === "ok") return result.condensed_text || "Web search completed without a usable summary.";
|
|
if (result.status === "no_results") return "No permitted web results were found.";
|
|
if (result.status === "unavailable") return "Web search is currently unavailable.";
|
|
if (result.blocked_reason === "rate_limited") return "Web search is temporarily rate-limited.";
|
|
if (result.blocked_reason === "origin_not_allowed") return "Web search is not enabled for this platform.";
|
|
if (result.blocked_reason === "tool_disabled") return "Web search is disabled.";
|
|
return "Web search was blocked by the configured safety policy.";
|
|
}
|
|
|
|
module.exports = {
|
|
WebSearchTool,
|
|
blockedResult,
|
|
cleanError,
|
|
trustedOrigin,
|
|
userMessage
|
|
};
|