const { sanitizeText } = require("./html_extractor"); const { decodeBody } = require("./page_fetcher"); const { evaluateNetworkTarget } = require("./url_policy"); class ExternalSearchProvider { constructor(options = {}) { this.fetcher = options.fetcher; } async search(query, options = {}) { const settings = options.settings; if (!settings.external_provider_endpoint) { throw new Error("The optional external search endpoint is not configured."); } const endpoint = buildEndpoint(query, settings, options.freshness); const policy = await evaluateNetworkTarget(endpoint, { resolveHost: this.fetcher.resolveHost }); if (!policy.allowed) throw new Error(`External provider blocked: ${policy.reason}.`); const headers = {}; if (settings.external_provider_api_key) { headers[settings.external_provider_api_key_header] = [ settings.external_provider_api_key_prefix, settings.external_provider_api_key ].filter(Boolean).join(" "); } const response = await this.fetcher.request(policy.url, settings, { networkOnly: true, timeoutMs: settings.search_timeout_ms, maxBytes: Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024), accept: "application/json", headers }); if (!response.ok) throw new Error(`External search provider failed (${response.status}).`); const body = decodeBody( response.body, response.headers.get("content-encoding"), Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024) ); const payload = JSON.parse(body.toString("utf8")); return { provider: "external_json", results: normalizeProviderResults(payload, settings.external_provider_adapter), warnings: [], adapter_errors: [] }; } } function buildEndpoint(query, settings, freshness = "") { const configured = settings.external_provider_endpoint; const endpoint = configured.includes("{query}") ? configured.replaceAll("{query}", encodeURIComponent(query)) : configured; const url = new URL(endpoint); if (!configured.includes("{query}")) { url.searchParams.set(settings.external_provider_query_parameter || "q", query); } url.searchParams.set("format", "json"); url.searchParams.set("safesearch", safeSearchValue(settings.safe_search_level)); url.searchParams.set("count", String(settings.max_results)); if (freshness) url.searchParams.set("time_range", String(freshness).slice(0, 32)); return url.href; } function normalizeProviderResults(payload, adapter) { const rows = adapter === "searxng_json" ? payload?.results : payload?.results || payload?.items || payload?.web?.results?.value; if (!Array.isArray(rows)) throw new Error("External provider returned an unsupported result list."); return rows.map((row, index) => { const url = String(row.url || row.link || ""); let domain = ""; try { domain = new URL(url).hostname; } catch {} return { title: sanitizeText(row.title || row.name || "Untitled result", 240), url, domain, snippet: sanitizeText(row.content || row.snippet || row.description || "", 900), date: normalizeDate(row.publishedDate || row.published_date || row.date), rank: index + 1, source: sanitizeText(row.engine || row.source || "external_json", 80), raw_source_id: sanitizeText(row.id || "", 120) || null, relevance_score: finiteScore(row.score, index) }; }).filter((row) => /^https?:\/\//i.test(row.url)); } function safeSearchValue(level) { if (level === "off") return "0"; if (level === "strict") return "2"; return "1"; } function normalizeDate(value) { if (!value) return null; const date = new Date(value); return Number.isNaN(date.getTime()) ? null : date.toISOString(); } function finiteScore(value, index) { const number = Number(value); return Number.isFinite(number) ? number : Math.max(0.1, 1 - index * 0.1); } module.exports = { ExternalSearchProvider, SearchProvider: ExternalSearchProvider, buildEndpoint, normalizeProviderResults, safeSearchValue };