111 lines
4.0 KiB
JavaScript
111 lines
4.0 KiB
JavaScript
const { sanitizeText } = require("./html_extractor");
|
|
const { decodeBody } = require("./page_fetcher");
|
|
const { evaluateNetworkTarget } = require("./url_policy");
|
|
|
|
class ExternalSearchProvider {
|
|
constructor(options = {}) {
|
|
this.fetcher = options.fetcher;
|
|
}
|
|
|
|
async search(query, options = {}) {
|
|
const settings = options.settings;
|
|
if (!settings.external_provider_endpoint) {
|
|
throw new Error("The optional external search endpoint is not configured.");
|
|
}
|
|
const endpoint = buildEndpoint(query, settings, options.freshness);
|
|
const policy = await evaluateNetworkTarget(endpoint, { resolveHost: this.fetcher.resolveHost });
|
|
if (!policy.allowed) throw new Error(`External provider blocked: ${policy.reason}.`);
|
|
const headers = {};
|
|
if (settings.external_provider_api_key) {
|
|
headers[settings.external_provider_api_key_header] = [
|
|
settings.external_provider_api_key_prefix,
|
|
settings.external_provider_api_key
|
|
].filter(Boolean).join(" ");
|
|
}
|
|
const response = await this.fetcher.request(policy.url, settings, {
|
|
networkOnly: true,
|
|
timeoutMs: settings.search_timeout_ms,
|
|
maxBytes: Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024),
|
|
accept: "application/json",
|
|
headers
|
|
});
|
|
if (!response.ok) throw new Error(`External search provider failed (${response.status}).`);
|
|
const body = decodeBody(
|
|
response.body,
|
|
response.headers.get("content-encoding"),
|
|
Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024)
|
|
);
|
|
const payload = JSON.parse(body.toString("utf8"));
|
|
return {
|
|
provider: "external_json",
|
|
results: normalizeProviderResults(payload, settings.external_provider_adapter),
|
|
warnings: [],
|
|
adapter_errors: []
|
|
};
|
|
}
|
|
}
|
|
|
|
function buildEndpoint(query, settings, freshness = "") {
|
|
const configured = settings.external_provider_endpoint;
|
|
const endpoint = configured.includes("{query}")
|
|
? configured.replaceAll("{query}", encodeURIComponent(query))
|
|
: configured;
|
|
const url = new URL(endpoint);
|
|
if (!configured.includes("{query}")) {
|
|
url.searchParams.set(settings.external_provider_query_parameter || "q", query);
|
|
}
|
|
url.searchParams.set("format", "json");
|
|
url.searchParams.set("safesearch", safeSearchValue(settings.safe_search_level));
|
|
url.searchParams.set("count", String(settings.max_results));
|
|
if (freshness) url.searchParams.set("time_range", String(freshness).slice(0, 32));
|
|
return url.href;
|
|
}
|
|
|
|
function normalizeProviderResults(payload, adapter) {
|
|
const rows = adapter === "searxng_json"
|
|
? payload?.results
|
|
: payload?.results || payload?.items || payload?.web?.results?.value;
|
|
if (!Array.isArray(rows)) throw new Error("External provider returned an unsupported result list.");
|
|
return rows.map((row, index) => {
|
|
const url = String(row.url || row.link || "");
|
|
let domain = "";
|
|
try { domain = new URL(url).hostname; } catch {}
|
|
return {
|
|
title: sanitizeText(row.title || row.name || "Untitled result", 240),
|
|
url,
|
|
domain,
|
|
snippet: sanitizeText(row.content || row.snippet || row.description || "", 900),
|
|
date: normalizeDate(row.publishedDate || row.published_date || row.date),
|
|
rank: index + 1,
|
|
source: sanitizeText(row.engine || row.source || "external_json", 80),
|
|
raw_source_id: sanitizeText(row.id || "", 120) || null,
|
|
relevance_score: finiteScore(row.score, index)
|
|
};
|
|
}).filter((row) => /^https?:\/\//i.test(row.url));
|
|
}
|
|
|
|
function safeSearchValue(level) {
|
|
if (level === "off") return "0";
|
|
if (level === "strict") return "2";
|
|
return "1";
|
|
}
|
|
|
|
function normalizeDate(value) {
|
|
if (!value) return null;
|
|
const date = new Date(value);
|
|
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
|
}
|
|
|
|
function finiteScore(value, index) {
|
|
const number = Number(value);
|
|
return Number.isFinite(number) ? number : Math.max(0.1, 1 - index * 0.1);
|
|
}
|
|
|
|
module.exports = {
|
|
ExternalSearchProvider,
|
|
SearchProvider: ExternalSearchProvider,
|
|
buildEndpoint,
|
|
normalizeProviderResults,
|
|
safeSearchValue
|
|
};
|