Lumi/plugins/lumi_ai_web_search/tests/verify.js
2026-06-17 06:21:20 +02:00

559 lines
21 KiB
JavaScript

const assert = require("assert");
const fs = require("fs");
const os = require("os");
const path = require("path");
const zlib = require("zlib");
const { parseBingRss } = require("../backend/adapters/bing_rss_search");
const { parseDuckDuckGoHtml } = require("../backend/adapters/generic_html_search");
const { ToolCache } = require("../backend/cache");
const { extractReadableHtml } = require("../backend/html_extractor");
const { LumiSearchBroker, normalizeBrokerResults } = require("../backend/lumi_search_broker");
const { PageFetcher } = require("../backend/page_fetcher");
const { ToolConcurrency } = require("../backend/rate_limits");
const { formatResults } = require("../backend/result_formatter");
const { WebSearchTool } = require("../backend/search_tool");
const { defaults, readSettings, writeSettings } = require("../backend/settings");
const { evaluateUrl, matchesRule } = require("../backend/url_policy");
const { ToolRegistry } = require("../../lumi_ai/backend/tool_router");
const { ToolInstaller } = require("../../lumi_ai/backend/tool_installer");
const { ToolLoader } = require("../../lumi_ai/backend/tool_loader");
const { ToolManager } = require("../../lumi_ai/backend/tool_manager");
const { ToolSettings } = require("../../lumi_ai/backend/tool_settings");
const { buildPrompt } = require("../../lumi_ai/backend/prompt_builder");
const plugin = require("../index");
const PUBLIC_DNS = async () => ["93.184.216.34"];
async function run() {
verifyDefaultsAndMigration();
await verifyPolicy();
verifyExtractionAndAdapters();
await verifyFetcher();
verifyFormatting();
await verifySearchAndCache();
await verifyProviderFailure();
await verifyFetchCapabilities();
await verifyRateLimit();
await verifyConcurrencyLimit();
await verifyLoaderLifecycle();
verifyRegistrationAndPrompt();
verifyStaticFiles();
console.log("Lumi AI Web Search verification passed.");
}
function verifyDefaultsAndMigration() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-defaults-"));
const fresh = readSettings(root);
assert.equal(fresh.provider, "lumi_search_broker");
assert.equal(fresh.enable_search, true);
assert.equal(fresh.policy_mode, "blacklist");
assert.deepEqual(fresh.allowed_origins, ["webui", "discord", "twitch"]);
fs.writeFileSync(path.join(root, "settings.json"), JSON.stringify({
enabled: false,
policy_mode: "whitelist",
url_rules: ["docs.example.com"],
provider_endpoint: ""
}));
const migrated = readSettings(root);
assert.equal(migrated.provider, "lumi_search_broker");
assert.equal(migrated.enable_search, false);
assert.equal(migrated.policy_mode, "whitelist");
assert.deepEqual(migrated.url_rules, ["docs.example.com"]);
fs.rmSync(root, { recursive: true, force: true });
}
async function verifyPolicy() {
assert.equal((await evaluateUrl("https://docs.example.com/guide", {
mode: "whitelist",
rules: ["*.example.com/*"],
resolveHost: PUBLIC_DNS
})).allowed, true);
assert.equal((await evaluateUrl("https://unrelated.test/guide", {
mode: "whitelist",
rules: ["*.example.com/*"],
resolveHost: PUBLIC_DNS
})).reason, "not_whitelisted");
assert.equal((await evaluateUrl("https://ads.example.com/tracker", {
mode: "blacklist",
rules: ["*.example.com/tracker*"],
resolveHost: PUBLIC_DNS
})).reason, "blacklisted");
for (const target of [
"http://127.0.0.1/",
"http://10.1.2.3/",
"http://169.254.169.254/latest/meta-data/",
"http://localhost/",
"file:///etc/passwd",
"javascript:alert(1)",
"data:text/plain,no"
]) {
assert.equal((await evaluateUrl(target, {
mode: "blacklist",
rules: [],
resolveHost: PUBLIC_DNS
})).allowed, false, target);
}
assert.equal((await evaluateUrl("https://dns-rebind.example/", {
mode: "blacklist",
rules: [],
resolveHost: async () => ["10.0.0.8"]
})).reason, "private_network");
assert.equal(matchesRule(new URL("https://docs.example.com/guide/start"), "example.com/guide"), true);
}
function verifyExtractionAndAdapters() {
const html = `
<html><head><title> Example &amp; Test </title>
<meta name="description" content="Useful description">
<meta property="article:published_time" content="2026-06-10T12:00:00Z"></head>
<body><nav>Noise</nav><main><h1>Heading</h1><p>Readable body text.</p>
<script>secret()</script></main><footer>Noise</footer></body></html>`;
const extracted = extractReadableHtml(html, { maxChars: 500 });
assert.equal(extracted.title, "Example & Test");
assert.match(extracted.extracted_text, /Readable body text/);
assert.doesNotMatch(extracted.extracted_text, /secret|Noise/);
assert.equal(extracted.published_at, "2026-06-10T12:00:00.000Z");
const ddg = parseDuckDuckGoHtml(`
<a class="result__a" href="/l/?uddg=https%3A%2F%2Fdocs.example.com%2Fguide">Official docs</a>
<a class="result__snippet">Current documentation result.</a>`);
assert.equal(ddg[0].url, "https://docs.example.com/guide");
assert.equal(ddg[0].snippet, "Current documentation result.");
const bing = parseBingRss(`
<rss><channel><item><title>Release</title><link>https://example.com/release</link>
<description>Latest release details.</description><pubDate>Wed, 10 Jun 2026 12:00:00 GMT</pubDate></item></channel></rss>`);
assert.equal(bing[0].domain, undefined);
assert.equal(bing[0].url, "https://example.com/release");
const normalized = normalizeBrokerResults([
{ title: "Docs", url: "https://example.com/a?utm_source=x", snippet: "A", source: "one" },
{ title: "Docs", url: "https://example.com/a", snippet: "B", source: "two" }
]);
assert.equal(normalized.length, 1);
assert.equal(normalized[0].url, "https://example.com/a");
}
async function verifyFetcher() {
const html = "<html><head><title>Page title</title></head><body><main>Readable page.</main></body></html>";
const compressed = zlib.gzipSync(Buffer.from(html));
let call = 0;
const fetcher = new PageFetcher({
resolveHost: PUBLIC_DNS,
fetch: async () => {
call += 1;
if (call === 1) return response({ status: 302, headers: { location: "https://docs.example.com/final" } });
return response({
headers: { "content-type": "text/html; charset=utf-8", "content-encoding": "gzip" },
body: compressed
});
}
});
const page = await fetcher.fetchPage("https://docs.example.com/start", testSettings({
policy_mode: "whitelist",
url_rules: ["*.example.com/*"]
}));
assert.equal(page.final_url, "https://docs.example.com/final");
assert.equal(page.title, "Page title");
assert.match(page.extracted_text, /Readable page/);
const blockedRedirect = new PageFetcher({
resolveHost: PUBLIC_DNS,
fetch: async () => response({ status: 302, headers: { location: "http://127.0.0.1/private" } })
});
await assert.rejects(
() => blockedRedirect.fetchPage("https://docs.example.com/start", testSettings()),
/blocked by policy/i
);
const oversized = new PageFetcher({
resolveHost: PUBLIC_DNS,
fetch: async () => response({
headers: { "content-type": "text/html" },
body: Buffer.alloc(70000, "a")
})
});
await assert.rejects(
() => oversized.fetchPage("https://example.com/large", testSettings({ max_fetch_bytes: 65536 })),
/size limit/i
);
}
function verifyFormatting() {
const rows = [
result("Community", "https://community.example.com/post", "Community context.", "web"),
result("Official docs", "https://docs.example.com/guide", "Official answer.", "documentation"),
result("Recent", "https://news.example.com/update", "Recent update.", "news", "2026-06-12")
];
const settings = testSettings();
const twitch = formatResults(rows, { reason: "fact_lookup", origin: "twitch", settings });
const discord = formatResults(rows, { reason: "resource_lookup", origin: "discord", settings });
const webui = formatResults(rows, { reason: "documentation_lookup", origin: "webui", settings });
assert(twitch.condensed_text.length <= settings.twitch_output_chars);
assert.equal(twitch.results.length, 1);
assert.equal(twitch.results[0].url, null);
assert(discord.condensed_text.length <= settings.discord_output_chars);
assert(webui.condensed_text.length <= settings.webui_output_chars);
assert.equal(webui.results[0].title, "Official docs");
}
async function verifySearchAndCache() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-search-"));
writeSettings(root, testSettings({
policy_mode: "whitelist",
url_rules: ["*.example.com/*"],
cache_ttl_seconds: 60
}));
let calls = 0;
const broker = {
async search() {
calls += 1;
return {
provider: "lumi_search_broker",
results: [
result("<b>Verified fact</b>", "https://docs.example.com/fact", "The answer is current.", "documentation"),
result("Blocked local", "http://127.0.0.1/private", "Never return.", "web")
],
warnings: [],
adapter_errors: []
};
}
};
const tool = new WebSearchTool({
dataDir: root,
broker,
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
});
const input = {
query: "current fact",
reason: "fact_lookup",
user: { id: "user-1", username: "alice" },
ctx: { origin: "webui", server_id: "server-1" }
};
const first = await tool.search(input);
assert.equal(first.status, "ok");
assert.equal(first.provider, "lumi_search_broker");
assert.equal(first.result_count, 1);
assert.equal(first.results[0].title, "Verified fact");
assert.equal(first.results.some((entry) => entry.url?.includes("127.0.0.1")), false);
const cached = await tool.search(input);
assert.equal(cached.cache_hit, true);
assert.equal(calls, 1);
const audit = fs.readFileSync(path.join(root, "audit.jsonl"), "utf8").trim().split(/\r?\n/).map(JSON.parse);
assert(audit.some((entry) =>
entry.actor === "user-1" &&
entry.origin === "webui" &&
entry.provider === "lumi_search_broker" &&
entry.query_hash &&
typeof entry.timing_ms === "number"
));
assert.equal(fs.readFileSync(path.join(root, "audit.jsonl"), "utf8").includes("The answer is current"), false);
fs.rmSync(root, { recursive: true, force: true });
}
async function verifyFetchCapabilities() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-fetch-"));
writeSettings(root, testSettings());
const page = {
url: "https://docs.example.com/page",
final_url: "https://docs.example.com/page",
title: "Documentation",
description: "Description",
headings: ["Documentation"],
extracted_text: "Readable documentation text with the current answer.",
content_type: "text/html",
fetched_at: new Date().toISOString(),
extraction_status: "ok",
published_at: null,
updated_at: null,
timing_ms: 10,
truncated: false
};
const tool = new WebSearchTool({
dataDir: root,
fetcher: {
resolveHost: PUBLIC_DNS,
async fetchPage() { return page; }
}
});
const fetched = await tool.fetchUrl({
url: page.url,
user: { id: "u" },
ctx: { origin: "discord" }
});
const summarized = await tool.summarizeUrl({
url: page.url,
user: { id: "u2" },
ctx: { origin: "webui" }
});
assert.equal(fetched.status, "ok");
assert.match(fetched.fetched_pages[0].extracted_text, /current answer/);
assert.equal(summarized.status, "ok");
assert.match(summarized.condensed_text, /Readable documentation/);
fs.rmSync(root, { recursive: true, force: true });
}
async function verifyProviderFailure() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-failure-"));
writeSettings(root, testSettings());
const tool = new WebSearchTool({
dataDir: root,
broker: {
async search() {
return {
provider: "lumi_search_broker",
results: [],
warnings: ["duckduckgo_html: timed out"],
adapter_errors: [{ adapter: "duckduckgo_html", reason: "timed out" }]
};
}
},
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
});
const result = await tool.search({
query: "current unavailable fact",
reason: "fact_lookup",
user: { id: "failure" },
ctx: { origin: "webui" }
});
assert.equal(result.status, "unavailable");
assert.match(result.user_message, /unavailable/i);
fs.rmSync(root, { recursive: true, force: true });
}
async function verifyRateLimit() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-rate-"));
writeSettings(root, testSettings({ per_user_per_minute: 1 }));
const tool = new WebSearchTool({
dataDir: root,
broker: {
async search() {
return { provider: "lumi_search_broker", results: [], warnings: [], adapter_errors: [] };
}
},
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
});
const input = {
query: "one",
reason: "general_lookup",
user: { id: "limited" },
ctx: { origin: "webui", server_id: "server" }
};
await tool.search(input);
const limited = await tool.search({ ...input, query: "two" });
assert.equal(limited.blocked_reason, "rate_limited");
assert(limited.retry_after_seconds > 0);
assert.match(limited.user_message, /Retry in/);
fs.rmSync(root, { recursive: true, force: true });
}
async function verifyConcurrencyLimit() {
const limiter = new ToolConcurrency(2, 4);
let active = 0;
let maximum = 0;
const tasks = Array.from({ length: 6 }, () => limiter.run(async () => {
active += 1;
maximum = Math.max(maximum, active);
await new Promise((resolve) => setTimeout(resolve, 5));
active -= 1;
}));
await Promise.all(tasks);
assert.equal(maximum, 2);
}
async function verifyLoaderLifecycle() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-loader-"));
const pluginsDir = path.join(root, "plugins");
const toolDir = path.join(pluginsDir, "lumi_ai_web_search");
copyDirectory(path.resolve(__dirname, ".."), toolDir, new Set(["audit.jsonl", "status.json"]));
fs.rmSync(path.join(toolDir, "data", "settings.json"), { force: true });
const installer = new ToolInstaller({
pluginsDir,
stagingRoot: path.join(root, "staging"),
repoClient: {}
});
const registry = new ToolRegistry(() => {});
const loader = new ToolLoader({
registry,
installer,
settings: { getSetting: (_key, fallback) => fallback },
stateFile: path.join(root, "enabled.json"),
lumiAiVersion: "0.8.1",
lumiVersion: "0.1.0"
});
await loader.loadEnabled();
assert.equal(fs.existsSync(path.join(toolDir, "data", "settings.json")), true);
assert.equal(loader.isEnabled("lumi_ai_web_search"), true);
assert.equal(registry.has("web_search.search"), true);
assert.equal(registry.has("web_search.fetch_url"), true);
assert.equal(registry.has("web_search.summarize_url"), true);
const manager = new ToolManager({
loader,
installer,
settings: new ToolSettings({ installer }),
repoClient: {
async discover() {
return {
repository: "local",
branch: "main",
checked_at: new Date().toISOString(),
cached: false,
stale: false,
tools: []
};
}
}
});
const diagnostics = await manager.diagnostics({
role: "admin",
user: { id: "admin", isAdmin: true },
context: { origin: "webui", permission_context: { webui_actions_allowed: true } }
});
const pluginDiagnostics = diagnostics.plugins.find((entry) => entry.tool_id === "lumi_ai_web_search");
assert.equal(pluginDiagnostics.runtime_details.provider, "lumi_search_broker");
assert.equal(pluginDiagnostics.decisions.length, 3);
assert(pluginDiagnostics.decisions.every((decision) => decision.exposed));
assert(diagnostics.considered_tools.includes("web_search.search"));
assert(diagnostics.exposed_tools.includes("web_search.search"));
const described = manager.settingsFor("lumi_ai_web_search");
assert.equal(described.values.provider, "lumi_search_broker");
assert(described.ui.html.includes("data-web-search-settings"));
assert(described.ui.scripts.some((value) => value.endsWith("/settings-modal.js")));
assert(described.ui.styles.some((value) => value.endsWith("/settings-modal.css")));
await loader.disable("lumi_ai_web_search");
assert.equal(registry.has("web_search.search"), false);
fs.rmSync(root, { recursive: true, force: true });
}
function verifyRegistrationAndPrompt() {
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-register-"));
writeSettings(root, testSettings());
assert.equal(plugin.checkAvailability({ paths: { data: root } }).available, true);
const definitions = [];
plugin.register({
paths: { data: root },
registerTool: (definition) => definitions.push(definition)
});
assert.deepEqual(definitions.map((definition) => definition.tool_id), [
"web_search.search",
"web_search.fetch_url",
"web_search.summarize_url"
]);
assert(definitions.every((definition) => definition.read_only === true));
assert(definitions[0].origin_check({
context: { origin: "discord", permission_context: { webui_actions_allowed: false } }
}));
const registry = new ToolRegistry(() => {});
const metadata = require("../tool_info.json");
const { registerManagedTool } = require("../../lumi_ai/backend/tool_registry");
for (const definition of definitions) registerManagedTool(registry, metadata, definition);
const exposed = registry.inspect({
role: "admin",
user: { id: "admin", isAdmin: true },
context: { origin: "webui", permission_context: { webui_actions_allowed: true } }
}).exposed;
const prompt = buildPrompt({
config: { support_scope: {}, instructions: { roleplay_intensity: 0 } },
role: "admin",
message: "Find the latest release",
tools: exposed,
originContext: { origin: "webui" }
});
assert(prompt.includes('"tool_id":"web_search.search"'));
assert(prompt.includes("WEB SEARCH DECISION RULES"));
assert(prompt.includes('{"type":"tool_call","tool":"tool_id","arguments":{}}'));
writeSettings(root, testSettings({ enable_search: false }));
const fetchOnly = [];
plugin.register({
paths: { data: root },
registerTool: (definition) => fetchOnly.push(definition)
});
assert.equal(fetchOnly.some((definition) => definition.tool_id === "web_search.search"), false);
assert.equal(fetchOnly.some((definition) => definition.tool_id === "web_search.fetch_url"), true);
writeSettings(root, testSettings({
provider: "external_json",
external_provider_endpoint: ""
}));
const externalWithoutEndpoint = [];
plugin.register({
paths: { data: root },
registerTool: (definition) => externalWithoutEndpoint.push(definition)
});
assert.equal(externalWithoutEndpoint.some((definition) => definition.tool_id === "web_search.search"), false);
assert.equal(externalWithoutEndpoint.some((definition) => definition.tool_id === "web_search.fetch_url"), true);
fs.rmSync(root, { recursive: true, force: true });
}
function verifyStaticFiles() {
const root = path.resolve(__dirname, "..");
const metadata = require("../tool_info.json");
assert.equal(metadata.version, "0.1.0");
assert.equal(metadata.settings_schema.provider.default, "lumi_search_broker");
assert.equal(metadata.settings_schema.policy_mode.default, "blacklist");
assert.equal(metadata.default_enabled, true);
assert(fs.existsSync(path.join(root, "backend", "lumi_search_broker.js")));
assert(fs.existsSync(path.join(root, "backend", "page_fetcher.js")));
assert(fs.existsSync(path.join(root, "public", "settings-modal.css")));
assert(fs.readFileSync(path.join(root, "readme.md"), "utf8").includes("without requiring an API key"));
}
function testSettings(overrides = {}) {
return {
...defaults(),
...overrides,
provider: overrides.provider || "lumi_search_broker",
enable_search: overrides.enable_search ?? true,
enable_fetch_url: overrides.enable_fetch_url ?? true,
enable_summarize_url: overrides.enable_summarize_url ?? true,
policy_mode: overrides.policy_mode || "blacklist",
allowed_origins: overrides.allowed_origins || ["webui", "discord", "twitch"]
};
}
function result(title, url, snippet, source, date = null) {
return {
title,
url,
domain: new URL(url).hostname,
snippet,
date,
rank: 1,
source,
raw_source_id: "test",
relevance_score: 0.9
};
}
function response({ status = 200, headers = {}, body = Buffer.from("") }) {
const normalized = Object.fromEntries(
Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value])
);
const buffer = Buffer.from(body);
return {
ok: status >= 200 && status < 300,
status,
headers: {
get(name) { return normalized[String(name).toLowerCase()] || null; }
},
async arrayBuffer() { return buffer; }
};
}
function copyDirectory(source, destination, ignored = new Set()) {
fs.mkdirSync(destination, { recursive: true });
for (const entry of fs.readdirSync(source, { withFileTypes: true })) {
if (ignored.has(entry.name)) continue;
const from = path.join(source, entry.name);
const to = path.join(destination, entry.name);
if (entry.isDirectory()) copyDirectory(from, to, ignored);
else if (entry.isFile()) fs.copyFileSync(from, to);
}
}
run().catch((error) => {
console.error(error);
process.exitCode = 1;
});