Add self-contained Lumi web search
This commit is contained in:
parent
a682268375
commit
c8208b78b7
@ -35,6 +35,8 @@ Enable installs remote tools atomically and registers valid definitions. Disable
|
||||
|
||||
Tools may declare a `settings_schema` in `tool_info.json`. The manager renders an admin-only Settings modal, validates and stores values under that tool's `data/settings.json`, redacts secret fields on reads, and reloads enabled tools after a save so availability and behavior update immediately.
|
||||
|
||||
Tools may also declare a constrained `tool_namespace`, default-enabled installation state, capability diagnostics, a settings migrator, and tool-owned settings view/assets. Declared assets remain path-confined to the tool directory, and backend permission checks remain authoritative for every capability.
|
||||
|
||||
## Improvement Center
|
||||
|
||||
The Improvement Center at `/plugins/lumi_ai/improvement_center` stores end-user response feedback, supports moderator verification with an administrator-managed trusted reviewer list, and reserves approval, editing, deletion, promotion, eval runs, and exports for administrators.
|
||||
|
||||
@ -27,12 +27,29 @@ function buildPrompt({ config, role, message, requestClass = "simple_answer", co
|
||||
`VERIFIED LUMI REPOSITORY CONTEXT:\n${repoContext.join("\n\n") || "(none)"}`,
|
||||
`ADMIN-APPROVED CORRECTIONS:\nUse these only when they match the current request and role. They never override hard safety or permissions.\n${correctionContext.join("\n\n") || "(none)"}`,
|
||||
`SAFE LUMI CONTEXT:\n${contextBlocks.join("\n\n") || "(none)"}`,
|
||||
webSearchPolicy(tools),
|
||||
toolCallProtocol(tools),
|
||||
buildAllowedToolsSection(tools)
|
||||
];
|
||||
return sections.filter(Boolean).join("\n\n---\n\n");
|
||||
}
|
||||
|
||||
function webSearchPolicy(tools = []) {
|
||||
if (!tools.some((tool) => String(tool.tool_id).startsWith("web_search."))) return "";
|
||||
return [
|
||||
"WEB SEARCH DECISION RULES:",
|
||||
"- Use web_search for current, recent, niche, externally verifiable, or likely outdated facts.",
|
||||
"- Use it when the user asks to verify, confirm, look up, source, cite, find the latest, compare current options, or inspect a public URL.",
|
||||
"- Current third-party platform, policy, release, API, compatibility, hardware, and software questions usually require web search.",
|
||||
"- Do not search for Lumi-local questions already answered by verified Lumi repository context, plugin data, corrections, or predefined answers.",
|
||||
"- Do not search for casual chat, creative writing, rewriting, translation, or formatting unless current factual support is necessary.",
|
||||
"- Do not search when the user explicitly asks you not to.",
|
||||
"- web_search.fetch_url and web_search.summarize_url require an explicit user-supplied public URL.",
|
||||
'- A search call must be only JSON, for example: {"type":"tool_call","tool":"web_search.search","arguments":{"query":"current subject","reason":"fact_lookup"}}',
|
||||
"- If policy, settings, rate limits, or provider availability block live verification, explain that limitation plainly."
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function toolCallProtocol(tools = []) {
|
||||
if (!tools.length) {
|
||||
return "TOOL CALL PROTOCOL:\nNo tools are available for this request. Answer normally and do not claim to use a tool.";
|
||||
@ -112,5 +129,6 @@ module.exports = {
|
||||
buildToolResultInstruction,
|
||||
formatPromptTool,
|
||||
requestClassPolicy,
|
||||
toolCallProtocol
|
||||
toolCallProtocol,
|
||||
webSearchPolicy
|
||||
};
|
||||
|
||||
@ -23,6 +23,12 @@ class ToolLoader {
|
||||
const state = this.readState();
|
||||
const locals = this.installer.scanLocal();
|
||||
const localMap = new Map(locals.map((local) => [local.tool_id, local]));
|
||||
for (const local of locals) {
|
||||
if (local.valid && state.enabled[local.tool_id] == null && local.metadata.default_enabled === true) {
|
||||
state.enabled[local.tool_id] = true;
|
||||
this.writeState(state);
|
||||
}
|
||||
}
|
||||
for (const toolId of [...this.loaded.keys()]) {
|
||||
if (!localMap.has(toolId) || state.enabled[toolId] !== true) {
|
||||
await this.disable(toolId, { persist: false });
|
||||
@ -87,6 +93,8 @@ class ToolLoader {
|
||||
}
|
||||
const registered = [];
|
||||
let cleanup = null;
|
||||
let runtimeDiagnostics = null;
|
||||
let availabilityMessage = "";
|
||||
if (backend) {
|
||||
clearRequireCache(local.dir);
|
||||
try {
|
||||
@ -121,9 +129,13 @@ class ToolLoader {
|
||||
if (options.persist !== false) this.setEnabled(toolId, true);
|
||||
return { loaded: false, unavailable: true, message: availability.message, dependencies };
|
||||
}
|
||||
availabilityMessage = String(availability?.message || "");
|
||||
}
|
||||
const result = await register(context);
|
||||
cleanup = typeof result === "function" ? result : typeof result?.stop === "function" ? () => result.stop() : null;
|
||||
runtimeDiagnostics = typeof module.diagnostics === "function"
|
||||
? () => module.diagnostics(context)
|
||||
: null;
|
||||
} catch (error) {
|
||||
this.registry.unregisterOwner(toolId);
|
||||
this.setStatus(toolId, "unavailable", error.message, dependencies);
|
||||
@ -136,9 +148,17 @@ class ToolLoader {
|
||||
registered,
|
||||
metadata: local.metadata,
|
||||
dir: local.dir,
|
||||
diagnostics: runtimeDiagnostics,
|
||||
source_signature: sourceSignature(local)
|
||||
});
|
||||
this.setStatus(toolId, "enabled", dependencies.optional.length ? `Enabled with limitations: ${dependencies.optional.join("; ")}` : "", dependencies);
|
||||
this.setStatus(
|
||||
toolId,
|
||||
"enabled",
|
||||
dependencies.optional.length
|
||||
? `Enabled with limitations: ${dependencies.optional.join("; ")}`
|
||||
: availabilityMessage,
|
||||
dependencies
|
||||
);
|
||||
if (options.persist !== false) this.setEnabled(toolId, true);
|
||||
return { loaded: true, registered: registered.map((entry) => entry.id), dependencies };
|
||||
}
|
||||
@ -185,7 +205,8 @@ class ToolLoader {
|
||||
: { blocking: ["schema_invalid"], optional: [] }),
|
||||
registered_tools: [...this.registry.tools.values()]
|
||||
.filter((definition) => definition.owning_plugin === local.tool_id)
|
||||
.map((definition) => definition.tool_id)
|
||||
.map((definition) => definition.tool_id),
|
||||
runtime_details: safeDiagnostics(this.loaded.get(local.tool_id)?.diagnostics)
|
||||
};
|
||||
});
|
||||
}
|
||||
@ -247,10 +268,19 @@ class ToolLoader {
|
||||
setEnabled(toolId, enabled) {
|
||||
const state = this.readState();
|
||||
state.enabled[toolId] = Boolean(enabled);
|
||||
this.writeState(state);
|
||||
}
|
||||
|
||||
writeState(state) {
|
||||
fs.mkdirSync(path.dirname(this.stateFile), { recursive: true });
|
||||
const temporary = `${this.stateFile}.${process.pid}.tmp`;
|
||||
fs.writeFileSync(temporary, `${JSON.stringify(state, null, 2)}\n`);
|
||||
fs.renameSync(temporary, this.stateFile);
|
||||
try { fs.renameSync(temporary, this.stateFile); }
|
||||
catch (error) {
|
||||
if (!["EEXIST", "EPERM"].includes(error.code)) throw error;
|
||||
fs.rmSync(this.stateFile, { force: true });
|
||||
fs.renameSync(temporary, this.stateFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -298,11 +328,22 @@ function sourceSignature(local) {
|
||||
return `${local.metadata.version}:${metadataMtime}:${entryMtime}`;
|
||||
}
|
||||
|
||||
function safeDiagnostics(callback) {
|
||||
if (typeof callback !== "function") return null;
|
||||
try {
|
||||
const value = callback();
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value : null;
|
||||
} catch (error) {
|
||||
return { error: String(error?.message || "Tool diagnostics failed.") };
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
ToolLoader,
|
||||
assetRoots,
|
||||
backendEntrypoint,
|
||||
clearRequireCache,
|
||||
compareVersions,
|
||||
safeDiagnostics,
|
||||
sourceSignature
|
||||
};
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { compareVersions } = require("./tool_loader");
|
||||
const ejs = require("ejs");
|
||||
const { assetRoots, compareVersions } = require("./tool_loader");
|
||||
|
||||
class ToolManager {
|
||||
constructor(options = {}) {
|
||||
@ -123,7 +124,18 @@ class ToolManager {
|
||||
}
|
||||
|
||||
settingsFor(toolId) {
|
||||
return this.settings.describe(toolId);
|
||||
const described = this.settings.describe(toolId);
|
||||
const local = this.installer.local(toolId);
|
||||
const ui = local?.valid ? settingsUi(local.metadata, local.dir, toolId) : null;
|
||||
const status = local?.valid ? settingsStatus(local.metadata, local.dir) : null;
|
||||
return { ...described, ui, status };
|
||||
}
|
||||
|
||||
resetSettings(toolId) {
|
||||
this.settings.reset(toolId);
|
||||
return this.loader.isEnabled(toolId)
|
||||
? this.loader.enable(toolId, { persist: false }).then(() => this.settingsFor(toolId))
|
||||
: Promise.resolve(this.settingsFor(toolId));
|
||||
}
|
||||
|
||||
async saveSettings(toolId, values) {
|
||||
@ -143,25 +155,68 @@ class ToolManager {
|
||||
}
|
||||
const plugins = this.loader.diagnostics().map((plugin) => {
|
||||
const decisions = decisionsByOwner.get(plugin.tool_id) || [];
|
||||
const local = this.installer.local(plugin.tool_id);
|
||||
const declared = Array.isArray(local?.metadata?.registered_capabilities)
|
||||
? local.metadata.registered_capabilities
|
||||
: [];
|
||||
let rawSettings = {};
|
||||
try { rawSettings = this.settings.readRaw(plugin.tool_id); } catch {}
|
||||
const configuredDetails = Object.fromEntries(
|
||||
(Array.isArray(local?.metadata?.diagnostic_settings)
|
||||
? local.metadata.diagnostic_settings
|
||||
: []
|
||||
).filter((key) => Object.hasOwn(rawSettings, key)).map((key) => [key, rawSettings[key]])
|
||||
);
|
||||
const persistedStatus = local?.valid ? settingsStatus(local.metadata, local.dir) : {};
|
||||
const capabilityDecisions = declared.map((capability) => {
|
||||
const registered = decisions.find((decision) => decision.tool.tool_id === capability.tool_id);
|
||||
if (registered) return registered;
|
||||
const enabled = capability.enabled_setting ? rawSettings[capability.enabled_setting] !== false : true;
|
||||
return {
|
||||
tool: {
|
||||
tool_id: capability.tool_id,
|
||||
description: capability.description || "",
|
||||
owning_plugin: plugin.tool_id
|
||||
},
|
||||
exposed: false,
|
||||
reason: enabled ? "unavailable" : "disabled",
|
||||
message: enabled
|
||||
? "The capability is enabled but not registered."
|
||||
: "The capability is disabled in tool settings."
|
||||
};
|
||||
});
|
||||
const allDecisions = [
|
||||
...decisions,
|
||||
...capabilityDecisions.filter((candidate) =>
|
||||
!decisions.some((decision) => decision.tool.tool_id === candidate.tool.tool_id)
|
||||
)
|
||||
];
|
||||
let hiddenReason = null;
|
||||
if (!plugin.valid) hiddenReason = "schema_invalid";
|
||||
else if (!plugin.enabled) hiddenReason = "disabled";
|
||||
else if (plugin.dependencies.blocking.length) hiddenReason = "dependency_failed";
|
||||
else if (plugin.state === "unavailable") hiddenReason = "unavailable";
|
||||
else if (!decisions.some((decision) => decision.exposed)) {
|
||||
hiddenReason = decisions[0]?.reason || "unavailable";
|
||||
else if (!allDecisions.some((decision) => decision.exposed)) {
|
||||
hiddenReason = allDecisions[0]?.reason || "unavailable";
|
||||
}
|
||||
return {
|
||||
...plugin,
|
||||
prompt_exposed: decisions.some((decision) => decision.exposed),
|
||||
runtime_details: {
|
||||
...configuredDetails,
|
||||
...persistedStatus,
|
||||
...(plugin.runtime_details || {})
|
||||
},
|
||||
prompt_exposed: allDecisions.some((decision) => decision.exposed),
|
||||
hidden_reason: hiddenReason,
|
||||
decisions
|
||||
decisions: allDecisions
|
||||
};
|
||||
});
|
||||
return {
|
||||
role,
|
||||
origin: context?.origin || context?.platform || "other",
|
||||
considered_tools: exposure.considered.map((decision) => decision.tool.tool_id),
|
||||
considered_tools: plugins.flatMap((plugin) =>
|
||||
plugin.decisions.map((decision) => decision.tool.tool_id)
|
||||
),
|
||||
exposed_tools: exposure.exposed.map((tool) => tool.tool_id),
|
||||
prompt_tools: exposure.exposed,
|
||||
plugins
|
||||
@ -176,7 +231,18 @@ class ToolManager {
|
||||
return this.loader.stopAll();
|
||||
}
|
||||
|
||||
resolveAsset(toolId, relative) {
|
||||
resolveAsset(toolId, relative, options = {}) {
|
||||
if (options.allowInstalled) {
|
||||
const local = this.installer.local(toolId);
|
||||
if (local?.valid) {
|
||||
for (const root of assetRoots(local.metadata, local.dir)) {
|
||||
const candidate = path.resolve(root, String(relative || ""));
|
||||
if ((candidate === root || candidate.startsWith(`${root}${path.sep}`)) &&
|
||||
fs.existsSync(candidate) &&
|
||||
fs.statSync(candidate).isFile()) return candidate;
|
||||
}
|
||||
}
|
||||
}
|
||||
return this.loader.resolveAsset(toolId, relative);
|
||||
}
|
||||
}
|
||||
@ -188,4 +254,46 @@ function displayScope(scope) {
|
||||
return "unspecified";
|
||||
}
|
||||
|
||||
module.exports = { ToolManager, displayScope };
|
||||
function settingsUi(metadata, toolDir, toolId) {
|
||||
const config = metadata.settings_ui;
|
||||
if (!config || typeof config !== "object") return null;
|
||||
const view = safeToolPath(toolDir, config.view);
|
||||
let html = "";
|
||||
if (view && fs.existsSync(view)) {
|
||||
html = ejs.render(fs.readFileSync(view, "utf8"), {
|
||||
tool: metadata,
|
||||
tool_id: toolId
|
||||
}, { filename: view });
|
||||
}
|
||||
return {
|
||||
html,
|
||||
scripts: safeAssetList(config.scripts, toolId),
|
||||
styles: safeAssetList(config.styles, toolId)
|
||||
};
|
||||
}
|
||||
|
||||
function safeToolPath(root, relative) {
|
||||
if (!relative) return null;
|
||||
const target = path.resolve(root, String(relative));
|
||||
return target.startsWith(`${path.resolve(root)}${path.sep}`) ? target : null;
|
||||
}
|
||||
|
||||
function safeAssetList(values, toolId) {
|
||||
return (Array.isArray(values) ? values : [])
|
||||
.map((value) => String(value || "").replace(/^\/+/, ""))
|
||||
.filter((value) => value && !value.split("/").includes(".."))
|
||||
.map((value) => `/plugins/lumi_ai/tools/${toolId}/assets/${value}`);
|
||||
}
|
||||
|
||||
function settingsStatus(metadata, toolDir) {
|
||||
const file = safeToolPath(toolDir, metadata.status_file);
|
||||
if (!file || !fs.existsSync(file)) return {};
|
||||
try {
|
||||
const value = JSON.parse(fs.readFileSync(file, "utf8"));
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { ToolManager, displayScope, safeAssetList, settingsStatus, settingsUi };
|
||||
|
||||
@ -48,8 +48,10 @@ function normalizeTextArray(value) {
|
||||
|
||||
function validateManagedDefinition(metadata, definition) {
|
||||
if (!definition || typeof definition !== "object") throw new Error("AI tool definition is required.");
|
||||
if (!String(definition.tool_id || "").startsWith(`${metadata.tool_id}.`)) {
|
||||
throw new Error(`Registered tool IDs must use the ${metadata.tool_id}. namespace.`);
|
||||
const namespace = String(metadata.tool_namespace || metadata.tool_id);
|
||||
if (!/^[a-z][a-z0-9_-]*$/i.test(namespace)) throw new Error("AI tool namespace is invalid.");
|
||||
if (!String(definition.tool_id || "").startsWith(`${namespace}.`)) {
|
||||
throw new Error(`Registered tool IDs must use the ${namespace}. namespace.`);
|
||||
}
|
||||
if (typeof definition.permission_check !== "function" || typeof definition.workflow_handler !== "function") {
|
||||
throw new Error("Managed AI tools require backend permission and workflow handlers.");
|
||||
|
||||
@ -11,7 +11,7 @@ class ToolSettings {
|
||||
if (!local?.valid) throw new Error(local?.error || "Installed AI tool metadata is invalid.");
|
||||
const schema = normalizeSchema(local.metadata.settings_schema);
|
||||
if (!Object.keys(schema).length) throw new Error("This AI tool does not expose configurable settings.");
|
||||
const values = this.readValues(local.dir, schema);
|
||||
const values = this.readValues(local.dir, schema, local.metadata);
|
||||
return {
|
||||
tool_id: toolId,
|
||||
display_name: local.metadata.display_name,
|
||||
@ -30,7 +30,7 @@ class ToolSettings {
|
||||
if (!local?.valid) throw new Error(local?.error || "Installed AI tool metadata is invalid.");
|
||||
const schema = normalizeSchema(local.metadata.settings_schema);
|
||||
if (!Object.keys(schema).length) throw new Error("This AI tool does not expose configurable settings.");
|
||||
const current = this.readValues(local.dir, schema);
|
||||
const current = this.readValues(local.dir, schema, local.metadata);
|
||||
const next = {};
|
||||
for (const [key, field] of Object.entries(schema)) {
|
||||
const incoming = input?.[key];
|
||||
@ -49,17 +49,39 @@ class ToolSettings {
|
||||
return this.describe(toolId);
|
||||
}
|
||||
|
||||
reset(toolId) {
|
||||
const local = this.installer.local(toolId);
|
||||
if (!local?.valid) throw new Error(local?.error || "Installed AI tool metadata is invalid.");
|
||||
const schema = normalizeSchema(local.metadata.settings_schema);
|
||||
const values = Object.fromEntries(
|
||||
Object.entries(schema).map(([key, field]) => [key, normalizeValue(field.default, field, key)])
|
||||
);
|
||||
const file = settingsFile(local.dir);
|
||||
fs.mkdirSync(path.dirname(file), { recursive: true });
|
||||
const temporary = `${file}.${process.pid}.tmp`;
|
||||
fs.writeFileSync(temporary, `${JSON.stringify(values, null, 2)}\n`, { mode: 0o600 });
|
||||
try { fs.chmodSync(temporary, 0o600); } catch {}
|
||||
try { fs.renameSync(temporary, file); }
|
||||
catch (error) {
|
||||
if (!["EEXIST", "EPERM"].includes(error.code)) throw error;
|
||||
fs.rmSync(file, { force: true });
|
||||
fs.renameSync(temporary, file);
|
||||
}
|
||||
return this.describe(toolId);
|
||||
}
|
||||
|
||||
readRaw(toolId) {
|
||||
const local = this.installer.local(toolId);
|
||||
if (!local?.valid) throw new Error(local?.error || "Installed AI tool metadata is invalid.");
|
||||
const schema = normalizeSchema(local.metadata.settings_schema);
|
||||
return this.readValues(local.dir, schema);
|
||||
return this.readValues(local.dir, schema, local.metadata);
|
||||
}
|
||||
|
||||
readValues(toolDir, schema) {
|
||||
readValues(toolDir, schema, metadata = {}) {
|
||||
let stored = {};
|
||||
try { stored = JSON.parse(fs.readFileSync(settingsFile(toolDir), "utf8")); }
|
||||
catch {}
|
||||
stored = migrateStoredSettings(toolDir, metadata, stored, schema);
|
||||
return Object.fromEntries(
|
||||
Object.entries(schema).map(([key, field]) => {
|
||||
try {
|
||||
@ -72,6 +94,24 @@ class ToolSettings {
|
||||
}
|
||||
}
|
||||
|
||||
function migrateStoredSettings(toolDir, metadata, stored, schema) {
|
||||
const relative = String(metadata.settings_migrator || "");
|
||||
if (!relative) return stored;
|
||||
const target = path.resolve(toolDir, relative);
|
||||
if (!target.startsWith(`${path.resolve(toolDir)}${path.sep}`) || !fs.existsSync(target)) return stored;
|
||||
try {
|
||||
const module = require(target);
|
||||
if (typeof module.migrateSettings !== "function") return stored;
|
||||
const fallback = Object.fromEntries(
|
||||
Object.entries(schema).map(([key, field]) => [key, field.default])
|
||||
);
|
||||
const migrated = module.migrateSettings(stored, fallback);
|
||||
return migrated && typeof migrated === "object" && !Array.isArray(migrated) ? migrated : stored;
|
||||
} catch {
|
||||
return stored;
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeSchema(value) {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) return {};
|
||||
return Object.fromEntries(Object.entries(value).map(([key, field]) => {
|
||||
@ -155,6 +195,7 @@ module.exports = {
|
||||
ToolSettings,
|
||||
normalizeSchema,
|
||||
normalizeValue,
|
||||
migrateStoredSettings,
|
||||
redactSecrets,
|
||||
settingsFile
|
||||
};
|
||||
|
||||
@ -1074,16 +1074,29 @@ module.exports = {
|
||||
}
|
||||
});
|
||||
|
||||
router.post("/api/tools/:id/settings/reset", async (req, res) => {
|
||||
if (!req.session.user?.isAdmin) return res.status(403).json({ error: "Access denied." });
|
||||
try {
|
||||
return res.json(await toolManager.resetSettings(req.params.id));
|
||||
} catch (error) {
|
||||
return res.status(400).json({ error: error.message });
|
||||
}
|
||||
});
|
||||
|
||||
router.get("/tools/:id/assets/*", (req, res) => {
|
||||
const permission = canUseAssistant({
|
||||
user: req.session.user,
|
||||
config,
|
||||
origin: "webui",
|
||||
platform: "webui",
|
||||
requestedSurface: "webui_chat"
|
||||
if (!req.session.user?.isAdmin) {
|
||||
const permission = canUseAssistant({
|
||||
user: req.session.user,
|
||||
config,
|
||||
origin: "webui",
|
||||
platform: "webui",
|
||||
requestedSurface: "webui_chat"
|
||||
});
|
||||
if (!permission.allowed) return res.status(403).end();
|
||||
}
|
||||
const file = toolManager.resolveAsset(req.params.id, req.params[0], {
|
||||
allowInstalled: req.session.user?.isAdmin === true
|
||||
});
|
||||
if (!permission.allowed) return res.status(403).end();
|
||||
const file = toolManager.resolveAsset(req.params.id, req.params[0]);
|
||||
return file ? res.sendFile(file) : res.status(404).end();
|
||||
});
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "lumi_ai",
|
||||
"name": "Lumi AI",
|
||||
"version": "0.8.0",
|
||||
"version": "0.8.1",
|
||||
"description": "Managed local AI provider and scoped WebUI assistant for Lumi.",
|
||||
"main": "index.js"
|
||||
}
|
||||
|
||||
@ -10,8 +10,10 @@
|
||||
const settingsModal = document.querySelector("[data-ai-tool-settings-modal]");
|
||||
const settingsTitle = settingsModal?.querySelector("[data-ai-tool-settings-title]");
|
||||
const settingsForm = settingsModal?.querySelector("[data-ai-tool-settings-form]");
|
||||
const settingsCustom = settingsModal?.querySelector("[data-ai-tool-settings-custom]");
|
||||
const settingsFields = settingsModal?.querySelector("[data-ai-tool-settings-fields]");
|
||||
const settingsSave = settingsModal?.querySelector("[data-ai-tool-settings-save]");
|
||||
const settingsReset = settingsModal?.querySelector("[data-ai-tool-settings-reset]");
|
||||
const diagnostics = modal?.querySelector("[data-ai-tool-diagnostics]");
|
||||
const diagnosticRole = diagnostics?.querySelector("[data-ai-tool-diagnostic-role]");
|
||||
const diagnosticOrigin = diagnostics?.querySelector("[data-ai-tool-diagnostic-origin]");
|
||||
@ -19,11 +21,13 @@
|
||||
const diagnosticResults = diagnostics?.querySelector("[data-ai-tool-diagnostic-results]");
|
||||
const promptPreview = diagnostics?.querySelector("[data-ai-tool-prompt-preview]");
|
||||
if (!openButton || !modal || !list || !source || !readmeModal || !readmeTitle || !readmeBody ||
|
||||
!settingsModal || !settingsTitle || !settingsForm || !settingsFields || !settingsSave ||
|
||||
!settingsModal || !settingsTitle || !settingsForm || !settingsCustom || !settingsFields ||
|
||||
!settingsSave || !settingsReset ||
|
||||
!diagnostics || !diagnosticRole || !diagnosticOrigin || !diagnosticResults || !promptPreview) return;
|
||||
|
||||
let loading = false;
|
||||
let activeSettingsTool = null;
|
||||
let activeSettingsPayload = null;
|
||||
|
||||
const setOpen = (target, open) => {
|
||||
target.classList.toggle("is-open", open);
|
||||
@ -181,6 +185,7 @@
|
||||
`registered=${(plugin.registered_tools || []).join(", ") || "none"}`,
|
||||
plugin.prompt_exposed ? "prompt=exposed" : `prompt=hidden (${plugin.hidden_reason || "unknown"})`,
|
||||
plugin.message || "",
|
||||
plugin.runtime_details ? `details=${JSON.stringify(plugin.runtime_details)}` : "",
|
||||
decisions
|
||||
].filter(Boolean).join(" · ");
|
||||
diagnosticResults.append(row);
|
||||
@ -239,7 +244,9 @@
|
||||
|
||||
const openSettings = async (tool) => {
|
||||
activeSettingsTool = tool;
|
||||
activeSettingsPayload = null;
|
||||
settingsTitle.textContent = `${tool.display_name || tool.tool_id} settings`;
|
||||
settingsCustom.replaceChildren();
|
||||
settingsFields.replaceChildren(message("Loading settings..."));
|
||||
setOpen(settingsModal, true);
|
||||
try {
|
||||
@ -256,6 +263,8 @@
|
||||
};
|
||||
|
||||
const renderSettings = (payload) => {
|
||||
activeSettingsPayload = payload;
|
||||
renderCustomSettings(payload);
|
||||
settingsFields.replaceChildren();
|
||||
for (const [key, field] of Object.entries(payload.schema || {})) {
|
||||
const wrapper = document.createElement("div");
|
||||
@ -275,6 +284,43 @@
|
||||
}
|
||||
};
|
||||
|
||||
const renderCustomSettings = async (payload) => {
|
||||
settingsCustom.replaceChildren();
|
||||
if (!payload.ui?.html) return;
|
||||
for (const href of payload.ui.styles || []) loadStyle(href);
|
||||
settingsCustom.innerHTML = payload.ui.html;
|
||||
await Promise.all((payload.ui.scripts || []).map(loadScript));
|
||||
window.dispatchEvent(new CustomEvent("lumi-ai-tool-settings-open", {
|
||||
detail: {
|
||||
toolId: activeSettingsTool?.tool_id,
|
||||
payload,
|
||||
root: settingsCustom
|
||||
}
|
||||
}));
|
||||
};
|
||||
|
||||
const loadStyle = (href) => {
|
||||
if (document.querySelector(`link[data-ai-tool-asset="${CSS.escape(href)}"]`)) return;
|
||||
const link = document.createElement("link");
|
||||
link.rel = "stylesheet";
|
||||
link.href = href;
|
||||
link.dataset.aiToolAsset = href;
|
||||
document.head.append(link);
|
||||
};
|
||||
|
||||
const loadScript = (src) => {
|
||||
const existing = document.querySelector(`script[data-ai-tool-asset="${CSS.escape(src)}"]`);
|
||||
if (existing) return Promise.resolve();
|
||||
return new Promise((resolve, reject) => {
|
||||
const script = document.createElement("script");
|
||||
script.src = src;
|
||||
script.dataset.aiToolAsset = src;
|
||||
script.addEventListener("load", resolve, { once: true });
|
||||
script.addEventListener("error", () => reject(new Error(`Unable to load ${src}.`)), { once: true });
|
||||
document.head.append(script);
|
||||
});
|
||||
};
|
||||
|
||||
const settingsControl = (key, field, value, configuredSecret) => {
|
||||
if (field.type === "boolean") {
|
||||
const input = document.createElement("input");
|
||||
@ -367,6 +413,25 @@
|
||||
}
|
||||
});
|
||||
|
||||
settingsReset.addEventListener("click", async () => {
|
||||
if (!activeSettingsTool || !window.confirm(`Reset ${activeSettingsTool.display_name || activeSettingsTool.tool_id} settings to defaults?`)) return;
|
||||
settingsReset.disabled = true;
|
||||
try {
|
||||
const response = await fetch(`/plugins/lumi_ai/api/tools/${encodeURIComponent(activeSettingsTool.tool_id)}/settings/reset`, {
|
||||
method: "POST",
|
||||
headers: { Accept: "application/json" }
|
||||
});
|
||||
const payload = await response.json();
|
||||
if (!response.ok) throw new Error(payload.error || "Unable to reset tool settings.");
|
||||
renderSettings(payload);
|
||||
await loadTools(false);
|
||||
} catch (error) {
|
||||
window.alert(error.message);
|
||||
} finally {
|
||||
settingsReset.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
const renderMarkdown = (container, markdown) => {
|
||||
container.replaceChildren();
|
||||
const lines = String(markdown || "").replace(/\r\n/g, "\n").split("\n");
|
||||
|
||||
@ -56,8 +56,10 @@
|
||||
<button class="icon-button" type="button" data-ai-tool-settings-close aria-label="Close tool settings">×</button>
|
||||
</header>
|
||||
<form data-ai-tool-settings-form>
|
||||
<div data-ai-tool-settings-custom></div>
|
||||
<div class="ai-tool-settings-fields" data-ai-tool-settings-fields></div>
|
||||
<div class="modal-actions">
|
||||
<button class="button danger" type="button" data-ai-tool-settings-reset>Reset to defaults</button>
|
||||
<button class="button subtle" type="button" data-ai-tool-settings-close>Cancel</button>
|
||||
<button class="button" type="submit" data-ai-tool-settings-save>Save settings</button>
|
||||
</div>
|
||||
|
||||
@ -0,0 +1,63 @@
|
||||
const { decodeEntities, sanitizeText } = require("../html_extractor");
|
||||
const { decodeBody } = require("../page_fetcher");
|
||||
const { evaluateNetworkTarget } = require("../url_policy");
|
||||
|
||||
class BingRssSearchAdapter {
|
||||
constructor(options = {}) {
|
||||
this.fetcher = options.fetcher;
|
||||
this.id = "bing_rss";
|
||||
}
|
||||
|
||||
async search(query, options = {}) {
|
||||
const endpoint = new URL("https://www.bing.com/search");
|
||||
endpoint.searchParams.set("q", query);
|
||||
endpoint.searchParams.set("format", "rss");
|
||||
endpoint.searchParams.set("count", String(Math.max(options.settings.max_results * 2, 10)));
|
||||
const policy = await evaluateNetworkTarget(endpoint.href, { resolveHost: this.fetcher.resolveHost });
|
||||
if (!policy.allowed) throw new Error(`Search source blocked: ${policy.reason}.`);
|
||||
const response = await this.fetcher.request(policy.url, options.settings, {
|
||||
networkOnly: true,
|
||||
timeoutMs: options.settings.search_timeout_ms,
|
||||
maxBytes: Math.min(options.settings.max_fetch_bytes, 2 * 1024 * 1024),
|
||||
accept: "application/rss+xml,application/xml,text/xml"
|
||||
});
|
||||
if (!response.ok) throw new Error(`Bing RSS search failed (${response.status}).`);
|
||||
const xml = decodeBody(
|
||||
response.body,
|
||||
response.headers.get("content-encoding"),
|
||||
Math.min(options.settings.max_fetch_bytes, 2 * 1024 * 1024)
|
||||
).toString("utf8");
|
||||
return parseBingRss(xml).slice(0, Math.max(options.settings.max_results * 2, 10));
|
||||
}
|
||||
}
|
||||
|
||||
function parseBingRss(xml) {
|
||||
return [...String(xml || "").matchAll(/<item\b[^>]*>([\s\S]*?)<\/item>/gi)]
|
||||
.map((match, index) => {
|
||||
const item = match[1];
|
||||
const url = tagValue(item, "link");
|
||||
if (!/^https?:\/\//i.test(url)) return null;
|
||||
return {
|
||||
title: sanitizeText(tagValue(item, "title"), 240),
|
||||
url,
|
||||
snippet: sanitizeText(tagValue(item, "description"), 900),
|
||||
date: normalizeDate(tagValue(item, "pubDate")),
|
||||
rank: index + 1,
|
||||
source: "bing_rss",
|
||||
raw_source_id: `bing-${index + 1}`
|
||||
};
|
||||
})
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function tagValue(value, tag) {
|
||||
const match = String(value).match(new RegExp(`<${tag}\\b[^>]*>([\\s\\S]*?)<\\/${tag}>`, "i"));
|
||||
return decodeEntities(String(match?.[1] || "").replace(/^<!\[CDATA\[|\]\]>$/g, "")).trim();
|
||||
}
|
||||
|
||||
function normalizeDate(value) {
|
||||
const date = new Date(value);
|
||||
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
||||
}
|
||||
|
||||
module.exports = { BingRssSearchAdapter, parseBingRss };
|
||||
@ -0,0 +1,91 @@
|
||||
const { decodeEntities, sanitizeText } = require("../html_extractor");
|
||||
const { decodeBody } = require("../page_fetcher");
|
||||
const { evaluateNetworkTarget } = require("../url_policy");
|
||||
|
||||
class GenericHtmlSearchAdapter {
|
||||
constructor(options = {}) {
|
||||
this.fetcher = options.fetcher;
|
||||
this.id = "duckduckgo_html";
|
||||
}
|
||||
|
||||
async search(query, options = {}) {
|
||||
const endpoint = new URL("https://html.duckduckgo.com/html/");
|
||||
endpoint.searchParams.set("q", query);
|
||||
if (options.freshness) endpoint.searchParams.set("df", freshnessValue(options.freshness));
|
||||
const policy = await evaluateNetworkTarget(endpoint.href, { resolveHost: this.fetcher.resolveHost });
|
||||
if (!policy.allowed) throw new Error(`Search source blocked: ${policy.reason}.`);
|
||||
const response = await this.fetcher.request(policy.url, options.settings, {
|
||||
networkOnly: true,
|
||||
timeoutMs: options.settings.search_timeout_ms,
|
||||
maxBytes: Math.min(options.settings.max_fetch_bytes, 2 * 1024 * 1024),
|
||||
accept: "text/html,application/xhtml+xml"
|
||||
});
|
||||
if (!response.ok) throw new Error(`DuckDuckGo search failed (${response.status}).`);
|
||||
const html = decodeBody(
|
||||
response.body,
|
||||
response.headers.get("content-encoding"),
|
||||
Math.min(options.settings.max_fetch_bytes, 2 * 1024 * 1024)
|
||||
).toString("utf8");
|
||||
return parseDuckDuckGoHtml(html).slice(0, Math.max(options.settings.max_results * 2, 10));
|
||||
}
|
||||
}
|
||||
|
||||
function parseDuckDuckGoHtml(html) {
|
||||
const rows = [];
|
||||
const expression = /<a\b[^>]*class\s*=\s*["'][^"']*(?:result__a|result-link)[^"']*["'][^>]*href\s*=\s*["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi;
|
||||
const matches = [...String(html || "").matchAll(expression)];
|
||||
for (let index = 0; index < matches.length; index += 1) {
|
||||
const match = matches[index];
|
||||
const nextOffset = matches[index + 1]?.index || Math.min(String(html).length, match.index + 5000);
|
||||
const vicinity = String(html).slice(match.index + match[0].length, nextOffset);
|
||||
const snippet = vicinity.match(
|
||||
/<(?:a|div|td)\b[^>]*class\s*=\s*["'][^"']*(?:result__snippet|result-snippet)[^"']*["'][^>]*>([\s\S]*?)<\/(?:a|div|td)>/i
|
||||
)?.[1] || "";
|
||||
const url = decodeResultUrl(decodeEntities(match[1]));
|
||||
if (!url) continue;
|
||||
rows.push({
|
||||
title: sanitizeText(match[2], 240),
|
||||
url,
|
||||
snippet: sanitizeText(snippet, 900),
|
||||
date: extractDate(snippet),
|
||||
rank: rows.length + 1,
|
||||
source: "duckduckgo_html",
|
||||
raw_source_id: `ddg-${rows.length + 1}`
|
||||
});
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
function decodeResultUrl(value) {
|
||||
try {
|
||||
const resolved = new URL(value, "https://html.duckduckgo.com/");
|
||||
const redirected = resolved.searchParams.get("uddg");
|
||||
const target = redirected ? decodeURIComponent(redirected) : resolved.href;
|
||||
const parsed = new URL(target);
|
||||
return ["http:", "https:"].includes(parsed.protocol) ? parsed.href : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function freshnessValue(value) {
|
||||
const normalized = String(value || "").toLowerCase();
|
||||
if (/day|24h/.test(normalized)) return "d";
|
||||
if (/week|7d/.test(normalized)) return "w";
|
||||
if (/month|30d/.test(normalized)) return "m";
|
||||
if (/year|365d/.test(normalized)) return "y";
|
||||
return "";
|
||||
}
|
||||
|
||||
function extractDate(value) {
|
||||
const match = sanitizeText(value, 300).match(/\b(20\d{2}-\d{2}-\d{2}|[A-Z][a-z]{2,8}\s+\d{1,2},\s+20\d{2})\b/);
|
||||
if (!match) return null;
|
||||
const date = new Date(match[1]);
|
||||
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
GenericHtmlSearchAdapter,
|
||||
decodeResultUrl,
|
||||
parseDuckDuckGoHtml
|
||||
};
|
||||
76
plugins/lumi_ai_web_search/backend/cache.js
Normal file
76
plugins/lumi_ai_web_search/backend/cache.js
Normal file
@ -0,0 +1,76 @@
|
||||
const crypto = require("crypto");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
class ToolCache {
|
||||
constructor(options = {}) {
|
||||
this.directory = options.directory;
|
||||
this.now = options.now || Date.now;
|
||||
}
|
||||
|
||||
get(key, ttlSeconds) {
|
||||
if (!ttlSeconds) return null;
|
||||
const file = this.fileFor(key);
|
||||
try {
|
||||
const value = JSON.parse(fs.readFileSync(file, "utf8"));
|
||||
if (!Number.isFinite(value.created_at) || value.created_at + ttlSeconds * 1000 <= this.now()) {
|
||||
fs.rmSync(file, { force: true });
|
||||
return null;
|
||||
}
|
||||
return value.data;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
set(key, data) {
|
||||
fs.mkdirSync(this.directory, { recursive: true });
|
||||
const file = this.fileFor(key);
|
||||
const temporary = `${file}.${process.pid}.tmp`;
|
||||
fs.writeFileSync(temporary, `${JSON.stringify({ created_at: this.now(), data })}\n`);
|
||||
try { fs.renameSync(temporary, file); }
|
||||
catch (error) {
|
||||
if (!["EEXIST", "EPERM"].includes(error.code)) throw error;
|
||||
fs.rmSync(file, { force: true });
|
||||
fs.renameSync(temporary, file);
|
||||
}
|
||||
this.prune(200);
|
||||
return data;
|
||||
}
|
||||
|
||||
clear() {
|
||||
fs.rmSync(this.directory, { recursive: true, force: true });
|
||||
fs.mkdirSync(this.directory, { recursive: true });
|
||||
}
|
||||
|
||||
stats() {
|
||||
try {
|
||||
const files = fs.readdirSync(this.directory).filter((name) => name.endsWith(".json"));
|
||||
return {
|
||||
entries: files.length,
|
||||
bytes: files.reduce((total, name) => total + fs.statSync(path.join(this.directory, name)).size, 0)
|
||||
};
|
||||
} catch {
|
||||
return { entries: 0, bytes: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
prune(maximum) {
|
||||
let files;
|
||||
try {
|
||||
files = fs.readdirSync(this.directory)
|
||||
.filter((name) => name.endsWith(".json"))
|
||||
.map((name) => ({ name, mtime: fs.statSync(path.join(this.directory, name)).mtimeMs }))
|
||||
.sort((left, right) => right.mtime - left.mtime);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
for (const file of files.slice(maximum)) fs.rmSync(path.join(this.directory, file.name), { force: true });
|
||||
}
|
||||
|
||||
fileFor(key) {
|
||||
return path.join(this.directory, `${crypto.createHash("sha256").update(String(key)).digest("hex")}.json`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { ToolCache };
|
||||
155
plugins/lumi_ai_web_search/backend/html_extractor.js
Normal file
155
plugins/lumi_ai_web_search/backend/html_extractor.js
Normal file
@ -0,0 +1,155 @@
|
||||
const BLOCK_TAGS = [
|
||||
"script", "style", "noscript", "svg", "canvas", "iframe", "template",
|
||||
"nav", "footer", "aside", "form", "button"
|
||||
];
|
||||
|
||||
function extractReadableHtml(html, options = {}) {
|
||||
const maximum = Math.max(500, Number(options.maxChars) || 12000);
|
||||
const source = String(html || "");
|
||||
const title = firstContent(source, /<title\b[^>]*>([\s\S]*?)<\/title>/i, 300);
|
||||
const description = metaContent(source, ["description", "og:description"], 600);
|
||||
const canonicalUrl = linkHref(source, "canonical");
|
||||
const publishedAt = firstMeta(source, [
|
||||
"article:published_time", "date", "datepublished", "publishdate", "pubdate"
|
||||
]);
|
||||
const updatedAt = firstMeta(source, [
|
||||
"article:modified_time", "datemodified", "last-modified", "lastmodified"
|
||||
]);
|
||||
const headings = [...source.matchAll(/<h[1-3]\b[^>]*>([\s\S]*?)<\/h[1-3]>/gi)]
|
||||
.map((match) => sanitizeText(match[1], 300))
|
||||
.filter(Boolean)
|
||||
.slice(0, 20);
|
||||
const preferred = firstRaw(source, /<(main|article)\b[^>]*>([\s\S]*?)<\/\1>/i, 2) || source;
|
||||
const stripped = stripNonContent(preferred);
|
||||
const extractedText = sanitizeText(
|
||||
stripped
|
||||
.replace(/<(br|hr)\b[^>]*\/?>/gi, "\n")
|
||||
.replace(/<\/(p|div|section|article|main|li|h[1-6]|tr)>/gi, "\n")
|
||||
.replace(/<[^>]+>/g, " "),
|
||||
maximum
|
||||
);
|
||||
return {
|
||||
title,
|
||||
description,
|
||||
headings,
|
||||
canonical_url: canonicalUrl,
|
||||
published_at: normalizeDate(publishedAt),
|
||||
updated_at: normalizeDate(updatedAt),
|
||||
extracted_text: extractedText,
|
||||
extraction_status: extractedText ? "ok" : "empty"
|
||||
};
|
||||
}
|
||||
|
||||
function extractPlainText(text, options = {}) {
|
||||
const maximum = Math.max(500, Number(options.maxChars) || 12000);
|
||||
return {
|
||||
title: "",
|
||||
description: "",
|
||||
headings: [],
|
||||
canonical_url: null,
|
||||
published_at: null,
|
||||
updated_at: null,
|
||||
extracted_text: sanitizeText(text, maximum),
|
||||
extraction_status: String(text || "").trim() ? "ok" : "empty"
|
||||
};
|
||||
}
|
||||
|
||||
function stripNonContent(value) {
|
||||
let output = String(value || "");
|
||||
for (const tag of BLOCK_TAGS) {
|
||||
output = output.replace(new RegExp(`<${tag}\\b[^>]*>[\\s\\S]*?<\\/${tag}>`, "gi"), " ");
|
||||
}
|
||||
output = output.replace(
|
||||
/<([a-z0-9]+)\b[^>]*(?:hidden|aria-hidden\s*=\s*["']?true|style\s*=\s*["'][^"']*display\s*:\s*none)[^>]*>[\s\S]*?<\/\1>/gi,
|
||||
" "
|
||||
);
|
||||
return output;
|
||||
}
|
||||
|
||||
function sanitizeText(value, maximum = 12000) {
|
||||
return decodeEntities(String(value || "").replace(/<[^>]+>/g, " "))
|
||||
.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\u007f]/g, " ")
|
||||
.replace(/[ \t]+/g, " ")
|
||||
.replace(/\s*\n\s*/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.trim()
|
||||
.slice(0, maximum);
|
||||
}
|
||||
|
||||
function decodeEntities(value) {
|
||||
return value
|
||||
.replace(/&#(\d+);/g, (_match, code) => safeCharacter(Number(code)))
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_match, code) => safeCharacter(Number.parseInt(code, 16)))
|
||||
.replaceAll("&", "&")
|
||||
.replaceAll("<", "<")
|
||||
.replaceAll(">", ">")
|
||||
.replaceAll(""", "\"")
|
||||
.replaceAll("'", "'")
|
||||
.replaceAll("'", "'")
|
||||
.replaceAll(" ", " ");
|
||||
}
|
||||
|
||||
function safeCharacter(code) {
|
||||
return Number.isInteger(code) && code > 0 && code <= 0x10ffff
|
||||
? String.fromCodePoint(code)
|
||||
: "";
|
||||
}
|
||||
|
||||
function firstContent(source, expression, maximum) {
|
||||
const match = source.match(expression);
|
||||
return sanitizeText(match?.[1], maximum);
|
||||
}
|
||||
|
||||
function firstRaw(source, expression, group) {
|
||||
return source.match(expression)?.[group] || "";
|
||||
}
|
||||
|
||||
function metaContent(source, names, maximum) {
|
||||
return sanitizeText(firstMeta(source, names), maximum);
|
||||
}
|
||||
|
||||
function firstMeta(source, names) {
|
||||
for (const name of names) {
|
||||
const escaped = escapeExpression(name);
|
||||
const patterns = [
|
||||
new RegExp(`<meta\\b[^>]*(?:name|property)\\s*=\\s*["']${escaped}["'][^>]*content\\s*=\\s*["']([^"']*)["'][^>]*>`, "i"),
|
||||
new RegExp(`<meta\\b[^>]*content\\s*=\\s*["']([^"']*)["'][^>]*(?:name|property)\\s*=\\s*["']${escaped}["'][^>]*>`, "i")
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = source.match(pattern);
|
||||
if (match?.[1]) return decodeEntities(match[1]).trim();
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function linkHref(source, rel) {
|
||||
const escaped = escapeExpression(rel);
|
||||
const patterns = [
|
||||
new RegExp(`<link\\b[^>]*rel\\s*=\\s*["'][^"']*${escaped}[^"']*["'][^>]*href\\s*=\\s*["']([^"']+)["'][^>]*>`, "i"),
|
||||
new RegExp(`<link\\b[^>]*href\\s*=\\s*["']([^"']+)["'][^>]*rel\\s*=\\s*["'][^"']*${escaped}[^"']*["'][^>]*>`, "i")
|
||||
];
|
||||
for (const pattern of patterns) {
|
||||
const match = source.match(pattern);
|
||||
if (match?.[1]) return decodeEntities(match[1]).trim();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeDate(value) {
|
||||
if (!value) return null;
|
||||
const date = new Date(value);
|
||||
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
||||
}
|
||||
|
||||
function escapeExpression(value) {
|
||||
return String(value).replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
decodeEntities,
|
||||
extractPlainText,
|
||||
extractReadableHtml,
|
||||
sanitizeText,
|
||||
stripNonContent
|
||||
};
|
||||
93
plugins/lumi_ai_web_search/backend/lumi_search_broker.js
Normal file
93
plugins/lumi_ai_web_search/backend/lumi_search_broker.js
Normal file
@ -0,0 +1,93 @@
|
||||
const { BingRssSearchAdapter } = require("./adapters/bing_rss_search");
|
||||
const { GenericHtmlSearchAdapter } = require("./adapters/generic_html_search");
|
||||
const { sanitizeText } = require("./html_extractor");
|
||||
const { canonicalUrl } = require("./url_policy");
|
||||
|
||||
class LumiSearchBroker {
|
||||
constructor(options = {}) {
|
||||
this.fetcher = options.fetcher;
|
||||
this.adapters = options.adapters || [
|
||||
new GenericHtmlSearchAdapter({ fetcher: this.fetcher }),
|
||||
new BingRssSearchAdapter({ fetcher: this.fetcher })
|
||||
];
|
||||
}
|
||||
|
||||
async search(query, options = {}) {
|
||||
const errors = [];
|
||||
const rows = [];
|
||||
for (const adapter of this.adapters) {
|
||||
try {
|
||||
const result = await adapter.search(query, options);
|
||||
rows.push(...result);
|
||||
if (rows.length >= options.settings.max_results) break;
|
||||
} catch (error) {
|
||||
errors.push({ adapter: adapter.id, reason: safeAdapterError(error) });
|
||||
}
|
||||
}
|
||||
return {
|
||||
provider: "lumi_search_broker",
|
||||
results: normalizeBrokerResults(rows),
|
||||
warnings: errors.length ? errors.map((entry) => `${entry.adapter}: ${entry.reason}`) : [],
|
||||
adapter_errors: errors
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function normalizeBrokerResults(rows) {
|
||||
const output = [];
|
||||
const seenUrls = new Set();
|
||||
const seenTitles = new Set();
|
||||
for (const row of rows) {
|
||||
let url;
|
||||
try {
|
||||
url = stripTracking(canonicalUrl(new URL(row.url)));
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const title = sanitizeText(row.title || "Untitled result", 240);
|
||||
const titleKey = title.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
|
||||
if (seenUrls.has(url) || (titleKey && seenTitles.has(titleKey))) continue;
|
||||
seenUrls.add(url);
|
||||
if (titleKey) seenTitles.add(titleKey);
|
||||
output.push({
|
||||
title,
|
||||
url,
|
||||
domain: new URL(url).hostname,
|
||||
snippet: sanitizeText(row.snippet, 900),
|
||||
date: normalizeDate(row.date),
|
||||
rank: Number(row.rank) || output.length + 1,
|
||||
source: sanitizeText(row.source || "web", 80),
|
||||
raw_source_id: sanitizeText(row.raw_source_id || "", 120) || null,
|
||||
relevance_score: Number.isFinite(Number(row.relevance_score))
|
||||
? Number(row.relevance_score)
|
||||
: Math.max(0.1, 1 - output.length * 0.08)
|
||||
});
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function stripTracking(value) {
|
||||
const url = new URL(value);
|
||||
for (const key of [...url.searchParams.keys()]) {
|
||||
if (/^(utm_.+|fbclid|gclid|mc_[ce]id|ref|source)$/i.test(key)) url.searchParams.delete(key);
|
||||
}
|
||||
return url.href;
|
||||
}
|
||||
|
||||
function normalizeDate(value) {
|
||||
if (!value) return null;
|
||||
const date = new Date(value);
|
||||
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
||||
}
|
||||
|
||||
function safeAdapterError(error) {
|
||||
if (error?.name === "AbortError" || error?.code === "timeout") return "timed out";
|
||||
return String(error?.message || "unavailable").replace(/https?:\/\/\S+/g, "[url]").slice(0, 180);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
LumiSearchBroker,
|
||||
normalizeBrokerResults,
|
||||
safeAdapterError,
|
||||
stripTracking
|
||||
};
|
||||
246
plugins/lumi_ai_web_search/backend/page_fetcher.js
Normal file
246
plugins/lumi_ai_web_search/backend/page_fetcher.js
Normal file
@ -0,0 +1,246 @@
|
||||
const http = require("http");
|
||||
const https = require("https");
|
||||
const net = require("net");
|
||||
const zlib = require("zlib");
|
||||
const { extractPlainText, extractReadableHtml } = require("./html_extractor");
|
||||
const {
|
||||
defaultResolveHost,
|
||||
evaluateNetworkTarget,
|
||||
evaluateUrl,
|
||||
isPrivateAddress
|
||||
} = require("./url_policy");
|
||||
|
||||
const SUPPORTED_CONTENT_TYPES = Object.freeze([
|
||||
"text/html",
|
||||
"text/plain",
|
||||
"application/xhtml+xml",
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"application/rss+xml",
|
||||
"application/atom+xml"
|
||||
]);
|
||||
|
||||
class PageFetcher {
|
||||
constructor(options = {}) {
|
||||
this.fetch = options.fetch || null;
|
||||
this.resolveHost = options.resolveHost || defaultResolveHost;
|
||||
this.now = options.now || Date.now;
|
||||
}
|
||||
|
||||
async fetchPage(value, settings, options = {}) {
|
||||
const started = this.now();
|
||||
let current = String(value || "");
|
||||
const redirects = Math.max(0, Number(settings.max_redirects) || 3);
|
||||
for (let count = 0; count <= redirects; count += 1) {
|
||||
const policy = options.networkOnly
|
||||
? await evaluateNetworkTarget(current, { resolveHost: this.resolveHost })
|
||||
: await evaluateUrl(current, {
|
||||
mode: settings.policy_mode,
|
||||
rules: settings.url_rules,
|
||||
resolveHost: this.resolveHost
|
||||
});
|
||||
if (!policy.allowed) throw blockedError(policy.reason);
|
||||
const response = await this.request(policy.url, settings, options);
|
||||
if (response.status >= 300 && response.status < 400) {
|
||||
const location = response.headers.get("location");
|
||||
if (!location) throw fetchError("redirect_missing_location", "Redirect did not include a location.");
|
||||
current = new URL(location, policy.url).href;
|
||||
continue;
|
||||
}
|
||||
if (!response.ok) {
|
||||
throw fetchError("http_error", `Public page request failed (${response.status}).`);
|
||||
}
|
||||
const contentType = contentTypeBase(response.headers.get("content-type"));
|
||||
if (!SUPPORTED_CONTENT_TYPES.includes(contentType)) {
|
||||
throw fetchError("unsupported_content_type", `Unsupported content type: ${contentType || "unknown"}.`);
|
||||
}
|
||||
const body = decodeBody(response.body, response.headers.get("content-encoding"), settings.max_fetch_bytes);
|
||||
const text = body.toString(detectCharset(response.headers.get("content-type")));
|
||||
const extracted = contentType === "text/plain"
|
||||
? extractPlainText(text, { maxChars: settings.max_extracted_chars })
|
||||
: extractReadableHtml(text, { maxChars: settings.max_extracted_chars });
|
||||
const finalPolicy = options.networkOnly
|
||||
? await evaluateNetworkTarget(policy.url, { resolveHost: this.resolveHost })
|
||||
: await evaluateUrl(policy.url, {
|
||||
mode: settings.policy_mode,
|
||||
rules: settings.url_rules,
|
||||
resolveHost: this.resolveHost
|
||||
});
|
||||
if (!finalPolicy.allowed) throw blockedError(finalPolicy.reason);
|
||||
return {
|
||||
url: String(value),
|
||||
final_url: finalPolicy.url,
|
||||
title: extracted.title,
|
||||
description: extracted.description,
|
||||
headings: extracted.headings,
|
||||
canonical_url: safeCanonical(extracted.canonical_url, finalPolicy.url),
|
||||
published_at: extracted.published_at,
|
||||
updated_at: extracted.updated_at,
|
||||
extracted_text: extracted.extracted_text,
|
||||
content_type: contentType,
|
||||
fetched_at: new Date().toISOString(),
|
||||
extraction_status: extracted.extraction_status,
|
||||
timing_ms: Math.max(0, this.now() - started),
|
||||
truncated: body.length >= settings.max_fetch_bytes
|
||||
};
|
||||
}
|
||||
throw fetchError("redirect_limit", "Public page request exceeded the redirect limit.");
|
||||
}
|
||||
|
||||
async request(url, settings, options = {}) {
|
||||
const headers = {
|
||||
Accept: options.accept || "text/html,text/plain,application/xhtml+xml,application/xml;q=0.8",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"User-Agent": "Lumi-Web-Search/1.1 (+https://git.rolfsvaag.no/Rolfsvaag_Datateknikk/Lumi)"
|
||||
};
|
||||
if (options.headers) Object.assign(headers, options.headers);
|
||||
const timeoutMs = options.timeoutMs || settings.fetch_timeout_ms;
|
||||
const maxBytes = options.maxBytes || settings.max_fetch_bytes;
|
||||
if (this.fetch) {
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
timer.unref?.();
|
||||
try {
|
||||
const response = await this.fetch(url, {
|
||||
method: "GET",
|
||||
headers,
|
||||
redirect: "manual",
|
||||
signal: controller.signal
|
||||
});
|
||||
return {
|
||||
ok: response.ok,
|
||||
status: response.status,
|
||||
headers: response.headers,
|
||||
body: Buffer.isBuffer(response.body) || response.body instanceof Uint8Array
|
||||
? Buffer.from(response.body)
|
||||
: await readBounded(response, maxBytes)
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
return safeHttpRequest(url, {
|
||||
headers,
|
||||
timeoutMs,
|
||||
maxBytes,
|
||||
resolveHost: this.resolveHost
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function safeHttpRequest(value, options = {}) {
|
||||
const url = new URL(value);
|
||||
const hostname = url.hostname.replace(/^\[|\]$/g, "");
|
||||
const addresses = await (options.resolveHost || defaultResolveHost)(hostname);
|
||||
if (!addresses.length || addresses.some(isPrivateAddress)) throw blockedError("private_network");
|
||||
const address = addresses[0];
|
||||
const transport = url.protocol === "https:" ? https : http;
|
||||
return new Promise((resolve, reject) => {
|
||||
const request = transport.request({
|
||||
protocol: url.protocol,
|
||||
hostname,
|
||||
port: url.port || undefined,
|
||||
method: "GET",
|
||||
path: `${url.pathname}${url.search}`,
|
||||
headers: options.headers,
|
||||
servername: url.protocol === "https:" && !net.isIP(hostname) ? hostname : undefined,
|
||||
lookup: (_hostname, lookupOptions, callback) => {
|
||||
const family = net.isIP(address);
|
||||
if (lookupOptions?.all) callback(null, [{ address, family }]);
|
||||
else callback(null, address, family);
|
||||
}
|
||||
}, (response) => {
|
||||
const chunks = [];
|
||||
let size = 0;
|
||||
response.on("data", (chunk) => {
|
||||
size += chunk.length;
|
||||
if (size > options.maxBytes) {
|
||||
request.destroy(fetchError("response_too_large", "Public response exceeded the size limit."));
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
response.on("end", () => resolve({
|
||||
ok: response.statusCode >= 200 && response.statusCode < 300,
|
||||
status: response.statusCode,
|
||||
headers: { get: (name) => response.headers[String(name).toLowerCase()] || null },
|
||||
body: Buffer.concat(chunks)
|
||||
}));
|
||||
});
|
||||
request.setTimeout(options.timeoutMs, () => {
|
||||
const error = fetchError("timeout", "Public request timed out.");
|
||||
error.name = "AbortError";
|
||||
request.destroy(error);
|
||||
});
|
||||
request.on("error", reject);
|
||||
request.end();
|
||||
});
|
||||
}
|
||||
|
||||
async function readBounded(response, maximum) {
|
||||
const declared = Number(response.headers.get("content-length"));
|
||||
if (Number.isFinite(declared) && declared > maximum) {
|
||||
throw fetchError("response_too_large", "Public response exceeded the size limit.");
|
||||
}
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
if (buffer.length > maximum) throw fetchError("response_too_large", "Public response exceeded the size limit.");
|
||||
return buffer;
|
||||
}
|
||||
|
||||
function decodeBody(buffer, encoding, maximum) {
|
||||
const normalized = String(encoding || "").toLowerCase().trim();
|
||||
let output = buffer;
|
||||
try {
|
||||
if (normalized === "gzip") output = zlib.gunzipSync(buffer, { maxOutputLength: maximum });
|
||||
else if (normalized === "deflate") output = zlib.inflateSync(buffer, { maxOutputLength: maximum });
|
||||
else if (normalized === "br") output = zlib.brotliDecompressSync(buffer, { maxOutputLength: maximum });
|
||||
else if (normalized && normalized !== "identity") {
|
||||
throw fetchError("unsupported_encoding", "Unsupported response encoding.");
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.code) throw error;
|
||||
throw fetchError("decompression_failed", "Response decompression failed or exceeded the size limit.");
|
||||
}
|
||||
if (output.length > maximum) throw fetchError("response_too_large", "Decompressed response exceeded the size limit.");
|
||||
return output;
|
||||
}
|
||||
|
||||
function contentTypeBase(value) {
|
||||
return String(value || "").split(";")[0].trim().toLowerCase();
|
||||
}
|
||||
|
||||
function detectCharset(contentType) {
|
||||
const charset = String(contentType || "").match(/charset\s*=\s*["']?([^;"'\s]+)/i)?.[1]?.toLowerCase();
|
||||
return ["utf8", "utf-8", "ascii", "latin1"].includes(charset) ? charset.replace("-", "") : "utf8";
|
||||
}
|
||||
|
||||
function safeCanonical(value, baseUrl) {
|
||||
if (!value) return null;
|
||||
try {
|
||||
const resolved = new URL(value, baseUrl);
|
||||
return ["http:", "https:"].includes(resolved.protocol) ? resolved.href : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function blockedError(reason) {
|
||||
const error = fetchError("URL_BLOCKED", `URL blocked by policy: ${reason}.`);
|
||||
error.blockedReason = reason;
|
||||
return error;
|
||||
}
|
||||
|
||||
function fetchError(code, message) {
|
||||
return Object.assign(new Error(message), { code });
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
PageFetcher,
|
||||
SUPPORTED_CONTENT_TYPES,
|
||||
blockedError,
|
||||
contentTypeBase,
|
||||
decodeBody,
|
||||
fetchError,
|
||||
readBounded,
|
||||
safeHttpRequest
|
||||
};
|
||||
@ -1,162 +1,63 @@
|
||||
const http = require("http");
|
||||
const https = require("https");
|
||||
const net = require("net");
|
||||
const {
|
||||
defaultResolveHost,
|
||||
evaluateNetworkTarget,
|
||||
evaluateUrl,
|
||||
isPrivateAddress
|
||||
} = require("./url_policy");
|
||||
const { sanitizeText } = require("./html_extractor");
|
||||
const { decodeBody } = require("./page_fetcher");
|
||||
const { evaluateNetworkTarget } = require("./url_policy");
|
||||
|
||||
const MAX_RESPONSE_BYTES = 2 * 1024 * 1024;
|
||||
const MAX_PAGE_BYTES = 512 * 1024;
|
||||
|
||||
class SearchProvider {
|
||||
class ExternalSearchProvider {
|
||||
constructor(options = {}) {
|
||||
this.fetch = options.fetch || null;
|
||||
this.resolveHost = options.resolveHost;
|
||||
this.fetcher = options.fetcher;
|
||||
}
|
||||
|
||||
async search(query, options) {
|
||||
const endpoint = buildEndpoint(query, options);
|
||||
const response = await this.request(endpoint, options, true, MAX_RESPONSE_BYTES);
|
||||
const payload = JSON.parse(response.body.toString("utf8"));
|
||||
return normalizeProviderResults(payload, options.provider_adapter);
|
||||
}
|
||||
|
||||
async fetchPage(url, options) {
|
||||
const response = await this.request(url, options, false, MAX_PAGE_BYTES);
|
||||
const contentType = String(response.headers.get("content-type") || "").toLowerCase();
|
||||
if (!contentType.includes("text/html") && !contentType.includes("text/plain")) {
|
||||
throw new Error("Page content type is not supported.");
|
||||
async search(query, options = {}) {
|
||||
const settings = options.settings;
|
||||
if (!settings.external_provider_endpoint) {
|
||||
throw new Error("The optional external search endpoint is not configured.");
|
||||
}
|
||||
const endpoint = buildEndpoint(query, settings, options.freshness);
|
||||
const policy = await evaluateNetworkTarget(endpoint, { resolveHost: this.fetcher.resolveHost });
|
||||
if (!policy.allowed) throw new Error(`External provider blocked: ${policy.reason}.`);
|
||||
const headers = {};
|
||||
if (settings.external_provider_api_key) {
|
||||
headers[settings.external_provider_api_key_header] = [
|
||||
settings.external_provider_api_key_prefix,
|
||||
settings.external_provider_api_key
|
||||
].filter(Boolean).join(" ");
|
||||
}
|
||||
const response = await this.fetcher.request(policy.url, settings, {
|
||||
networkOnly: true,
|
||||
timeoutMs: settings.search_timeout_ms,
|
||||
maxBytes: Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024),
|
||||
accept: "application/json",
|
||||
headers
|
||||
});
|
||||
if (!response.ok) throw new Error(`External search provider failed (${response.status}).`);
|
||||
const body = decodeBody(
|
||||
response.body,
|
||||
response.headers.get("content-encoding"),
|
||||
Math.min(settings.max_fetch_bytes, 2 * 1024 * 1024)
|
||||
);
|
||||
const payload = JSON.parse(body.toString("utf8"));
|
||||
return {
|
||||
url: response.url,
|
||||
text: extractPageText(response.body.toString("utf8")).slice(0, 6000)
|
||||
provider: "external_json",
|
||||
results: normalizeProviderResults(payload, settings.external_provider_adapter),
|
||||
warnings: [],
|
||||
adapter_errors: []
|
||||
};
|
||||
}
|
||||
|
||||
async request(initialUrl, options, providerRequest, maxBytes) {
|
||||
let current = initialUrl;
|
||||
const providerOrigin = providerRequest ? new URL(initialUrl).origin : null;
|
||||
for (let redirects = 0; redirects <= 3; redirects += 1) {
|
||||
const policy = providerRequest
|
||||
? await evaluateNetworkTarget(current, { resolveHost: this.resolveHost })
|
||||
: await evaluateUrl(current, {
|
||||
mode: options.policy_mode,
|
||||
rules: options.url_rules,
|
||||
resolveHost: this.resolveHost
|
||||
});
|
||||
if (!policy.allowed) throw blockedError(policy.reason);
|
||||
if (providerRequest && new URL(policy.url).origin !== providerOrigin) {
|
||||
throw blockedError("cross_origin_provider_redirect");
|
||||
}
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), options.search_timeout_ms);
|
||||
timer.unref?.();
|
||||
try {
|
||||
const headers = {
|
||||
Accept: providerRequest ? "application/json" : "text/html,text/plain;q=0.9",
|
||||
"User-Agent": "Lumi-AI-Web-Search/1.0"
|
||||
};
|
||||
if (providerRequest && options.provider_api_key) {
|
||||
headers[options.provider_api_key_header] = [
|
||||
options.provider_api_key_prefix,
|
||||
options.provider_api_key
|
||||
].filter(Boolean).join(" ");
|
||||
}
|
||||
const response = this.fetch
|
||||
? await this.fetch(policy.url, {
|
||||
method: "GET",
|
||||
headers,
|
||||
redirect: "manual",
|
||||
signal: controller.signal
|
||||
})
|
||||
: await safeHttpRequest(policy.url, {
|
||||
headers,
|
||||
timeoutMs: options.search_timeout_ms,
|
||||
maxBytes,
|
||||
resolveHost: this.resolveHost
|
||||
});
|
||||
if (response.status >= 300 && response.status < 400) {
|
||||
const location = response.headers.get("location");
|
||||
if (!location) throw new Error("Provider redirect did not include a location.");
|
||||
current = new URL(location, policy.url).href;
|
||||
continue;
|
||||
}
|
||||
if (!response.ok) throw new Error(`Search provider request failed (${response.status}).`);
|
||||
return {
|
||||
url: policy.url,
|
||||
headers: response.headers,
|
||||
body: response.body || await readBounded(response, maxBytes)
|
||||
};
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
throw new Error("Search request exceeded the redirect limit.");
|
||||
}
|
||||
}
|
||||
|
||||
async function safeHttpRequest(value, options = {}) {
|
||||
const url = new URL(value);
|
||||
const hostname = url.hostname.replace(/^\[|\]$/g, "");
|
||||
const resolveHost = options.resolveHost || defaultResolveHost;
|
||||
const addresses = await resolveHost(hostname);
|
||||
const address = addresses.find((entry) => !isPrivateAddress(entry));
|
||||
if (!address) throw blockedError("private_network");
|
||||
const transport = url.protocol === "https:" ? https : http;
|
||||
return new Promise((resolve, reject) => {
|
||||
const request = transport.request({
|
||||
protocol: url.protocol,
|
||||
hostname,
|
||||
port: url.port || undefined,
|
||||
method: "GET",
|
||||
path: `${url.pathname}${url.search}`,
|
||||
headers: options.headers,
|
||||
servername: url.protocol === "https:" && !net.isIP(hostname) ? hostname : undefined,
|
||||
lookup: (_hostname, _options, callback) => callback(null, address, net.isIP(address))
|
||||
}, (response) => {
|
||||
const chunks = [];
|
||||
let size = 0;
|
||||
response.on("data", (chunk) => {
|
||||
size += chunk.length;
|
||||
if (size > options.maxBytes) {
|
||||
request.destroy(new Error("Provider response is too large."));
|
||||
return;
|
||||
}
|
||||
chunks.push(chunk);
|
||||
});
|
||||
response.on("end", () => resolve({
|
||||
ok: response.statusCode >= 200 && response.statusCode < 300,
|
||||
status: response.statusCode,
|
||||
headers: { get: (name) => response.headers[String(name).toLowerCase()] || null },
|
||||
body: Buffer.concat(chunks)
|
||||
}));
|
||||
});
|
||||
request.setTimeout(options.timeoutMs, () => {
|
||||
const error = new Error("Search provider timed out.");
|
||||
error.name = "AbortError";
|
||||
request.destroy(error);
|
||||
});
|
||||
request.on("error", reject);
|
||||
request.end();
|
||||
});
|
||||
}
|
||||
|
||||
function buildEndpoint(query, settings) {
|
||||
if (!settings.provider_endpoint) throw new Error("Search provider endpoint is not configured.");
|
||||
const endpoint = settings.provider_endpoint.includes("{query}")
|
||||
? settings.provider_endpoint.replaceAll("{query}", encodeURIComponent(query))
|
||||
: settings.provider_endpoint;
|
||||
function buildEndpoint(query, settings, freshness = "") {
|
||||
const configured = settings.external_provider_endpoint;
|
||||
const endpoint = configured.includes("{query}")
|
||||
? configured.replaceAll("{query}", encodeURIComponent(query))
|
||||
: configured;
|
||||
const url = new URL(endpoint);
|
||||
if (!settings.provider_endpoint.includes("{query}")) {
|
||||
url.searchParams.set(settings.provider_query_parameter || "q", query);
|
||||
if (!configured.includes("{query}")) {
|
||||
url.searchParams.set(settings.external_provider_query_parameter || "q", query);
|
||||
}
|
||||
url.searchParams.set("format", "json");
|
||||
url.searchParams.set("safesearch", safeSearchValue(settings.safe_search));
|
||||
url.searchParams.set("safesearch", safeSearchValue(settings.safe_search_level));
|
||||
url.searchParams.set("count", String(settings.max_results));
|
||||
if (settings.freshness) url.searchParams.set("time_range", String(settings.freshness).slice(0, 32));
|
||||
if (freshness) url.searchParams.set("time_range", String(freshness).slice(0, 32));
|
||||
return url.href;
|
||||
}
|
||||
|
||||
@ -164,50 +65,29 @@ function normalizeProviderResults(payload, adapter) {
|
||||
const rows = adapter === "searxng_json"
|
||||
? payload?.results
|
||||
: payload?.results || payload?.items || payload?.web?.results?.value;
|
||||
if (!Array.isArray(rows)) throw new Error("Search provider response does not contain a supported result list.");
|
||||
return rows.map((row, index) => ({
|
||||
title: sanitizeText(row.title || row.name || "Untitled result", 240),
|
||||
url: String(row.url || row.link || ""),
|
||||
snippet: sanitizeText(row.content || row.snippet || row.description || "", 800),
|
||||
source_type: sanitizeText(row.source_type || row.category || row.engine || "", 80) || null,
|
||||
date: normalizeDate(row.publishedDate || row.published_date || row.date),
|
||||
relevance_score: finiteScore(row.score, index)
|
||||
})).filter((row) => row.url);
|
||||
if (!Array.isArray(rows)) throw new Error("External provider returned an unsupported result list.");
|
||||
return rows.map((row, index) => {
|
||||
const url = String(row.url || row.link || "");
|
||||
let domain = "";
|
||||
try { domain = new URL(url).hostname; } catch {}
|
||||
return {
|
||||
title: sanitizeText(row.title || row.name || "Untitled result", 240),
|
||||
url,
|
||||
domain,
|
||||
snippet: sanitizeText(row.content || row.snippet || row.description || "", 900),
|
||||
date: normalizeDate(row.publishedDate || row.published_date || row.date),
|
||||
rank: index + 1,
|
||||
source: sanitizeText(row.engine || row.source || "external_json", 80),
|
||||
raw_source_id: sanitizeText(row.id || "", 120) || null,
|
||||
relevance_score: finiteScore(row.score, index)
|
||||
};
|
||||
}).filter((row) => /^https?:\/\//i.test(row.url));
|
||||
}
|
||||
|
||||
async function readBounded(response, maxBytes) {
|
||||
const declared = Number(response.headers.get("content-length"));
|
||||
if (Number.isFinite(declared) && declared > maxBytes) throw new Error("Provider response is too large.");
|
||||
const buffer = Buffer.from(await response.arrayBuffer());
|
||||
if (buffer.length > maxBytes) throw new Error("Provider response is too large.");
|
||||
return buffer;
|
||||
}
|
||||
|
||||
function extractPageText(value) {
|
||||
return sanitizeText(
|
||||
String(value)
|
||||
.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, " ")
|
||||
.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, " ")
|
||||
.replace(/<[^>]+>/g, " "),
|
||||
12000
|
||||
);
|
||||
}
|
||||
|
||||
function sanitizeText(value, maximum) {
|
||||
return decodeEntities(String(value || "").replace(/<[^>]+>/g, " "))
|
||||
.replace(/[\u0000-\u001f\u007f]/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim()
|
||||
.slice(0, maximum);
|
||||
}
|
||||
|
||||
function decodeEntities(value) {
|
||||
return value
|
||||
.replaceAll("&", "&")
|
||||
.replaceAll("<", "<")
|
||||
.replaceAll(">", ">")
|
||||
.replaceAll(""", "\"")
|
||||
.replaceAll("'", "'");
|
||||
function safeSearchValue(level) {
|
||||
if (level === "off") return "0";
|
||||
if (level === "strict") return "2";
|
||||
return "1";
|
||||
}
|
||||
|
||||
function normalizeDate(value) {
|
||||
@ -218,31 +98,13 @@ function normalizeDate(value) {
|
||||
|
||||
function finiteScore(value, index) {
|
||||
const number = Number(value);
|
||||
return Number.isFinite(number) ? number : Math.max(0, 1 - index * 0.1);
|
||||
}
|
||||
|
||||
function safeSearchValue(level) {
|
||||
if (level === "off") return "0";
|
||||
if (level === "moderate") return "1";
|
||||
return "2";
|
||||
}
|
||||
|
||||
function blockedError(reason) {
|
||||
const error = new Error(`URL blocked by policy: ${reason}.`);
|
||||
error.code = "URL_BLOCKED";
|
||||
error.blockedReason = reason;
|
||||
return error;
|
||||
return Number.isFinite(number) ? number : Math.max(0.1, 1 - index * 0.1);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
MAX_PAGE_BYTES,
|
||||
MAX_RESPONSE_BYTES,
|
||||
SearchProvider,
|
||||
blockedError,
|
||||
ExternalSearchProvider,
|
||||
SearchProvider: ExternalSearchProvider,
|
||||
buildEndpoint,
|
||||
extractPageText,
|
||||
normalizeProviderResults,
|
||||
readBounded,
|
||||
sanitizeText,
|
||||
safeHttpRequest
|
||||
safeSearchValue
|
||||
};
|
||||
|
||||
80
plugins/lumi_ai_web_search/backend/rate_limits.js
Normal file
80
plugins/lumi_ai_web_search/backend/rate_limits.js
Normal file
@ -0,0 +1,80 @@
|
||||
class ToolRateLimits {
|
||||
constructor(options = {}) {
|
||||
this.now = options.now || Date.now;
|
||||
this.buckets = new Map();
|
||||
}
|
||||
|
||||
consume({ actor, origin, server }, settings) {
|
||||
const checks = [
|
||||
[`user:${actor}`, settings.per_user_per_minute],
|
||||
[`origin:${origin}`, settings.per_origin_per_minute],
|
||||
[`server:${origin}:${server}`, settings.per_server_per_minute]
|
||||
];
|
||||
let retryAfter = 0;
|
||||
for (const [key, maximum] of checks) {
|
||||
const result = this.inspect(key, maximum);
|
||||
if (!result.allowed) retryAfter = Math.max(retryAfter, result.retry_after_seconds);
|
||||
}
|
||||
if (retryAfter) return { allowed: false, retry_after_seconds: retryAfter };
|
||||
for (const [key] of checks) this.record(key);
|
||||
return { allowed: true, retry_after_seconds: 0 };
|
||||
}
|
||||
|
||||
inspect(key, maximum) {
|
||||
const cutoff = this.now() - 60000;
|
||||
const recent = (this.buckets.get(key) || []).filter((timestamp) => timestamp > cutoff);
|
||||
this.buckets.set(key, recent);
|
||||
if (recent.length < maximum) return { allowed: true, retry_after_seconds: 0 };
|
||||
return {
|
||||
allowed: false,
|
||||
retry_after_seconds: Math.max(1, Math.ceil((recent[0] + 60000 - this.now()) / 1000))
|
||||
};
|
||||
}
|
||||
|
||||
record(key) {
|
||||
const recent = this.buckets.get(key) || [];
|
||||
recent.push(this.now());
|
||||
this.buckets.set(key, recent);
|
||||
}
|
||||
}
|
||||
|
||||
class ToolConcurrency {
|
||||
constructor(maximum = 3, maximumQueue = 20) {
|
||||
this.maximum = Math.max(1, Number(maximum) || 3);
|
||||
this.maximumQueue = Math.max(0, Number(maximumQueue) || 20);
|
||||
this.active = 0;
|
||||
this.queue = [];
|
||||
}
|
||||
|
||||
run(callback) {
|
||||
if (this.active < this.maximum) return this.start(callback);
|
||||
if (this.queue.length >= this.maximumQueue) {
|
||||
return Promise.reject(Object.assign(
|
||||
new Error("Web lookup concurrency queue is full."),
|
||||
{ code: "concurrency_limited" }
|
||||
));
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
this.queue.push({ callback, resolve, reject });
|
||||
});
|
||||
}
|
||||
|
||||
async start(callback) {
|
||||
this.active += 1;
|
||||
try {
|
||||
return await callback();
|
||||
} finally {
|
||||
this.active -= 1;
|
||||
this.drain();
|
||||
}
|
||||
}
|
||||
|
||||
drain() {
|
||||
while (this.active < this.maximum && this.queue.length) {
|
||||
const next = this.queue.shift();
|
||||
this.start(next.callback).then(next.resolve, next.reject);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { ToolConcurrency, ToolRateLimits };
|
||||
@ -1,4 +1,5 @@
|
||||
const { sanitizeText } = require("./provider_adapter");
|
||||
const { sanitizeText } = require("./html_extractor");
|
||||
const { sourceLinksEnabled } = require("./settings");
|
||||
|
||||
const REASONS = new Set([
|
||||
"fact_lookup",
|
||||
@ -13,39 +14,67 @@ function formatResults(rows, options = {}) {
|
||||
const reason = REASONS.has(options.reason) ? options.reason : "general_lookup";
|
||||
const origin = normalizeOrigin(options.origin);
|
||||
const budget = outputBudget(options.settings, origin);
|
||||
const sourceLimit = origin === "twitch" ? 2 : origin === "discord" ? 4 : options.settings.max_results;
|
||||
const sourceLimit = origin === "twitch" ? 1 : origin === "discord" ? 4 : options.settings.max_results;
|
||||
const maximum = Math.max(1, Math.min(
|
||||
options.settings.max_results,
|
||||
Number(options.maxResults) || options.settings.max_results,
|
||||
sourceLimit
|
||||
));
|
||||
const snippetLimit = reason === "fact_lookup" ? 180
|
||||
: ["resource_lookup", "troubleshooting", "documentation_lookup"].includes(reason) ? 300
|
||||
: 240;
|
||||
const ordered = prioritize(rows, reason).slice(0, sourceLimit);
|
||||
: ["resource_lookup", "troubleshooting", "documentation_lookup"].includes(reason) ? 320
|
||||
: 260;
|
||||
const ordered = prioritize(rows, reason).slice(0, maximum);
|
||||
const results = [];
|
||||
let used = 0;
|
||||
for (const row of ordered) {
|
||||
for (const [index, row] of ordered.entries()) {
|
||||
const url = sourceLinksEnabled(options.settings, origin) ? row.url : null;
|
||||
const normalized = {
|
||||
title: sanitizeText(row.title, 180),
|
||||
url: options.settings.show_source_links ? row.url : null,
|
||||
url,
|
||||
domain: safeDomain(row.url),
|
||||
snippet: sanitizeText(row.page_excerpt || row.snippet, snippetLimit),
|
||||
source_type: row.source_type || inferSourceType(row.url),
|
||||
snippet: sanitizeText(row.snippet, snippetLimit),
|
||||
date: row.date || null,
|
||||
relevance_score: Number.isFinite(Number(row.relevance_score)) ? Number(row.relevance_score) : null
|
||||
rank: Number(row.rank) || index + 1,
|
||||
source: sanitizeText(row.source || inferSourceType(row.url), 80),
|
||||
raw_source_id: sanitizeText(row.raw_source_id || "", 120) || null,
|
||||
relevance_score: Number.isFinite(Number(row.relevance_score)) ? Number(row.relevance_score) : null,
|
||||
allowed: true
|
||||
};
|
||||
const cost = normalized.title.length + normalized.snippet.length + (normalized.url?.length || 0) + 30;
|
||||
const cost = normalized.title.length + normalized.snippet.length + (normalized.url?.length || 0) + 40;
|
||||
if (results.length && used + cost > budget) break;
|
||||
results.push(normalized);
|
||||
used += cost;
|
||||
}
|
||||
const condensedText = buildCondensedText(results, reason, origin, budget);
|
||||
const condensed = buildCondensedText(results, reason, origin, budget);
|
||||
return {
|
||||
reason,
|
||||
origin,
|
||||
output_budget_chars: budget,
|
||||
truncated: ordered.length > results.length || condensedText.truncated,
|
||||
condensed_text: condensedText.text,
|
||||
truncated: ordered.length > results.length || condensed.truncated,
|
||||
condensed_text: condensed.text,
|
||||
results
|
||||
};
|
||||
}
|
||||
|
||||
function formatFetchedPage(page, options = {}) {
|
||||
const origin = normalizeOrigin(options.origin);
|
||||
const budget = outputBudget(options.settings, origin);
|
||||
const textBudget = Math.max(200, Math.min(options.settings.max_extracted_chars, budget));
|
||||
return {
|
||||
url: sourceLinksEnabled(options.settings, origin) ? page.url : null,
|
||||
final_url: sourceLinksEnabled(options.settings, origin) ? page.final_url : null,
|
||||
title: sanitizeText(page.title, 240),
|
||||
description: sanitizeText(page.description, 500),
|
||||
extracted_text: sanitizeText(page.extracted_text, textBudget),
|
||||
content_type: page.content_type,
|
||||
fetched_at: page.fetched_at,
|
||||
extraction_status: page.extraction_status,
|
||||
published_at: page.published_at || null,
|
||||
updated_at: page.updated_at || null,
|
||||
truncated: page.extracted_text.length > textBudget || page.truncated === true
|
||||
};
|
||||
}
|
||||
|
||||
function buildCondensedText(results, reason, origin, budget) {
|
||||
const lines = results.map((result, index) => {
|
||||
const date = result.date ? ` (${result.date.slice(0, 10)})` : "";
|
||||
@ -56,7 +85,7 @@ function buildCondensedText(results, reason, origin, budget) {
|
||||
let text = lines.join(origin === "twitch" ? " | " : "\n");
|
||||
let truncated = false;
|
||||
if (text.length > budget) {
|
||||
text = `${text.slice(0, Math.max(0, budget - 18)).trimEnd()}...`;
|
||||
text = `${text.slice(0, Math.max(0, budget - 3)).trimEnd()}...`;
|
||||
truncated = true;
|
||||
}
|
||||
return { text, truncated };
|
||||
@ -67,7 +96,8 @@ function prioritize(rows, reason) {
|
||||
if (["documentation_lookup", "troubleshooting"].includes(reason)) {
|
||||
values.sort((left, right) =>
|
||||
authorityScore(right) - authorityScore(left) ||
|
||||
Number(right.relevance_score || 0) - Number(left.relevance_score || 0)
|
||||
Number(right.relevance_score || 0) - Number(left.relevance_score || 0) ||
|
||||
Number(left.rank || 999) - Number(right.rank || 999)
|
||||
);
|
||||
} else if (reason === "news_or_recent") {
|
||||
values.sort((left, right) =>
|
||||
@ -81,7 +111,7 @@ function prioritize(rows, reason) {
|
||||
function outputBudget(settings, origin) {
|
||||
return Number(settings[`${origin}_output_chars`]) ||
|
||||
Number(settings.other_output_chars) ||
|
||||
500;
|
||||
700;
|
||||
}
|
||||
|
||||
function normalizeOrigin(value) {
|
||||
@ -97,12 +127,14 @@ function safeDomain(value) {
|
||||
function inferSourceType(value) {
|
||||
const domain = safeDomain(value);
|
||||
if (/^(docs|developer|support)\./i.test(domain) || /\.(gov|edu)$/i.test(domain)) return "authoritative";
|
||||
if (/github\.com$|gitlab\.com$|huggingface\.co$|npmjs\.com$|pypi\.org$/i.test(domain)) return "repository";
|
||||
return "web";
|
||||
}
|
||||
|
||||
function authorityScore(row) {
|
||||
return ["official", "authoritative", "documentation"].includes(String(row.source_type || "").toLowerCase()) ||
|
||||
inferSourceType(row.url) === "authoritative" ? 1 : 0;
|
||||
const source = String(row.source || "").toLowerCase();
|
||||
return ["official", "authoritative", "documentation", "repository"].includes(source) ||
|
||||
inferSourceType(row.url) !== "web" ? 1 : 0;
|
||||
}
|
||||
|
||||
function dateValue(value) {
|
||||
@ -113,6 +145,7 @@ function dateValue(value) {
|
||||
module.exports = {
|
||||
REASONS,
|
||||
buildCondensedText,
|
||||
formatFetchedPage,
|
||||
formatResults,
|
||||
inferSourceType,
|
||||
normalizeOrigin,
|
||||
|
||||
@ -1,134 +1,303 @@
|
||||
const crypto = require("crypto");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { SearchProvider } = require("./provider_adapter");
|
||||
const { formatResults, normalizeOrigin } = require("./result_formatter");
|
||||
const { readSettings } = require("./settings");
|
||||
const { ToolCache } = require("./cache");
|
||||
const { LumiSearchBroker } = require("./lumi_search_broker");
|
||||
const { PageFetcher } = require("./page_fetcher");
|
||||
const { ExternalSearchProvider } = require("./provider_adapter");
|
||||
const { ToolConcurrency, ToolRateLimits } = require("./rate_limits");
|
||||
const {
|
||||
REASONS,
|
||||
formatFetchedPage,
|
||||
formatResults,
|
||||
normalizeOrigin
|
||||
} = require("./result_formatter");
|
||||
const {
|
||||
capabilityEnabled,
|
||||
readSettings,
|
||||
readStatus,
|
||||
writeStatus
|
||||
} = require("./settings");
|
||||
const { evaluateUrl } = require("./url_policy");
|
||||
|
||||
class WebSearchTool {
|
||||
constructor(options = {}) {
|
||||
this.dataDir = options.dataDir;
|
||||
this.provider = options.provider || new SearchProvider(options);
|
||||
this.now = options.now || Date.now;
|
||||
this.cache = new Map();
|
||||
this.rateLimits = new Map();
|
||||
this.fetcher = options.fetcher || new PageFetcher(options);
|
||||
this.broker = options.broker || new LumiSearchBroker({ fetcher: this.fetcher });
|
||||
this.externalProvider = options.externalProvider || new ExternalSearchProvider({ fetcher: this.fetcher });
|
||||
this.cache = options.cache || new ToolCache({
|
||||
directory: path.join(this.dataDir, "cache"),
|
||||
now: this.now
|
||||
});
|
||||
this.rateLimits = options.rateLimits || new ToolRateLimits({ now: this.now });
|
||||
this.concurrency = options.concurrency || new ToolConcurrency(3, 20);
|
||||
}
|
||||
|
||||
async run(input = {}) {
|
||||
const started = this.now();
|
||||
const settings = readSettings(this.dataDir);
|
||||
const query = String(input.query || "").trim().slice(0, 500);
|
||||
const reason = String(input.reason || "general_lookup");
|
||||
const origin = trustedOrigin(input.ctx, input.origin);
|
||||
const actor = String(input.user?.id || input.user?.username || "unknown").slice(0, 120);
|
||||
const server = String(input.ctx?.server_id || input.ctx?.channel_id || "direct").slice(0, 120);
|
||||
const auditBase = { query, reason, origin, actor, server };
|
||||
if (!settings.enabled) return this.finish(blockedResult(query, reason, "tool_disabled", settings, started, this.now()), auditBase);
|
||||
if (!settings.allowed_origins.includes(origin)) {
|
||||
return this.finish(blockedResult(query, reason, "origin_not_allowed", settings, started, this.now()), auditBase);
|
||||
}
|
||||
if (!query) return this.finish(blockedResult(query, reason, "query_required", settings, started, this.now()), auditBase);
|
||||
if (!this.consumeRateLimit(`${origin}:${server}:${actor}`, settings.requests_per_minute)) {
|
||||
return this.finish(blockedResult(query, reason, "rate_limited", settings, started, this.now()), auditBase);
|
||||
}
|
||||
async search(input = {}) {
|
||||
return this.runConcurrent(input, "search", () => this.performSearch(input));
|
||||
}
|
||||
|
||||
async performSearch(input = {}) {
|
||||
const context = this.context(input, "search");
|
||||
const blocked = this.preflight(context);
|
||||
if (blocked) return this.finish(blocked, context);
|
||||
const query = cleanQuery(input.query);
|
||||
if (!query) return this.finish(this.blocked(context, "query_required"), context);
|
||||
context.query = query;
|
||||
context.reason = normalizeReason(input.reason);
|
||||
const maximum = Math.max(1, Math.min(
|
||||
context.settings.max_results,
|
||||
Number.parseInt(input.max_results, 10) || context.settings.max_results
|
||||
));
|
||||
const cacheKey = JSON.stringify([
|
||||
query.toLowerCase(), reason, input.freshness || "", settings.provider_endpoint,
|
||||
settings.policy_mode, settings.url_rules, settings.safe_search, settings.max_results,
|
||||
origin, settings[`${origin}_output_chars`], settings.show_source_links,
|
||||
input.requested_depth || "search", settings.allow_full_page_fetch
|
||||
"search", query.toLowerCase(), context.reason, String(input.freshness || ""),
|
||||
maximum, context.settings.provider, context.settings.policy_mode,
|
||||
context.settings.url_rules, context.origin
|
||||
]);
|
||||
const cached = this.cache.get(cacheKey);
|
||||
if (cached && cached.expiresAt > this.now()) {
|
||||
return this.finish({ ...cached.value, cache_hit: true, timing_ms: this.now() - started }, auditBase);
|
||||
}
|
||||
const cached = this.cache.get(cacheKey, context.settings.cache_ttl_seconds);
|
||||
if (cached) return this.finish({
|
||||
...cached,
|
||||
cache_hit: true,
|
||||
timing_ms: this.now() - context.started
|
||||
}, context);
|
||||
try {
|
||||
const discovered = await this.provider.search(query, {
|
||||
...settings,
|
||||
freshness: input.freshness
|
||||
const provider = context.settings.provider === "external_json"
|
||||
? this.externalProvider
|
||||
: this.broker;
|
||||
const discovered = await provider.search(query, {
|
||||
settings: context.settings,
|
||||
freshness: String(input.freshness || "").slice(0, 40)
|
||||
});
|
||||
const allowed = [];
|
||||
for (const row of discovered) {
|
||||
if (allowed.length >= settings.max_results) break;
|
||||
const warnings = [...(discovered.warnings || [])];
|
||||
let blockedResults = 0;
|
||||
for (const row of discovered.results || []) {
|
||||
if (allowed.length >= maximum) break;
|
||||
const policy = await evaluateUrl(row.url, {
|
||||
mode: settings.policy_mode,
|
||||
rules: settings.url_rules,
|
||||
resolveHost: this.provider.resolveHost
|
||||
mode: context.settings.policy_mode,
|
||||
rules: context.settings.url_rules,
|
||||
resolveHost: this.fetcher.resolveHost
|
||||
});
|
||||
if (!policy.allowed) continue;
|
||||
const normalized = { ...row, url: policy.url };
|
||||
if (input.requested_depth === "full_page" && settings.allow_full_page_fetch) {
|
||||
try {
|
||||
const page = await this.provider.fetchPage(policy.url, settings);
|
||||
normalized.url = page.url;
|
||||
normalized.page_excerpt = page.text;
|
||||
} catch (error) {
|
||||
if (error.code === "URL_BLOCKED") continue;
|
||||
}
|
||||
if (!policy.allowed) {
|
||||
blockedResults += 1;
|
||||
warnings.push(`Blocked result from ${safeDomain(row.url) || "unknown source"}: ${policy.reason}.`);
|
||||
continue;
|
||||
}
|
||||
allowed.push(normalized);
|
||||
allowed.push({ ...row, url: policy.url });
|
||||
}
|
||||
const formatted = formatResults(allowed, { reason, origin, settings });
|
||||
const value = {
|
||||
query,
|
||||
reason: formatted.reason,
|
||||
status: allowed.length ? "ok" : "no_results",
|
||||
blocked_reason: null,
|
||||
const formatted = formatResults(allowed, {
|
||||
reason: context.reason,
|
||||
origin: context.origin,
|
||||
settings: context.settings,
|
||||
maxResults: maximum
|
||||
});
|
||||
const fetchedPages = [];
|
||||
if (input.target_url) {
|
||||
try {
|
||||
const target = await this.fetcher.fetchPage(input.target_url, context.settings);
|
||||
fetchedPages.push(formatFetchedPage(target, {
|
||||
origin: context.origin,
|
||||
settings: context.settings
|
||||
}));
|
||||
} catch (error) {
|
||||
warnings.push(`Target URL fetch failed: ${safeFailure(error)}.`);
|
||||
}
|
||||
}
|
||||
const adapterErrors = discovered.adapter_errors || [];
|
||||
const status = formatted.results.length
|
||||
? adapterErrors.length ? "partial" : "ok"
|
||||
: blockedResults > 0
|
||||
? "blocked"
|
||||
: adapterErrors.length
|
||||
? "unavailable"
|
||||
: "no_results";
|
||||
const result = this.result(context, {
|
||||
status,
|
||||
blocked_reason: status === "blocked" ? "all_results_blocked" : null,
|
||||
provider: discovered.provider || context.settings.provider,
|
||||
result_count: formatted.results.length,
|
||||
results: formatted.results,
|
||||
fetched_pages: fetchedPages,
|
||||
warnings,
|
||||
errors: adapterErrors,
|
||||
condensed_text: formatted.condensed_text,
|
||||
output_budget_chars: formatted.output_budget_chars,
|
||||
truncated: formatted.truncated,
|
||||
timing_ms: this.now() - started,
|
||||
cache_hit: false,
|
||||
policy_mode: settings.policy_mode
|
||||
};
|
||||
if (settings.cache_ttl_seconds > 0 && allowed.length) {
|
||||
this.cache.set(cacheKey, {
|
||||
expiresAt: this.now() + settings.cache_ttl_seconds * 1000,
|
||||
value
|
||||
});
|
||||
}
|
||||
return this.finish(value, auditBase);
|
||||
truncated: formatted.truncated
|
||||
});
|
||||
if (context.settings.cache_ttl_seconds > 0 && formatted.results.length) this.cache.set(cacheKey, result);
|
||||
return this.finish(result, context);
|
||||
} catch (error) {
|
||||
return this.finish({
|
||||
query,
|
||||
reason,
|
||||
status: error.code === "URL_BLOCKED" ? "blocked" : "unavailable",
|
||||
blocked_reason: error.blockedReason || null,
|
||||
error: cleanError(error),
|
||||
return this.finish(this.failure(context, error), context);
|
||||
}
|
||||
}
|
||||
|
||||
async fetchUrl(input = {}) {
|
||||
return this.runConcurrent(input, "fetch_url", () => this.fetchCapability(input, "fetch_url"));
|
||||
}
|
||||
|
||||
async summarizeUrl(input = {}) {
|
||||
return this.runConcurrent(input, "summarize_url", () => this.fetchCapability(input, "summarize_url"));
|
||||
}
|
||||
|
||||
async runConcurrent(input, capability, callback) {
|
||||
try {
|
||||
return await this.concurrency.run(callback);
|
||||
} catch (error) {
|
||||
if (error?.code !== "concurrency_limited") throw error;
|
||||
const context = this.context(input, capability);
|
||||
context.query = cleanQuery(input.query || input.url || input.target_url);
|
||||
return this.finish(this.blocked(context, "concurrency_limited", {
|
||||
retry_after_seconds: 2
|
||||
}), context);
|
||||
}
|
||||
}
|
||||
|
||||
async fetchCapability(input, capability) {
|
||||
const context = this.context(input, capability);
|
||||
const blocked = this.preflight(context);
|
||||
if (blocked) return this.finish(blocked, context);
|
||||
const targetUrl = String(input.url || input.target_url || "").trim().slice(0, 2048);
|
||||
if (!targetUrl) return this.finish(this.blocked(context, "url_required"), context);
|
||||
context.query = targetUrl;
|
||||
context.reason = normalizeReason(input.reason || (
|
||||
capability === "summarize_url" ? "resource_lookup" : "general_lookup"
|
||||
));
|
||||
const cacheKey = JSON.stringify([
|
||||
capability, targetUrl, context.settings.policy_mode,
|
||||
context.settings.url_rules, context.origin
|
||||
]);
|
||||
const cached = this.cache.get(cacheKey, context.settings.cache_ttl_seconds);
|
||||
if (cached) return this.finish({
|
||||
...cached,
|
||||
cache_hit: true,
|
||||
timing_ms: this.now() - context.started
|
||||
}, context);
|
||||
try {
|
||||
const page = await this.fetcher.fetchPage(targetUrl, context.settings);
|
||||
const formatted = formatFetchedPage(page, {
|
||||
origin: context.origin,
|
||||
settings: context.settings
|
||||
});
|
||||
const result = this.result(context, {
|
||||
status: formatted.extraction_status === "ok" ? "ok" : "no_results",
|
||||
provider: "direct_http",
|
||||
result_count: 0,
|
||||
results: [],
|
||||
condensed_text: "",
|
||||
timing_ms: this.now() - started,
|
||||
cache_hit: false,
|
||||
policy_mode: settings.policy_mode
|
||||
}, auditBase);
|
||||
fetched_pages: [formatted],
|
||||
warnings: formatted.truncated ? ["Extracted page text was truncated to the configured origin budget."] : [],
|
||||
errors: [],
|
||||
condensed_text: capability === "summarize_url"
|
||||
? formatted.extracted_text
|
||||
: [formatted.title, formatted.description, formatted.extracted_text].filter(Boolean).join("\n")
|
||||
});
|
||||
if (context.settings.cache_ttl_seconds > 0 && formatted.extraction_status === "ok") {
|
||||
this.cache.set(cacheKey, result);
|
||||
}
|
||||
return this.finish(result, context);
|
||||
} catch (error) {
|
||||
return this.finish(this.failure(context, error), context);
|
||||
}
|
||||
}
|
||||
|
||||
consumeRateLimit(key, maximum) {
|
||||
const cutoff = this.now() - 60000;
|
||||
const recent = (this.rateLimits.get(key) || []).filter((timestamp) => timestamp > cutoff);
|
||||
if (recent.length >= maximum) {
|
||||
this.rateLimits.set(key, recent);
|
||||
return false;
|
||||
}
|
||||
recent.push(this.now());
|
||||
this.rateLimits.set(key, recent);
|
||||
return true;
|
||||
context(input, capability) {
|
||||
const settings = readSettings(this.dataDir);
|
||||
const origin = normalizeOrigin(input.ctx?.origin || input.ctx?.platform || input.origin || "other");
|
||||
return {
|
||||
started: this.now(),
|
||||
capability,
|
||||
settings,
|
||||
origin,
|
||||
actor: String(input.user?.id || input.user?.username || "unknown").slice(0, 120),
|
||||
username: String(input.user?.username || "").slice(0, 120),
|
||||
role: input.user?.isAdmin ? "admin" : input.user?.isMod ? "mod" : "user",
|
||||
server: String(input.ctx?.server_id || input.ctx?.channel_id || "direct").slice(0, 120),
|
||||
query: "",
|
||||
reason: normalizeReason(input.reason)
|
||||
};
|
||||
}
|
||||
|
||||
finish(result, base) {
|
||||
result.user_message ||= userMessage(result);
|
||||
this.audit({
|
||||
...base,
|
||||
status: result.status,
|
||||
allowed: result.status === "ok" || result.status === "no_results",
|
||||
blocked_reason: result.blocked_reason || null,
|
||||
result_count: result.result_count,
|
||||
timing_ms: result.timing_ms,
|
||||
cache_hit: result.cache_hit
|
||||
preflight(context) {
|
||||
if (!capabilityEnabled(context.settings, context.capability)) {
|
||||
return this.blocked(context, "capability_disabled");
|
||||
}
|
||||
if (!context.settings.allowed_origins.includes(context.origin)) {
|
||||
return this.blocked(context, "origin_not_allowed");
|
||||
}
|
||||
const rate = this.rateLimits.consume(context, context.settings);
|
||||
if (!rate.allowed) {
|
||||
return this.blocked(context, "rate_limited", {
|
||||
retry_after_seconds: rate.retry_after_seconds
|
||||
});
|
||||
}
|
||||
if (context.settings.provider === "external_json" &&
|
||||
context.capability === "search" &&
|
||||
!context.settings.external_provider_endpoint) {
|
||||
return this.blocked(context, "external_provider_not_configured");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
result(context, patch = {}) {
|
||||
return {
|
||||
status: "ok",
|
||||
query: context.query,
|
||||
reason: context.reason,
|
||||
provider: context.settings.provider,
|
||||
policy_mode: context.settings.policy_mode,
|
||||
cache_hit: false,
|
||||
timing_ms: Math.max(0, this.now() - context.started),
|
||||
result_count: 0,
|
||||
blocked_reason: null,
|
||||
results: [],
|
||||
fetched_pages: [],
|
||||
warnings: [],
|
||||
errors: [],
|
||||
...patch
|
||||
};
|
||||
}
|
||||
|
||||
blocked(context, reason, patch = {}) {
|
||||
return this.result(context, {
|
||||
status: "blocked",
|
||||
blocked_reason: reason,
|
||||
...patch
|
||||
});
|
||||
}
|
||||
|
||||
failure(context, error) {
|
||||
return this.result(context, {
|
||||
status: error?.code === "URL_BLOCKED" ? "blocked" : "unavailable",
|
||||
blocked_reason: error?.blockedReason || null,
|
||||
errors: [safeFailure(error)]
|
||||
});
|
||||
}
|
||||
|
||||
finish(result, context) {
|
||||
result.timing_ms = Math.max(0, this.now() - context.started);
|
||||
result.user_message = userMessage(result);
|
||||
const audit = {
|
||||
actor: context.actor,
|
||||
username: context.username || null,
|
||||
role: context.role,
|
||||
origin: context.origin,
|
||||
server: context.server,
|
||||
capability: context.capability,
|
||||
query_hash: queryHash(context.query),
|
||||
query_summary: safeQuerySummary(context.query),
|
||||
reason: context.reason,
|
||||
provider: result.provider,
|
||||
policy_mode: result.policy_mode,
|
||||
policy_decision: ["ok", "partial", "no_results"].includes(result.status) ? "allowed" : "blocked",
|
||||
result_count: result.result_count,
|
||||
cache_hit: result.cache_hit,
|
||||
timing_ms: result.timing_ms,
|
||||
blocked_reason: result.blocked_reason,
|
||||
status: result.status
|
||||
};
|
||||
this.audit(audit);
|
||||
this.updateStatus(result, audit);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -139,46 +308,125 @@ class WebSearchTool {
|
||||
...entry
|
||||
})}\n`);
|
||||
}
|
||||
|
||||
updateStatus(result, audit) {
|
||||
const current = readStatus(this.dataDir);
|
||||
const metrics = {
|
||||
searches: Number(current.metrics?.searches || 0) + (audit.capability === "search" ? 1 : 0),
|
||||
fetches: Number(current.metrics?.fetches || 0) + (audit.capability !== "search" ? 1 : 0),
|
||||
successes: Number(current.metrics?.successes || 0) + (result.status === "ok" ? 1 : 0),
|
||||
blocked: Number(current.metrics?.blocked || 0) + (result.status === "blocked" ? 1 : 0),
|
||||
cache_hits: Number(current.metrics?.cache_hits || 0) + (result.cache_hit ? 1 : 0),
|
||||
failures: Number(current.metrics?.failures || 0) + (result.status === "unavailable" ? 1 : 0),
|
||||
total_search_ms: Number(current.metrics?.total_search_ms || 0) +
|
||||
(audit.capability === "search" ? result.timing_ms : 0),
|
||||
total_fetch_ms: Number(current.metrics?.total_fetch_ms || 0) +
|
||||
(audit.capability !== "search" ? result.timing_ms : 0),
|
||||
failures_by_reason: {
|
||||
...(current.metrics?.failures_by_reason || {})
|
||||
}
|
||||
};
|
||||
const failureReason = result.blocked_reason || (
|
||||
result.status === "unavailable" ? result.errors?.[0]?.reason || result.errors?.[0] || "unavailable" : null
|
||||
);
|
||||
if (failureReason) {
|
||||
const key = String(failureReason).slice(0, 120);
|
||||
metrics.failures_by_reason[key] = Number(metrics.failures_by_reason[key] || 0) + 1;
|
||||
}
|
||||
const recent = [
|
||||
{
|
||||
timestamp: new Date().toISOString(),
|
||||
capability: audit.capability,
|
||||
origin: audit.origin,
|
||||
query_summary: audit.query_summary,
|
||||
provider: audit.provider,
|
||||
status: audit.status,
|
||||
blocked_reason: audit.blocked_reason,
|
||||
timing_ms: audit.timing_ms
|
||||
},
|
||||
...(Array.isArray(current.recent) ? current.recent : [])
|
||||
].slice(0, 20);
|
||||
writeStatus(this.dataDir, {
|
||||
provider: result.provider,
|
||||
provider_health: ["ok", "partial", "no_results"].includes(result.status) ? "available" : "degraded",
|
||||
last_success_at: result.status === "ok" ? new Date().toISOString() : current.last_success_at || null,
|
||||
last_error: ["blocked", "unavailable"].includes(result.status)
|
||||
? result.blocked_reason || result.errors?.[0] || result.status
|
||||
: null,
|
||||
cache: this.cache.stats(),
|
||||
metrics,
|
||||
recent
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function trustedOrigin(ctx, fallback) {
|
||||
return normalizeOrigin(ctx?.origin || ctx?.platform || fallback || "other");
|
||||
function cleanQuery(value) {
|
||||
return String(value || "").replace(/[\u0000-\u001f\u007f]/g, " ").replace(/\s+/g, " ").trim().slice(0, 500);
|
||||
}
|
||||
|
||||
function blockedResult(query, reason, blockedReason, settings, started, now = Date.now()) {
|
||||
return {
|
||||
query,
|
||||
reason,
|
||||
status: "blocked",
|
||||
blocked_reason: blockedReason,
|
||||
result_count: 0,
|
||||
results: [],
|
||||
condensed_text: "",
|
||||
timing_ms: Math.max(0, now - started),
|
||||
cache_hit: false,
|
||||
policy_mode: settings.policy_mode
|
||||
};
|
||||
function normalizeReason(value) {
|
||||
return REASONS.has(String(value || "")) ? String(value) : "general_lookup";
|
||||
}
|
||||
|
||||
function cleanError(error) {
|
||||
if (error?.name === "AbortError") return "Search provider timed out.";
|
||||
return "Search provider is unavailable.";
|
||||
function queryHash(value) {
|
||||
return value ? crypto.createHash("sha256").update(String(value)).digest("hex").slice(0, 16) : null;
|
||||
}
|
||||
|
||||
function safeQuerySummary(value) {
|
||||
let text = cleanQuery(value);
|
||||
try {
|
||||
const url = new URL(text);
|
||||
if (url.username || url.password) {
|
||||
url.username = "";
|
||||
url.password = "";
|
||||
text = url.href;
|
||||
}
|
||||
} catch {}
|
||||
text = text.replace(/([?&](?:key|token|secret|password)=)[^&\s]+/gi, "$1[REDACTED]");
|
||||
return text.slice(0, 120);
|
||||
}
|
||||
|
||||
function safeDomain(value) {
|
||||
try { return new URL(value).hostname; }
|
||||
catch { return ""; }
|
||||
}
|
||||
|
||||
function safeFailure(error) {
|
||||
if (error?.name === "AbortError" || error?.code === "timeout") return "request timed out";
|
||||
if (error?.code === "response_too_large") return "response exceeded the configured size limit";
|
||||
if (error?.code === "unsupported_content_type") return "content type is not supported";
|
||||
if (error?.code === "URL_BLOCKED") return `URL blocked: ${error.blockedReason || "safety policy"}`;
|
||||
return String(error?.message || "provider unavailable")
|
||||
.replace(/https?:\/\/\S+/g, "[url]")
|
||||
.replace(/(key|token|secret|password)=[^&\s]+/gi, "$1=[REDACTED]")
|
||||
.slice(0, 240);
|
||||
}
|
||||
|
||||
function userMessage(result) {
|
||||
if (result.status === "ok") return result.condensed_text || "Web search completed without a usable summary.";
|
||||
if (result.status === "no_results") return "No permitted web results were found.";
|
||||
if (result.status === "unavailable") return "Web search is currently unavailable.";
|
||||
if (result.blocked_reason === "rate_limited") return "Web search is temporarily rate-limited.";
|
||||
if (result.blocked_reason === "origin_not_allowed") return "Web search is not enabled for this platform.";
|
||||
if (result.blocked_reason === "tool_disabled") return "Web search is disabled.";
|
||||
return "Web search was blocked by the configured safety policy.";
|
||||
if (result.status === "ok") return result.condensed_text || "The public lookup completed.";
|
||||
if (result.status === "partial") return "The lookup returned partial results because one or more public sources were unavailable.";
|
||||
if (result.status === "no_results") return "No permitted public results were found.";
|
||||
if (result.blocked_reason === "rate_limited") {
|
||||
return `Web lookup is rate-limited. Retry in ${result.retry_after_seconds || 60} seconds.`;
|
||||
}
|
||||
if (result.blocked_reason === "concurrency_limited") {
|
||||
return "Web lookup is busy. Retry in a few seconds.";
|
||||
}
|
||||
if (result.blocked_reason === "origin_not_allowed") return "Web lookup is not enabled for this platform.";
|
||||
if (result.blocked_reason === "capability_disabled") return "This web lookup capability is disabled.";
|
||||
if (result.blocked_reason === "external_provider_not_configured") {
|
||||
return "The optional external provider is selected but not configured. Select Lumi search broker or configure the endpoint.";
|
||||
}
|
||||
if (result.status === "blocked") return "The URL or lookup was blocked by the configured safety policy.";
|
||||
return "Live public verification is currently unavailable.";
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
WebSearchTool,
|
||||
blockedResult,
|
||||
cleanError,
|
||||
trustedOrigin,
|
||||
cleanQuery,
|
||||
normalizeReason,
|
||||
queryHash,
|
||||
safeFailure,
|
||||
safeQuerySummary,
|
||||
userMessage
|
||||
};
|
||||
|
||||
@ -2,6 +2,7 @@ const fs = require("fs");
|
||||
const path = require("path");
|
||||
|
||||
const metadata = require("../tool_info.json");
|
||||
const ORIGINS = new Set(["webui", "discord", "twitch", "youtube", "kick", "other"]);
|
||||
|
||||
function defaults() {
|
||||
return Object.fromEntries(
|
||||
@ -13,61 +14,164 @@ function readSettings(dataDir) {
|
||||
const fallback = defaults();
|
||||
try {
|
||||
const stored = JSON.parse(fs.readFileSync(settingsPath(dataDir), "utf8"));
|
||||
return normalizeSettings({ ...fallback, ...stored });
|
||||
return normalizeSettings(migrateSettings(stored, fallback));
|
||||
} catch {
|
||||
return normalizeSettings(fallback);
|
||||
}
|
||||
}
|
||||
|
||||
function ensureSettings(dataDir) {
|
||||
if (!fs.existsSync(settingsPath(dataDir))) return writeSettings(dataDir, defaults());
|
||||
return readSettings(dataDir);
|
||||
}
|
||||
|
||||
function writeSettings(dataDir, value) {
|
||||
const normalized = normalizeSettings({ ...defaults(), ...value });
|
||||
const normalized = normalizeSettings(migrateSettings(value, defaults()));
|
||||
fs.mkdirSync(dataDir, { recursive: true });
|
||||
const file = settingsPath(dataDir);
|
||||
const temporary = `${file}.${process.pid}.tmp`;
|
||||
fs.writeFileSync(temporary, `${JSON.stringify(normalized, null, 2)}\n`, { mode: 0o600 });
|
||||
try { fs.chmodSync(temporary, 0o600); } catch {}
|
||||
fs.renameSync(temporary, file);
|
||||
replaceFile(temporary, file);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function migrateSettings(value = {}, fallback = defaults()) {
|
||||
const migrated = { ...fallback, ...value };
|
||||
if (!value.provider) migrated.provider = value.provider_endpoint ? "external_json" : "lumi_search_broker";
|
||||
if (value.enabled != null && value.enable_search == null) migrated.enable_search = value.enabled === true;
|
||||
if (value.safe_search && value.safe_search_level == null) migrated.safe_search_level = value.safe_search;
|
||||
if (value.provider_adapter && value.external_provider_adapter == null) {
|
||||
migrated.external_provider_adapter = value.provider_adapter;
|
||||
}
|
||||
if (value.provider_endpoint && value.external_provider_endpoint == null) {
|
||||
migrated.external_provider_endpoint = value.provider_endpoint;
|
||||
}
|
||||
if (value.provider_api_key && value.external_provider_api_key == null) {
|
||||
migrated.external_provider_api_key = value.provider_api_key;
|
||||
}
|
||||
if (value.provider_api_key_header && value.external_provider_api_key_header == null) {
|
||||
migrated.external_provider_api_key_header = value.provider_api_key_header;
|
||||
}
|
||||
if (value.provider_api_key_prefix && value.external_provider_api_key_prefix == null) {
|
||||
migrated.external_provider_api_key_prefix = value.provider_api_key_prefix;
|
||||
}
|
||||
if (value.provider_query_parameter && value.external_provider_query_parameter == null) {
|
||||
migrated.external_provider_query_parameter = value.provider_query_parameter;
|
||||
}
|
||||
if (value.requests_per_minute && value.per_user_per_minute == null) {
|
||||
migrated.per_user_per_minute = value.requests_per_minute;
|
||||
}
|
||||
if (typeof value.show_source_links === "boolean") {
|
||||
for (const origin of ["webui", "discord", "twitch"]) {
|
||||
const key = `show_source_links_${origin}`;
|
||||
if (value[key] == null) migrated[key] = value.show_source_links;
|
||||
}
|
||||
}
|
||||
return migrated;
|
||||
}
|
||||
|
||||
function normalizeSettings(value) {
|
||||
const mode = value.policy_mode === "blacklist" ? "blacklist" : "whitelist";
|
||||
const adapter = value.provider_adapter === "generic_json" ? "generic_json" : "searxng_json";
|
||||
const safeSearch = ["off", "moderate", "strict"].includes(value.safe_search) ? value.safe_search : "strict";
|
||||
return {
|
||||
enabled: value.enabled === true,
|
||||
policy_mode: mode,
|
||||
provider: value.provider === "external_json" ? "external_json" : "lumi_search_broker",
|
||||
enable_search: value.enable_search !== false,
|
||||
enable_fetch_url: value.enable_fetch_url !== false,
|
||||
enable_summarize_url: value.enable_summarize_url !== false,
|
||||
enable_headless_browser_fallback: value.enable_headless_browser_fallback === true,
|
||||
policy_mode: value.policy_mode === "whitelist" ? "whitelist" : "blacklist",
|
||||
url_rules: stringList(value.url_rules, 200),
|
||||
max_results: integer(value.max_results, 1, 10, 5),
|
||||
search_timeout_ms: integer(value.search_timeout_ms, 1000, 30000, 8000),
|
||||
cache_ttl_seconds: integer(value.cache_ttl_seconds, 0, 3600, 300),
|
||||
safe_search: safeSearch,
|
||||
allowed_origins: stringList(value.allowed_origins, 6)
|
||||
.filter((origin) => ["webui", "discord", "twitch", "youtube", "kick", "other"].includes(origin)),
|
||||
webui_output_chars: integer(value.webui_output_chars, 300, 12000, 4000),
|
||||
fetch_timeout_ms: integer(value.fetch_timeout_ms, 1000, 30000, 10000),
|
||||
max_fetch_bytes: integer(value.max_fetch_bytes, 65536, 4194304, 1048576),
|
||||
max_extracted_chars: integer(value.max_extracted_chars, 1000, 50000, 12000),
|
||||
max_redirects: integer(value.max_redirects, 0, 8, 3),
|
||||
cache_ttl_seconds: integer(value.cache_ttl_seconds, 0, 86400, 900),
|
||||
safe_search_level: ["off", "moderate", "strict"].includes(value.safe_search_level)
|
||||
? value.safe_search_level
|
||||
: "moderate",
|
||||
allowed_origins: stringList(value.allowed_origins, 6).filter((origin) => ORIGINS.has(origin)),
|
||||
show_source_links_webui: value.show_source_links_webui !== false,
|
||||
show_source_links_discord: value.show_source_links_discord !== false,
|
||||
show_source_links_twitch: value.show_source_links_twitch === true,
|
||||
webui_output_chars: integer(value.webui_output_chars, 300, 12000, 2500),
|
||||
discord_output_chars: integer(value.discord_output_chars, 200, 4000, 1200),
|
||||
twitch_output_chars: integer(value.twitch_output_chars, 120, 1000, 350),
|
||||
youtube_output_chars: integer(value.youtube_output_chars, 120, 1500, 500),
|
||||
kick_output_chars: integer(value.kick_output_chars, 120, 1000, 350),
|
||||
other_output_chars: integer(value.other_output_chars, 120, 2000, 500),
|
||||
provider_adapter: adapter,
|
||||
provider_endpoint: String(value.provider_endpoint || "").trim(),
|
||||
provider_api_key: String(value.provider_api_key || "").trim(),
|
||||
provider_api_key_header: value.provider_api_key_header === "Authorization" ? "Authorization" : "X-API-Key",
|
||||
provider_api_key_prefix: String(value.provider_api_key_prefix || "").trim().slice(0, 32),
|
||||
provider_query_parameter: /^[A-Za-z][A-Za-z0-9_.-]{0,63}$/.test(String(value.provider_query_parameter || ""))
|
||||
? String(value.provider_query_parameter)
|
||||
: "q",
|
||||
show_source_links: value.show_source_links !== false,
|
||||
allow_full_page_fetch: value.allow_full_page_fetch === true,
|
||||
requests_per_minute: integer(value.requests_per_minute, 1, 60, 6)
|
||||
twitch_output_chars: integer(value.twitch_output_chars, 120, 1000, 450),
|
||||
youtube_output_chars: integer(value.youtube_output_chars, 120, 3000, 1200),
|
||||
kick_output_chars: integer(value.kick_output_chars, 120, 1000, 450),
|
||||
other_output_chars: integer(value.other_output_chars, 120, 3000, 700),
|
||||
per_user_per_minute: integer(value.per_user_per_minute, 1, 60, 6),
|
||||
per_origin_per_minute: integer(value.per_origin_per_minute, 1, 300, 30),
|
||||
per_server_per_minute: integer(value.per_server_per_minute, 1, 300, 20),
|
||||
external_provider_adapter: value.external_provider_adapter === "generic_json"
|
||||
? "generic_json"
|
||||
: "searxng_json",
|
||||
external_provider_endpoint: String(value.external_provider_endpoint || "").trim(),
|
||||
external_provider_api_key: String(value.external_provider_api_key || "").trim(),
|
||||
external_provider_api_key_header: value.external_provider_api_key_header === "Authorization"
|
||||
? "Authorization"
|
||||
: "X-API-Key",
|
||||
external_provider_api_key_prefix: String(value.external_provider_api_key_prefix || "").trim().slice(0, 32),
|
||||
external_provider_query_parameter: /^[A-Za-z][A-Za-z0-9_.-]{0,63}$/.test(
|
||||
String(value.external_provider_query_parameter || "")
|
||||
) ? String(value.external_provider_query_parameter) : "q"
|
||||
};
|
||||
}
|
||||
|
||||
function capabilityEnabled(settings, capability) {
|
||||
return {
|
||||
search: settings.enable_search,
|
||||
fetch_url: settings.enable_fetch_url,
|
||||
summarize_url: settings.enable_summarize_url
|
||||
}[capability] === true;
|
||||
}
|
||||
|
||||
function capabilityAvailable(settings, capability) {
|
||||
if (!capabilityEnabled(settings, capability)) return false;
|
||||
if (capability === "search" &&
|
||||
settings.provider === "external_json" &&
|
||||
!settings.external_provider_endpoint) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
function sourceLinksEnabled(settings, origin) {
|
||||
if (origin === "twitch") return settings.show_source_links_twitch;
|
||||
if (origin === "discord") return settings.show_source_links_discord;
|
||||
return settings.show_source_links_webui;
|
||||
}
|
||||
|
||||
function settingsPath(dataDir) {
|
||||
return path.join(dataDir, "settings.json");
|
||||
}
|
||||
|
||||
function statusPath(dataDir) {
|
||||
return path.join(dataDir, "status.json");
|
||||
}
|
||||
|
||||
function readStatus(dataDir) {
|
||||
try { return JSON.parse(fs.readFileSync(statusPath(dataDir), "utf8")); }
|
||||
catch { return {}; }
|
||||
}
|
||||
|
||||
function writeStatus(dataDir, patch) {
|
||||
const next = { ...readStatus(dataDir), ...patch, updated_at: new Date().toISOString() };
|
||||
fs.mkdirSync(dataDir, { recursive: true });
|
||||
const file = statusPath(dataDir);
|
||||
const temporary = `${file}.${process.pid}.tmp`;
|
||||
fs.writeFileSync(temporary, `${JSON.stringify(next, null, 2)}\n`);
|
||||
replaceFile(temporary, file);
|
||||
return next;
|
||||
}
|
||||
|
||||
function replaceFile(source, destination) {
|
||||
try { fs.renameSync(source, destination); }
|
||||
catch (error) {
|
||||
if (!["EEXIST", "EPERM"].includes(error.code)) throw error;
|
||||
fs.rmSync(destination, { force: true });
|
||||
fs.renameSync(source, destination);
|
||||
}
|
||||
}
|
||||
|
||||
function integer(value, minimum, maximum, fallback) {
|
||||
const number = Number.parseInt(value, 10);
|
||||
return Number.isFinite(number) ? Math.max(minimum, Math.min(maximum, number)) : fallback;
|
||||
@ -79,9 +183,17 @@ function stringList(value, limit) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
capabilityAvailable,
|
||||
capabilityEnabled,
|
||||
defaults,
|
||||
ensureSettings,
|
||||
migrateSettings,
|
||||
normalizeSettings,
|
||||
readSettings,
|
||||
readStatus,
|
||||
settingsPath,
|
||||
writeSettings
|
||||
sourceLinksEnabled,
|
||||
statusPath,
|
||||
writeSettings,
|
||||
writeStatus
|
||||
};
|
||||
|
||||
1
plugins/lumi_ai_web_search/data/cache/.gitkeep
vendored
Normal file
1
plugins/lumi_ai_web_search/data/cache/.gitkeep
vendored
Normal file
@ -0,0 +1 @@
|
||||
|
||||
@ -1,77 +1,167 @@
|
||||
const net = require("net");
|
||||
const { WebSearchTool } = require("./backend/search_tool");
|
||||
const { normalizeOrigin } = require("./backend/result_formatter");
|
||||
const { readSettings } = require("./backend/settings");
|
||||
const { isLocalHostname, isPrivateAddress } = require("./backend/url_policy");
|
||||
const {
|
||||
capabilityAvailable,
|
||||
capabilityEnabled,
|
||||
ensureSettings,
|
||||
readSettings,
|
||||
readStatus
|
||||
} = require("./backend/settings");
|
||||
|
||||
const REASON_SCHEMA = {
|
||||
type: "string",
|
||||
required: true,
|
||||
enum: [
|
||||
"fact_lookup",
|
||||
"resource_lookup",
|
||||
"troubleshooting",
|
||||
"documentation_lookup",
|
||||
"news_or_recent",
|
||||
"general_lookup"
|
||||
]
|
||||
};
|
||||
|
||||
module.exports.checkAvailability = ({ paths }) => {
|
||||
const settings = readSettings(paths.data);
|
||||
if (!settings.enabled) {
|
||||
return { available: false, message: "Web search is disabled in tool settings." };
|
||||
const settings = ensureSettings(paths.data);
|
||||
const enabled = ["search", "fetch_url", "summarize_url"]
|
||||
.filter((capability) => capabilityAvailable(settings, capability));
|
||||
if (!enabled.length) {
|
||||
return {
|
||||
available: false,
|
||||
message: "All web-search capabilities are disabled in tool settings."
|
||||
};
|
||||
}
|
||||
if (!settings.provider_endpoint) {
|
||||
return { available: false, message: "Configure a search provider endpoint in Tool Settings." };
|
||||
if (settings.provider === "external_json" &&
|
||||
settings.enable_search &&
|
||||
!settings.external_provider_endpoint) {
|
||||
return {
|
||||
available: true,
|
||||
message: "Explicit URL tools are available. Search requires the selected external endpoint or Lumi search broker."
|
||||
};
|
||||
}
|
||||
try {
|
||||
const endpoint = new URL(settings.provider_endpoint);
|
||||
const hostname = endpoint.hostname.replace(/^\[|\]$/g, "");
|
||||
if (!["http:", "https:"].includes(endpoint.protocol) || endpoint.username || endpoint.password ||
|
||||
isLocalHostname(hostname) || (net.isIP(hostname) && isPrivateAddress(hostname))) {
|
||||
return { available: false, message: "Search provider endpoint is blocked by network safety rules." };
|
||||
}
|
||||
} catch {
|
||||
return { available: false, message: "Search provider endpoint is invalid." };
|
||||
}
|
||||
return { available: true };
|
||||
return {
|
||||
available: true,
|
||||
message: settings.provider === "lumi_search_broker"
|
||||
? "Self-contained Lumi search broker is ready; no external provider setup is required."
|
||||
: "Optional external JSON provider is configured."
|
||||
};
|
||||
};
|
||||
|
||||
module.exports.register = ({ registerTool, paths }) => {
|
||||
const search = new WebSearchTool({ dataDir: paths.data });
|
||||
registerTool({
|
||||
tool_id: "lumi_ai_web_search.search",
|
||||
display_name: "Search the web",
|
||||
description: "Search current public web information only when verified local Lumi context is insufficient or current external information is explicitly needed. Returns normalized, policy-filtered results for final answer formatting.",
|
||||
required_role: "user",
|
||||
required_permission: "lumi_ai_web_search.search",
|
||||
audit_category: "web_search",
|
||||
confirmation_required: false,
|
||||
risk_level: "low",
|
||||
read_only: true,
|
||||
ensureSettings(paths.data);
|
||||
const tool = new WebSearchTool({ dataDir: paths.data });
|
||||
const register = (capability, definition) => {
|
||||
const settings = readSettings(paths.data);
|
||||
if (!capabilityAvailable(settings, capability)) return;
|
||||
registerTool({
|
||||
...definition,
|
||||
required_role: "user",
|
||||
required_permission: `lumi_ai_web_search.${capability}`,
|
||||
audit_category: "web_search",
|
||||
confirmation_required: false,
|
||||
risk_level: "low",
|
||||
read_only: true,
|
||||
origin_check: ({ context }) => originAllowed(paths.data, context, capability),
|
||||
prompt_permission_check: ({ user }) => Boolean(user?.id),
|
||||
permission_check: ({ user, context }) =>
|
||||
Boolean(user?.id) && originAllowed(paths.data, context, capability)
|
||||
});
|
||||
};
|
||||
|
||||
register("search", {
|
||||
tool_id: "web_search.search",
|
||||
display_name: "Search the public web",
|
||||
description: "Search current public web information when verified Lumi-local context is insufficient or the user requests current, external, sourced, or verified facts.",
|
||||
use_cases: [
|
||||
"Current facts or recent news",
|
||||
"External documentation and troubleshooting sources",
|
||||
"Public resources not present in verified Lumi context"
|
||||
"Current, recent, niche, or likely outdated public facts",
|
||||
"Verification, citations, comparisons, changelogs, releases, policies, and public documentation",
|
||||
"Current third-party information about Twitch, Discord, YouTube, Kick, Throne, Gitea, llama.cpp, Hugging Face, hardware, software, APIs, and services"
|
||||
],
|
||||
output_expectations: "Returns policy-filtered structured search results. Lumi Assistant writes the final origin-limited answer and cites only returned URLs.",
|
||||
output_expectations: "Returns compact policy-filtered structured results. Use only returned facts and URLs in the final answer; state uncertainty when results are incomplete.",
|
||||
schema: {
|
||||
query: { type: "string", required: true },
|
||||
reason: {
|
||||
type: "string",
|
||||
required: true,
|
||||
enum: [
|
||||
"fact_lookup",
|
||||
"resource_lookup",
|
||||
"troubleshooting",
|
||||
"documentation_lookup",
|
||||
"news_or_recent",
|
||||
"general_lookup"
|
||||
]
|
||||
},
|
||||
requested_depth: { type: "string", required: false, enum: ["search", "full_page"] },
|
||||
freshness: { type: "string", required: false }
|
||||
reason: REASON_SCHEMA,
|
||||
freshness: { type: "string", required: false },
|
||||
max_results: { type: "integer", required: false },
|
||||
target_url: { type: "string", required: false }
|
||||
},
|
||||
origin_check: ({ context }) => {
|
||||
const settings = readSettings(paths.data);
|
||||
const origin = normalizeOrigin(context?.origin || context?.platform || "other");
|
||||
return settings.enabled && settings.allowed_origins.includes(origin);
|
||||
workflow_handler: ({ arguments: args, user, ctx }) => tool.search({ ...args, user, ctx })
|
||||
});
|
||||
|
||||
register("fetch_url", {
|
||||
tool_id: "web_search.fetch_url",
|
||||
display_name: "Fetch a public URL",
|
||||
description: "Safely fetch and extract readable information from an explicit public HTTP or HTTPS URL supplied by the user.",
|
||||
use_cases: [
|
||||
"Read an explicit public URL without search discovery",
|
||||
"Inspect public documentation, release notes, articles, or status pages",
|
||||
"Verify the current contents of a user-supplied page"
|
||||
],
|
||||
output_expectations: "Returns sanitized page metadata and bounded readable text. Never claim JavaScript execution or browser interaction.",
|
||||
schema: {
|
||||
url: { type: "string", required: true },
|
||||
reason: { ...REASON_SCHEMA, required: false }
|
||||
},
|
||||
prompt_permission_check: ({ user }) => Boolean(user?.id),
|
||||
permission_check: ({ user, context }) => {
|
||||
const settings = readSettings(paths.data);
|
||||
const origin = normalizeOrigin(context?.origin || context?.platform || "other");
|
||||
return Boolean(user?.id) && settings.enabled && settings.allowed_origins.includes(origin);
|
||||
workflow_handler: ({ arguments: args, user, ctx }) => tool.fetchUrl({ ...args, user, ctx })
|
||||
});
|
||||
|
||||
register("summarize_url", {
|
||||
tool_id: "web_search.summarize_url",
|
||||
display_name: "Summarize a public URL",
|
||||
description: "Safely fetch an explicit public HTTP or HTTPS URL and return compact extracted content for a concise summary.",
|
||||
use_cases: [
|
||||
"Summarize a user-supplied public article, documentation page, or release note",
|
||||
"Condense a public page without discovering unrelated search results"
|
||||
],
|
||||
output_expectations: "Returns bounded sanitized source text and metadata. The assistant writes the final concise summary using only that content.",
|
||||
schema: {
|
||||
url: { type: "string", required: true },
|
||||
reason: { ...REASON_SCHEMA, required: false }
|
||||
},
|
||||
workflow_handler: ({ arguments: args, user, ctx }) =>
|
||||
search.run({ ...args, user, ctx })
|
||||
workflow_handler: ({ arguments: args, user, ctx }) => tool.summarizeUrl({ ...args, user, ctx })
|
||||
});
|
||||
};
|
||||
|
||||
module.exports.diagnostics = ({ paths }) => {
|
||||
const settings = readSettings(paths.data);
|
||||
const status = readStatus(paths.data);
|
||||
const metrics = status.metrics || {};
|
||||
const capabilities = ["search", "fetch_url", "summarize_url"].map((capability) => ({
|
||||
tool_id: `web_search.${capability}`,
|
||||
enabled: capabilityEnabled(settings, capability),
|
||||
available: capabilityAvailable(settings, capability)
|
||||
}));
|
||||
return {
|
||||
provider: settings.provider,
|
||||
provider_health: status.provider_health || "not_tested",
|
||||
policy_mode: settings.policy_mode,
|
||||
allowed_origins: settings.allowed_origins,
|
||||
capabilities,
|
||||
last_success_at: status.last_success_at || null,
|
||||
last_error: status.last_error || null,
|
||||
cache: status.cache || { entries: 0, bytes: 0 },
|
||||
metrics: {
|
||||
...metrics,
|
||||
average_search_ms: metrics.searches
|
||||
? Math.round(Number(metrics.total_search_ms || 0) / Number(metrics.searches))
|
||||
: 0,
|
||||
average_fetch_ms: metrics.fetches
|
||||
? Math.round(Number(metrics.total_fetch_ms || 0) / Number(metrics.fetches))
|
||||
: 0,
|
||||
cache_hit_rate: Number(metrics.searches || 0) + Number(metrics.fetches || 0)
|
||||
? Math.round(
|
||||
Number(metrics.cache_hits || 0) /
|
||||
(Number(metrics.searches || 0) + Number(metrics.fetches || 0)) *
|
||||
1000
|
||||
) / 10
|
||||
: 0
|
||||
},
|
||||
recent: Array.isArray(status.recent) ? status.recent.slice(0, 10) : []
|
||||
};
|
||||
};
|
||||
|
||||
function originAllowed(dataDir, context, capability) {
|
||||
const settings = readSettings(dataDir);
|
||||
const origin = normalizeOrigin(context?.origin || context?.platform || "other");
|
||||
return capabilityAvailable(settings, capability) && settings.allowed_origins.includes(origin);
|
||||
}
|
||||
|
||||
11
plugins/lumi_ai_web_search/public/settings-modal.css
Normal file
11
plugins/lumi_ai_web_search/public/settings-modal.css
Normal file
@ -0,0 +1,11 @@
|
||||
.lumi-web-search-status { display: grid; gap: 12px; margin-bottom: 16px; }
|
||||
.lumi-web-search-status-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); gap: 8px; }
|
||||
.lumi-web-search-status-grid > div { display: grid; gap: 3px; padding: 9px; border: 1px solid var(--border); border-radius: 7px; background: var(--surface-2); }
|
||||
.lumi-web-search-status-grid span { color: var(--ink-soft); font-size: 11px; text-transform: uppercase; letter-spacing: .04em; }
|
||||
.lumi-web-search-capabilities { display: flex; flex-wrap: wrap; gap: 6px; }
|
||||
.lumi-web-search-recent { display: grid; gap: 5px; max-height: 180px; overflow: auto; margin-top: 8px; font-size: 12px; }
|
||||
.lumi-web-search-recent > div { padding: 7px; border-left: 3px solid var(--border); background: var(--surface-2); overflow-wrap: anywhere; }
|
||||
.lumi-web-search-test { display: grid; grid-template-columns: minmax(220px, 1fr) 130px auto; align-items: end; gap: 8px; }
|
||||
.lumi-web-search-test label { display: grid; gap: 4px; font-weight: 800; }
|
||||
.lumi-web-search-test-output { max-height: 280px; overflow: auto; white-space: pre-wrap; }
|
||||
@media (max-width: 760px) { .lumi-web-search-test { grid-template-columns: 1fr; } }
|
||||
@ -1,4 +1,103 @@
|
||||
window.LumiAiToolSettings = window.LumiAiToolSettings || {};
|
||||
window.LumiAiToolSettings.lumi_ai_web_search = Object.freeze({
|
||||
policyExamples: ["docs.example.com", "*.example.com/docs/*", "https://example.com/resources/"]
|
||||
});
|
||||
(() => {
|
||||
if (window.LumiWebSearchSettingsLoaded) return;
|
||||
window.LumiWebSearchSettingsLoaded = true;
|
||||
|
||||
window.addEventListener("lumi-ai-tool-settings-open", (event) => {
|
||||
if (event.detail?.toolId !== "lumi_ai_web_search") return;
|
||||
const root = event.detail.root?.querySelector("[data-web-search-settings]");
|
||||
if (!root) return;
|
||||
const payload = event.detail.payload || {};
|
||||
const values = payload.values || {};
|
||||
const status = payload.status || {};
|
||||
set(root, "provider", status.provider || values.provider || "lumi_search_broker");
|
||||
set(root, "provider_health", status.provider_health || "not_tested");
|
||||
set(root, "policy_mode", values.policy_mode || "blacklist");
|
||||
set(root, "last_success_at", status.last_success_at ? new Date(status.last_success_at).toLocaleString() : "Never");
|
||||
set(root, "last_error", status.last_error || "None");
|
||||
set(root, "cache", `${status.cache?.entries || 0} entries, ${formatBytes(status.cache?.bytes || 0)}`);
|
||||
renderCapabilities(root, values);
|
||||
renderRecent(root, status.recent || []);
|
||||
bindTest(root);
|
||||
});
|
||||
|
||||
function set(root, key, value) {
|
||||
const target = root.querySelector(`[data-web-status="${key}"]`);
|
||||
if (target) target.textContent = String(value);
|
||||
}
|
||||
|
||||
function renderCapabilities(root, values) {
|
||||
const target = root.querySelector("[data-web-capabilities]");
|
||||
target.replaceChildren();
|
||||
for (const [id, enabled] of [
|
||||
["web_search.search", values.enable_search],
|
||||
["web_search.fetch_url", values.enable_fetch_url],
|
||||
["web_search.summarize_url", values.enable_summarize_url]
|
||||
]) {
|
||||
const badge = document.createElement("span");
|
||||
badge.className = `ai-tag ${enabled ? "installed" : ""}`.trim();
|
||||
badge.textContent = `${id}: ${enabled ? "enabled" : "disabled"}`;
|
||||
target.append(badge);
|
||||
}
|
||||
}
|
||||
|
||||
function renderRecent(root, rows) {
|
||||
const target = root.querySelector("[data-web-recent]");
|
||||
target.replaceChildren();
|
||||
if (!rows.length) {
|
||||
target.textContent = "No calls recorded.";
|
||||
return;
|
||||
}
|
||||
for (const row of rows.slice(0, 10)) {
|
||||
const item = document.createElement("div");
|
||||
item.textContent = [
|
||||
row.timestamp ? new Date(row.timestamp).toLocaleString() : "",
|
||||
row.capability,
|
||||
row.origin,
|
||||
row.status,
|
||||
row.blocked_reason,
|
||||
`${row.timing_ms || 0} ms`,
|
||||
row.query_summary
|
||||
].filter(Boolean).join(" | ");
|
||||
target.append(item);
|
||||
}
|
||||
}
|
||||
|
||||
function bindTest(root) {
|
||||
const form = root.querySelector("[data-web-search-test]");
|
||||
const output = root.querySelector("[data-web-search-test-output]");
|
||||
if (!form || form.dataset.bound === "1") return;
|
||||
form.dataset.bound = "1";
|
||||
form.addEventListener("submit", async (event) => {
|
||||
event.preventDefault();
|
||||
const data = new FormData(form);
|
||||
output.hidden = false;
|
||||
output.textContent = "Running through discovery, prompt exposure, and the normal tool pipeline...";
|
||||
try {
|
||||
const response = await fetch("/plugins/lumi_ai/assistant/test", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json", Accept: "application/json" },
|
||||
body: JSON.stringify({
|
||||
role: "admin",
|
||||
origin: data.get("origin"),
|
||||
message: data.get("query"),
|
||||
allow_tools: true,
|
||||
show_raw_prompt: true,
|
||||
show_raw_output: false
|
||||
})
|
||||
});
|
||||
const payload = await response.json();
|
||||
if (!response.ok) throw new Error(payload.error || "Web-search test failed.");
|
||||
output.textContent = JSON.stringify(payload, null, 2);
|
||||
} catch (error) {
|
||||
output.textContent = error.message;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function formatBytes(value) {
|
||||
const bytes = Number(value) || 0;
|
||||
if (bytes >= 1048576) return `${(bytes / 1048576).toFixed(1)} MB`;
|
||||
if (bytes >= 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
return `${bytes} B`;
|
||||
}
|
||||
})();
|
||||
|
||||
@ -1,86 +1,132 @@
|
||||
# Lumi AI Web Search
|
||||
|
||||
`lumi_ai_web_search` is an AI tool plugin for controlled current-information lookup. It is loaded only by Lumi AI's tool manager and is not an ordinary core plugin.
|
||||
`lumi_ai_web_search` gives Lumi Assistant controlled public web search and safe URL reading without requiring an API key, manually installed search service, or separate provider setup.
|
||||
|
||||
## Installation and enablement
|
||||
## Default behavior
|
||||
|
||||
1. Install this directory as `plugins/lumi_ai_web_search/`.
|
||||
2. Install Lumi AI `0.8.0` or newer.
|
||||
3. Open **Plugins -> Lumi AI -> Tools**.
|
||||
4. Select **Settings** for Lumi AI Web Search.
|
||||
5. Configure the provider and URL policy, turn on **Web search enabled**, and save.
|
||||
6. Select **Enable** in the Tools list.
|
||||
The default provider is `lumi_search_broker`. It lives entirely inside this plugin and uses lightweight public search endpoints through replaceable adapters:
|
||||
|
||||
The tool is not registered with the assistant while disabled. If its internal enabled setting or provider endpoint is missing, Lumi AI marks it unavailable without preventing Lumi from starting.
|
||||
- DuckDuckGo HTML search
|
||||
- Bing RSS fallback
|
||||
|
||||
## Provider
|
||||
Adapters return a stable internal result shape. If one source times out, rate-limits requests, blocks automated access, or changes markup, the broker records the adapter error and tries the next source. Optional external JSON providers remain available as an advanced mode.
|
||||
|
||||
The initial adapters accept JSON from a configured public endpoint:
|
||||
Fresh installations default to:
|
||||
|
||||
- `searxng_json` reads the SearxNG `results` array.
|
||||
- `generic_json` reads `results`, `items`, or `web.results.value`.
|
||||
- Search, explicit URL fetch, and URL summarization enabled
|
||||
- Blacklist policy with no custom blocks
|
||||
- WebUI, Discord, and Twitch origins allowed
|
||||
- No headless browser
|
||||
- No external endpoint or API key
|
||||
|
||||
The configured query parameter defaults to `q`. The adapter adds `format=json`, safe-search level, and result count. API keys are stored in `data/settings.json` with restricted file permissions where supported. The settings API never returns the key and a blank save keeps the existing secret.
|
||||
The parent Lumi AI tool enable state is preserved. Existing installations that were explicitly disabled remain disabled.
|
||||
|
||||
Provider requests use a strict timeout, a 2 MiB response limit, and at most three redirects. No page JavaScript is executed.
|
||||
## Registered tools
|
||||
|
||||
The plugin registers enabled capabilities independently:
|
||||
|
||||
- `web_search.search`: discover current or external public information
|
||||
- `web_search.fetch_url`: safely read an explicit public URL
|
||||
- `web_search.summarize_url`: safely extract compact content from an explicit public URL for summarization
|
||||
|
||||
Disabling search does not disable explicit URL fetch or summarization. Lumi AI prompt diagnostics show each capability as registered/exposed or hidden with its reason.
|
||||
|
||||
Trusted `ctx`, actor, role, origin, channel, and server details are supplied by Lumi at execution time. The model cannot provide or override them.
|
||||
|
||||
## When Lumi should search
|
||||
|
||||
Lumi Assistant is instructed to search for current, recent, niche, externally verifiable, or likely outdated facts. Search is also appropriate when a user asks to verify, confirm, look up, cite, find the latest, compare current options, or inspect public third-party information.
|
||||
|
||||
Lumi-local routes, plugin data, corrections, and help answers continue to use verified Lumi context first. Casual chat, creative writing, rewriting, translation, and formatting do not trigger search unless current factual support is needed. An explicit request not to search is respected.
|
||||
|
||||
## Safe URL fetching
|
||||
|
||||
Every search result, explicit URL, selected page, and redirect is checked before use. The fetcher:
|
||||
|
||||
- Allows only HTTP and HTTPS
|
||||
- Rejects URL credentials
|
||||
- Resolves DNS before connecting and pins the request to a verified public address
|
||||
- Blocks localhost, loopback, private, carrier-grade NAT, link-local, multicast, reserved, and metadata targets
|
||||
- Rechecks policy after every redirect
|
||||
- Limits redirects, time, compressed bytes, decompressed bytes, and extracted characters
|
||||
- Accepts readable HTML, plain text, XML, RSS, and Atom content only
|
||||
- Never executes JavaScript
|
||||
|
||||
Security blocks override administrator whitelist rules.
|
||||
|
||||
## URL policy
|
||||
|
||||
The default is an empty whitelist, so no result URL is usable until an administrator adds explicit rules. Rules support:
|
||||
Blacklist mode allows safe public URLs except matching rules. Whitelist mode allows only matching rules.
|
||||
|
||||
Rules may be:
|
||||
|
||||
- Domain: `docs.example.com`
|
||||
- Domain and subdomains: `example.com`
|
||||
- Subdomain wildcard: `*.example.com`
|
||||
- Domain including subdomains: `example.com`
|
||||
- Wildcard subdomain: `*.example.com`
|
||||
- Path prefix: `example.com/docs`
|
||||
- Full wildcard pattern: `https://*.example.com/resources/*`
|
||||
- Full pattern: `https://*.example.com/resources/*`
|
||||
|
||||
Whitelist mode permits only matching result, page, and redirect URLs. Blacklist mode permits public URLs except matching rules.
|
||||
Tracking parameters such as `utm_*`, `fbclid`, and `gclid` are removed from normalized search results where safe.
|
||||
|
||||
Independent hard network rules always block:
|
||||
## Extraction and result processing
|
||||
|
||||
- `localhost`, `.localhost`, `.local`, and known metadata hostnames
|
||||
- Private, loopback, carrier-grade NAT, link-local, multicast, and reserved IP ranges
|
||||
- DNS names resolving to private or otherwise unsafe addresses
|
||||
- URL credentials
|
||||
- Non-HTTP/HTTPS protocols
|
||||
The HTML extractor removes scripts, styles, navigation, footers, forms, hidden content, and other non-readable elements. It prefers `<main>` or `<article>`, then falls back to headings, metadata, and body text.
|
||||
|
||||
The same checks run before each page fetch and after every redirect. Administrator rules cannot override these blocks.
|
||||
Normal tool results never include raw HTML. Structured results contain:
|
||||
|
||||
## Tool behavior
|
||||
- Status, query, reason, provider, policy mode, cache state, timing, counts, warnings, and errors
|
||||
- Normalized title, permitted URL, domain, snippet, date, rank, source, source ID, relevance score, and policy state
|
||||
- Fetched page URL, final URL, title, description, bounded readable text, content type, fetch time, and extraction state
|
||||
|
||||
The registered tool ID is `lumi_ai_web_search.search`. It accepts:
|
||||
Lumi AI passes this structured result back to the model to produce the final natural answer. Normal users do not see raw tool JSON.
|
||||
|
||||
- `query`
|
||||
- `reason`: `fact_lookup`, `resource_lookup`, `troubleshooting`, `documentation_lookup`, `news_or_recent`, or `general_lookup`
|
||||
- Optional `requested_depth`: `search` or `full_page`
|
||||
- Optional `freshness`
|
||||
## Settings
|
||||
|
||||
The assistant should use this tool only for current or external information that is not available in verified local Lumi context.
|
||||
Open **Plugins -> Lumi AI -> Tools -> Lumi AI Web Search -> Settings**.
|
||||
|
||||
The tool registers as a read-only lookup. This allows permitted Discord, Twitch, YouTube, Kick, and other contexts to use it even though those contexts cannot run WebUI action tools. Lumi AI evaluates the configured origin allowlist before including the tool in the model prompt and again before execution.
|
||||
The tool-owned settings panel shows:
|
||||
|
||||
Results are sanitized and returned as structured data rather than raw provider JSON. Each result contains a title, permitted URL or no URL when links are disabled, domain, condensed snippet, source type, date, and relevance score. Documentation and troubleshooting searches prioritize authoritative sources; recent searches prioritize dated sources.
|
||||
- Provider and provider health
|
||||
- Search/fetch/summarize capability states
|
||||
- Policy mode and allowed origins
|
||||
- Last successful request and last error
|
||||
- Cache count and size
|
||||
- Recent redacted calls
|
||||
- A test field that uses Lumi Assistant's normal tool pipeline
|
||||
|
||||
Optional full-page mode extracts bounded visible text only when the administrator enables it. It does not automate a browser, submit forms, execute scripts, or follow unrestricted links.
|
||||
Settings include capability toggles, URL policy, timeouts, byte/text limits, redirects, cache TTL, safe-search level, origins, source-link controls, per-origin output budgets, three rate-limit scopes, and optional external provider fields. **Reset to defaults** restores the no-key broker configuration.
|
||||
|
||||
## Origin limits and rate limits
|
||||
## Output limits
|
||||
|
||||
Allowed origins and output budgets are independently configurable for WebUI, Discord, Twitch, YouTube, Kick, and other sources. Trusted runtime context determines the origin; a model-provided origin cannot elevate access.
|
||||
Results are condensed before returning to Lumi AI:
|
||||
|
||||
Twitch is limited to compact output and at most two source references. Discord permits moderate detail. WebUI permits richer summaries and more results. The tool also applies a per-actor, per-origin, per-server/channel rolling request limit.
|
||||
- WebUI may use richer context and multiple sources
|
||||
- Discord receives compact context
|
||||
- Twitch receives a very short result with at most one source reference when enabled
|
||||
|
||||
## Auditing and storage
|
||||
The final Lumi response formatter still applies the platform's authoritative message limit.
|
||||
|
||||
All writable data remains under this plugin:
|
||||
## Caching, limits, and diagnostics
|
||||
|
||||
- `data/settings.json`: normalized settings and provider secret
|
||||
- `data/audit.jsonl`: query, reason, actor, origin, server/channel, policy outcome, result count, cache status, and timing
|
||||
Cache entries are stored in `data/cache/` and expire according to the configured TTL. Rate limits apply independently per actor, origin, and server/channel. Rate-limited results include a retry-after value.
|
||||
|
||||
Provider credentials are not written to audit records or returned in tool results. Updates preserve `data/` by default.
|
||||
`data/status.json` stores provider health, aggregate counters, cache status, and recent redacted calls. `data/audit.jsonl` records actor, role, origin, capability, safe query summary/hash, reason, provider, policy decision, result count, cache state, timing, blocked reason, and status. Full page content and secrets are not logged.
|
||||
|
||||
## Security boundary
|
||||
## Optional external provider
|
||||
|
||||
The plugin has no shell, SQL, arbitrary filesystem, browser automation, or code-execution feature. Network access is limited to the configured public search provider and policy-approved public result pages. Lumi AI's backend role and permission checks remain authoritative.
|
||||
Select `external_json` only when an administrator explicitly wants a compatible SearxNG or generic JSON endpoint. External provider settings are advanced and are not required for default operation. Selecting external mode without an endpoint blocks only search discovery; explicit URL tools remain available.
|
||||
|
||||
## Browser and sidecar modes
|
||||
|
||||
Headless browser fallback is disabled and not implemented as a default path. The setting is reserved for a future restricted Lumi-managed runtime. A future local sidecar can be added behind the provider abstraction without changing tool contracts; default operation will continue to work without it.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- **Search unavailable but URL fetch works:** a public search adapter may be blocked or rate-limited. Check provider health and recent adapter errors.
|
||||
- **URL blocked:** inspect blacklist/whitelist rules. Hard private-network blocks cannot be overridden.
|
||||
- **External provider not configured:** select `lumi_search_broker` or configure the optional endpoint.
|
||||
- **Rate limited:** wait for the returned retry-after interval.
|
||||
- **No readable content:** the page may be JavaScript-only or use an unsupported content type. Browser fallback is intentionally off.
|
||||
|
||||
## Verification
|
||||
|
||||
@ -90,4 +136,4 @@ Run:
|
||||
node plugins/lumi_ai_web_search/tests/verify.js
|
||||
```
|
||||
|
||||
The suite covers whitelist/blacklist matching, hard private-network blocks, redirect checks, reason-aware formatting, origin budgets, provider failures, settings effects, registration availability, and audits.
|
||||
The suite covers default no-key availability, three-capability registration, prompt exposure compatibility, broker adapters, URL policy, redirects, private-network blocking, readable extraction, output budgets, caching, rate limits, audits, and clean failure behavior.
|
||||
|
||||
@ -2,35 +2,371 @@ const assert = require("assert");
|
||||
const fs = require("fs");
|
||||
const os = require("os");
|
||||
const path = require("path");
|
||||
const { SearchProvider } = require("../backend/provider_adapter");
|
||||
const zlib = require("zlib");
|
||||
const { parseBingRss } = require("../backend/adapters/bing_rss_search");
|
||||
const { parseDuckDuckGoHtml } = require("../backend/adapters/generic_html_search");
|
||||
const { ToolCache } = require("../backend/cache");
|
||||
const { extractReadableHtml } = require("../backend/html_extractor");
|
||||
const { LumiSearchBroker, normalizeBrokerResults } = require("../backend/lumi_search_broker");
|
||||
const { PageFetcher } = require("../backend/page_fetcher");
|
||||
const { ToolConcurrency } = require("../backend/rate_limits");
|
||||
const { formatResults } = require("../backend/result_formatter");
|
||||
const { WebSearchTool } = require("../backend/search_tool");
|
||||
const { readSettings, writeSettings } = require("../backend/settings");
|
||||
const { defaults, readSettings, writeSettings } = require("../backend/settings");
|
||||
const { evaluateUrl, matchesRule } = require("../backend/url_policy");
|
||||
const { ToolRegistry } = require("../../lumi_ai/backend/tool_router");
|
||||
const { ToolInstaller } = require("../../lumi_ai/backend/tool_installer");
|
||||
const { ToolLoader } = require("../../lumi_ai/backend/tool_loader");
|
||||
const { ToolManager } = require("../../lumi_ai/backend/tool_manager");
|
||||
const { ToolSettings } = require("../../lumi_ai/backend/tool_settings");
|
||||
const { buildPrompt } = require("../../lumi_ai/backend/prompt_builder");
|
||||
const plugin = require("../index");
|
||||
|
||||
const PUBLIC_DNS = async () => ["93.184.216.34"];
|
||||
|
||||
async function run() {
|
||||
verifyDefaultsAndMigration();
|
||||
await verifyPolicy();
|
||||
await verifyRedirectPolicy();
|
||||
verifyExtractionAndAdapters();
|
||||
await verifyFetcher();
|
||||
verifyFormatting();
|
||||
await verifySearchFlow();
|
||||
await verifySearchAndCache();
|
||||
await verifyProviderFailure();
|
||||
await verifyFetchCapabilities();
|
||||
await verifyRateLimit();
|
||||
await verifyConcurrencyLimit();
|
||||
await verifyLoaderLifecycle();
|
||||
verifyRegistrationAvailability();
|
||||
verifyRegistrationAndPrompt();
|
||||
verifyStaticFiles();
|
||||
console.log("Lumi AI Web Search verification passed.");
|
||||
}
|
||||
|
||||
function verifyDefaultsAndMigration() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-defaults-"));
|
||||
const fresh = readSettings(root);
|
||||
assert.equal(fresh.provider, "lumi_search_broker");
|
||||
assert.equal(fresh.enable_search, true);
|
||||
assert.equal(fresh.policy_mode, "blacklist");
|
||||
assert.deepEqual(fresh.allowed_origins, ["webui", "discord", "twitch"]);
|
||||
fs.writeFileSync(path.join(root, "settings.json"), JSON.stringify({
|
||||
enabled: false,
|
||||
policy_mode: "whitelist",
|
||||
url_rules: ["docs.example.com"],
|
||||
provider_endpoint: ""
|
||||
}));
|
||||
const migrated = readSettings(root);
|
||||
assert.equal(migrated.provider, "lumi_search_broker");
|
||||
assert.equal(migrated.enable_search, false);
|
||||
assert.equal(migrated.policy_mode, "whitelist");
|
||||
assert.deepEqual(migrated.url_rules, ["docs.example.com"]);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyPolicy() {
|
||||
assert.equal((await evaluateUrl("https://docs.example.com/guide", {
|
||||
mode: "whitelist",
|
||||
rules: ["*.example.com/*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
})).allowed, true);
|
||||
assert.equal((await evaluateUrl("https://unrelated.test/guide", {
|
||||
mode: "whitelist",
|
||||
rules: ["*.example.com/*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
})).reason, "not_whitelisted");
|
||||
assert.equal((await evaluateUrl("https://ads.example.com/tracker", {
|
||||
mode: "blacklist",
|
||||
rules: ["*.example.com/tracker*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
})).reason, "blacklisted");
|
||||
for (const target of [
|
||||
"http://127.0.0.1/",
|
||||
"http://10.1.2.3/",
|
||||
"http://169.254.169.254/latest/meta-data/",
|
||||
"http://localhost/",
|
||||
"file:///etc/passwd",
|
||||
"javascript:alert(1)",
|
||||
"data:text/plain,no"
|
||||
]) {
|
||||
assert.equal((await evaluateUrl(target, {
|
||||
mode: "blacklist",
|
||||
rules: [],
|
||||
resolveHost: PUBLIC_DNS
|
||||
})).allowed, false, target);
|
||||
}
|
||||
assert.equal((await evaluateUrl("https://dns-rebind.example/", {
|
||||
mode: "blacklist",
|
||||
rules: [],
|
||||
resolveHost: async () => ["10.0.0.8"]
|
||||
})).reason, "private_network");
|
||||
assert.equal(matchesRule(new URL("https://docs.example.com/guide/start"), "example.com/guide"), true);
|
||||
}
|
||||
|
||||
function verifyExtractionAndAdapters() {
|
||||
const html = `
|
||||
<html><head><title> Example & Test </title>
|
||||
<meta name="description" content="Useful description">
|
||||
<meta property="article:published_time" content="2026-06-10T12:00:00Z"></head>
|
||||
<body><nav>Noise</nav><main><h1>Heading</h1><p>Readable body text.</p>
|
||||
<script>secret()</script></main><footer>Noise</footer></body></html>`;
|
||||
const extracted = extractReadableHtml(html, { maxChars: 500 });
|
||||
assert.equal(extracted.title, "Example & Test");
|
||||
assert.match(extracted.extracted_text, /Readable body text/);
|
||||
assert.doesNotMatch(extracted.extracted_text, /secret|Noise/);
|
||||
assert.equal(extracted.published_at, "2026-06-10T12:00:00.000Z");
|
||||
|
||||
const ddg = parseDuckDuckGoHtml(`
|
||||
<a class="result__a" href="/l/?uddg=https%3A%2F%2Fdocs.example.com%2Fguide">Official docs</a>
|
||||
<a class="result__snippet">Current documentation result.</a>`);
|
||||
assert.equal(ddg[0].url, "https://docs.example.com/guide");
|
||||
assert.equal(ddg[0].snippet, "Current documentation result.");
|
||||
|
||||
const bing = parseBingRss(`
|
||||
<rss><channel><item><title>Release</title><link>https://example.com/release</link>
|
||||
<description>Latest release details.</description><pubDate>Wed, 10 Jun 2026 12:00:00 GMT</pubDate></item></channel></rss>`);
|
||||
assert.equal(bing[0].domain, undefined);
|
||||
assert.equal(bing[0].url, "https://example.com/release");
|
||||
|
||||
const normalized = normalizeBrokerResults([
|
||||
{ title: "Docs", url: "https://example.com/a?utm_source=x", snippet: "A", source: "one" },
|
||||
{ title: "Docs", url: "https://example.com/a", snippet: "B", source: "two" }
|
||||
]);
|
||||
assert.equal(normalized.length, 1);
|
||||
assert.equal(normalized[0].url, "https://example.com/a");
|
||||
}
|
||||
|
||||
async function verifyFetcher() {
|
||||
const html = "<html><head><title>Page title</title></head><body><main>Readable page.</main></body></html>";
|
||||
const compressed = zlib.gzipSync(Buffer.from(html));
|
||||
let call = 0;
|
||||
const fetcher = new PageFetcher({
|
||||
resolveHost: PUBLIC_DNS,
|
||||
fetch: async () => {
|
||||
call += 1;
|
||||
if (call === 1) return response({ status: 302, headers: { location: "https://docs.example.com/final" } });
|
||||
return response({
|
||||
headers: { "content-type": "text/html; charset=utf-8", "content-encoding": "gzip" },
|
||||
body: compressed
|
||||
});
|
||||
}
|
||||
});
|
||||
const page = await fetcher.fetchPage("https://docs.example.com/start", testSettings({
|
||||
policy_mode: "whitelist",
|
||||
url_rules: ["*.example.com/*"]
|
||||
}));
|
||||
assert.equal(page.final_url, "https://docs.example.com/final");
|
||||
assert.equal(page.title, "Page title");
|
||||
assert.match(page.extracted_text, /Readable page/);
|
||||
|
||||
const blockedRedirect = new PageFetcher({
|
||||
resolveHost: PUBLIC_DNS,
|
||||
fetch: async () => response({ status: 302, headers: { location: "http://127.0.0.1/private" } })
|
||||
});
|
||||
await assert.rejects(
|
||||
() => blockedRedirect.fetchPage("https://docs.example.com/start", testSettings()),
|
||||
/blocked by policy/i
|
||||
);
|
||||
|
||||
const oversized = new PageFetcher({
|
||||
resolveHost: PUBLIC_DNS,
|
||||
fetch: async () => response({
|
||||
headers: { "content-type": "text/html" },
|
||||
body: Buffer.alloc(70000, "a")
|
||||
})
|
||||
});
|
||||
await assert.rejects(
|
||||
() => oversized.fetchPage("https://example.com/large", testSettings({ max_fetch_bytes: 65536 })),
|
||||
/size limit/i
|
||||
);
|
||||
}
|
||||
|
||||
function verifyFormatting() {
|
||||
const rows = [
|
||||
result("Community", "https://community.example.com/post", "Community context.", "web"),
|
||||
result("Official docs", "https://docs.example.com/guide", "Official answer.", "documentation"),
|
||||
result("Recent", "https://news.example.com/update", "Recent update.", "news", "2026-06-12")
|
||||
];
|
||||
const settings = testSettings();
|
||||
const twitch = formatResults(rows, { reason: "fact_lookup", origin: "twitch", settings });
|
||||
const discord = formatResults(rows, { reason: "resource_lookup", origin: "discord", settings });
|
||||
const webui = formatResults(rows, { reason: "documentation_lookup", origin: "webui", settings });
|
||||
assert(twitch.condensed_text.length <= settings.twitch_output_chars);
|
||||
assert.equal(twitch.results.length, 1);
|
||||
assert.equal(twitch.results[0].url, null);
|
||||
assert(discord.condensed_text.length <= settings.discord_output_chars);
|
||||
assert(webui.condensed_text.length <= settings.webui_output_chars);
|
||||
assert.equal(webui.results[0].title, "Official docs");
|
||||
}
|
||||
|
||||
async function verifySearchAndCache() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-search-"));
|
||||
writeSettings(root, testSettings({
|
||||
policy_mode: "whitelist",
|
||||
url_rules: ["*.example.com/*"],
|
||||
cache_ttl_seconds: 60
|
||||
}));
|
||||
let calls = 0;
|
||||
const broker = {
|
||||
async search() {
|
||||
calls += 1;
|
||||
return {
|
||||
provider: "lumi_search_broker",
|
||||
results: [
|
||||
result("<b>Verified fact</b>", "https://docs.example.com/fact", "The answer is current.", "documentation"),
|
||||
result("Blocked local", "http://127.0.0.1/private", "Never return.", "web")
|
||||
],
|
||||
warnings: [],
|
||||
adapter_errors: []
|
||||
};
|
||||
}
|
||||
};
|
||||
const tool = new WebSearchTool({
|
||||
dataDir: root,
|
||||
broker,
|
||||
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
|
||||
});
|
||||
const input = {
|
||||
query: "current fact",
|
||||
reason: "fact_lookup",
|
||||
user: { id: "user-1", username: "alice" },
|
||||
ctx: { origin: "webui", server_id: "server-1" }
|
||||
};
|
||||
const first = await tool.search(input);
|
||||
assert.equal(first.status, "ok");
|
||||
assert.equal(first.provider, "lumi_search_broker");
|
||||
assert.equal(first.result_count, 1);
|
||||
assert.equal(first.results[0].title, "Verified fact");
|
||||
assert.equal(first.results.some((entry) => entry.url?.includes("127.0.0.1")), false);
|
||||
const cached = await tool.search(input);
|
||||
assert.equal(cached.cache_hit, true);
|
||||
assert.equal(calls, 1);
|
||||
const audit = fs.readFileSync(path.join(root, "audit.jsonl"), "utf8").trim().split(/\r?\n/).map(JSON.parse);
|
||||
assert(audit.some((entry) =>
|
||||
entry.actor === "user-1" &&
|
||||
entry.origin === "webui" &&
|
||||
entry.provider === "lumi_search_broker" &&
|
||||
entry.query_hash &&
|
||||
typeof entry.timing_ms === "number"
|
||||
));
|
||||
assert.equal(fs.readFileSync(path.join(root, "audit.jsonl"), "utf8").includes("The answer is current"), false);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyFetchCapabilities() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-fetch-"));
|
||||
writeSettings(root, testSettings());
|
||||
const page = {
|
||||
url: "https://docs.example.com/page",
|
||||
final_url: "https://docs.example.com/page",
|
||||
title: "Documentation",
|
||||
description: "Description",
|
||||
headings: ["Documentation"],
|
||||
extracted_text: "Readable documentation text with the current answer.",
|
||||
content_type: "text/html",
|
||||
fetched_at: new Date().toISOString(),
|
||||
extraction_status: "ok",
|
||||
published_at: null,
|
||||
updated_at: null,
|
||||
timing_ms: 10,
|
||||
truncated: false
|
||||
};
|
||||
const tool = new WebSearchTool({
|
||||
dataDir: root,
|
||||
fetcher: {
|
||||
resolveHost: PUBLIC_DNS,
|
||||
async fetchPage() { return page; }
|
||||
}
|
||||
});
|
||||
const fetched = await tool.fetchUrl({
|
||||
url: page.url,
|
||||
user: { id: "u" },
|
||||
ctx: { origin: "discord" }
|
||||
});
|
||||
const summarized = await tool.summarizeUrl({
|
||||
url: page.url,
|
||||
user: { id: "u2" },
|
||||
ctx: { origin: "webui" }
|
||||
});
|
||||
assert.equal(fetched.status, "ok");
|
||||
assert.match(fetched.fetched_pages[0].extracted_text, /current answer/);
|
||||
assert.equal(summarized.status, "ok");
|
||||
assert.match(summarized.condensed_text, /Readable documentation/);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyProviderFailure() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-failure-"));
|
||||
writeSettings(root, testSettings());
|
||||
const tool = new WebSearchTool({
|
||||
dataDir: root,
|
||||
broker: {
|
||||
async search() {
|
||||
return {
|
||||
provider: "lumi_search_broker",
|
||||
results: [],
|
||||
warnings: ["duckduckgo_html: timed out"],
|
||||
adapter_errors: [{ adapter: "duckduckgo_html", reason: "timed out" }]
|
||||
};
|
||||
}
|
||||
},
|
||||
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
|
||||
});
|
||||
const result = await tool.search({
|
||||
query: "current unavailable fact",
|
||||
reason: "fact_lookup",
|
||||
user: { id: "failure" },
|
||||
ctx: { origin: "webui" }
|
||||
});
|
||||
assert.equal(result.status, "unavailable");
|
||||
assert.match(result.user_message, /unavailable/i);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyRateLimit() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-rate-"));
|
||||
writeSettings(root, testSettings({ per_user_per_minute: 1 }));
|
||||
const tool = new WebSearchTool({
|
||||
dataDir: root,
|
||||
broker: {
|
||||
async search() {
|
||||
return { provider: "lumi_search_broker", results: [], warnings: [], adapter_errors: [] };
|
||||
}
|
||||
},
|
||||
fetcher: new PageFetcher({ resolveHost: PUBLIC_DNS })
|
||||
});
|
||||
const input = {
|
||||
query: "one",
|
||||
reason: "general_lookup",
|
||||
user: { id: "limited" },
|
||||
ctx: { origin: "webui", server_id: "server" }
|
||||
};
|
||||
await tool.search(input);
|
||||
const limited = await tool.search({ ...input, query: "two" });
|
||||
assert.equal(limited.blocked_reason, "rate_limited");
|
||||
assert(limited.retry_after_seconds > 0);
|
||||
assert.match(limited.user_message, /Retry in/);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyConcurrencyLimit() {
|
||||
const limiter = new ToolConcurrency(2, 4);
|
||||
let active = 0;
|
||||
let maximum = 0;
|
||||
const tasks = Array.from({ length: 6 }, () => limiter.run(async () => {
|
||||
active += 1;
|
||||
maximum = Math.max(maximum, active);
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
active -= 1;
|
||||
}));
|
||||
await Promise.all(tasks);
|
||||
assert.equal(maximum, 2);
|
||||
}
|
||||
|
||||
async function verifyLoaderLifecycle() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-loader-"));
|
||||
const pluginsDir = path.join(root, "plugins");
|
||||
const toolDir = path.join(pluginsDir, "lumi_ai_web_search");
|
||||
copyDirectory(path.resolve(__dirname, ".."), toolDir, new Set(["data"]));
|
||||
fs.mkdirSync(path.join(toolDir, "data"), { recursive: true });
|
||||
copyDirectory(path.resolve(__dirname, ".."), toolDir, new Set(["audit.jsonl", "status.json"]));
|
||||
fs.rmSync(path.join(toolDir, "data", "settings.json"), { force: true });
|
||||
const installer = new ToolInstaller({
|
||||
pluginsDir,
|
||||
stagingRoot: path.join(root, "staging"),
|
||||
@ -42,276 +378,166 @@ async function verifyLoaderLifecycle() {
|
||||
installer,
|
||||
settings: { getSetting: (_key, fallback) => fallback },
|
||||
stateFile: path.join(root, "enabled.json"),
|
||||
lumiAiVersion: "0.8.0",
|
||||
lumiAiVersion: "0.8.1",
|
||||
lumiVersion: "0.1.0"
|
||||
});
|
||||
const unavailable = await loader.enable("lumi_ai_web_search");
|
||||
assert.equal(unavailable.unavailable, true);
|
||||
assert.equal(registry.tools.has("lumi_ai_web_search.search"), false);
|
||||
writeSettings(path.join(toolDir, "data"), providerSettings());
|
||||
const enabled = await loader.enable("lumi_ai_web_search");
|
||||
assert.equal(enabled.loaded, true);
|
||||
assert.equal(registry.tools.has("lumi_ai_web_search.search"), true);
|
||||
await loader.loadEnabled();
|
||||
assert.equal(fs.existsSync(path.join(toolDir, "data", "settings.json")), true);
|
||||
assert.equal(loader.isEnabled("lumi_ai_web_search"), true);
|
||||
assert.equal(registry.has("web_search.search"), true);
|
||||
assert.equal(registry.has("web_search.fetch_url"), true);
|
||||
assert.equal(registry.has("web_search.summarize_url"), true);
|
||||
const manager = new ToolManager({
|
||||
loader,
|
||||
installer,
|
||||
settings: new ToolSettings({ installer }),
|
||||
repoClient: {
|
||||
async discover() {
|
||||
return {
|
||||
repository: "local",
|
||||
branch: "main",
|
||||
checked_at: new Date().toISOString(),
|
||||
cached: false,
|
||||
stale: false,
|
||||
tools: []
|
||||
};
|
||||
}
|
||||
}
|
||||
});
|
||||
const diagnostics = await manager.diagnostics({
|
||||
role: "admin",
|
||||
user: { id: "admin", isAdmin: true },
|
||||
context: { origin: "webui", permission_context: { webui_actions_allowed: true } }
|
||||
});
|
||||
const pluginDiagnostics = diagnostics.plugins.find((entry) => entry.tool_id === "lumi_ai_web_search");
|
||||
assert.equal(pluginDiagnostics.runtime_details.provider, "lumi_search_broker");
|
||||
assert.equal(pluginDiagnostics.decisions.length, 3);
|
||||
assert(pluginDiagnostics.decisions.every((decision) => decision.exposed));
|
||||
assert(diagnostics.considered_tools.includes("web_search.search"));
|
||||
assert(diagnostics.exposed_tools.includes("web_search.search"));
|
||||
const described = manager.settingsFor("lumi_ai_web_search");
|
||||
assert.equal(described.values.provider, "lumi_search_broker");
|
||||
assert(described.ui.html.includes("data-web-search-settings"));
|
||||
assert(described.ui.scripts.some((value) => value.endsWith("/settings-modal.js")));
|
||||
assert(described.ui.styles.some((value) => value.endsWith("/settings-modal.css")));
|
||||
await loader.disable("lumi_ai_web_search");
|
||||
assert.equal(registry.tools.has("lumi_ai_web_search.search"), false);
|
||||
assert.equal(registry.has("web_search.search"), false);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
async function verifyPolicy() {
|
||||
let result = await evaluateUrl("https://docs.example.com/guide", {
|
||||
mode: "whitelist",
|
||||
rules: ["*.example.com/*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
});
|
||||
assert.equal(result.allowed, true);
|
||||
result = await evaluateUrl("https://unrelated.test/guide", {
|
||||
mode: "whitelist",
|
||||
rules: ["*.example.com/*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
});
|
||||
assert.equal(result.allowed, false);
|
||||
assert.equal(result.reason, "not_whitelisted");
|
||||
|
||||
result = await evaluateUrl("https://ads.example.com/tracker", {
|
||||
mode: "blacklist",
|
||||
rules: ["*.example.com/tracker*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
});
|
||||
assert.equal(result.allowed, false);
|
||||
result = await evaluateUrl("https://docs.example.org/", {
|
||||
mode: "blacklist",
|
||||
rules: ["*.example.com/tracker*"],
|
||||
resolveHost: PUBLIC_DNS
|
||||
});
|
||||
assert.equal(result.allowed, true);
|
||||
|
||||
for (const target of [
|
||||
"http://127.0.0.1/",
|
||||
"http://10.1.2.3/",
|
||||
"http://169.254.169.254/latest/meta-data/",
|
||||
"http://localhost/",
|
||||
"file:///etc/passwd"
|
||||
]) {
|
||||
result = await evaluateUrl(target, {
|
||||
mode: "blacklist",
|
||||
rules: [],
|
||||
resolveHost: PUBLIC_DNS
|
||||
});
|
||||
assert.equal(result.allowed, false, target);
|
||||
}
|
||||
result = await evaluateUrl("https://dns-rebind.example/", {
|
||||
mode: "blacklist",
|
||||
rules: [],
|
||||
resolveHost: async () => ["10.0.0.8"]
|
||||
});
|
||||
assert.equal(result.allowed, false);
|
||||
assert.equal(matchesRule(new URL("https://docs.example.com/guide/start"), "example.com/guide"), true);
|
||||
assert.equal(matchesRule(new URL("https://example.com/"), "http://example.com/"), false);
|
||||
}
|
||||
|
||||
async function verifyRedirectPolicy() {
|
||||
const provider = new SearchProvider({
|
||||
resolveHost: PUBLIC_DNS,
|
||||
fetch: async () => response({
|
||||
status: 302,
|
||||
headers: { location: "http://127.0.0.1/private" }
|
||||
})
|
||||
});
|
||||
await assert.rejects(
|
||||
() => provider.search("test", providerSettings()),
|
||||
/blocked by policy/i
|
||||
);
|
||||
let calls = 0;
|
||||
const crossOrigin = new SearchProvider({
|
||||
resolveHost: PUBLIC_DNS,
|
||||
fetch: async () => {
|
||||
calls += 1;
|
||||
return response({
|
||||
status: 302,
|
||||
headers: { location: "https://other-provider.example/search" }
|
||||
});
|
||||
}
|
||||
});
|
||||
await assert.rejects(
|
||||
() => crossOrigin.search("test", { ...providerSettings(), provider_api_key: "secret" }),
|
||||
/cross_origin_provider_redirect/i
|
||||
);
|
||||
assert.equal(calls, 1);
|
||||
}
|
||||
|
||||
function verifyFormatting() {
|
||||
const rows = [
|
||||
result("Official docs", "https://docs.example.com/guide", "A detailed official answer for the requested subject.", "documentation"),
|
||||
result("Community post", "https://community.example.com/post", "A secondary explanation with useful context.", "web"),
|
||||
result("Recent update", "https://news.example.com/update", "A recently published update.", "news", "2026-06-12")
|
||||
];
|
||||
const settings = {
|
||||
max_results: 5,
|
||||
show_source_links: true,
|
||||
twitch_output_chars: 180,
|
||||
discord_output_chars: 700,
|
||||
webui_output_chars: 3000,
|
||||
other_output_chars: 500
|
||||
};
|
||||
const fact = formatResults(rows, { reason: "fact_lookup", origin: "twitch", settings });
|
||||
const resource = formatResults(rows, { reason: "resource_lookup", origin: "discord", settings });
|
||||
const webui = formatResults(rows, { reason: "documentation_lookup", origin: "webui", settings });
|
||||
assert(fact.condensed_text.length <= 180);
|
||||
assert(fact.results.length <= 2);
|
||||
assert(resource.condensed_text.length <= 700);
|
||||
assert(webui.condensed_text.length > fact.condensed_text.length);
|
||||
assert.equal(webui.results[0].source_type, "documentation");
|
||||
}
|
||||
|
||||
async function verifySearchFlow() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-search-"));
|
||||
const settings = {
|
||||
...providerSettings(),
|
||||
enabled: true,
|
||||
policy_mode: "whitelist",
|
||||
url_rules: ["*.example.com/*"],
|
||||
allowed_origins: ["webui", "discord", "twitch"],
|
||||
cache_ttl_seconds: 60
|
||||
};
|
||||
writeSettings(root, settings);
|
||||
let calls = 0;
|
||||
const provider = {
|
||||
resolveHost: PUBLIC_DNS,
|
||||
async search() {
|
||||
calls += 1;
|
||||
return [
|
||||
result("<b>Verified fact</b>", "https://docs.example.com/fact", "The <em>answer</em> is current.", "documentation"),
|
||||
result("Blocked local", "http://127.0.0.1/private", "Must never be returned.", "web")
|
||||
];
|
||||
},
|
||||
async fetchPage() {
|
||||
return { url: "https://docs.example.com/fact", text: "Expanded public page text." };
|
||||
}
|
||||
};
|
||||
const tool = new WebSearchTool({ dataDir: root, provider });
|
||||
const first = await tool.run({
|
||||
query: "current fact",
|
||||
reason: "fact_lookup",
|
||||
user: { id: "user-1" },
|
||||
ctx: { origin: "webui", server_id: "server-1" }
|
||||
});
|
||||
assert.equal(first.status, "ok");
|
||||
assert.equal(first.result_count, 1);
|
||||
assert.equal(first.results[0].title, "Verified fact");
|
||||
assert.equal(first.results.some((entry) => entry.url?.includes("127.0.0.1")), false);
|
||||
const cached = await tool.run({
|
||||
query: "current fact",
|
||||
reason: "fact_lookup",
|
||||
user: { id: "user-1" },
|
||||
ctx: { origin: "webui", server_id: "server-1" }
|
||||
});
|
||||
assert.equal(cached.cache_hit, true);
|
||||
assert.equal(calls, 1);
|
||||
|
||||
const twitch = await tool.run({
|
||||
query: "current fact twitch",
|
||||
reason: "resource_lookup",
|
||||
user: { id: "user-1" },
|
||||
ctx: { origin: "twitch", channel_id: "channel-1" }
|
||||
});
|
||||
assert(twitch.condensed_text.length <= readSettings(root).twitch_output_chars);
|
||||
assert.equal(calls, 2);
|
||||
|
||||
writeSettings(root, { ...readSettings(root), allowed_origins: ["webui"] });
|
||||
const blockedOrigin = await tool.run({
|
||||
query: "current fact",
|
||||
reason: "fact_lookup",
|
||||
origin: "webui",
|
||||
user: { id: "user-1" },
|
||||
ctx: { origin: "discord" }
|
||||
});
|
||||
assert.equal(blockedOrigin.status, "blocked");
|
||||
assert.equal(blockedOrigin.blocked_reason, "origin_not_allowed");
|
||||
|
||||
const failing = new WebSearchTool({
|
||||
dataDir: root,
|
||||
provider: {
|
||||
resolveHost: PUBLIC_DNS,
|
||||
async search() { throw new Error("provider secret https://provider.example/api?token=secret"); }
|
||||
}
|
||||
});
|
||||
const failed = await failing.run({
|
||||
query: "failure",
|
||||
reason: "general_lookup",
|
||||
user: { id: "user-2" },
|
||||
ctx: { origin: "webui" }
|
||||
});
|
||||
assert.equal(failed.status, "unavailable");
|
||||
assert.equal(failed.error.includes("provider.example"), false);
|
||||
|
||||
const audit = fs.readFileSync(path.join(root, "audit.jsonl"), "utf8")
|
||||
.trim().split(/\r?\n/).map(JSON.parse);
|
||||
assert(audit.some((entry) =>
|
||||
entry.query === "current fact" &&
|
||||
entry.actor === "user-1" &&
|
||||
entry.origin === "webui" &&
|
||||
typeof entry.timing_ms === "number"
|
||||
));
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
function verifyRegistrationAvailability() {
|
||||
function verifyRegistrationAndPrompt() {
|
||||
const root = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-web-register-"));
|
||||
assert.equal(plugin.checkAvailability({ paths: { data: root } }).available, false);
|
||||
writeSettings(root, { ...providerSettings(), enabled: true });
|
||||
writeSettings(root, testSettings());
|
||||
assert.equal(plugin.checkAvailability({ paths: { data: root } }).available, true);
|
||||
const definitions = [];
|
||||
plugin.register({
|
||||
paths: { data: root },
|
||||
registerTool: (definition) => definitions.push(definition)
|
||||
});
|
||||
assert.equal(definitions.length, 1);
|
||||
assert.equal(definitions[0].tool_id, "lumi_ai_web_search.search");
|
||||
assert.equal(definitions[0].read_only, true);
|
||||
assert.equal(definitions[0].origin_check({
|
||||
assert.deepEqual(definitions.map((definition) => definition.tool_id), [
|
||||
"web_search.search",
|
||||
"web_search.fetch_url",
|
||||
"web_search.summarize_url"
|
||||
]);
|
||||
assert(definitions.every((definition) => definition.read_only === true));
|
||||
assert(definitions[0].origin_check({
|
||||
context: { origin: "discord", permission_context: { webui_actions_allowed: false } }
|
||||
}), true);
|
||||
assert.equal(definitions[0].permission_check({
|
||||
user: { id: "user" },
|
||||
context: { origin: "discord", permission_context: { webui_actions_allowed: false } }
|
||||
}), true);
|
||||
}));
|
||||
const registry = new ToolRegistry(() => {});
|
||||
const metadata = require("../tool_info.json");
|
||||
const { registerManagedTool } = require("../../lumi_ai/backend/tool_registry");
|
||||
for (const definition of definitions) registerManagedTool(registry, metadata, definition);
|
||||
const exposed = registry.inspect({
|
||||
role: "admin",
|
||||
user: { id: "admin", isAdmin: true },
|
||||
context: { origin: "webui", permission_context: { webui_actions_allowed: true } }
|
||||
}).exposed;
|
||||
const prompt = buildPrompt({
|
||||
config: { support_scope: {}, instructions: { roleplay_intensity: 0 } },
|
||||
role: "admin",
|
||||
message: "Find the latest release",
|
||||
tools: exposed,
|
||||
originContext: { origin: "webui" }
|
||||
});
|
||||
assert(prompt.includes('"tool_id":"web_search.search"'));
|
||||
assert(prompt.includes("WEB SEARCH DECISION RULES"));
|
||||
assert(prompt.includes('{"type":"tool_call","tool":"tool_id","arguments":{}}'));
|
||||
|
||||
writeSettings(root, testSettings({ enable_search: false }));
|
||||
const fetchOnly = [];
|
||||
plugin.register({
|
||||
paths: { data: root },
|
||||
registerTool: (definition) => fetchOnly.push(definition)
|
||||
});
|
||||
assert.equal(fetchOnly.some((definition) => definition.tool_id === "web_search.search"), false);
|
||||
assert.equal(fetchOnly.some((definition) => definition.tool_id === "web_search.fetch_url"), true);
|
||||
|
||||
writeSettings(root, testSettings({
|
||||
provider: "external_json",
|
||||
external_provider_endpoint: ""
|
||||
}));
|
||||
const externalWithoutEndpoint = [];
|
||||
plugin.register({
|
||||
paths: { data: root },
|
||||
registerTool: (definition) => externalWithoutEndpoint.push(definition)
|
||||
});
|
||||
assert.equal(externalWithoutEndpoint.some((definition) => definition.tool_id === "web_search.search"), false);
|
||||
assert.equal(externalWithoutEndpoint.some((definition) => definition.tool_id === "web_search.fetch_url"), true);
|
||||
fs.rmSync(root, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
function verifyStaticFiles() {
|
||||
const root = path.resolve(__dirname, "..");
|
||||
const metadata = JSON.parse(fs.readFileSync(path.join(root, "tool_info.json"), "utf8"));
|
||||
assert.equal(metadata.tool_id, "lumi_ai_web_search");
|
||||
assert.equal(metadata.settings_schema.policy_mode.default, "whitelist");
|
||||
assert(fs.existsSync(path.join(root, "readme.md")));
|
||||
assert(fs.readFileSync(path.join(root, "views", "settings-modal.ejs"), "utf8").includes("settings_schema"));
|
||||
const metadata = require("../tool_info.json");
|
||||
assert.equal(metadata.version, "1.1.0");
|
||||
assert.equal(metadata.settings_schema.provider.default, "lumi_search_broker");
|
||||
assert.equal(metadata.settings_schema.policy_mode.default, "blacklist");
|
||||
assert.equal(metadata.default_enabled, true);
|
||||
assert(fs.existsSync(path.join(root, "backend", "lumi_search_broker.js")));
|
||||
assert(fs.existsSync(path.join(root, "backend", "page_fetcher.js")));
|
||||
assert(fs.existsSync(path.join(root, "public", "settings-modal.css")));
|
||||
assert(fs.readFileSync(path.join(root, "readme.md"), "utf8").includes("without requiring an API key"));
|
||||
}
|
||||
|
||||
function providerSettings() {
|
||||
function testSettings(overrides = {}) {
|
||||
return {
|
||||
...Object.fromEntries(
|
||||
Object.entries(require("../tool_info.json").settings_schema).map(([key, field]) => [key, structuredClone(field.default)])
|
||||
),
|
||||
provider_endpoint: "https://search.example.net/search",
|
||||
enabled: true,
|
||||
allowed_origins: ["webui", "discord", "twitch"],
|
||||
url_rules: ["*.example.com/*"]
|
||||
...defaults(),
|
||||
...overrides,
|
||||
provider: overrides.provider || "lumi_search_broker",
|
||||
enable_search: overrides.enable_search ?? true,
|
||||
enable_fetch_url: overrides.enable_fetch_url ?? true,
|
||||
enable_summarize_url: overrides.enable_summarize_url ?? true,
|
||||
policy_mode: overrides.policy_mode || "blacklist",
|
||||
allowed_origins: overrides.allowed_origins || ["webui", "discord", "twitch"]
|
||||
};
|
||||
}
|
||||
|
||||
function result(title, url, snippet, sourceType, date = null) {
|
||||
return { title, url, snippet, source_type: sourceType, date, relevance_score: 0.9 };
|
||||
function result(title, url, snippet, source, date = null) {
|
||||
return {
|
||||
title,
|
||||
url,
|
||||
domain: new URL(url).hostname,
|
||||
snippet,
|
||||
date,
|
||||
rank: 1,
|
||||
source,
|
||||
raw_source_id: "test",
|
||||
relevance_score: 0.9
|
||||
};
|
||||
}
|
||||
|
||||
function response({ status = 200, headers = {}, body = "" }) {
|
||||
const normalized = Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]));
|
||||
function response({ status = 200, headers = {}, body = Buffer.from("") }) {
|
||||
const normalized = Object.fromEntries(
|
||||
Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value])
|
||||
);
|
||||
const buffer = Buffer.from(body);
|
||||
return {
|
||||
ok: status >= 200 && status < 300,
|
||||
status,
|
||||
headers: {
|
||||
get(name) { return normalized[String(name).toLowerCase()] || null; }
|
||||
},
|
||||
async arrayBuffer() { return Buffer.from(body); }
|
||||
async arrayBuffer() { return buffer; }
|
||||
};
|
||||
}
|
||||
|
||||
@ -321,7 +547,7 @@ function copyDirectory(source, destination, ignored = new Set()) {
|
||||
if (ignored.has(entry.name)) continue;
|
||||
const from = path.join(source, entry.name);
|
||||
const to = path.join(destination, entry.name);
|
||||
if (entry.isDirectory()) copyDirectory(from, to);
|
||||
if (entry.isDirectory()) copyDirectory(from, to, ignored);
|
||||
else if (entry.isFile()) fs.copyFileSync(from, to);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,31 +1,51 @@
|
||||
{
|
||||
"tool_id": "lumi_ai_web_search",
|
||||
"tool_namespace": "web_search",
|
||||
"name": "lumi_ai_web_search",
|
||||
"display_name": "Lumi AI Web Search",
|
||||
"version": "1.0.1",
|
||||
"description": "Controlled current-information search for Lumi Assistant with URL policy, origin budgets, and source normalization.",
|
||||
"version": "1.1.0",
|
||||
"description": "Self-contained, policy-controlled public web search and safe URL extraction for Lumi Assistant. The default provider requires no API key or external service setup.",
|
||||
"scope": {
|
||||
"label": "Assistant web lookup",
|
||||
"label": "Assistant public web lookup",
|
||||
"required_role": "user"
|
||||
},
|
||||
"permissions": {
|
||||
"required_role": "user",
|
||||
"permission": "lumi_ai_web_search.search"
|
||||
"permission": "lumi_ai_web_search.use"
|
||||
},
|
||||
"capabilities": [
|
||||
"Current web search through an administrator-configured JSON search provider",
|
||||
"No-key public web search through the tool-contained Lumi search broker",
|
||||
"Safe explicit URL fetch and readable-text extraction",
|
||||
"Condensed URL summarization",
|
||||
"Whitelist or blacklist URL policy with wildcard rules",
|
||||
"Optional bounded page excerpt fetching",
|
||||
"Context-aware condensed results for WebUI and chat platforms"
|
||||
"Context-aware output budgets, caching, rate limits, diagnostics, and audits"
|
||||
],
|
||||
"registered_capabilities": [
|
||||
{
|
||||
"tool_id": "web_search.search",
|
||||
"enabled_setting": "enable_search",
|
||||
"description": "Search the public web for current or external information."
|
||||
},
|
||||
{
|
||||
"tool_id": "web_search.fetch_url",
|
||||
"enabled_setting": "enable_fetch_url",
|
||||
"description": "Fetch and extract readable information from an explicit public URL."
|
||||
},
|
||||
{
|
||||
"tool_id": "web_search.summarize_url",
|
||||
"enabled_setting": "enable_summarize_url",
|
||||
"description": "Fetch an explicit public URL and return compact structured content for summarization."
|
||||
}
|
||||
],
|
||||
"limitations": [
|
||||
"Requires an administrator-configured search provider endpoint",
|
||||
"Does not provide browser automation or execute page scripts",
|
||||
"Private, local, link-local, metadata, and non-HTTP targets are always blocked",
|
||||
"Search quality and freshness depend on the configured provider"
|
||||
"Search sources may rate-limit, block automated requests, or change markup",
|
||||
"No JavaScript execution or browser automation is enabled by default",
|
||||
"Private, local, link-local, metadata, credential-bearing, and non-HTTP targets are always blocked",
|
||||
"Search quality and freshness depend on public source availability"
|
||||
],
|
||||
"tool_type": "web_search",
|
||||
"owning_plugin": "lumi_ai_web_search",
|
||||
"default_enabled": true,
|
||||
"entrypoints": {
|
||||
"backend": "index.js"
|
||||
},
|
||||
@ -35,9 +55,28 @@
|
||||
"views": [
|
||||
"views/settings-modal.ejs"
|
||||
],
|
||||
"settings_ui": {
|
||||
"view": "views/settings-modal.ejs",
|
||||
"scripts": [
|
||||
"settings-modal.js"
|
||||
],
|
||||
"styles": [
|
||||
"settings-modal.css"
|
||||
]
|
||||
},
|
||||
"settings_migrator": "backend/settings.js",
|
||||
"diagnostic_settings": [
|
||||
"provider",
|
||||
"policy_mode",
|
||||
"allowed_origins",
|
||||
"enable_search",
|
||||
"enable_fetch_url",
|
||||
"enable_summarize_url"
|
||||
],
|
||||
"status_file": "data/status.json",
|
||||
"dependencies": [],
|
||||
"minimum_lumi_version": "0.1.0",
|
||||
"minimum_lumi_ai_version": "0.8.0",
|
||||
"minimum_lumi_ai_version": "0.8.1",
|
||||
"required_plugins": [
|
||||
"core",
|
||||
"lumi_ai"
|
||||
@ -47,36 +86,63 @@
|
||||
"confirmation_required": false,
|
||||
"data_paths": [
|
||||
"data/settings.json",
|
||||
"data/audit.jsonl"
|
||||
"data/status.json",
|
||||
"data/audit.jsonl",
|
||||
"data/cache"
|
||||
],
|
||||
"preserve_on_update": [
|
||||
"data"
|
||||
],
|
||||
"update_notes": "Registers as a read-only lookup tool with context-aware prompt exposure for Lumi AI 0.8.0.",
|
||||
"update_notes": "Adds a self-contained no-key search broker plus independent search, fetch URL, and summarize URL capabilities.",
|
||||
"author": "Lumi",
|
||||
"homepage": "https://git.rolfsvaag.no/Rolfsvaag_Datateknikk/Lumi",
|
||||
"repository_path": "plugins/lumi_ai_web_search",
|
||||
"settings_schema": {
|
||||
"enabled": {
|
||||
"provider": {
|
||||
"type": "enum",
|
||||
"label": "Provider",
|
||||
"description": "The default Lumi search broker is contained in this plugin and requires no API key or separate service.",
|
||||
"options": [
|
||||
"lumi_search_broker",
|
||||
"external_json"
|
||||
],
|
||||
"default": "lumi_search_broker"
|
||||
},
|
||||
"enable_search": {
|
||||
"type": "boolean",
|
||||
"label": "Web search enabled",
|
||||
"description": "The tool remains unavailable to Lumi Assistant until this and the parent Tools enable state are both on.",
|
||||
"label": "Enable web search",
|
||||
"default": true
|
||||
},
|
||||
"enable_fetch_url": {
|
||||
"type": "boolean",
|
||||
"label": "Enable explicit URL fetch",
|
||||
"default": true
|
||||
},
|
||||
"enable_summarize_url": {
|
||||
"type": "boolean",
|
||||
"label": "Enable URL summarization",
|
||||
"default": true
|
||||
},
|
||||
"enable_headless_browser_fallback": {
|
||||
"type": "boolean",
|
||||
"label": "Headless browser fallback",
|
||||
"description": "Reserved for a future Lumi-managed restricted browser runtime. HTTP mode works without it.",
|
||||
"default": false
|
||||
},
|
||||
"policy_mode": {
|
||||
"type": "enum",
|
||||
"label": "Search policy",
|
||||
"description": "Whitelist permits only matching result/page URLs. Blacklist permits URLs except matching rules.",
|
||||
"label": "URL policy",
|
||||
"description": "Blacklist allows safe public URLs except matching rules. Whitelist permits only matching rules.",
|
||||
"options": [
|
||||
"whitelist",
|
||||
"blacklist"
|
||||
"blacklist",
|
||||
"whitelist"
|
||||
],
|
||||
"default": "whitelist"
|
||||
"default": "blacklist"
|
||||
},
|
||||
"url_rules": {
|
||||
"type": "string_list",
|
||||
"label": "URL policy rules",
|
||||
"description": "One domain, URL, path prefix, or * wildcard pattern per line. Example: docs.example.com or *.example.com/docs/*.",
|
||||
"description": "One domain, subdomain, URL, path prefix, or * wildcard pattern per line.",
|
||||
"default": [],
|
||||
"rows": 6
|
||||
},
|
||||
@ -94,14 +160,42 @@
|
||||
"minimum": 1000,
|
||||
"maximum": 30000
|
||||
},
|
||||
"fetch_timeout_ms": {
|
||||
"type": "integer",
|
||||
"label": "Page fetch timeout (ms)",
|
||||
"default": 10000,
|
||||
"minimum": 1000,
|
||||
"maximum": 30000
|
||||
},
|
||||
"max_fetch_bytes": {
|
||||
"type": "integer",
|
||||
"label": "Maximum fetched bytes",
|
||||
"default": 1048576,
|
||||
"minimum": 65536,
|
||||
"maximum": 4194304
|
||||
},
|
||||
"max_extracted_chars": {
|
||||
"type": "integer",
|
||||
"label": "Maximum extracted characters",
|
||||
"default": 12000,
|
||||
"minimum": 1000,
|
||||
"maximum": 50000
|
||||
},
|
||||
"max_redirects": {
|
||||
"type": "integer",
|
||||
"label": "Maximum redirects",
|
||||
"default": 3,
|
||||
"minimum": 0,
|
||||
"maximum": 8
|
||||
},
|
||||
"cache_ttl_seconds": {
|
||||
"type": "integer",
|
||||
"label": "Result cache TTL (seconds)",
|
||||
"default": 300,
|
||||
"label": "Cache TTL (seconds)",
|
||||
"default": 900,
|
||||
"minimum": 0,
|
||||
"maximum": 3600
|
||||
"maximum": 86400
|
||||
},
|
||||
"safe_search": {
|
||||
"safe_search_level": {
|
||||
"type": "enum",
|
||||
"label": "Safe search",
|
||||
"options": [
|
||||
@ -109,7 +203,7 @@
|
||||
"moderate",
|
||||
"strict"
|
||||
],
|
||||
"default": "strict"
|
||||
"default": "moderate"
|
||||
},
|
||||
"allowed_origins": {
|
||||
"type": "multi_select",
|
||||
@ -123,13 +217,30 @@
|
||||
"other"
|
||||
],
|
||||
"default": [
|
||||
"webui"
|
||||
"webui",
|
||||
"discord",
|
||||
"twitch"
|
||||
]
|
||||
},
|
||||
"show_source_links_webui": {
|
||||
"type": "boolean",
|
||||
"label": "Show source links in WebUI",
|
||||
"default": true
|
||||
},
|
||||
"show_source_links_discord": {
|
||||
"type": "boolean",
|
||||
"label": "Show source links in Discord",
|
||||
"default": true
|
||||
},
|
||||
"show_source_links_twitch": {
|
||||
"type": "boolean",
|
||||
"label": "Show source links in Twitch",
|
||||
"default": false
|
||||
},
|
||||
"webui_output_chars": {
|
||||
"type": "integer",
|
||||
"label": "WebUI output budget",
|
||||
"default": 4000,
|
||||
"default": 2500,
|
||||
"minimum": 300,
|
||||
"maximum": 12000
|
||||
},
|
||||
@ -143,90 +254,91 @@
|
||||
"twitch_output_chars": {
|
||||
"type": "integer",
|
||||
"label": "Twitch output budget",
|
||||
"default": 350,
|
||||
"default": 450,
|
||||
"minimum": 120,
|
||||
"maximum": 1000
|
||||
},
|
||||
"youtube_output_chars": {
|
||||
"type": "integer",
|
||||
"label": "YouTube output budget",
|
||||
"default": 500,
|
||||
"default": 1200,
|
||||
"minimum": 120,
|
||||
"maximum": 1500
|
||||
"maximum": 3000
|
||||
},
|
||||
"kick_output_chars": {
|
||||
"type": "integer",
|
||||
"label": "Kick output budget",
|
||||
"default": 350,
|
||||
"default": 450,
|
||||
"minimum": 120,
|
||||
"maximum": 1000
|
||||
},
|
||||
"other_output_chars": {
|
||||
"type": "integer",
|
||||
"label": "Other output budget",
|
||||
"default": 500,
|
||||
"default": 700,
|
||||
"minimum": 120,
|
||||
"maximum": 2000
|
||||
"maximum": 3000
|
||||
},
|
||||
"provider_adapter": {
|
||||
"per_user_per_minute": {
|
||||
"type": "integer",
|
||||
"label": "Requests per user/minute",
|
||||
"default": 6,
|
||||
"minimum": 1,
|
||||
"maximum": 60
|
||||
},
|
||||
"per_origin_per_minute": {
|
||||
"type": "integer",
|
||||
"label": "Requests per origin/minute",
|
||||
"default": 30,
|
||||
"minimum": 1,
|
||||
"maximum": 300
|
||||
},
|
||||
"per_server_per_minute": {
|
||||
"type": "integer",
|
||||
"label": "Requests per server/minute",
|
||||
"default": 20,
|
||||
"minimum": 1,
|
||||
"maximum": 300
|
||||
},
|
||||
"external_provider_adapter": {
|
||||
"type": "enum",
|
||||
"label": "Provider adapter",
|
||||
"label": "Advanced external adapter",
|
||||
"options": [
|
||||
"searxng_json",
|
||||
"generic_json"
|
||||
],
|
||||
"default": "searxng_json"
|
||||
},
|
||||
"provider_endpoint": {
|
||||
"external_provider_endpoint": {
|
||||
"type": "string",
|
||||
"label": "Provider endpoint",
|
||||
"description": "HTTPS is recommended. The endpoint must be publicly routable and return JSON.",
|
||||
"label": "Advanced external endpoint",
|
||||
"description": "Optional. Not required by the default Lumi search broker.",
|
||||
"default": ""
|
||||
},
|
||||
"provider_api_key": {
|
||||
"external_provider_api_key": {
|
||||
"type": "string",
|
||||
"label": "Provider API key",
|
||||
"description": "Stored in the plugin data directory and never returned by the settings API.",
|
||||
"label": "Advanced external API key",
|
||||
"default": "",
|
||||
"secret": true
|
||||
},
|
||||
"provider_api_key_header": {
|
||||
"external_provider_api_key_header": {
|
||||
"type": "enum",
|
||||
"label": "API key header",
|
||||
"label": "External API key header",
|
||||
"options": [
|
||||
"X-API-Key",
|
||||
"Authorization"
|
||||
],
|
||||
"default": "X-API-Key"
|
||||
},
|
||||
"provider_api_key_prefix": {
|
||||
"external_provider_api_key_prefix": {
|
||||
"type": "string",
|
||||
"label": "API key prefix",
|
||||
"description": "For example: Bearer",
|
||||
"label": "External API key prefix",
|
||||
"default": ""
|
||||
},
|
||||
"provider_query_parameter": {
|
||||
"external_provider_query_parameter": {
|
||||
"type": "string",
|
||||
"label": "Query parameter",
|
||||
"label": "External query parameter",
|
||||
"default": "q"
|
||||
},
|
||||
"show_source_links": {
|
||||
"type": "boolean",
|
||||
"label": "Show source links",
|
||||
"default": true
|
||||
},
|
||||
"allow_full_page_fetch": {
|
||||
"type": "boolean",
|
||||
"label": "Allow page excerpts",
|
||||
"description": "Allows bounded text extraction after search discovery. URL policy and redirect checks still apply.",
|
||||
"default": false
|
||||
},
|
||||
"requests_per_minute": {
|
||||
"type": "integer",
|
||||
"label": "Requests per actor/minute",
|
||||
"default": 6,
|
||||
"minimum": 1,
|
||||
"maximum": 60
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,32 @@
|
||||
<p class="hint">
|
||||
Lumi AI renders this tool's settings from <code>tool_info.json</code> <code>settings_schema</code>. Provider secrets are write-only,
|
||||
and policy changes apply to WebUI and platform tool calls after saving.
|
||||
</p>
|
||||
<section class="lumi-web-search-status" data-web-search-settings>
|
||||
<div class="lumi-web-search-status-grid">
|
||||
<div><span>Provider</span><strong data-web-status="provider">lumi_search_broker</strong></div>
|
||||
<div><span>Provider health</span><strong data-web-status="provider_health">Not tested</strong></div>
|
||||
<div><span>Policy</span><strong data-web-status="policy_mode">blacklist</strong></div>
|
||||
<div><span>Last success</span><strong data-web-status="last_success_at">Never</strong></div>
|
||||
<div><span>Last error</span><strong data-web-status="last_error">None</strong></div>
|
||||
<div><span>Cache</span><strong data-web-status="cache">0 entries</strong></div>
|
||||
</div>
|
||||
<div class="lumi-web-search-capabilities" data-web-capabilities></div>
|
||||
<details>
|
||||
<summary>Recent web tool calls</summary>
|
||||
<div class="lumi-web-search-recent" data-web-recent>No calls recorded.</div>
|
||||
</details>
|
||||
<form class="lumi-web-search-test" data-web-search-test>
|
||||
<label>
|
||||
Test through Lumi Assistant
|
||||
<input name="query" type="text" value="Find the latest public Lumi Bot information" maxlength="500" required />
|
||||
</label>
|
||||
<label>
|
||||
Origin
|
||||
<select name="origin">
|
||||
<option value="webui">WebUI</option>
|
||||
<option value="discord">Discord</option>
|
||||
<option value="twitch">Twitch</option>
|
||||
</select>
|
||||
</label>
|
||||
<button class="button subtle" type="submit">Run normal tool pipeline</button>
|
||||
</form>
|
||||
<pre class="lumi-web-search-test-output" data-web-search-test-output hidden></pre>
|
||||
<p class="hint">The default Lumi search broker requires no API key or separately installed search service. Trusted origin and user context are supplied by Lumi, not by model arguments.</p>
|
||||
</section>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user