168 lines
6.6 KiB
JavaScript
168 lines
6.6 KiB
JavaScript
const { WebSearchTool } = require("./backend/search_tool");
|
|
const { normalizeOrigin } = require("./backend/result_formatter");
|
|
const {
|
|
capabilityAvailable,
|
|
capabilityEnabled,
|
|
ensureSettings,
|
|
readSettings,
|
|
readStatus
|
|
} = require("./backend/settings");
|
|
|
|
const REASON_SCHEMA = {
|
|
type: "string",
|
|
required: true,
|
|
enum: [
|
|
"fact_lookup",
|
|
"resource_lookup",
|
|
"troubleshooting",
|
|
"documentation_lookup",
|
|
"news_or_recent",
|
|
"general_lookup"
|
|
]
|
|
};
|
|
|
|
module.exports.checkAvailability = ({ paths }) => {
|
|
const settings = ensureSettings(paths.data);
|
|
const enabled = ["search", "fetch_url", "summarize_url"]
|
|
.filter((capability) => capabilityAvailable(settings, capability));
|
|
if (!enabled.length) {
|
|
return {
|
|
available: false,
|
|
message: "All web-search capabilities are disabled in tool settings."
|
|
};
|
|
}
|
|
if (settings.provider === "external_json" &&
|
|
settings.enable_search &&
|
|
!settings.external_provider_endpoint) {
|
|
return {
|
|
available: true,
|
|
message: "Explicit URL tools are available. Search requires the selected external endpoint or Lumi search broker."
|
|
};
|
|
}
|
|
return {
|
|
available: true,
|
|
message: settings.provider === "lumi_search_broker"
|
|
? "Self-contained Lumi search broker is ready; no external provider setup is required."
|
|
: "Optional external JSON provider is configured."
|
|
};
|
|
};
|
|
|
|
module.exports.register = ({ registerTool, paths }) => {
|
|
ensureSettings(paths.data);
|
|
const tool = new WebSearchTool({ dataDir: paths.data });
|
|
const register = (capability, definition) => {
|
|
const settings = readSettings(paths.data);
|
|
if (!capabilityAvailable(settings, capability)) return;
|
|
registerTool({
|
|
...definition,
|
|
required_role: "user",
|
|
required_permission: `lumi_ai_web_search.${capability}`,
|
|
audit_category: "web_search",
|
|
confirmation_required: false,
|
|
risk_level: "low",
|
|
read_only: true,
|
|
origin_check: ({ context }) => originAllowed(paths.data, context, capability),
|
|
prompt_permission_check: ({ user }) => Boolean(user?.id),
|
|
permission_check: ({ user, context }) =>
|
|
Boolean(user?.id) && originAllowed(paths.data, context, capability)
|
|
});
|
|
};
|
|
|
|
register("search", {
|
|
tool_id: "web_search.search",
|
|
display_name: "Search the public web",
|
|
description: "Search current public web information when verified Lumi-local context is insufficient or the user requests current, external, sourced, or verified facts.",
|
|
use_cases: [
|
|
"Current, recent, niche, or likely outdated public facts",
|
|
"Verification, citations, comparisons, changelogs, releases, policies, and public documentation",
|
|
"Current third-party information about Twitch, Discord, YouTube, Kick, Throne, Gitea, llama.cpp, Hugging Face, hardware, software, APIs, and services"
|
|
],
|
|
output_expectations: "Returns compact policy-filtered structured results. Use only returned facts and URLs in the final answer; state uncertainty when results are incomplete.",
|
|
schema: {
|
|
query: { type: "string", required: true },
|
|
reason: REASON_SCHEMA,
|
|
freshness: { type: "string", required: false },
|
|
max_results: { type: "integer", required: false },
|
|
target_url: { type: "string", required: false }
|
|
},
|
|
workflow_handler: ({ arguments: args, user, ctx }) => tool.search({ ...args, user, ctx })
|
|
});
|
|
|
|
register("fetch_url", {
|
|
tool_id: "web_search.fetch_url",
|
|
display_name: "Fetch a public URL",
|
|
description: "Safely fetch and extract readable information from an explicit public HTTP or HTTPS URL supplied by the user.",
|
|
use_cases: [
|
|
"Read an explicit public URL without search discovery",
|
|
"Inspect public documentation, release notes, articles, or status pages",
|
|
"Verify the current contents of a user-supplied page"
|
|
],
|
|
output_expectations: "Returns sanitized page metadata and bounded readable text. Never claim JavaScript execution or browser interaction.",
|
|
schema: {
|
|
url: { type: "string", required: true },
|
|
reason: { ...REASON_SCHEMA, required: false }
|
|
},
|
|
workflow_handler: ({ arguments: args, user, ctx }) => tool.fetchUrl({ ...args, user, ctx })
|
|
});
|
|
|
|
register("summarize_url", {
|
|
tool_id: "web_search.summarize_url",
|
|
display_name: "Summarize a public URL",
|
|
description: "Safely fetch an explicit public HTTP or HTTPS URL and return compact extracted content for a concise summary.",
|
|
use_cases: [
|
|
"Summarize a user-supplied public article, documentation page, or release note",
|
|
"Condense a public page without discovering unrelated search results"
|
|
],
|
|
output_expectations: "Returns bounded sanitized source text and metadata. The assistant writes the final concise summary using only that content.",
|
|
schema: {
|
|
url: { type: "string", required: true },
|
|
reason: { ...REASON_SCHEMA, required: false }
|
|
},
|
|
workflow_handler: ({ arguments: args, user, ctx }) => tool.summarizeUrl({ ...args, user, ctx })
|
|
});
|
|
};
|
|
|
|
module.exports.diagnostics = ({ paths }) => {
|
|
const settings = readSettings(paths.data);
|
|
const status = readStatus(paths.data);
|
|
const metrics = status.metrics || {};
|
|
const capabilities = ["search", "fetch_url", "summarize_url"].map((capability) => ({
|
|
tool_id: `web_search.${capability}`,
|
|
enabled: capabilityEnabled(settings, capability),
|
|
available: capabilityAvailable(settings, capability)
|
|
}));
|
|
return {
|
|
provider: settings.provider,
|
|
provider_health: status.provider_health || "not_tested",
|
|
policy_mode: settings.policy_mode,
|
|
allowed_origins: settings.allowed_origins,
|
|
capabilities,
|
|
last_success_at: status.last_success_at || null,
|
|
last_error: status.last_error || null,
|
|
cache: status.cache || { entries: 0, bytes: 0 },
|
|
metrics: {
|
|
...metrics,
|
|
average_search_ms: metrics.searches
|
|
? Math.round(Number(metrics.total_search_ms || 0) / Number(metrics.searches))
|
|
: 0,
|
|
average_fetch_ms: metrics.fetches
|
|
? Math.round(Number(metrics.total_fetch_ms || 0) / Number(metrics.fetches))
|
|
: 0,
|
|
cache_hit_rate: Number(metrics.searches || 0) + Number(metrics.fetches || 0)
|
|
? Math.round(
|
|
Number(metrics.cache_hits || 0) /
|
|
(Number(metrics.searches || 0) + Number(metrics.fetches || 0)) *
|
|
1000
|
|
) / 10
|
|
: 0
|
|
},
|
|
recent: Array.isArray(status.recent) ? status.recent.slice(0, 10) : []
|
|
};
|
|
};
|
|
|
|
function originAllowed(dataDir, context, capability) {
|
|
const settings = readSettings(dataDir);
|
|
const origin = normalizeOrigin(context?.origin || context?.platform || "other");
|
|
return capabilityAvailable(settings, capability) && settings.allowed_origins.includes(origin);
|
|
}
|