416 lines
16 KiB
JavaScript
416 lines
16 KiB
JavaScript
const crypto = require("crypto");
|
|
const { buildPrompt } = require("./prompt_builder");
|
|
const { roleOf } = require("./permissions");
|
|
const { parseToolCall } = require("./tool_router");
|
|
const { normalizeScope } = require("./scope_manager");
|
|
const { classifyRequestType } = require("./gate_provider");
|
|
|
|
class AiProvider {
|
|
constructor({ getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime }) {
|
|
Object.assign(this, { getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime });
|
|
}
|
|
|
|
async generate({
|
|
message,
|
|
user,
|
|
sessionId,
|
|
scope = "assistant",
|
|
max_tokens,
|
|
includeRaw = false,
|
|
originContext = null,
|
|
allowDeterministicShortcut = null,
|
|
history = [],
|
|
signal = null,
|
|
onStage = () => {}
|
|
}) {
|
|
const requestId = crypto.randomUUID();
|
|
const role = roleOf(user);
|
|
const started = Date.now();
|
|
const cfg = this.getConfig();
|
|
const supportScope = normalizeScope(cfg.support_scope);
|
|
let effectiveMessage = message;
|
|
let gateDecision = null;
|
|
let requestClass = classifyRequestType(message, { role, scope });
|
|
if (this.gate) {
|
|
gateDecision = await this.gate.route({
|
|
message,
|
|
user,
|
|
role,
|
|
scope,
|
|
originContext,
|
|
onStage: (stage, details) => onStage(
|
|
["deterministic", "gating"].includes(stage) ? "gate" : stage,
|
|
details
|
|
)
|
|
});
|
|
effectiveMessage = gateDecision.message || message;
|
|
requestClass = gateDecision.request_class || requestClass;
|
|
if (gateDecision.answer) {
|
|
const answer = gateDecision.answer;
|
|
this.metrics.record({
|
|
kind: "request",
|
|
status: gateDecision.route === "refusal" ? "refused" : "success",
|
|
request_id: requestId,
|
|
user_id: user.id,
|
|
role,
|
|
scope,
|
|
route_used: gateDecision.route,
|
|
gate_confidence: gateDecision.confidence,
|
|
gate_reason_code: gateDecision.reason_code,
|
|
route_class: requestClass,
|
|
max_output_tokens_used: 0,
|
|
deterministic_ms: gateDecision.deterministic_ms,
|
|
gate_ms: gateDecision.gate_ms,
|
|
main_queue_ms: 0,
|
|
main_generate_ms: 0,
|
|
total_ms: Date.now() - started,
|
|
duration_ms: Date.now() - started
|
|
});
|
|
return {
|
|
success: gateDecision.route !== "unavailable",
|
|
text: answer.text,
|
|
links: answer.links || [],
|
|
source: answer.source || null,
|
|
model_id: cfg.gate?.model_id || "lumi-gate",
|
|
route_used: gateDecision.route,
|
|
route_class: requestClass,
|
|
max_output_tokens_used: 0,
|
|
gate_decision: gateDecision,
|
|
stage_timings: {
|
|
deterministic_ms: gateDecision.deterministic_ms,
|
|
gate_ms: gateDecision.gate_ms,
|
|
main_queue_ms: 0,
|
|
main_generate_ms: 0,
|
|
total_ms: Date.now() - started
|
|
},
|
|
internal_generated_length: answer.text.length,
|
|
duration_ms: Date.now() - started,
|
|
queue_wait_ms: 0,
|
|
request_id: requestId
|
|
};
|
|
}
|
|
}
|
|
const repoAnswer = this.lookupRepo?.(effectiveMessage) || null;
|
|
const shortcutSurfaceAllowed = scope === "assistant" || scope === "platform_command";
|
|
const guardedRepoAnswer = ["clarification", "contact", "unknown"].includes(repoAnswer?.type);
|
|
const verifiedRouteAnswer = isExactHelpShortcut(effectiveMessage, repoAnswer);
|
|
if (!this.gate && shortcutSurfaceAllowed && (guardedRepoAnswer || verifiedRouteAnswer)) {
|
|
this.metrics.record({
|
|
kind: "request", status: "success", request_id: requestId, user_id: user.id,
|
|
role, scope: "repo_lookup", route_used: `repo_${repoAnswer.type}`,
|
|
route_class: requestClass, max_output_tokens_used: 0, duration_ms: Date.now() - started
|
|
});
|
|
return {
|
|
success: true,
|
|
text: repoAnswer.text,
|
|
links: repoAnswer.links || [],
|
|
source: repoAnswer.source || null,
|
|
model_id: "lumi-repo-index",
|
|
route_used: `repo_${repoAnswer.type}`,
|
|
route_class: requestClass,
|
|
max_output_tokens_used: 0,
|
|
internal_generated_length: repoAnswer.text.length,
|
|
duration_ms: Date.now() - started,
|
|
queue_wait_ms: 0,
|
|
request_id: requestId
|
|
};
|
|
}
|
|
|
|
onStage("queued", {
|
|
route: "main_llm",
|
|
reason_code: gateDecision?.reason_code || "direct_main_llm"
|
|
});
|
|
if (this.ensureRuntime) {
|
|
const health = await this.runtime.health();
|
|
if (!health.healthy) {
|
|
onStage("main_model_loading", { route: "main_llm" });
|
|
await this.ensureRuntime();
|
|
}
|
|
}
|
|
onStage("queued", { queue_position: this.queue.length + 1 });
|
|
return this.queue.run(user.id, role, async (queueWait) => {
|
|
const generateStarted = Date.now();
|
|
if (signal?.aborted) throw requestCancelledError();
|
|
const repoContext = supportScope.repo_lookup_enabled
|
|
? this.getRepoContext?.(effectiveMessage, role, supportScope.allow_moderator_code_help) || []
|
|
: [];
|
|
const correctionContext = this.getCorrections?.({
|
|
message: effectiveMessage,
|
|
role,
|
|
origin: originContext?.origin || originContext?.platform || "webui",
|
|
platform: originContext?.platform || originContext?.origin || "webui"
|
|
}) || [];
|
|
const platformToolsAllowed = originContext?.permission_context?.webui_actions_allowed !== false;
|
|
const prompt = buildPrompt({
|
|
config: cfg,
|
|
role,
|
|
message: effectiveMessage,
|
|
requestClass,
|
|
contextBlocks: this.getContext(role),
|
|
correctionContext,
|
|
repoContext,
|
|
originContext,
|
|
tools: platformToolsAllowed ? this.tools.list(role) : []
|
|
});
|
|
const conversation = normalizeHistory(history);
|
|
const outputTokenLimit = resolveOutputBudget({
|
|
config: cfg,
|
|
requestClass,
|
|
explicitMaxTokens: max_tokens
|
|
});
|
|
const runtimeSettings = {
|
|
backend: this.runtime.activeAcceleration?.backend || this.runtime.runtimeMetadata?.()?.backend || "cpu",
|
|
gpu_layers: this.runtime.activeAcceleration?.gpu_layers || 0,
|
|
context_size: Number(cfg.context_size) || 4096,
|
|
batch_size: Number(cfg.batch_size) || 512,
|
|
ubatch_size: Number(cfg.ubatch_size) || 128,
|
|
threads: Number(cfg.threads) || 0,
|
|
max_output_tokens: outputTokenLimit,
|
|
max_output_tokens_used: outputTokenLimit,
|
|
route_class: requestClass
|
|
};
|
|
onStage("prompt_eval", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings });
|
|
const generatingTimer = setTimeout(() => {
|
|
onStage("generating", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings });
|
|
}, 250);
|
|
generatingTimer.unref?.();
|
|
let result;
|
|
try {
|
|
result = await this.runtime.infer(
|
|
[
|
|
{ role: "system", content: prompt },
|
|
...conversation,
|
|
{ role: "user", content: effectiveMessage }
|
|
],
|
|
outputTokenLimit,
|
|
{
|
|
signal,
|
|
timeoutMs: cfg.hard_generation_timeout_ms
|
|
}
|
|
);
|
|
} finally {
|
|
clearTimeout(generatingTimer);
|
|
}
|
|
if (signal?.aborted) throw requestCancelledError();
|
|
const text = result.choices?.[0]?.message?.content || "";
|
|
const inference = normalizeInferenceDiagnostics(result, Date.now() - generateStarted);
|
|
onStage("generating", {
|
|
route: "main_llm",
|
|
queue_ms: queueWait,
|
|
...runtimeSettings,
|
|
...inference
|
|
});
|
|
const toolCall = platformToolsAllowed ? parseToolCall(text) : null;
|
|
let confirmation = null;
|
|
let toolResult = null;
|
|
if (toolCall) {
|
|
const prepared = this.tools.prepare({ tool: toolCall.tool, args: toolCall.arguments, user, role, sessionId });
|
|
if (prepared.execute) toolResult = await this.tools.execute({ checked: prepared.checked, user, requestId });
|
|
confirmation = prepared.confirmation;
|
|
}
|
|
const out = {
|
|
success: true,
|
|
text: confirmation ? `Please confirm: ${confirmation.display_name}.`
|
|
: toolResult ? `Action completed: ${JSON.stringify(toolResult)}` : text,
|
|
links: [],
|
|
raw_response: cfg.logging.log_responses || includeRaw ? result : null,
|
|
tool_call: toolCall,
|
|
tool_result: toolResult,
|
|
confirmation,
|
|
model_id: cfg.selected_model_id,
|
|
duration_ms: Date.now() - started,
|
|
queue_wait_ms: queueWait,
|
|
finish_reason: result.choices?.[0]?.finish_reason || null,
|
|
request_id: requestId,
|
|
route_used: gateDecision ? "main_llm" : "llm",
|
|
route_class: requestClass,
|
|
max_output_tokens_used: outputTokenLimit,
|
|
gate_decision: gateDecision,
|
|
force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null,
|
|
internal_generated_length: text.length,
|
|
stage_timings: {
|
|
deterministic_ms: gateDecision?.deterministic_ms || 0,
|
|
gate_ms: gateDecision?.gate_ms || 0,
|
|
queue_ms: queueWait,
|
|
prompt_eval_ms: inference.prompt_eval_ms,
|
|
generation_ms: inference.generation_ms,
|
|
main_queue_ms: queueWait,
|
|
main_generate_ms: inference.generation_ms,
|
|
total_ms: Date.now() - started
|
|
},
|
|
diagnostics: {
|
|
...inference,
|
|
...runtimeSettings,
|
|
gate_ms: gateDecision?.gate_ms || 0,
|
|
queue_ms: queueWait,
|
|
total_ms: Date.now() - started
|
|
}
|
|
};
|
|
this.metrics.record({
|
|
kind: "request", status: "success", request_id: requestId, user_id: user.id, role, scope,
|
|
model: cfg.selected_model_id, duration_ms: out.duration_ms, queue_wait_ms: queueWait,
|
|
tool_requested: toolCall?.tool || null, tool_executed: false,
|
|
route_used: gateDecision ? "main_llm" : "llm",
|
|
route_class: requestClass,
|
|
max_output_tokens_used: outputTokenLimit,
|
|
gate_confidence: gateDecision?.confidence,
|
|
gate_reason_code: gateDecision?.reason_code,
|
|
force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null,
|
|
deterministic_ms: out.stage_timings.deterministic_ms,
|
|
gate_ms: out.stage_timings.gate_ms,
|
|
main_queue_ms: out.stage_timings.main_queue_ms,
|
|
main_generate_ms: out.stage_timings.main_generate_ms,
|
|
queue_ms: out.stage_timings.queue_ms,
|
|
prompt_eval_ms: out.stage_timings.prompt_eval_ms,
|
|
generation_ms: out.stage_timings.generation_ms,
|
|
total_ms: out.stage_timings.total_ms,
|
|
...out.diagnostics,
|
|
internal_generated_length: text.length
|
|
});
|
|
return out;
|
|
}, { signal });
|
|
}
|
|
|
|
async classify({ message, labels, user }) {
|
|
const result = await this.generate({
|
|
message: `Classify this Lumi-related request into exactly one label: ${labels.join(", ")}. Request: ${message}`,
|
|
user, scope: "classify", max_tokens: 40
|
|
});
|
|
return { ...result, label: labels.find((label) => result.text.toLowerCase().includes(label.toLowerCase())) || null };
|
|
}
|
|
|
|
async summarize({ text, max_length = 500, user }) {
|
|
return this.generate({
|
|
message: `Summarize this Lumi-related content in at most ${max_length} characters:\n${text}`,
|
|
user, scope: "summarize", max_tokens: Math.ceil(max_length / 3)
|
|
});
|
|
}
|
|
|
|
async test({ message, user, max_tokens = 300, includeRaw = false }) {
|
|
const requestId = crypto.randomUUID();
|
|
const role = roleOf(user);
|
|
const started = Date.now();
|
|
return this.queue.run(user.id, role, async (queueWait) => {
|
|
const cfg = this.getConfig();
|
|
const prompt = [
|
|
"You are Lumi Assistant, the built-in assistant for Lumi Bot, running an administrator-requested local model diagnostic.",
|
|
"Answer the exact user message directly and concisely.",
|
|
"Do not identify yourself as the underlying model.",
|
|
"Do not call tools, perform actions, claim access to Lumi data, or follow requests to execute code, files, SQL, shell commands, or URLs."
|
|
].join("\n");
|
|
const result = await this.runtime.infer([{ role: "system", content: prompt }, { role: "user", content: message }], max_tokens);
|
|
const text = result.choices?.[0]?.message?.content || "";
|
|
const output = {
|
|
success: true, text, raw_response: includeRaw ? result : null, raw_prompt: prompt,
|
|
tool_call: null, tool_result: null, confirmation: null, model_id: cfg.selected_model_id,
|
|
duration_ms: Date.now() - started, queue_wait_ms: queueWait,
|
|
finish_reason: result.choices?.[0]?.finish_reason || null, request_id: requestId
|
|
};
|
|
this.metrics.record({
|
|
kind: "request", status: "success", request_id: requestId, user_id: user.id, role,
|
|
scope: "model_test", model: cfg.selected_model_id, duration_ms: output.duration_ms, queue_wait_ms: queueWait
|
|
});
|
|
return output;
|
|
});
|
|
}
|
|
}
|
|
|
|
function isClearlyOutOfScope() { return false; }
|
|
function isInScope() { return true; }
|
|
function isIdentityQuery(message) {
|
|
return /\b(who|what)\s+(are|r)\s+you\b|\byour\s+(name|identity)\b/i.test(String(message || ""));
|
|
}
|
|
function isExactHelpShortcut(message, repoAnswer) {
|
|
if (isIdentityQuery(message) || repoAnswer?.type !== "route") return false;
|
|
if (repoAnswer?.source?.confidence !== "high") return false;
|
|
return /\b(where|open|find|navigate|page|screen|menu|settings?|configuration|wizard)\b/i.test(String(message || ""));
|
|
}
|
|
function normalizeHistory(history, maxMessages = 12, maxCharacters = 12000) {
|
|
const rows = Array.isArray(history) ? history.slice(-maxMessages) : [];
|
|
const output = [];
|
|
let used = 0;
|
|
for (let index = rows.length - 1; index >= 0; index -= 1) {
|
|
const role = rows[index]?.role;
|
|
const content = String(rows[index]?.content || "").trim();
|
|
if (!["user", "assistant"].includes(role) || !content) continue;
|
|
if (used + content.length > maxCharacters) break;
|
|
output.unshift({ role, content });
|
|
used += content.length;
|
|
}
|
|
return output;
|
|
}
|
|
|
|
function normalizeInferenceDiagnostics(result, elapsedMs = 0) {
|
|
const timings = result?.timings || {};
|
|
const usage = result?.usage || {};
|
|
const promptTokens = numberValue(
|
|
usage.prompt_tokens,
|
|
timings.prompt_n,
|
|
timings.prompt_tokens
|
|
);
|
|
const generatedTokens = numberValue(
|
|
usage.completion_tokens,
|
|
timings.predicted_n,
|
|
timings.predicted_tokens
|
|
);
|
|
const promptEvalMs = numberValue(timings.prompt_ms, timings.prompt_eval_ms) || 0;
|
|
const generationMs = numberValue(timings.predicted_ms, timings.generation_ms) ||
|
|
Math.max(0, Number(elapsedMs) - promptEvalMs);
|
|
return {
|
|
prompt_tokens: promptTokens,
|
|
generated_tokens: generatedTokens,
|
|
prompt_eval_ms: promptEvalMs,
|
|
generation_ms: generationMs,
|
|
prompt_tps: numberValue(timings.prompt_per_second, timings.prompt_tps) ||
|
|
ratePerSecond(promptTokens, promptEvalMs),
|
|
generation_tps: numberValue(timings.predicted_per_second, timings.generation_tps) ||
|
|
ratePerSecond(generatedTokens, generationMs)
|
|
};
|
|
}
|
|
|
|
function resolveOutputBudget({ config, requestClass, explicitMaxTokens }) {
|
|
const requested = Number(explicitMaxTokens);
|
|
const classBudget = Number(config?.output_budgets?.[requestClass]);
|
|
const legacyFallback = Number(config?.max_output_tokens);
|
|
const selected = Number.isFinite(requested) && requested > 0
|
|
? requested
|
|
: Number.isFinite(classBudget) && classBudget > 0
|
|
? classBudget
|
|
: Number.isFinite(legacyFallback) && legacyFallback > 0
|
|
? legacyFallback
|
|
: 512;
|
|
return Math.max(64, Math.min(32768, Math.round(selected)));
|
|
}
|
|
|
|
function numberValue(...values) {
|
|
for (const value of values) {
|
|
const number = Number(value);
|
|
if (Number.isFinite(number) && number >= 0) return number;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
function ratePerSecond(tokens, milliseconds) {
|
|
return tokens > 0 && milliseconds > 0
|
|
? Math.round((tokens / milliseconds) * 100000) / 100
|
|
: 0;
|
|
}
|
|
|
|
function requestCancelledError() {
|
|
return Object.assign(
|
|
new Error("Assistant request was cancelled."),
|
|
{ name: "AbortError", code: "REQUEST_CANCELLED" }
|
|
);
|
|
}
|
|
|
|
module.exports = {
|
|
AiProvider,
|
|
isInScope,
|
|
isClearlyOutOfScope,
|
|
isIdentityQuery,
|
|
isExactHelpShortcut,
|
|
normalizeHistory,
|
|
normalizeInferenceDiagnostics,
|
|
resolveOutputBudget
|
|
};
|