Lumi/plugins/lumi_ai/backend/ai_provider.js
2026-06-25 14:10:04 +02:00

854 lines
32 KiB
JavaScript

const crypto = require("crypto");
const { buildPrompt, buildToolResultInstruction } = require("./prompt_builder");
const {
buildControllerDecision,
okfLimitForController,
outputBudgetForController
} = require("./controller");
const { roleOf } = require("./permissions");
const { parseToolCallResult } = require("./tool_router");
const { normalizeScope } = require("./scope_manager");
const { classifyRequestType } = require("./gate_provider");
class AiProvider {
constructor({ getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime }) {
Object.assign(this, { getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime });
}
async generate({
message,
user,
sessionId,
scope = "assistant",
max_tokens,
includeRaw = false,
originContext = null,
allowDeterministicShortcut = null,
history = [],
signal = null,
onStage = () => {},
allowTools = true,
includePrompt = false
}) {
const requestId = crypto.randomUUID();
const role = roleOf(user);
const started = Date.now();
const cfg = this.getConfig();
const supportScope = normalizeScope(cfg.support_scope);
let effectiveMessage = message;
let gateDecision = null;
let requestClass = classifyRequestType(message, { role, scope });
let controllerDecision = null;
this.metrics.record({
kind: "prompt",
status: "received",
request_id: requestId,
user_id: user?.id,
role,
scope,
origin: originContext?.origin || originContext?.platform || "webui",
platform: originContext?.platform || originContext?.origin || "webui",
prompt: redactPrompt(message),
prompt_tokens: estimateTokens(message)
});
if (this.gate) {
gateDecision = await this.gate.route({
message,
user,
role,
scope,
originContext,
onStage: (stage, details) => onStage(
["deterministic", "gating"].includes(stage) ? "gate" : stage,
details
)
});
effectiveMessage = gateDecision.message || message;
requestClass = gateDecision.request_class || requestClass;
this.metrics.record({
kind: "gate_summary",
status: "success",
request_id: requestId,
user_id: user?.id,
role,
scope,
route_used: gateDecision.route,
confidence: gateDecision.confidence,
reason_code: gateDecision.reason_code,
route_class: requestClass,
gate_error: gateDecision.gate_error || null,
deterministic_ms: gateDecision.deterministic_ms,
gate_ms: gateDecision.gate_ms,
platform: originContext?.origin || originContext?.platform || "webui"
});
controllerDecision = buildControllerDecision({
message: effectiveMessage,
role,
scope,
originContext,
gateDecision,
requestClass,
config: cfg
});
this.metrics.record(controllerMetricEntry({
requestId,
user,
role,
scope,
originContext,
decision: controllerDecision
}));
onStage("controller", controllerStageDetails(controllerDecision));
if (gateDecision.answer) {
const answer = gateDecision.answer;
this.metrics.record({
kind: "request",
status: gateDecision.route === "refusal" ? "refused" : "success",
request_id: requestId,
user_id: user.id,
role,
scope,
route_used: gateDecision.route,
gate_confidence: gateDecision.confidence,
gate_reason_code: gateDecision.reason_code,
route_class: requestClass,
max_output_tokens_used: 0,
deterministic_ms: gateDecision.deterministic_ms,
gate_ms: gateDecision.gate_ms,
main_queue_ms: 0,
main_generate_ms: 0,
total_ms: Date.now() - started,
duration_ms: Date.now() - started
});
return {
success: gateDecision.route !== "unavailable",
text: answer.text,
links: answer.links || [],
source: answer.source || null,
model_id: cfg.gate?.model_id || "lumi-gate",
route_used: gateDecision.route,
route_class: requestClass,
controller_decision: controllerDecision,
max_output_tokens_used: 0,
gate_decision: gateDecision,
stage_timings: {
deterministic_ms: gateDecision.deterministic_ms,
gate_ms: gateDecision.gate_ms,
main_queue_ms: 0,
main_generate_ms: 0,
total_ms: Date.now() - started
},
internal_generated_length: answer.text.length,
duration_ms: Date.now() - started,
queue_wait_ms: 0,
request_id: requestId
};
}
}
const repoAnswer = this.lookupRepo?.(effectiveMessage) || null;
const shortcutSurfaceAllowed = scope === "assistant" || scope === "platform_command";
const guardedRepoAnswer = ["clarification", "contact", "unknown"].includes(repoAnswer?.type);
const verifiedRouteAnswer = isExactHelpShortcut(effectiveMessage, repoAnswer);
if (!this.gate && shortcutSurfaceAllowed && (guardedRepoAnswer || verifiedRouteAnswer)) {
this.metrics.record({
kind: "request", status: "success", request_id: requestId, user_id: user.id,
role, scope: "repo_lookup", route_used: `repo_${repoAnswer.type}`,
route_class: requestClass, max_output_tokens_used: 0, duration_ms: Date.now() - started
});
return {
success: true,
text: repoAnswer.text,
links: repoAnswer.links || [],
source: repoAnswer.source || null,
model_id: "lumi-repo-index",
route_used: `repo_${repoAnswer.type}`,
route_class: requestClass,
controller_decision: buildControllerDecision({
message: effectiveMessage,
role,
scope,
originContext,
requestClass,
config: cfg
}),
max_output_tokens_used: 0,
internal_generated_length: repoAnswer.text.length,
duration_ms: Date.now() - started,
queue_wait_ms: 0,
request_id: requestId
};
}
onStage("queued", {
route: "main_llm",
reason_code: gateDecision?.reason_code || "direct_main_llm"
});
if (!controllerDecision) {
controllerDecision = buildControllerDecision({
message: effectiveMessage,
role,
scope,
originContext,
requestClass,
config: cfg
});
this.metrics.record(controllerMetricEntry({
requestId,
user,
role,
scope,
originContext,
decision: controllerDecision
}));
onStage("controller", controllerStageDetails(controllerDecision));
}
if (this.ensureRuntime) {
const health = await this.runtime.health();
if (!health.healthy) {
onStage("main_model_loading", { route: "main_llm" });
await this.ensureRuntime();
}
}
onStage("queued", { queue_position: this.queue.length + 1 });
return this.queue.run(user.id, role, async (queueWait) => {
const generateStarted = Date.now();
if (signal?.aborted) throw requestCancelledError();
const repoContext = supportScope.repo_lookup_enabled
? this.getRepoContext?.(effectiveMessage, role, supportScope.allow_moderator_code_help) || []
: [];
const correctionContext = this.getCorrections?.({
message: effectiveMessage,
role,
origin: originContext?.origin || originContext?.platform || "webui",
platform: originContext?.platform || originContext?.origin || "webui"
}) || [];
const toolExposure = allowTools
? this.tools.inspect({ role, user, context: originContext })
: { considered: [], exposed: [] };
this.metrics.record({
kind: "tool_exposure",
status: allowTools ? "evaluated" : "disabled",
request_id: requestId,
user_id: user.id,
role,
origin: originContext?.origin || originContext?.platform || "other",
considered_tools: toolExposure.considered.map((decision) => decision.tool.tool_id),
exposed_tools: toolExposure.exposed.map((tool) => tool.tool_id),
rejected_tools: toolExposure.considered
.filter((decision) => !decision.exposed)
.map((decision) => ({ tool_id: decision.tool.tool_id, reason: decision.reason }))
});
let contextBlocks = [];
let contextFailure = null;
let contextDiagnostics = [];
const okfLimit = okfLimitForController(controllerDecision);
try {
contextBlocks = this.getContext({
role,
user,
message: effectiveMessage,
originContext,
scope,
okf_retrieval: controllerDecision.okf_retrieval,
limit: okfLimit,
controller: controllerDecision
}) || [];
contextDiagnostics = Array.isArray(this.getContext?.lastDiagnostics)
? this.getContext.lastDiagnostics
: [];
} catch (error) {
contextFailure = error;
contextBlocks = [];
}
const okfDiagnostics = summarizeContextDiagnostics(contextDiagnostics);
this.metrics.record({
kind: "okf_retrieval",
status: contextFailure ? "failed" : "success",
request_id: requestId,
user_id: user.id,
role,
scope,
okf_retrieval_depth: controllerDecision.okf_retrieval,
okf_match_count: Array.isArray(contextBlocks) ? contextBlocks.length : 0,
okf_candidate_count: okfDiagnostics.candidate_count,
okf_query: okfDiagnostics.query || effectiveMessage,
okf_limit: okfLimit,
okf_provider_count: okfDiagnostics.provider_count,
fallback_reason: controllerDecision.fallback_used ? controllerDecision.reason_code : null,
gate_reason_code: gateDecision?.reason_code || null,
gate_error: gateDecision?.gate_error || null,
error_code: contextFailure?.code || null,
message: contextFailure?.message ? String(contextFailure.message).slice(0, 300) : null
});
const prompt = buildPrompt({
config: cfg,
role,
message: effectiveMessage,
requestClass,
contextBlocks,
correctionContext,
repoContext,
originContext: originContext ? {
...originContext,
response_profile: controllerDecision.source_profile
} : null,
controllerDecision,
tools: toolExposure.exposed
});
this.metrics.record({
kind: "prompt_build",
status: "success",
request_id: requestId,
user_id: user.id,
role,
scope,
context_block_count: contextBlocks.length,
correction_count: correctionContext.length,
repo_context_count: repoContext.length,
prompt_tokens: estimateTokens(prompt),
controller_complexity: controllerDecision.complexity,
okf_retrieval_depth: controllerDecision.okf_retrieval
});
const conversation = normalizeHistory(history);
const outputTokenLimit = resolveOutputBudget({
config: cfg,
requestClass,
explicitMaxTokens: max_tokens,
controllerDecision
});
const runtimeSettings = {
backend: this.runtime.activeAcceleration?.backend || this.runtime.runtimeMetadata?.()?.backend || "cpu",
gpu_layers: this.runtime.activeAcceleration?.gpu_layers || 0,
context_size: Number(cfg.context_size) || 4096,
batch_size: Number(cfg.batch_size) || 512,
ubatch_size: Number(cfg.ubatch_size) || 128,
threads: Number(cfg.threads) || 0,
max_output_tokens: outputTokenLimit,
max_output_tokens_used: outputTokenLimit,
route_class: requestClass,
controller_complexity: controllerDecision.complexity,
okf_retrieval_depth: controllerDecision.okf_retrieval,
controller_intent: controllerDecision.intent,
source_profile: controllerDecision.source_profile.source
};
onStage("prompt_eval", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings });
const generatingTimer = setTimeout(() => {
onStage("generating", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings });
}, 250);
generatingTimer.unref?.();
let result;
try {
this.metrics.record({
kind: "model_request",
status: "started",
request_id: requestId,
user_id: user.id,
role,
scope,
model: cfg.selected_model_id,
controller_complexity: controllerDecision.complexity,
okf_retrieval_depth: controllerDecision.okf_retrieval,
prompt_tokens: estimateTokens(prompt)
});
result = await this.runtime.infer(
[
{ role: "system", content: prompt },
...conversation,
{ role: "user", content: effectiveMessage }
],
outputTokenLimit,
{
signal,
timeoutMs: cfg.hard_generation_timeout_ms
}
);
} catch (error) {
this.metrics.record({
kind: "error",
status: "failed",
request_id: requestId,
user_id: user.id,
role,
scope,
error_code: error.code || error.name || "model_request_failed",
message: String(error.message || "Main model request failed.").slice(0, 300)
});
throw error;
} finally {
clearTimeout(generatingTimer);
}
if (signal?.aborted) throw requestCancelledError();
const initialText = result.choices?.[0]?.message?.content || "";
const initialInference = normalizeInferenceDiagnostics(result, Date.now() - generateStarted);
onStage("generating", {
route: "main_llm",
queue_ms: queueWait,
...runtimeSettings,
...initialInference
});
const parsedToolCall = allowTools ? parseToolCallResult(initialText) : { status: "none", call: null };
const toolCall = parsedToolCall.call;
let confirmation = null;
let toolResult = null;
let finalText = initialText;
let finalResult = null;
let finalInference = emptyInferenceDiagnostics();
let selectedTool = null;
let rejectedReason = null;
let toolExecutionMs = 0;
if (parsedToolCall.status === "malformed") {
rejectedReason = "malformed_tool_call";
finalText = "I could not validate the requested tool call. Please retry or clarify the request.";
this.metrics.record({
kind: "tool_decision",
status: "rejected",
request_id: requestId,
user_id: user.id,
origin: originContext?.origin || originContext?.platform || "other",
rejected_reason: rejectedReason
});
} else if (toolCall) {
selectedTool = toolCall.tool;
try {
const prepared = this.tools.prepare({
tool: toolCall.tool,
args: toolCall.arguments,
user,
role,
sessionId,
context: originContext
});
confirmation = prepared.confirmation;
if (prepared.execute) {
const executionStarted = Date.now();
onStage("tool_running", { selected_tool: selectedTool });
try {
toolResult = await this.tools.execute({
checked: prepared.checked,
user,
requestId,
context: originContext
});
} catch (error) {
toolResult = {
status: "failed",
error: "The selected tool failed to complete."
};
rejectedReason = error.code || "execution_failed";
}
toolExecutionMs = Date.now() - executionStarted;
if (prepared.checked.def.read_only) {
onStage("formatting", { selected_tool: selectedTool, tool_execution_ms: toolExecutionMs });
const finalStarted = Date.now();
try {
finalResult = await this.runtime.infer(
[
{ role: "system", content: prompt },
...conversation,
{ role: "user", content: effectiveMessage },
{ role: "assistant", content: initialText },
{
role: "user",
content: buildToolResultInstruction({
tool: prepared.checked.def,
result: toolResult,
originContext
})
}
],
outputTokenLimit,
{
signal,
timeoutMs: cfg.hard_generation_timeout_ms
}
);
finalInference = normalizeInferenceDiagnostics(finalResult, Date.now() - finalStarted);
const candidate = finalResult.choices?.[0]?.message?.content || "";
const repeatedCall = parseToolCallResult(candidate);
finalText = repeatedCall.status === "none"
? candidate
: fallbackToolMessage(toolResult);
} catch {
finalText = fallbackToolMessage(toolResult);
}
} else {
finalText = safeActionResult(toolResult);
}
} else {
finalText = `Please confirm: ${confirmation.display_name}.`;
}
} catch (error) {
rejectedReason = error.code || "tool_rejected";
finalText = `I could not use ${toolCall.tool}: ${safeToolError(error)}`;
}
}
const inference = combineInferenceDiagnostics(initialInference, finalInference);
let fallbackReason = null;
if (!String(finalText || "").trim()) {
fallbackReason = "empty_model_response";
rejectedReason = rejectedReason || fallbackReason;
finalText = "Lumi did not return a usable answer. Please retry, rephrase, or check Work History for the failed stage.";
this.metrics.record({
kind: "fallback",
status: "partial",
request_id: requestId,
user_id: user.id,
role,
scope,
fallback_reason: fallbackReason
});
}
const out = {
success: true,
text: finalText,
links: [],
raw_response: cfg.logging.log_responses || includeRaw
? finalResult ? { initial: result, final: finalResult } : result
: null,
raw_prompt: includePrompt ? prompt : undefined,
tool_call: toolCall,
tool_result: toolResult,
confirmation,
model_id: cfg.selected_model_id,
duration_ms: Date.now() - started,
queue_wait_ms: queueWait,
finish_reason: (finalResult || result).choices?.[0]?.finish_reason || null,
request_id: requestId,
route_used: gateDecision ? "main_llm" : "llm",
route_class: requestClass,
controller_decision: controllerDecision,
max_output_tokens_used: outputTokenLimit,
gate_decision: gateDecision,
force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null,
internal_generated_length: initialText.length + String(finalText || "").length,
fallback_reason: fallbackReason,
stage_timings: {
deterministic_ms: gateDecision?.deterministic_ms || 0,
gate_ms: gateDecision?.gate_ms || 0,
queue_ms: queueWait,
prompt_eval_ms: inference.prompt_eval_ms,
generation_ms: inference.generation_ms,
main_queue_ms: queueWait,
main_generate_ms: inference.generation_ms,
total_ms: Date.now() - started
},
diagnostics: {
...inference,
...runtimeSettings,
controller_reason_code: controllerDecision.reason_code,
controller_confidence: controllerDecision.confidence,
source_profile: controllerDecision.source_profile,
gate_ms: gateDecision?.gate_ms || 0,
queue_ms: queueWait,
total_ms: Date.now() - started
}
};
this.metrics.record({
kind: "request", status: "success", request_id: requestId, user_id: user.id, role, scope,
model: cfg.selected_model_id, duration_ms: out.duration_ms, queue_wait_ms: queueWait,
tool_requested: toolCall?.tool || null,
considered_tools: toolExposure.considered.map((decision) => decision.tool.tool_id),
exposed_tools: toolExposure.exposed.map((tool) => tool.tool_id),
selected_tool: selectedTool,
rejected_reason: rejectedReason,
fallback_reason: fallbackReason,
execution_ms: toolExecutionMs,
tool_executed: Boolean(toolResult),
route_used: gateDecision ? "main_llm" : "llm",
route_class: requestClass,
controller_route: controllerDecision.route,
controller_intent: controllerDecision.intent,
controller_complexity: controllerDecision.complexity,
controller_reason_code: controllerDecision.reason_code,
controller_confidence: controllerDecision.confidence,
okf_retrieval_depth: controllerDecision.okf_retrieval,
source_profile: controllerDecision.source_profile.source,
target_final_chars: controllerDecision.source_profile.target_chars,
hard_final_chars: controllerDecision.source_profile.hard_chars,
controller_fallback_used: controllerDecision.fallback_used,
max_output_tokens_used: outputTokenLimit,
gate_confidence: gateDecision?.confidence,
gate_reason_code: gateDecision?.reason_code,
gate_error: gateDecision?.gate_error || null,
force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null,
deterministic_ms: out.stage_timings.deterministic_ms,
gate_ms: out.stage_timings.gate_ms,
main_queue_ms: out.stage_timings.main_queue_ms,
main_generate_ms: out.stage_timings.main_generate_ms,
queue_ms: out.stage_timings.queue_ms,
prompt_eval_ms: out.stage_timings.prompt_eval_ms,
generation_ms: out.stage_timings.generation_ms,
total_ms: out.stage_timings.total_ms,
...out.diagnostics,
internal_generated_length: out.internal_generated_length
});
return out;
}, { signal });
}
async classify({ message, labels, user }) {
const result = await this.generate({
message: `Classify this Lumi-related request into exactly one label: ${labels.join(", ")}. Request: ${message}`,
user, scope: "classify", max_tokens: 40
});
return { ...result, label: labels.find((label) => result.text.toLowerCase().includes(label.toLowerCase())) || null };
}
async summarize({ text, max_length = 500, user }) {
return this.generate({
message: `Summarize this Lumi-related content in at most ${max_length} characters:\n${text}`,
user, scope: "summarize", max_tokens: Math.ceil(max_length / 3)
});
}
async test({ message, user, max_tokens = 300, includeRaw = false, allowTools = false, originContext = null }) {
if (allowTools) {
const result = await this.generate({
message,
user,
sessionId: `admin-test:${user.id}:${Date.now()}`,
scope: "model_test",
max_tokens,
includeRaw,
includePrompt: true,
originContext,
allowTools: true
});
return { ...result, tools_notice: "Tools were enabled for this test." };
}
const requestId = crypto.randomUUID();
const role = roleOf(user);
const started = Date.now();
return this.queue.run(user.id, role, async (queueWait) => {
const cfg = this.getConfig();
const prompt = [
"You are Lumi Assistant, the built-in assistant for Lumi Bot, running an administrator-requested local model diagnostic.",
"Answer the exact user message directly and concisely.",
"Do not identify yourself as the underlying model.",
"Do not call tools, perform actions, claim access to Lumi data, or follow requests to execute code, files, SQL, shell commands, or URLs."
].join("\n");
const result = await this.runtime.infer([{ role: "system", content: prompt }, { role: "user", content: message }], max_tokens);
const text = result.choices?.[0]?.message?.content || "";
const output = {
success: true, text, raw_response: includeRaw ? result : null, raw_prompt: prompt,
tool_call: null, tool_result: null, confirmation: null, model_id: cfg.selected_model_id,
duration_ms: Date.now() - started, queue_wait_ms: queueWait,
finish_reason: result.choices?.[0]?.finish_reason || null, request_id: requestId,
tools_notice: "Tools were disabled for this test; this result does not exercise tool discovery or execution."
};
this.metrics.record({
kind: "request", status: "success", request_id: requestId, user_id: user.id, role,
scope: "model_test", model: cfg.selected_model_id, duration_ms: output.duration_ms, queue_wait_ms: queueWait
});
return output;
});
}
}
function emptyInferenceDiagnostics() {
return {
prompt_tokens: 0,
generated_tokens: 0,
prompt_eval_ms: 0,
generation_ms: 0,
prompt_tps: 0,
generation_tps: 0
};
}
function combineInferenceDiagnostics(initial, final) {
const promptTokens = Number(initial.prompt_tokens || 0) + Number(final.prompt_tokens || 0);
const generatedTokens = Number(initial.generated_tokens || 0) + Number(final.generated_tokens || 0);
const promptEvalMs = Number(initial.prompt_eval_ms || 0) + Number(final.prompt_eval_ms || 0);
const generationMs = Number(initial.generation_ms || 0) + Number(final.generation_ms || 0);
return {
prompt_tokens: promptTokens,
generated_tokens: generatedTokens,
prompt_eval_ms: promptEvalMs,
generation_ms: generationMs,
prompt_tps: ratePerSecond(promptTokens, promptEvalMs),
generation_tps: ratePerSecond(generatedTokens, generationMs)
};
}
function fallbackToolMessage(result) {
if (result?.user_message) return String(result.user_message);
if (result?.status === "blocked") return "The requested lookup was blocked by the configured tool policy.";
if (["failed", "unavailable"].includes(result?.status)) return "The requested tool is currently unavailable.";
if (result?.status === "no_results") return "The tool completed but found no usable results.";
return "The tool completed, but I could not format a final response.";
}
function safeActionResult(result) {
if (result?.user_message) return String(result.user_message);
if (result?.status === "failed") return "The action failed.";
return "The action completed successfully.";
}
function safeToolError(error) {
return ({
not_registered: "the tool is not registered.",
permission_blocked: "permission was denied.",
origin_blocked: "the tool is unavailable for this origin.",
scope_blocked: "the tool is outside this request context.",
schema_invalid: "the tool arguments were invalid."
})[error?.code] || "the tool is unavailable.";
}
function isClearlyOutOfScope() { return false; }
function isInScope() { return true; }
function isIdentityQuery(message) {
return /\b(who|what)\s+(are|r)\s+you\b|\byour\s+(name|identity)\b/i.test(String(message || ""));
}
function isExactHelpShortcut(message, repoAnswer) {
if (isIdentityQuery(message) || repoAnswer?.type !== "route") return false;
if (repoAnswer?.source?.confidence !== "high") return false;
return /\b(where|open|find|navigate|page|screen|menu|settings?|configuration|wizard)\b/i.test(String(message || ""));
}
function normalizeHistory(history, maxMessages = 12, maxCharacters = 12000) {
const rows = Array.isArray(history) ? history.slice(-maxMessages) : [];
const output = [];
let used = 0;
for (let index = rows.length - 1; index >= 0; index -= 1) {
const role = rows[index]?.role;
const content = String(rows[index]?.content || "").trim();
if (!["user", "assistant"].includes(role) || !content) continue;
if (used + content.length > maxCharacters) break;
output.unshift({ role, content });
used += content.length;
}
return output;
}
function normalizeInferenceDiagnostics(result, elapsedMs = 0) {
const timings = result?.timings || {};
const usage = result?.usage || {};
const promptTokens = numberValue(
usage.prompt_tokens,
timings.prompt_n,
timings.prompt_tokens
);
const generatedTokens = numberValue(
usage.completion_tokens,
timings.predicted_n,
timings.predicted_tokens
);
const promptEvalMs = numberValue(timings.prompt_ms, timings.prompt_eval_ms) || 0;
const generationMs = numberValue(timings.predicted_ms, timings.generation_ms) ||
Math.max(0, Number(elapsedMs) - promptEvalMs);
return {
prompt_tokens: promptTokens,
generated_tokens: generatedTokens,
prompt_eval_ms: promptEvalMs,
generation_ms: generationMs,
prompt_tps: numberValue(timings.prompt_per_second, timings.prompt_tps) ||
ratePerSecond(promptTokens, promptEvalMs),
generation_tps: numberValue(timings.predicted_per_second, timings.generation_tps) ||
ratePerSecond(generatedTokens, generationMs)
};
}
function resolveOutputBudget({ config, requestClass, explicitMaxTokens, controllerDecision } = {}) {
return outputBudgetForController({ config, requestClass, explicitMaxTokens, controllerDecision });
}
function controllerMetricEntry({ requestId, user, role, scope, originContext, decision }) {
return {
kind: "controller_decision",
status: "success",
request_id: requestId,
user_id: user?.id,
role,
scope,
route_used: decision.route,
controller_route: decision.route,
controller_intent: decision.intent,
controller_complexity: decision.complexity,
controller_reason_code: decision.reason_code,
controller_confidence: decision.confidence,
okf_retrieval_depth: decision.okf_retrieval,
answer_style: decision.answer_style,
source_profile: decision.source_profile.source,
target_final_chars: decision.source_profile.target_chars,
hard_final_chars: decision.source_profile.hard_chars,
permission_sensitive: decision.permission_sensitive,
admin_only: decision.admin_only,
risk_of_private_data: decision.risk_of_private_data,
controller_fallback_used: decision.fallback_used,
gate_reason_code: decision.gate_reason_code,
platform: originContext?.origin || originContext?.platform || "other"
};
}
function controllerStageDetails(decision) {
return {
route: decision.route,
controller_intent: decision.intent,
controller_complexity: decision.complexity,
controller_reason_code: decision.reason_code,
controller_confidence: decision.confidence,
okf_retrieval_depth: decision.okf_retrieval,
answer_style: decision.answer_style,
source_profile: decision.source_profile.source,
target_final_chars: decision.source_profile.target_chars,
hard_final_chars: decision.source_profile.hard_chars,
controller_fallback_used: decision.fallback_used
};
}
function summarizeContextDiagnostics(rows = []) {
const diagnostics = Array.isArray(rows) ? rows.filter((row) => row && typeof row === "object") : [];
const okfRows = diagnostics.filter((row) => row.provider === "okf" || row.kind === "okf");
const preferred = okfRows[0] || diagnostics[0] || {};
return {
query: preferred.query ? String(preferred.query).slice(0, 500) : null,
candidate_count: okfRows.reduce((sum, row) => sum + (Number(row.candidate_count) || 0), 0),
provider_count: diagnostics.length
};
}
function numberValue(...values) {
for (const value of values) {
const number = Number(value);
if (Number.isFinite(number) && number >= 0) return number;
}
return 0;
}
function ratePerSecond(tokens, milliseconds) {
return tokens > 0 && milliseconds > 0
? Math.round((tokens / milliseconds) * 100000) / 100
: 0;
}
function requestCancelledError() {
return Object.assign(
new Error("Assistant request was cancelled."),
{ name: "AbortError", code: "REQUEST_CANCELLED" }
);
}
function estimateTokens(value) {
const text = String(value || "");
if (!text.trim()) return 0;
return Math.max(1, Math.ceil(text.length / 4));
}
function redactPrompt(value) {
return String(value || "")
.replace(/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, "[email]")
.replace(/\b((?:token|secret|password|api[_ -]?key|client[_ -]?secret)\s*[:=]\s*)\S+/gi, "$1[redacted]")
.slice(0, 6000);
}
module.exports = {
AiProvider,
isInScope,
isClearlyOutOfScope,
isIdentityQuery,
isExactHelpShortcut,
normalizeHistory,
normalizeInferenceDiagnostics,
resolveOutputBudget
};