From 3c0cd409a25e0c67a76dacb675486b114f1b20eb Mon Sep 17 00:00:00 2001 From: Franz Rolfsvaag Date: Fri, 12 Jun 2026 19:27:43 +0200 Subject: [PATCH] Improve Lumi AI request handling and review workflows --- .gitignore | 1 + plugins/lumi_ai/README.md | 24 +- plugins/lumi_ai/backend/ai_provider.js | 281 +++++++- .../lumi_ai/backend/assistant_visibility.js | 18 +- plugins/lumi_ai/backend/cache.js | 70 ++ plugins/lumi_ai/backend/config_manager.js | 119 ++++ plugins/lumi_ai/backend/corrections.js | 226 ++++++ plugins/lumi_ai/backend/evals.js | 141 ++++ plugins/lumi_ai/backend/feedback.js | 204 ++++++ plugins/lumi_ai/backend/gate_provider.js | 398 +++++++++++ plugins/lumi_ai/backend/hardware.js | 70 +- plugins/lumi_ai/backend/metrics.js | 170 ++++- plugins/lumi_ai/backend/paths.js | 5 +- plugins/lumi_ai/backend/prompt_builder.js | 29 +- plugins/lumi_ai/backend/queue_manager.js | 32 +- plugins/lumi_ai/backend/repo_indexer.js | 2 +- plugins/lumi_ai/backend/request_jobs.js | 155 +++++ plugins/lumi_ai/backend/response_formatter.js | 6 +- plugins/lumi_ai/backend/runtime_manager.js | 279 +++++++- plugins/lumi_ai/backend/storage.js | 13 +- plugins/lumi_ai/backend/training_export.js | 55 ++ plugins/lumi_ai/data/corrections/.gitkeep | 1 + plugins/lumi_ai/data/evals/.gitkeep | 1 + plugins/lumi_ai/data/exports/.gitkeep | 1 + plugins/lumi_ai/data/feedback/.gitkeep | 1 + plugins/lumi_ai/index.js | 639 +++++++++++++++-- plugins/lumi_ai/plugin.json | 2 +- plugins/lumi_ai/public/assistant.css | 15 + plugins/lumi_ai/public/assistant.js | 267 ++++++- plugins/lumi_ai/public/improvement-center.css | 21 + plugins/lumi_ai/public/improvement-center.js | 17 + plugins/lumi_ai/public/settings.js | 14 +- plugins/lumi_ai/tests/verify.js | 652 +++++++++++++++++- plugins/lumi_ai/views/assistant-panel.ejs | 1 + plugins/lumi_ai/views/improvement-center.ejs | 226 ++++++ plugins/lumi_ai/views/settings.ejs | 111 ++- src/web/server.js | 7 + 37 files changed, 4118 insertions(+), 156 deletions(-) create mode 100644 plugins/lumi_ai/backend/cache.js create mode 100644 plugins/lumi_ai/backend/corrections.js create mode 100644 plugins/lumi_ai/backend/evals.js create mode 100644 plugins/lumi_ai/backend/feedback.js create mode 100644 plugins/lumi_ai/backend/gate_provider.js create mode 100644 plugins/lumi_ai/backend/request_jobs.js create mode 100644 plugins/lumi_ai/backend/training_export.js create mode 100644 plugins/lumi_ai/data/corrections/.gitkeep create mode 100644 plugins/lumi_ai/data/evals/.gitkeep create mode 100644 plugins/lumi_ai/data/exports/.gitkeep create mode 100644 plugins/lumi_ai/data/feedback/.gitkeep create mode 100644 plugins/lumi_ai/public/improvement-center.css create mode 100644 plugins/lumi_ai/public/improvement-center.js create mode 100644 plugins/lumi_ai/views/improvement-center.ejs diff --git a/.gitignore b/.gitignore index 12cf31f..07581c2 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ plugins/*/data/** npm-debug.log security-audit-*.json security-audit-*.md +taskfile.txt diff --git a/plugins/lumi_ai/README.md b/plugins/lumi_ai/README.md index 0c34e94..66a7ec8 100644 --- a/plugins/lumi_ai/README.md +++ b/plugins/lumi_ai/README.md @@ -1,5 +1,21 @@ # Lumi AI +## Improvement Center + +The Improvement Center at `/plugins/lumi_ai/improvement_center` stores end-user response feedback, supports moderator verification with an administrator-managed trusted reviewer list, and reserves approval, editing, deletion, promotion, eval runs, and exports for administrators. + +Approved corrections are staged until an administrator selects **Save Corrections**. Active entries are constrained by minimum role, origin, and platform, and verified route links are checked against the local Lumi repository index. Manual instruction and DPO JSONL exports include approved examples only and never start training. + +## Lightweight request gate + +When Lumi AI is enabled and its runtime is running, a separate CPU-oriented gate model stays active beside the main model. The gate can serve high-confidence answers from the verified Lumi route/help/plugin index and plugin-local safe cache, or route the request to clarification, refusal, unavailable, or the main LLM. + +The gate never executes tools. Repeated or explicitly forced prompts, low-confidence requests, user-specific data, economy, moderation, permissions, and action requests always continue to the main LLM after normal access controls and rate limits pass. + +Rate-limited WebUI users receive a live retry countdown. Send and retry controls remain disabled until the server-provided cooldown expires. + +Gate inference is bounded to the configured 1-5 second timeout and uses a compact classification-only prompt. Complex, ambiguous, code, and troubleshooting requests bypass the gate. WebUI requests use background jobs with live stage polling, so cold starts and long main-model generations do not hold a proxy-facing request open. + `lumi_ai` is a standalone Lumi plugin that manages a local `llama.cpp` inference process and adds a scoped AI Assistant to the WebUI. ## Install and configure @@ -30,6 +46,12 @@ Downloads are written to `data/tmp/`, verified against a pinned SHA-256 digest, Models use pinned Hugging Face repository commits. The runtime uses a pinned official `ggml-org/llama.cpp` GitHub release because the llama.cpp project does not publish authoritative multi-platform runtime archives on Hugging Face. This is the only download-source exception; the archive URL, version, size, and SHA-256 are pinned in `runtime_manifest.json`. +Long main-model requests run as cancellable background jobs. The WebUI polls job state, shows a recoverable soft-timeout panel, and leaves generation running until the configured hard generation timeout or an explicit cancel. + +GPU diagnostics separate total/free VRAM, Lumi-managed model allocation, and estimated external VRAM pressure. A loaded managed model is not counted as external pressure when calculating the safe GPU allocation. + +Main-model requests use configurable token budgets by request class: navigation/help, simple answers, code/custom commands, admin diagnostics, and explicitly requested long answers. Polling shows live elapsed time and the selected budget, while metrics record the request class, token speeds, and validated stage timings. + The runtime binds only to `127.0.0.1` on an ephemeral port. It is never exposed on `0.0.0.0`. Before loading a model, Lumi AI runs `llama-server --help` as a smoke test. Failed launches and exits are decoded into plugin-local diagnostics, including Windows NTSTATUS values such as `0xC0000005 / STATUS_ACCESS_VIOLATION`. The admin page provides remediation steps, raw stdout/stderr tails, model verification, and a redacted diagnostics bundle. @@ -110,4 +132,4 @@ Run: node plugins/lumi_ai/tests/verify.js ``` -The verification covers path confinement, size formatting, GPU intent and actual allocation, pagination, model and log deletion safety, assistant role access, tool schema and permission checks, queue limits, refusal behavior, and runtime resume persistence. +The verification covers path confinement, size formatting, GPU intent and actual allocation, pagination, model and log deletion safety, assistant role access, Improvement Center permissions and activation, approved-only exports, tool schema and permission checks, queue limits, refusal behavior, and runtime resume persistence. diff --git a/plugins/lumi_ai/backend/ai_provider.js b/plugins/lumi_ai/backend/ai_provider.js index f31e301..210386d 100644 --- a/plugins/lumi_ai/backend/ai_provider.js +++ b/plugins/lumi_ai/backend/ai_provider.js @@ -3,10 +3,11 @@ const { buildPrompt } = require("./prompt_builder"); const { roleOf } = require("./permissions"); const { parseToolCall } = require("./tool_router"); const { normalizeScope } = require("./scope_manager"); +const { classifyRequestType } = require("./gate_provider"); class AiProvider { - constructor({ getConfig, runtime, queue, tools, metrics, getContext, lookupRepo, getRepoContext }) { - Object.assign(this, { getConfig, runtime, queue, tools, metrics, getContext, lookupRepo, getRepoContext }); + constructor({ getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime }) { + Object.assign(this, { getConfig, runtime, gate, queue, tools, metrics, getContext, lookupRepo, getRepoContext, getCorrections, ensureRuntime }); } async generate({ @@ -18,21 +19,86 @@ class AiProvider { includeRaw = false, originContext = null, allowDeterministicShortcut = null, - history = [] + history = [], + signal = null, + onStage = () => {} }) { const requestId = crypto.randomUUID(); const role = roleOf(user); const started = Date.now(); const cfg = this.getConfig(); const supportScope = normalizeScope(cfg.support_scope); - const repoAnswer = this.lookupRepo?.(message) || null; + let effectiveMessage = message; + let gateDecision = null; + let requestClass = classifyRequestType(message, { role, scope }); + if (this.gate) { + gateDecision = await this.gate.route({ + message, + user, + role, + scope, + originContext, + onStage: (stage, details) => onStage( + ["deterministic", "gating"].includes(stage) ? "gate" : stage, + details + ) + }); + effectiveMessage = gateDecision.message || message; + requestClass = gateDecision.request_class || requestClass; + if (gateDecision.answer) { + const answer = gateDecision.answer; + this.metrics.record({ + kind: "request", + status: gateDecision.route === "refusal" ? "refused" : "success", + request_id: requestId, + user_id: user.id, + role, + scope, + route_used: gateDecision.route, + gate_confidence: gateDecision.confidence, + gate_reason_code: gateDecision.reason_code, + route_class: requestClass, + max_output_tokens_used: 0, + deterministic_ms: gateDecision.deterministic_ms, + gate_ms: gateDecision.gate_ms, + main_queue_ms: 0, + main_generate_ms: 0, + total_ms: Date.now() - started, + duration_ms: Date.now() - started + }); + return { + success: gateDecision.route !== "unavailable", + text: answer.text, + links: answer.links || [], + source: answer.source || null, + model_id: cfg.gate?.model_id || "lumi-gate", + route_used: gateDecision.route, + route_class: requestClass, + max_output_tokens_used: 0, + gate_decision: gateDecision, + stage_timings: { + deterministic_ms: gateDecision.deterministic_ms, + gate_ms: gateDecision.gate_ms, + main_queue_ms: 0, + main_generate_ms: 0, + total_ms: Date.now() - started + }, + internal_generated_length: answer.text.length, + duration_ms: Date.now() - started, + queue_wait_ms: 0, + request_id: requestId + }; + } + } + const repoAnswer = this.lookupRepo?.(effectiveMessage) || null; const shortcutSurfaceAllowed = scope === "assistant" || scope === "platform_command"; const guardedRepoAnswer = ["clarification", "contact", "unknown"].includes(repoAnswer?.type); - const verifiedRouteAnswer = isExactHelpShortcut(message, repoAnswer); - if (shortcutSurfaceAllowed && (guardedRepoAnswer || verifiedRouteAnswer)) { + const verifiedRouteAnswer = isExactHelpShortcut(effectiveMessage, repoAnswer); + if (!this.gate && shortcutSurfaceAllowed && (guardedRepoAnswer || verifiedRouteAnswer)) { this.metrics.record({ kind: "request", status: "success", request_id: requestId, user_id: user.id, - role, scope: "repo_lookup", route_used: `repo_${repoAnswer.type}`, duration_ms: Date.now() - started + role, scope: "repo_lookup", route_used: `repo_${repoAnswer.type}`, + route_class: requestClass, max_output_tokens_used: 0, duration_ms: Date.now() - started }); return { success: true, @@ -41,6 +107,8 @@ class AiProvider { source: repoAnswer.source || null, model_id: "lumi-repo-index", route_used: `repo_${repoAnswer.type}`, + route_class: requestClass, + max_output_tokens_used: 0, internal_generated_length: repoAnswer.text.length, duration_ms: Date.now() - started, queue_wait_ms: 0, @@ -48,31 +116,90 @@ class AiProvider { }; } + onStage("queued", { + route: "main_llm", + reason_code: gateDecision?.reason_code || "direct_main_llm" + }); + if (this.ensureRuntime) { + const health = await this.runtime.health(); + if (!health.healthy) { + onStage("main_model_loading", { route: "main_llm" }); + await this.ensureRuntime(); + } + } + onStage("queued", { queue_position: this.queue.length + 1 }); return this.queue.run(user.id, role, async (queueWait) => { + const generateStarted = Date.now(); + if (signal?.aborted) throw requestCancelledError(); const repoContext = supportScope.repo_lookup_enabled - ? this.getRepoContext?.(message, role, supportScope.allow_moderator_code_help) || [] + ? this.getRepoContext?.(effectiveMessage, role, supportScope.allow_moderator_code_help) || [] : []; + const correctionContext = this.getCorrections?.({ + message: effectiveMessage, + role, + origin: originContext?.origin || originContext?.platform || "webui", + platform: originContext?.platform || originContext?.origin || "webui" + }) || []; const platformToolsAllowed = originContext?.permission_context?.webui_actions_allowed !== false; const prompt = buildPrompt({ config: cfg, role, - message, + message: effectiveMessage, + requestClass, contextBlocks: this.getContext(role), + correctionContext, repoContext, originContext, tools: platformToolsAllowed ? this.tools.list(role) : [] }); const conversation = normalizeHistory(history); - const internalBudget = Math.max(2000, Math.min(64000, Number(cfg.internal_generation_char_budget) || 16000)); - const result = await this.runtime.infer( - [ - { role: "system", content: prompt }, - ...conversation, - { role: "user", content: message } - ], - max_tokens || Math.min(8192, Math.ceil(internalBudget / 3)) - ); + const outputTokenLimit = resolveOutputBudget({ + config: cfg, + requestClass, + explicitMaxTokens: max_tokens + }); + const runtimeSettings = { + backend: this.runtime.activeAcceleration?.backend || this.runtime.runtimeMetadata?.()?.backend || "cpu", + gpu_layers: this.runtime.activeAcceleration?.gpu_layers || 0, + context_size: Number(cfg.context_size) || 4096, + batch_size: Number(cfg.batch_size) || 512, + ubatch_size: Number(cfg.ubatch_size) || 128, + threads: Number(cfg.threads) || 0, + max_output_tokens: outputTokenLimit, + max_output_tokens_used: outputTokenLimit, + route_class: requestClass + }; + onStage("prompt_eval", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings }); + const generatingTimer = setTimeout(() => { + onStage("generating", { route: "main_llm", queue_ms: queueWait, ...runtimeSettings }); + }, 250); + generatingTimer.unref?.(); + let result; + try { + result = await this.runtime.infer( + [ + { role: "system", content: prompt }, + ...conversation, + { role: "user", content: effectiveMessage } + ], + outputTokenLimit, + { + signal, + timeoutMs: cfg.hard_generation_timeout_ms + } + ); + } finally { + clearTimeout(generatingTimer); + } + if (signal?.aborted) throw requestCancelledError(); const text = result.choices?.[0]?.message?.content || ""; + const inference = normalizeInferenceDiagnostics(result, Date.now() - generateStarted); + onStage("generating", { + route: "main_llm", + queue_ms: queueWait, + ...runtimeSettings, + ...inference + }); const toolCall = platformToolsAllowed ? parseToolCall(text) : null; let confirmation = null; let toolResult = null; @@ -95,17 +222,53 @@ class AiProvider { queue_wait_ms: queueWait, finish_reason: result.choices?.[0]?.finish_reason || null, request_id: requestId, - route_used: "llm", - internal_generated_length: text.length + route_used: gateDecision ? "main_llm" : "llm", + route_class: requestClass, + max_output_tokens_used: outputTokenLimit, + gate_decision: gateDecision, + force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null, + internal_generated_length: text.length, + stage_timings: { + deterministic_ms: gateDecision?.deterministic_ms || 0, + gate_ms: gateDecision?.gate_ms || 0, + queue_ms: queueWait, + prompt_eval_ms: inference.prompt_eval_ms, + generation_ms: inference.generation_ms, + main_queue_ms: queueWait, + main_generate_ms: inference.generation_ms, + total_ms: Date.now() - started + }, + diagnostics: { + ...inference, + ...runtimeSettings, + gate_ms: gateDecision?.gate_ms || 0, + queue_ms: queueWait, + total_ms: Date.now() - started + } }; this.metrics.record({ kind: "request", status: "success", request_id: requestId, user_id: user.id, role, scope, model: cfg.selected_model_id, duration_ms: out.duration_ms, queue_wait_ms: queueWait, - tool_requested: toolCall?.tool || null, tool_executed: false, route_used: "llm", + tool_requested: toolCall?.tool || null, tool_executed: false, + route_used: gateDecision ? "main_llm" : "llm", + route_class: requestClass, + max_output_tokens_used: outputTokenLimit, + gate_confidence: gateDecision?.confidence, + gate_reason_code: gateDecision?.reason_code, + force_through_reason: gateDecision?.forced ? gateDecision.reason_code : null, + deterministic_ms: out.stage_timings.deterministic_ms, + gate_ms: out.stage_timings.gate_ms, + main_queue_ms: out.stage_timings.main_queue_ms, + main_generate_ms: out.stage_timings.main_generate_ms, + queue_ms: out.stage_timings.queue_ms, + prompt_eval_ms: out.stage_timings.prompt_eval_ms, + generation_ms: out.stage_timings.generation_ms, + total_ms: out.stage_timings.total_ms, + ...out.diagnostics, internal_generated_length: text.length }); return out; - }); + }, { signal }); } async classify({ message, labels, user }) { @@ -177,4 +340,76 @@ function normalizeHistory(history, maxMessages = 12, maxCharacters = 12000) { return output; } -module.exports = { AiProvider, isInScope, isClearlyOutOfScope, isIdentityQuery, isExactHelpShortcut, normalizeHistory }; +function normalizeInferenceDiagnostics(result, elapsedMs = 0) { + const timings = result?.timings || {}; + const usage = result?.usage || {}; + const promptTokens = numberValue( + usage.prompt_tokens, + timings.prompt_n, + timings.prompt_tokens + ); + const generatedTokens = numberValue( + usage.completion_tokens, + timings.predicted_n, + timings.predicted_tokens + ); + const promptEvalMs = numberValue(timings.prompt_ms, timings.prompt_eval_ms) || 0; + const generationMs = numberValue(timings.predicted_ms, timings.generation_ms) || + Math.max(0, Number(elapsedMs) - promptEvalMs); + return { + prompt_tokens: promptTokens, + generated_tokens: generatedTokens, + prompt_eval_ms: promptEvalMs, + generation_ms: generationMs, + prompt_tps: numberValue(timings.prompt_per_second, timings.prompt_tps) || + ratePerSecond(promptTokens, promptEvalMs), + generation_tps: numberValue(timings.predicted_per_second, timings.generation_tps) || + ratePerSecond(generatedTokens, generationMs) + }; +} + +function resolveOutputBudget({ config, requestClass, explicitMaxTokens }) { + const requested = Number(explicitMaxTokens); + const classBudget = Number(config?.output_budgets?.[requestClass]); + const legacyFallback = Number(config?.max_output_tokens); + const selected = Number.isFinite(requested) && requested > 0 + ? requested + : Number.isFinite(classBudget) && classBudget > 0 + ? classBudget + : Number.isFinite(legacyFallback) && legacyFallback > 0 + ? legacyFallback + : 512; + return Math.max(64, Math.min(32768, Math.round(selected))); +} + +function numberValue(...values) { + for (const value of values) { + const number = Number(value); + if (Number.isFinite(number) && number >= 0) return number; + } + return 0; +} + +function ratePerSecond(tokens, milliseconds) { + return tokens > 0 && milliseconds > 0 + ? Math.round((tokens / milliseconds) * 100000) / 100 + : 0; +} + +function requestCancelledError() { + return Object.assign( + new Error("Assistant request was cancelled."), + { name: "AbortError", code: "REQUEST_CANCELLED" } + ); +} + +module.exports = { + AiProvider, + isInScope, + isClearlyOutOfScope, + isIdentityQuery, + isExactHelpShortcut, + normalizeHistory, + normalizeInferenceDiagnostics, + resolveOutputBudget +}; diff --git a/plugins/lumi_ai/backend/assistant_visibility.js b/plugins/lumi_ai/backend/assistant_visibility.js index 9464a15..5f26318 100644 --- a/plugins/lumi_ai/backend/assistant_visibility.js +++ b/plugins/lumi_ai/backend/assistant_visibility.js @@ -62,7 +62,18 @@ function buildVisibilityDiagnostics({ panel_html_returned: Boolean(frontend.panel_html_returned), mount_successful: Boolean(frontend.mount_successful) }; - const backendAvailable = Object.values(backend).every(Boolean); + const coldStartAvailable = Boolean( + backend.plugin_enabled && + backend.assistant_enabled && + backend.role_allowed && + backend.user_logged_in && + backend.model_installed && + backend.runtime_installed && + runtimeHealth.state === "stopped" && + runtimeHealth.runtime_usable !== false && + runtimeHealth.last_self_test?.success !== false + ); + const backendAvailable = Object.values(backend).every(Boolean) || coldStartAvailable; const reasonOrder = [ "user_logged_in", "plugin_enabled", "assistant_enabled", "role_allowed", "model_installed", "runtime_installed", "runtime_running", "runtime_healthy" @@ -70,8 +81,9 @@ function buildVisibilityDiagnostics({ const firstFailure = reasonOrder.find((key) => !backend[key]) || null; return { available: backendAvailable, - status: backendAvailable ? "healthy" : "offline", - reason_code: firstFailure, + status: coldStartAvailable ? "cold_start" : backendAvailable ? "healthy" : "offline", + reason_code: coldStartAvailable ? null : firstFailure, + cold_start_available: coldStartAvailable, conditions: CONDITION_KEYS.map((key) => ({ key, passed: conditions[key] })), permission, updated_at: new Date().toISOString() diff --git a/plugins/lumi_ai/backend/cache.js b/plugins/lumi_ai/backend/cache.js new file mode 100644 index 0000000..2c5f7d0 --- /dev/null +++ b/plugins/lumi_ai/backend/cache.js @@ -0,0 +1,70 @@ +const fs = require("fs"); +const crypto = require("crypto"); +const { resolveData } = require("./paths"); + +class SafeAnswerCache { + constructor(getConfig) { + this.getConfig = getConfig; + this.file = resolveData("cache", "gate-answers.json"); + } + + key({ message, role, platform }) { + const normalized = String(message || "").toLowerCase().replace(/\s+/g, " ").trim(); + return crypto.createHash("sha256") + .update(`gate-cache-v2\n${role || "user"}\n${platform || "webui"}\n${normalized}`) + .digest("hex"); + } + + get(input) { + const entry = this.read().entries[this.key(input)]; + if (!entry) return null; + if (new Date(entry.expires_at).getTime() <= Date.now()) { + this.delete(input); + return null; + } + return entry; + } + + set(input, answer) { + if (!answer?.text || answer.safe !== true) return null; + const ttlSeconds = Math.max(30, Number(this.getConfig()?.gate?.cache_ttl_seconds) || 3600); + const store = this.read(); + const key = this.key(input); + store.entries[key] = { + text: String(answer.text), + links: Array.isArray(answer.links) ? answer.links.slice(0, 8) : [], + source: answer.source || null, + created_at: new Date().toISOString(), + expires_at: new Date(Date.now() + ttlSeconds * 1000).toISOString() + }; + this.write(store); + return store.entries[key]; + } + + delete(input) { + const store = this.read(); + delete store.entries[this.key(input)]; + this.write(store); + } + + read() { + try { + const parsed = JSON.parse(fs.readFileSync(this.file, "utf8")); + return { entries: parsed.entries || {} }; + } catch { + return { entries: {} }; + } + } + + write(store) { + const tmp = `${this.file}.${process.pid}.${crypto.randomBytes(4).toString("hex")}.tmp`; + try { + fs.writeFileSync(tmp, `${JSON.stringify(store, null, 2)}\n`); + fs.renameSync(tmp, this.file); + } finally { + fs.rmSync(tmp, { force: true }); + } + } +} + +module.exports = { SafeAnswerCache }; diff --git a/plugins/lumi_ai/backend/config_manager.js b/plugins/lumi_ai/backend/config_manager.js index 50f4e0a..b63df86 100644 --- a/plugins/lumi_ai/backend/config_manager.js +++ b/plugins/lumi_ai/backend/config_manager.js @@ -13,11 +13,41 @@ const DEFAULT_CONFIG = { concurrency: 1, max_queue_length: 8, request_timeout_ms: 120000, + ui_soft_timeout_ms: 45000, + hard_generation_timeout_ms: 600000, + max_output_tokens: 2048, + output_budgets: { + navigation_help: 256, + simple_answer: 512, + code_custom_command: 896, + admin_debug: 1280, + explicit_long: 2048 + }, + batch_size: 512, + ubatch_size: 128, per_user_requests_per_minute: 6, admin_bypass_rate_limit: false, assistant_enabled: true, assistant_debug_logging: false, assistant_visibility: { admins: true, mods: false, users: false }, + improvement: { + allow_moderators_to_review_responses: false, + trusted_moderator_reviewers: [], + corrections_enabled: true + }, + gate: { + model_id: "smollm2-360m-q8", + context_size: 1024, + threads: 2, + timeout_ms: 3000, + high_confidence_threshold: 0.88, + main_llm_threshold: 0.72, + predefined_enabled: true, + cache_ttl_seconds: 3600, + repeat_force_window_seconds: 90, + similarity_threshold: 0.86, + force_prefix: "force ai:" + }, commands: { enabled: true, triggers: ["assistant", "lumi"], @@ -71,6 +101,9 @@ function getConfig() { max_answer_length: config.instructions?.maximum_answer_length }); config.assistant_visibility = { ...DEFAULT_CONFIG.assistant_visibility, ...(config.assistant_visibility || {}) }; + config.improvement = mergeImprovement(config.improvement); + config.output_budgets = mergeOutputBudgets(config.output_budgets); + config.gate = mergeGate(config.gate); config.instructions = { ...DEFAULT_CONFIG.instructions, ...(config.instructions || {}) }; config.logging = { ...DEFAULT_CONFIG.logging, ...(config.logging || {}) }; config.commands = mergeCommands(config.commands); @@ -88,8 +121,34 @@ function saveConfig(value) { 2000, Math.min(64000, Number(value.internal_generation_char_budget) || DEFAULT_CONFIG.internal_generation_char_budget) ); + merged.ui_soft_timeout_ms = boundedNumber( + value.ui_soft_timeout_ms, + 5000, + 300000, + DEFAULT_CONFIG.ui_soft_timeout_ms + ); + merged.hard_generation_timeout_ms = boundedNumber( + value.hard_generation_timeout_ms ?? value.request_timeout_ms, + 30000, + 3600000, + DEFAULT_CONFIG.hard_generation_timeout_ms + ); + merged.max_output_tokens = boundedNumber( + value.max_output_tokens, + 64, + 32768, + DEFAULT_CONFIG.max_output_tokens + ); + merged.output_budgets = mergeOutputBudgets(value.output_budgets); + merged.batch_size = boundedNumber(value.batch_size, 32, 4096, DEFAULT_CONFIG.batch_size); + merged.ubatch_size = Math.min( + merged.batch_size, + boundedNumber(value.ubatch_size, 16, 4096, DEFAULT_CONFIG.ubatch_size) + ); delete merged.gpu_workload_percent; merged.assistant_visibility = { ...DEFAULT_CONFIG.assistant_visibility, ...(value.assistant_visibility || {}) }; + merged.improvement = mergeImprovement(value.improvement); + merged.gate = mergeGate(value.gate); merged.support_scope = normalizeScope(value.support_scope); merged.instructions = { ...DEFAULT_CONFIG.instructions, ...(value.instructions || {}) }; merged.logging = { ...DEFAULT_CONFIG.logging, ...(value.logging || {}) }; @@ -121,4 +180,64 @@ function mergeCommands(value = {}) { }; } +function mergeGate(value = {}) { + return { + ...DEFAULT_CONFIG.gate, + ...value, + context_size: Math.max(512, Math.min(4096, Number(value.context_size) || DEFAULT_CONFIG.gate.context_size)), + threads: Math.max(1, Math.min(16, Number(value.threads) || DEFAULT_CONFIG.gate.threads)), + timeout_ms: boundedGateNumber(value.timeout_ms, 1000, 5000, DEFAULT_CONFIG.gate.timeout_ms), + high_confidence_threshold: clampConfidence(value.high_confidence_threshold, DEFAULT_CONFIG.gate.high_confidence_threshold), + main_llm_threshold: clampConfidence(value.main_llm_threshold, DEFAULT_CONFIG.gate.main_llm_threshold), + cache_ttl_seconds: Math.max(30, Math.min(604800, Number(value.cache_ttl_seconds) || DEFAULT_CONFIG.gate.cache_ttl_seconds)), + repeat_force_window_seconds: boundedGateNumber( + value.repeat_force_window_seconds, + 0, + 3600, + DEFAULT_CONFIG.gate.repeat_force_window_seconds + ), + similarity_threshold: clampConfidence(value.similarity_threshold, DEFAULT_CONFIG.gate.similarity_threshold), + force_prefix: String(value.force_prefix ?? DEFAULT_CONFIG.gate.force_prefix).trim().slice(0, 40) + }; +} + +function mergeOutputBudgets(value = {}) { + return Object.fromEntries( + Object.entries(DEFAULT_CONFIG.output_budgets).map(([key, fallback]) => [ + key, + boundedNumber(value?.[key], 64, 32768, fallback) + ]) + ); +} + +function mergeImprovement(value = {}) { + return { + ...DEFAULT_CONFIG.improvement, + ...value, + allow_moderators_to_review_responses: value.allow_moderators_to_review_responses === true, + corrections_enabled: value.corrections_enabled !== false, + trusted_moderator_reviewers: [...new Set( + (Array.isArray(value.trusted_moderator_reviewers) ? value.trusted_moderator_reviewers : []) + .map((entry) => String(entry || "").trim()) + .filter(Boolean) + .slice(0, 100) + )] + }; +} + +function clampConfidence(value, fallback) { + const number = Number(value); + return Number.isFinite(number) ? Math.max(0, Math.min(1, number)) : fallback; +} + +function boundedGateNumber(value, min, max, fallback) { + const number = Number(value); + return Number.isFinite(number) ? Math.max(min, Math.min(max, number)) : fallback; +} + +function boundedNumber(value, min, max, fallback) { + const number = Number(value); + return Number.isFinite(number) ? Math.max(min, Math.min(max, Math.round(number))) : fallback; +} + module.exports = { DEFAULT_CONFIG, getConfig, saveConfig, getRuntimeState, saveRuntimeState, readJson, writeJson }; diff --git a/plugins/lumi_ai/backend/corrections.js b/plugins/lumi_ai/backend/corrections.js new file mode 100644 index 0000000..fbb2a9c --- /dev/null +++ b/plugins/lumi_ai/backend/corrections.js @@ -0,0 +1,226 @@ +const fs = require("fs"); +const crypto = require("crypto"); +const { resolveData } = require("./paths"); +const { roleAllows } = require("./permissions"); +const { similarity, isSensitiveRequest } = require("./gate_provider"); +const { atomicJson, paginate } = require("./feedback"); + +const PROMOTION_TARGETS = Object.freeze([ + "correction", + "route_alias", + "predefined_answer", + "eval_case", + "training_export" +]); + +class CorrectionStore { + constructor({ getConfig, verifyLink = () => false, file } = {}) { + this.getConfig = getConfig || (() => ({})); + this.verifyLink = verifyLink; + this.file = file || resolveData("corrections", "corrections.json"); + } + + createFromFeedback(feedback, values, actor) { + if (!feedback || feedback.status !== "approved") { + throw new Error("Only approved feedback can be implemented."); + } + const target = PROMOTION_TARGETS.includes(values.target) ? values.target : "correction"; + const answer = clean(values.corrected_answer || feedback.optional_correction, 16000); + if (!answer && target !== "eval_case") throw new Error("A corrected answer is required."); + const expectedLink = clean(values.expected_link, 2000); + this.validateLinks(answer, expectedLink); + if (target === "route_alias" && !expectedLink) throw new Error("Route aliases require a verified Lumi route."); + if (target === "predefined_answer" && isSensitiveRequest(feedback.user_message) && values.explicitly_safe !== true) { + throw new Error("Sensitive or user-specific predefined answers must be explicitly marked safe."); + } + const entry = { + id: crypto.randomUUID(), + source_feedback_id: feedback.id, + prompt: feedback.user_message, + corrected_answer: answer, + rejected_answer: feedback.assistant_answer, + target, + route_alias: clean(values.route_alias, 500), + expected_link: expectedLink, + min_role: normalizeRole(values.min_role || feedback.role), + permission_scope: { + origin: clean(values.permission_origin || feedback.origin, 80) || "any", + platform: clean(values.permission_platform || feedback.platform, 80) || "any" + }, + explicitly_safe: values.explicitly_safe === true, + enabled: values.enabled !== false, + approved: true, + approved_by: String(actor.id), + approved_at: new Date().toISOString(), + verified_by: null, + verified_at: null, + active: false, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString() + }; + const store = this.read(); + store.entries.unshift(entry); + this.write(store); + return entry; + } + + list({ page = 1, pageSize = 20 } = {}) { + return paginate(this.read().entries, page, pageSize); + } + + all() { + return this.read().entries; + } + + get(id) { + return this.read().entries.find((entry) => entry.id === id) || null; + } + + update(id, values) { + return this.mutate(id, (entry) => { + const correctedAnswer = clean(values.corrected_answer ?? entry.corrected_answer, 16000); + const expectedLink = clean(values.expected_link ?? entry.expected_link, 2000); + this.validateLinks(correctedAnswer, expectedLink); + if (entry.target === "route_alias" && !expectedLink) throw new Error("Route aliases require a verified Lumi route."); + return { + ...entry, + corrected_answer: correctedAnswer, + route_alias: clean(values.route_alias ?? entry.route_alias, 500), + expected_link: expectedLink, + min_role: normalizeRole(values.min_role || entry.min_role), + permission_scope: { + origin: clean(values.permission_origin ?? entry.permission_scope?.origin, 80) || "any", + platform: clean(values.permission_platform ?? entry.permission_scope?.platform, 80) || "any" + }, + explicitly_safe: values.explicitly_safe ?? entry.explicitly_safe, + enabled: values.enabled ?? entry.enabled, + active: false, + updated_at: new Date().toISOString() + }; + }); + } + + validateLinks(answer, expectedLink) { + const links = [expectedLink, ...internalRouteReferences(answer)].filter(Boolean); + if (links.some((link) => !this.verifyLink(link))) { + throw new Error("Internal correction links must match a verified Lumi route."); + } + } + + verify(id, actor) { + return this.mutate(id, (entry) => ({ + ...entry, + approved: true, + verified_by: String(actor.id), + verified_at: new Date().toISOString(), + active: false, + updated_at: new Date().toISOString() + })); + } + + setEnabled(id, enabled) { + return this.mutate(id, (entry) => ({ + ...entry, + enabled: Boolean(enabled), + active: false, + updated_at: new Date().toISOString() + })); + } + + saveCorrections(actor) { + const store = this.read(); + let active = 0; + for (const entry of store.entries) { + entry.active = Boolean(entry.enabled && entry.approved); + entry.saved_by = String(actor.id); + entry.saved_at = new Date().toISOString(); + if (entry.active) active += 1; + } + this.write(store); + return { total: store.entries.length, active }; + } + + match(input, limit = 4) { + if (this.getConfig()?.improvement?.corrections_enabled === false) return []; + const role = normalizeRole(input.role); + return this.read().entries + .filter((entry) => entry.active && entry.enabled && entry.approved) + .filter((entry) => roleAllows(role, entry.min_role)) + .filter((entry) => scopeAllows(entry.permission_scope, input)) + .map((entry) => ({ ...entry, score: similarity(entry.prompt, input.message) })) + .filter((entry) => entry.score >= 0.45) + .sort((left, right) => right.score - left.score) + .slice(0, limit); + } + + findPredefined(input) { + return this.match(input, 10).find((entry) => + ["predefined_answer", "route_alias"].includes(entry.target) && + entry.explicitly_safe === true && + entry.score >= 0.82 + ) || null; + } + + context(input, limit = 4) { + return this.match(input, limit) + .filter((entry) => ["correction", "route_alias", "predefined_answer"].includes(entry.target)) + .map((entry) => [ + `Reviewed correction for a similar request (minimum role: ${entry.min_role}):`, + `Request: ${entry.prompt}`, + `Approved answer: ${entry.corrected_answer}`, + entry.expected_link ? `Verified link: ${entry.expected_link}` : "" + ].filter(Boolean).join("\n")); + } + + delete(id) { + const store = this.read(); + const before = store.entries.length; + store.entries = store.entries.filter((entry) => entry.id !== id); + if (before === store.entries.length) return false; + this.write(store); + return true; + } + + mutate(id, updater) { + const store = this.read(); + const index = store.entries.findIndex((entry) => entry.id === id); + if (index < 0) throw new Error("Correction was not found."); + store.entries[index] = updater(store.entries[index]); + this.write(store); + return store.entries[index]; + } + + read() { + try { + const parsed = JSON.parse(fs.readFileSync(this.file, "utf8")); + return { entries: Array.isArray(parsed.entries) ? parsed.entries : [] }; + } catch { + return { entries: [] }; + } + } + + write(store) { + atomicJson(this.file, { entries: store.entries.slice(0, 5000) }); + } +} + +function scopeAllows(scope = {}, input = {}) { + const originAllowed = !scope.origin || scope.origin === "any" || scope.origin === input.origin; + const platformAllowed = !scope.platform || scope.platform === "any" || scope.platform === input.platform; + return originAllowed && platformAllowed; +} + +function normalizeRole(value) { + return ["admin", "mod", "user"].includes(value) ? value : "user"; +} + +function clean(value, max) { + return String(value || "").trim().slice(0, max); +} + +function internalRouteReferences(value) { + return [...String(value || "").matchAll(/(?:^|[\s("'`])((?:GET\s+)?\/[a-z0-9_./:-]+)/gi)] + .map((match) => match[1].replace(/[.,;:!?]+$/, "")); +} + +module.exports = { PROMOTION_TARGETS, CorrectionStore, scopeAllows, internalRouteReferences }; diff --git a/plugins/lumi_ai/backend/evals.js b/plugins/lumi_ai/backend/evals.js new file mode 100644 index 0000000..f3db9d9 --- /dev/null +++ b/plugins/lumi_ai/backend/evals.js @@ -0,0 +1,141 @@ +const fs = require("fs"); +const crypto = require("crypto"); +const { resolveData } = require("./paths"); +const { atomicJson, paginate } = require("./feedback"); + +class EvalStore { + constructor(options = {}) { + this.casesFile = options.casesFile || resolveData("evals", "cases.json"); + this.resultsFile = options.resultsFile || resolveData("evals", "results.json"); + } + + add(values, actor) { + const prompt = clean(values.prompt, 6000); + if (!prompt) throw new Error("Eval prompt is required."); + const entry = { + id: crypto.randomUUID(), + prompt, + role: normalizeRole(values.role), + origin: clean(values.origin, 80) || "webui", + expected_behavior: clean(values.expected_behavior, 8000), + forbidden_behavior: clean(values.forbidden_behavior, 8000), + expected_link: clean(values.expected_link, 2000), + notes: clean(values.notes, 4000), + created_by: String(actor.id), + created_at: new Date().toISOString() + }; + const store = this.readCases(); + store.entries.unshift(entry); + atomicJson(this.casesFile, store); + return entry; + } + + list({ page = 1, pageSize = 20 } = {}) { + return paginate(this.readCases().entries, page, pageSize); + } + + results(limit = 100) { + return this.readResults().entries.slice(0, limit); + } + + delete(id) { + const store = this.readCases(); + const before = store.entries.length; + store.entries = store.entries.filter((entry) => entry.id !== id); + if (store.entries.length === before) return false; + atomicJson(this.casesFile, store); + return true; + } + + async runAll({ provider, actor }) { + const results = []; + for (const testCase of this.readCases().entries) { + const simulatedUser = { + id: `eval:${actor.id}`, + username: "lumi-eval", + isAdmin: testCase.role === "admin", + isMod: testCase.role === "mod" + }; + try { + const response = await provider.generate({ + message: testCase.prompt, + user: simulatedUser, + sessionId: `eval:${testCase.id}:${Date.now()}`, + scope: "eval", + originContext: { + origin: testCase.origin, + platform: testCase.origin, + role: testCase.role, + permission_context: { webui_actions_allowed: false } + } + }); + results.push(evaluateCase(testCase, response.text, response.links)); + } catch (error) { + results.push({ + case_id: testCase.id, + prompt: testCase.prompt, + status: "manual_review", + error: error.message, + run_at: new Date().toISOString() + }); + } + } + const store = this.readResults(); + store.entries = [...results, ...store.entries].slice(0, 1000); + atomicJson(this.resultsFile, store); + return results; + } + + readCases() { + return readStore(this.casesFile); + } + + readResults() { + return readStore(this.resultsFile); + } +} + +function evaluateCase(testCase, answer, links = []) { + const text = String(answer || ""); + const expected = splitChecks(testCase.expected_behavior); + const forbidden = splitChecks(testCase.forbidden_behavior); + const expectedPass = expected.every((check) => text.toLowerCase().includes(check.toLowerCase())); + const forbiddenPass = forbidden.every((check) => !text.toLowerCase().includes(check.toLowerCase())); + const linkPass = !testCase.expected_link || + text.includes(testCase.expected_link) || + links.some((link) => link.href === testCase.expected_link); + const hasAutomatedChecks = expected.length || forbidden.length || testCase.expected_link; + return { + case_id: testCase.id, + prompt: testCase.prompt, + status: !hasAutomatedChecks ? "manual_review" : expectedPass && forbiddenPass && linkPass ? "pass" : "fail", + expected_pass: expectedPass, + forbidden_pass: forbiddenPass, + link_pass: linkPass, + answer: text.slice(0, 16000), + run_at: new Date().toISOString() + }; +} + +function splitChecks(value) { + return String(value || "").split(/\r?\n|;/).map((entry) => entry.trim()).filter(Boolean); +} + +function readStore(file) { + try { + const parsed = JSON.parse(fs.readFileSync(file, "utf8")); + return { entries: Array.isArray(parsed.entries) ? parsed.entries : [] }; + } catch { + return { entries: [] }; + } +} + +function clean(value, max) { + return String(value || "").trim().slice(0, max); +} + +function normalizeRole(value) { + return ["admin", "mod", "user"].includes(value) ? value : "user"; +} + +module.exports = { EvalStore, evaluateCase }; diff --git a/plugins/lumi_ai/backend/feedback.js b/plugins/lumi_ai/backend/feedback.js new file mode 100644 index 0000000..65c116b --- /dev/null +++ b/plugins/lumi_ai/backend/feedback.js @@ -0,0 +1,204 @@ +const fs = require("fs"); +const crypto = require("crypto"); +const { resolveData } = require("./paths"); +const { roleOf } = require("./permissions"); + +const FEEDBACK_TAGS = Object.freeze([ + "good", + "bad", + "wrong_link", + "hallucinated", + "too_generic", + "unsafe", + "should_clarify", + "bad_code", + "wrong_scope" +]); + +class FeedbackStore { + constructor(options = {}) { + this.file = options.file || resolveData("feedback", "reviews.json"); + } + + capture(input, actor) { + const tag = FEEDBACK_TAGS.includes(input.feedback_tag) ? input.feedback_tag : null; + if (!tag) throw new Error("Unknown feedback tag."); + const entry = { + id: crypto.randomUUID(), + user_message: clean(input.user_message, 6000), + assistant_answer: clean(input.assistant_answer, 16000), + route_used: clean(input.route_used, 120), + role: normalizeRole(input.role), + origin: clean(input.origin, 80) || "webui", + platform: clean(input.platform, 80) || "webui", + model: clean(input.model, 200), + timestamp: validDate(input.timestamp) || new Date().toISOString(), + feedback_tag: tag, + optional_correction: clean(input.optional_correction, 16000), + status: "pending", + submitted_by: String(actor?.id || "anonymous"), + reviewed_by: null, + reviewed_at: null, + verified_by: null, + verified_at: null, + review_notes: "", + export_approved: false + }; + if (!entry.user_message || !entry.assistant_answer) { + throw new Error("Feedback requires the user message and assistant answer."); + } + const store = this.read(); + store.entries.unshift(entry); + this.write(store); + return entry; + } + + list({ page = 1, pageSize = 20, status = "" } = {}) { + const filtered = this.read().entries.filter((entry) => !status || entry.status === status); + return paginate(filtered, page, pageSize); + } + + all() { + return this.read().entries; + } + + get(id) { + return this.read().entries.find((entry) => entry.id === id) || null; + } + + edit(id, values, actor) { + return this.mutate(id, (entry) => ({ + ...entry, + feedback_tag: FEEDBACK_TAGS.includes(values.feedback_tag) ? values.feedback_tag : entry.feedback_tag, + optional_correction: clean(values.optional_correction, 16000), + review_notes: clean(values.review_notes, 4000), + reviewed_by: String(actor.id), + reviewed_at: new Date().toISOString() + })); + } + + setStatus(id, status, actor, notes = "") { + if (!["pending", "flagged", "verified", "approved", "rejected"].includes(status)) { + throw new Error("Invalid review status."); + } + return this.mutate(id, (entry) => ({ + ...entry, + status, + review_notes: clean(notes, 4000) || entry.review_notes, + reviewed_by: String(actor.id), + reviewed_at: new Date().toISOString() + })); + } + + verify(id, actor, notes = "") { + return this.mutate(id, (entry) => ({ + ...entry, + status: entry.status === "rejected" ? "rejected" : "verified", + review_notes: clean(notes, 4000) || entry.review_notes, + verified_by: String(actor.id), + verified_at: new Date().toISOString() + })); + } + + markExportApproved(id, actor) { + return this.mutate(id, (entry) => ({ + ...entry, + export_approved: true, + reviewed_by: entry.reviewed_by || String(actor.id), + reviewed_at: entry.reviewed_at || new Date().toISOString() + })); + } + + delete(id) { + const store = this.read(); + const before = store.entries.length; + store.entries = store.entries.filter((entry) => entry.id !== id); + if (store.entries.length === before) return false; + this.write(store); + return true; + } + + mutate(id, updater) { + const store = this.read(); + const index = store.entries.findIndex((entry) => entry.id === id); + if (index < 0) throw new Error("Feedback review was not found."); + store.entries[index] = updater(store.entries[index]); + this.write(store); + return store.entries[index]; + } + + read() { + try { + const parsed = JSON.parse(fs.readFileSync(this.file, "utf8")); + return { entries: Array.isArray(parsed.entries) ? parsed.entries : [] }; + } catch { + return { entries: [] }; + } + } + + write(store) { + atomicJson(this.file, { entries: store.entries.slice(0, 5000) }); + } +} + +function improvementAccess(user, config = {}) { + const role = roleOf(user); + const improvement = config.improvement || {}; + const trusted = role === "mod" && + (improvement.trusted_moderator_reviewers || []).map(String).includes(String(user?.id)); + const allowed = role === "admin" || + (role === "mod" && improvement.allow_moderators_to_review_responses === true); + return { + allowed, + role, + trusted, + can_submit: allowed, + can_flag: allowed, + can_verify: role === "admin" || trusted, + can_approve: role === "admin", + can_edit: role === "admin", + can_delete: role === "admin", + can_implement: role === "admin", + can_export: role === "admin", + can_run_evals: role === "admin" + }; +} + +function paginate(rows, pageValue, pageSizeValue) { + const pageSize = Math.max(1, Math.min(100, Number.parseInt(pageSizeValue, 10) || 20)); + const pages = Math.max(1, Math.ceil(rows.length / pageSize)); + const page = Math.min(pages, Math.max(1, Number.parseInt(pageValue, 10) || 1)); + const start = (page - 1) * pageSize; + return { entries: rows.slice(start, start + pageSize), page, pages, page_size: pageSize, total: rows.length }; +} + +function atomicJson(file, value) { + const tmp = `${file}.${process.pid}.${crypto.randomBytes(4).toString("hex")}.tmp`; + try { + fs.writeFileSync(tmp, `${JSON.stringify(value, null, 2)}\n`); + fs.renameSync(tmp, file); + } finally { + fs.rmSync(tmp, { force: true }); + } +} + +function clean(value, max) { + return String(value || "").trim().slice(0, max); +} + +function normalizeRole(value) { + return ["admin", "mod", "user"].includes(value) ? value : "user"; +} + +function validDate(value) { + const date = new Date(value); + return Number.isNaN(date.getTime()) ? null : date.toISOString(); +} + +module.exports = { + FEEDBACK_TAGS, + FeedbackStore, + improvementAccess, + paginate, + atomicJson +}; diff --git a/plugins/lumi_ai/backend/gate_provider.js b/plugins/lumi_ai/backend/gate_provider.js new file mode 100644 index 0000000..8137ae3 --- /dev/null +++ b/plugins/lumi_ai/backend/gate_provider.js @@ -0,0 +1,398 @@ +const SAFE_ROUTES = new Set([ + "cached_answer", + "predefined_answer", + "main_llm", + "clarification", + "refusal", + "unavailable" +]); + +class GateProvider { + constructor({ getConfig, runtime, lookupRepo, lookupCorrection, cache, metrics }) { + Object.assign(this, { getConfig, runtime, lookupRepo, lookupCorrection, cache, metrics }); + this.recentPrompts = new Map(); + } + + async route({ message, user, role, scope, originContext, onStage = () => {} }) { + const started = Date.now(); + const cfg = this.getConfig(); + const gate = cfg.gate || {}; + const prepared = stripForcePrefix(message, gate.force_prefix); + const context = { + message: prepared.message, + role, + platform: originContext?.platform || originContext?.origin || "webui" + }; + const requestClass = classifyRequestType(context.message, { role, scope }); + onStage("deterministic"); + const forceReason = prepared.forced + ? "explicit_force_prefix" + : this.isRepeat(context, user?.id, scope, gate) + ? "repeat_prompt_force" + : null; + this.remember(context.message, user?.id, scope, gate); + + if (forceReason) { + return this.finish({ + route: "main_llm", + confidence: 1, + reason_code: forceReason, + message: context.message, + forced: true, + request_class: requestClass, + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + if (isSensitiveRequest(context.message)) { + return this.finish({ + route: "main_llm", + confidence: 1, + reason_code: "sensitive_or_user_specific", + message: context.message, + request_class: requestClass, + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + const reviewed = gate.predefined_enabled !== false + ? this.lookupCorrection?.({ + ...context, + origin: originContext?.origin || context.platform + }) + : null; + if (reviewed) { + return this.finish({ + route: "predefined_answer", + confidence: reviewed.score, + reason_code: `approved_${reviewed.target}`, + message: context.message, + answer: { + text: reviewed.corrected_answer, + links: reviewed.expected_link + ? [{ label: reviewed.route_alias || "Open verified Lumi page", href: reviewed.expected_link }] + : [], + source: { type: "approved_correction", id: reviewed.id }, + safe: true + }, + request_class: requestClass, + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + const cached = gate.predefined_enabled !== false ? this.cache?.get(context) : null; + if (cached) { + return this.finish({ + route: "cached_answer", + confidence: 1, + reason_code: "exact_cache_hit", + message: context.message, + answer: cached, + request_class: requestClass, + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + const repoAnswer = gate.predefined_enabled !== false + ? this.lookupRepo?.(context.message) || null + : null; + if (isExactPredefinedQuery(context.message, repoAnswer)) { + const answer = { + text: repoAnswer.text, + links: repoAnswer.links || [], + source: repoAnswer.source || null, + safe: true + }; + this.cache?.set(context, answer); + return this.finish({ + route: "predefined_answer", + confidence: 1, + reason_code: `exact_verified_${repoAnswer.type}`, + message: context.message, + answer, + request_class: "navigation_help", + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + if (isComplexOrAmbiguous(context.message)) { + return this.finish({ + route: "main_llm", + confidence: 1, + reason_code: "deterministic_complexity_escalation", + message: context.message, + request_class: requestClass, + deterministic_ms: Date.now() - started, + gate_ms: 0 + }, started, context); + } + + const deterministicMs = Date.now() - started; + const gateStarted = Date.now(); + onStage("gating"); + let classification; + try { + classification = await this.classify(context); + } catch (error) { + classification = { + route: "main_llm", + confidence: 0, + reason_code: isTimeoutError(error) ? "gate_timeout_escalated" : "gate_error_escalated", + gate_error: error.message + }; + } + + const normalized = normalizeDecision(classification); + const mainThreshold = Math.max(0.1, Math.min(0.95, Number(gate.main_llm_threshold) || 0.72)); + const highThreshold = Math.max(0.5, Math.min(0.99, Number(gate.high_confidence_threshold) || 0.88)); + let decision = normalized; + + if (["refusal", "unavailable"].includes(decision.route) && decision.confidence < highThreshold) { + decision = { + route: "main_llm", + confidence: decision.confidence, + reason_code: "terminal_route_low_confidence" + }; + } else if (["cached_answer", "predefined_answer"].includes(decision.route)) { + decision = { + route: "main_llm", + confidence: decision.confidence, + reason_code: "gate_cannot_authorize_predefined" + }; + } else if ( + (decision.confidence < mainThreshold || !SAFE_ROUTES.has(decision.route)) && + !["gate_timeout_escalated", "gate_error_escalated"].includes(decision.reason_code) + ) { + decision = { + route: "main_llm", + confidence: decision.confidence, + reason_code: "low_confidence" + }; + } else if (decision.route === "refusal") { + decision.answer = { + text: cfg.instructions?.out_of_scope_response || "I cannot help with that request.", + links: [], + safe: true + }; + } else if (decision.route === "unavailable") { + decision.answer = { + text: cfg.commands?.unavailable_message || "Lumi Assistant is currently unavailable.", + links: [], + safe: true + }; + } else if (decision.route === "clarification") { + decision.route = "main_llm"; + decision.reason_code = "clarification_requires_main_llm"; + } + + return this.finish({ + ...decision, + message: context.message, + request_class: requestClass, + deterministic_ms: deterministicMs, + gate_ms: Date.now() - gateStarted + }, started, context); + } + + async classify(context) { + if (this.runtime.status().state !== "running") throw new Error("Gate runtime is unavailable."); + const timeoutMs = Math.max(1000, Math.min(5000, Number(this.getConfig().gate?.timeout_ms) || 3000)); + const prompt = [ + "Classify only. JSON only.", + "Routes: main_llm, refusal, unavailable.", + "Escalate uncertainty or complexity to main_llm.", + '{"route":"main_llm","confidence":0.0,"reason_code":"short_code"}' + ].join("\n"); + const result = await withTimeout(this.runtime.infer([ + { role: "system", content: prompt }, + { role: "user", content: String(context.message).slice(0, 1000) } + ], 64, timeoutMs), timeoutMs); + return parseDecision(result.choices?.[0]?.message?.content); + } + + isRepeat(context, userId, scope, gate) { + const configuredWindow = Number(gate.repeat_force_window_seconds); + const windowMs = Math.max(0, Number.isFinite(configuredWindow) ? configuredWindow : 90) * 1000; + if (!windowMs) return false; + const key = `${userId || "anonymous"}:${scope || "assistant"}`; + const rows = (this.recentPrompts.get(key) || []).filter((entry) => Date.now() - entry.at <= windowMs); + const threshold = Math.max(0.5, Math.min(1, Number(gate.similarity_threshold) || 0.86)); + return rows.some((entry) => similarity(entry.message, context.message) >= threshold); + } + + remember(message, userId, scope, gate) { + const key = `${userId || "anonymous"}:${scope || "assistant"}`; + const configuredWindow = Number(gate.repeat_force_window_seconds); + const windowMs = Math.max(1, Number.isFinite(configuredWindow) ? configuredWindow : 90) * 1000; + const rows = (this.recentPrompts.get(key) || []) + .filter((entry) => Date.now() - entry.at <= windowMs) + .slice(-9); + rows.push({ message, at: Date.now() }); + this.recentPrompts.set(key, rows); + } + + finish(decision, started, context) { + const output = { + route: decision.route, + confidence: Number(decision.confidence) || 0, + reason_code: decision.reason_code || "unspecified", + answer: decision.answer || null, + message: decision.message || context.message, + forced: Boolean(decision.forced), + request_class: normalizeRequestClass(decision.request_class), + deterministic_ms: Math.max(0, Number(decision.deterministic_ms) || 0), + gate_ms: Math.max(0, Number(decision.gate_ms) || 0), + duration_ms: Date.now() - started + }; + this.metrics.record({ + kind: "gate_decision", + status: "success", + route_used: output.route, + confidence: output.confidence, + reason_code: output.reason_code, + request_class: output.request_class, + route_class: output.request_class, + deterministic_ms: output.deterministic_ms, + gate_ms: output.gate_ms, + duration_ms: output.duration_ms, + platform: context.platform + }); + return output; + } +} + +function parseDecision(value) { + const text = String(value || "").trim(); + const match = text.match(/\{[\s\S]*\}/); + if (!match) throw new Error("Gate model returned invalid JSON."); + return JSON.parse(match[0]); +} + +function normalizeDecision(value = {}) { + return { + route: SAFE_ROUTES.has(value.route) ? value.route : "main_llm", + confidence: Math.max(0, Math.min(1, Number(value.confidence) || 0)), + reason_code: /^[a-z0-9_]{2,80}$/.test(String(value.reason_code || "")) + ? value.reason_code + : "invalid_reason_code" + }; +} + +function stripForcePrefix(message, prefix = "force ai:") { + const text = String(message || "").trim(); + const configured = String(prefix || "").trim(); + if (!configured || !text.toLowerCase().startsWith(configured.toLowerCase())) { + return { message: text, forced: false }; + } + return { message: text.slice(configured.length).trim() || text, forced: true }; +} + +function isSensitiveRequest(message) { + return /\b(delete|remove|ban|timeout|moderate|transfer|pay|give|balance|inventory|economy|points|currency|database|file|execute|run|install|api|token|password|secret|permission|role|my|mine|our|ours|their|theirs|this user|user id|username)\b/i + .test(String(message || "")); +} + +function isCacheSafeRepoAnswer(answer) { + if (!answer?.text) return false; + if (answer.type === "route") return answer.source?.confidence === "high"; + return ["contact", "unknown"].includes(answer.type); +} + +function isExactPredefinedQuery(message, answer) { + if (!isCacheSafeRepoAnswer(answer)) return false; + if (isComplexOrAmbiguous(message)) return false; + if (answer.type === "contact") return true; + if (answer.type !== "route" || answer.source?.confidence !== "high") return false; + return /\b(where|open|find|navigate|page|screen|menu|settings?|configuration|wizard|location)\b/i + .test(String(message || "")); +} + +function isComplexOrAmbiguous(message) { + const text = String(message || ""); + if (text.length > 500 || text.split(/\s+/).length > 70) return true; + return /\b(why|explain|debug|diagnos|troubleshoot|fix|error|failed|failure|code|javascript|python|implement|design|compare|analy[sz]e|step by step|multi[- ]?step|architecture|configure and|set up and|what should|this|that|it)\b/i + .test(text); +} + +function isTimeoutError(error) { + return error?.name === "TimeoutError" || error?.name === "AbortError" || /timed?\s*out|timeout/i.test(error?.message || ""); +} + +function classifyRequestType(message, { role = "user", scope = "assistant" } = {}) { + const text = String(message || "").trim(); + if (/\b(explicitly|please|give|write|provide|show)\b[\s\S]{0,60}\b(long|detailed|comprehensive|thorough|in[- ]depth)\b|\b(full (analysis|report|guide|explanation)|in detail|very detailed|long answer)\b/i.test(text)) { + return "explicit_long"; + } + if (/\b(custom command|javascript|python|code block|function run\s*\(|def run\s*\(|implement|write code|script)\b/i.test(text)) { + return "code_custom_command"; + } + if ( + role === "admin" && + (scope === "model_test" || /\b(debug|diagnos|troubleshoot|stack trace|runtime|backend|database|logs?|metrics?|configuration|config|error|failed|failure)\b/i.test(text)) + ) { + return "admin_debug"; + } + if (/\b(where|open|find|navigate|page|screen|menu|settings?|configuration|wizard|location|link|path)\b/i.test(text)) { + return "navigation_help"; + } + return "simple_answer"; +} + +function normalizeRequestClass(value) { + return [ + "navigation_help", + "simple_answer", + "code_custom_command", + "admin_debug", + "explicit_long" + ].includes(value) ? value : "simple_answer"; +} + +function withTimeout(promise, timeoutMs) { + let timer; + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + reject(Object.assign(new Error(`Gate timed out after ${timeoutMs}ms.`), { name: "TimeoutError" })); + }, timeoutMs); + }); + return Promise.race([promise, timeout]).finally(() => clearTimeout(timer)); +} + +function similarity(left, right) { + const a = tokens(left); + const b = tokens(right); + if (!a.size || !b.size) return 0; + let intersection = 0; + for (const token of a) if (b.has(token)) intersection += 1; + return intersection / (a.size + b.size - intersection); +} + +function tokens(value) { + const ignored = new Set([ + "a", "an", "are", "can", "could", "do", "find", "for", "how", "i", "in", "is", + "me", "of", "please", "the", "this", "to", "where", "would", "you" + ]); + return new Set( + String(value || "").toLowerCase().split(/[^a-z0-9]+/) + .filter((token) => token && !ignored.has(token)) + ); +} + +module.exports = { + GateProvider, + parseDecision, + stripForcePrefix, + isSensitiveRequest, + similarity, + isCacheSafeRepoAnswer, + isExactPredefinedQuery, + isComplexOrAmbiguous, + classifyRequestType, + normalizeRequestClass, + withTimeout +}; diff --git a/plugins/lumi_ai/backend/hardware.js b/plugins/lumi_ai/backend/hardware.js index db76d76..209861d 100644 --- a/plugins/lumi_ai/backend/hardware.js +++ b/plugins/lumi_ai/backend/hardware.js @@ -207,19 +207,20 @@ function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb const contextMb = estimateContextMemoryMb(model, contextSize); const overheadMb = supported ? Math.max(256, modelMb * 0.08) : 0; const fullOffloadMb = Math.ceil(modelMb + contextMb + overheadMb); - const observedAvailableVramMb = Math.floor( - gpu?.available_vram_mb || - (gpu?.vram_mb ? gpu.vram_mb * 0.9 : 0) - ); - const totalUsableVramMb = Math.floor((gpu?.vram_mb || observedAvailableVramMb) * 0.9); + const totalVramMb = Math.max(0, Number(gpu?.vram_mb) || 0); + const hasObservedFree = Number.isFinite(Number(gpu?.available_vram_mb)); + const observedAvailableVramMb = Math.max(0, Math.floor( + hasObservedFree ? Number(gpu.available_vram_mb) : totalVramMb * 0.9 + )); + const totalUsableVramMb = Math.floor((totalVramMb || observedAvailableVramMb) * 0.9); + const managedModelVramMb = Math.max(0, Number(managedUsageMb) || 0); + const observedUsedVramMb = Math.max(0, totalVramMb - observedAvailableVramMb); const externalUsageMb = Math.max( 0, - (gpu?.vram_mb || 0) - observedAvailableVramMb - Math.max(0, Number(managedUsageMb) || 0) - ); - const safeAvailableVramMb = Math.max( - 0, - Math.min(totalUsableVramMb, (gpu?.vram_mb || totalUsableVramMb) - externalUsageMb) + observedUsedVramMb - managedModelVramMb ); + // The managed model's live allocation remains part of its own usable budget. + const safeAvailableVramMb = Math.max(0, totalUsableVramMb - externalUsageMb); const maxPercent = supported && fullOffloadMb > 0 ? Math.max(0, Math.min(100, Math.floor((safeAvailableVramMb / fullOffloadMb) * 100))) : 0; @@ -227,9 +228,13 @@ function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb supported, backend: supported ? requestedBackend : "cpu", max_percent: maxPercent, + total_vram_mb: totalVramMb, + free_vram_mb: observedAvailableVramMb, + managed_model_vram_mb: managedModelVramMb, + external_vram_estimate_mb: externalUsageMb, available_vram_mb: observedAvailableVramMb, safe_available_vram_mb: safeAvailableVramMb, - managed_gpu_memory_mb: Math.max(0, Number(managedUsageMb) || 0), + managed_gpu_memory_mb: managedModelVramMb, external_gpu_memory_mb: externalUsageMb, estimated_full_offload_mb: fullOffloadMb, estimated_context_mb: contextMb, @@ -237,8 +242,10 @@ function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb ? "No supported GPU detected. CPU fallback will be used." : !supported ? "The detected GPU does not match the installed runtime backend. CPU fallback will be used." - : maxPercent < 100 - ? "Available VRAM limits maximum GPU acceleration." + : externalUsageMb > 0 && maxPercent < 100 + ? "External VRAM usage limits maximum GPU acceleration." + : maxPercent < 100 + ? "The selected model and context exceed the GPU's safe VRAM budget." : null }; } @@ -252,7 +259,9 @@ function estimateAllocation({ model, contextSize, gpu, backend, intentPercent, w : 0; const clampedReason = actual < intent ? capacity.supported - ? "external_vram_pressure" + ? capacity.external_vram_estimate_mb > 0 + ? "external_vram_pressure" + : "model_vram_capacity" : "gpu_backend_unavailable" : null; return { @@ -261,18 +270,48 @@ function estimateAllocation({ model, contextSize, gpu, backend, intentPercent, w gpu_allocation_actual_percent: actual, gpu_allocation_max_safe_percent: capacity.max_percent, gpu_allocation_clamped_reason: clampedReason, + intended_gpu_allocation: intent, + actual_gpu_allocation: actual, workload_percent: actual, gpu_layers: gpuLayers, estimated_gpu_memory_mb: Math.ceil(capacity.estimated_full_offload_mb * actual / 100), estimated_cpu_memory_mb: Math.ceil((model?.size || 0) / 1048576 * (1 - actual / 100)), warning: clampedReason === "external_vram_pressure" ? `Actual GPU allocation is limited to ${actual}% because other processes are using VRAM. Your ${intent}% intent is preserved.` + : clampedReason === "model_vram_capacity" + ? `Actual GPU allocation is limited to ${actual}% because the selected model and context exceed the safe VRAM budget.` : clampedReason === "gpu_backend_unavailable" ? `GPU acceleration intent is ${intent}%, but the installed runtime cannot use the detected GPU.` : capacity.warning }; } +function performanceTuningHints({ model, config, gpu, allocation, generationTps = 0 }) { + const hints = []; + const maxOutput = Math.max( + Number(config?.max_output_tokens) || 0, + ...Object.values(config?.output_budgets || {}).map((value) => Number(value) || 0) + ); + const context = Number(config?.context_size) || 0; + const concurrency = Number(config?.concurrency) || 1; + if (maxOutput > 2048) hints.push("Lower maximum output tokens to reduce long generation time."); + if (context > 8192) hints.push("Lower context size if prompt evaluation is slow."); + if (allocation?.gpu_allocation_actual_percent < allocation?.gpu_allocation_intent_percent) { + hints.push("Review GPU layers/offload and external VRAM use before increasing allocation."); + } + if (concurrency > 1) hints.push("Reduce concurrent AI jobs when generation throughput is low."); + if (/GTX\s*1060/i.test(String(gpu?.model || "")) && Number(gpu?.vram_mb) <= 6144) { + hints.push("On a GTX 1060 6GB, a full VRAM graph can be normal managed-model allocation rather than external pressure."); + } + if ((model?.size || 0) >= 3.5 * 1024 ** 3) { + hints.push("This model may generate slowly on older GPUs even when its VRAM allocation is expected."); + } + if (generationTps > 0 && generationTps < 8) { + hints.push(`Recent generation speed is ${generationTps.toFixed(1)} tok/s. Lower class output budgets, reduce context, improve GPU offload, or reduce concurrency.`); + } + return hints; +} + function estimateContextMemoryMb(model, contextSize) { const context = Math.max(512, Number(contextSize) || model?.default_context || 4096); const scale = Math.max(1, (model?.gpu_layers || 24) / 24); @@ -347,5 +386,6 @@ module.exports = { detectGpus, selectRuntimeTarget, calculateGpuCapacity, - estimateAllocation + estimateAllocation, + performanceTuningHints }; diff --git a/plugins/lumi_ai/backend/metrics.js b/plugins/lumi_ai/backend/metrics.js index 480c15b..8fa4dac 100644 --- a/plugins/lumi_ai/backend/metrics.js +++ b/plugins/lumi_ai/backend/metrics.js @@ -5,7 +5,7 @@ const historyFile = () => resolveData("metrics", "history.jsonl"); const stateFile = () => resolveData("metrics", "summary.json"); function getSummary() { try { return JSON.parse(fs.readFileSync(stateFile(), "utf8")); } - catch { return { total_requests:0, successful:0, failed:0, refusals:0, tool_suggestions:0, tool_executions:0, tool_denials:0, confirmation_cancellations:0, timeout_count:0, runtime_crash_count:0, runtime_self_test_total:0, runtime_self_test_failed_total:0, runtime_start_attempt_total:0, runtime_start_failed_total:0, verified_downloads:0, failed_downloads:0, requests_by_role:{}, requests_by_scope:{}, requests_by_route:{}, runtime_exit_code_counts:{}, durations:[], queue_wait_total_ms:0 }; } + catch { return { total_requests:0, successful:0, failed:0, refusals:0, gate_decisions:0, tool_suggestions:0, tool_executions:0, tool_denials:0, confirmation_cancellations:0, timeout_count:0, runtime_crash_count:0, runtime_self_test_total:0, runtime_self_test_failed_total:0, runtime_start_attempt_total:0, runtime_start_failed_total:0, verified_downloads:0, failed_downloads:0, requests_by_role:{}, requests_by_scope:{}, requests_by_route:{}, gate_reason_codes:{}, runtime_exit_code_counts:{}, stage_totals:{}, stage_samples:0, slow_requests:[], durations:[], queue_wait_total_ms:0 }; } } function record(entry) { const summary = getSummary(); @@ -20,6 +20,13 @@ function record(entry) { if (entry.role) summary.requests_by_role[entry.role] = (summary.requests_by_role[entry.role] || 0) + 1; if (entry.scope) summary.requests_by_scope[entry.scope] = (summary.requests_by_scope[entry.scope] || 0) + 1; } + if (entry.kind === "gate_decision") { + summary.gate_decisions = (summary.gate_decisions || 0) + 1; + summary.gate_reason_codes ||= {}; + if (entry.reason_code) { + summary.gate_reason_codes[entry.reason_code] = (summary.gate_reason_codes[entry.reason_code] || 0) + 1; + } + } if (entry.route_used) { summary.requests_by_route[entry.route_used] = (summary.requests_by_route[entry.route_used] || 0) + 1; } @@ -43,25 +50,128 @@ function record(entry) { } if (entry.kind === "download" && entry.status === "success") summary.verified_downloads += 1; if (entry.kind === "download" && entry.status === "failed") summary.failed_downloads += 1; - if (entry.duration_ms != null) summary.durations.push(entry.duration_ms); + summary.durations = (Array.isArray(summary.durations) ? summary.durations : []) + .filter(isValidTiming); + if (isValidTiming(entry.duration_ms)) summary.durations.push(Number(entry.duration_ms)); summary.durations = summary.durations.slice(-500); - if (entry.queue_wait_ms) summary.queue_wait_total_ms += entry.queue_wait_ms; + if (isValidTiming(entry.queue_wait_ms)) { + summary.queue_wait_total_ms = Math.max(0, Number(summary.queue_wait_total_ms) || 0) + Number(entry.queue_wait_ms); + } + const stageKeys = [ + "deterministic_ms", "gate_ms", "queue_ms", "prompt_eval_ms", "generation_ms", + "main_queue_ms", "main_generate_ms", "total_ms" + ]; + if (entry.kind === "request" && stageKeys.some((key) => isValidTiming(entry[key]))) { + summary.stage_totals ||= {}; + if (!summary.stage_counts) { + const legacySamples = Math.max(0, Number(summary.stage_samples) || 0); + summary.stage_counts = Object.fromEntries( + Object.keys(summary.stage_totals) + .filter((key) => isValidTiming(summary.stage_totals[key])) + .map((key) => [key, legacySamples]) + ); + } + for (const key of stageKeys) { + if (!isValidTiming(entry[key])) continue; + summary.stage_totals[key] = Math.max(0, Number(summary.stage_totals[key]) || 0) + Number(entry[key]); + summary.stage_counts[key] = Math.max(0, Number(summary.stage_counts[key]) || 0) + 1; + } + } + const totalCandidate = entry.total_ms ?? entry.duration_ms; + const totalMs = isValidTiming(totalCandidate) ? Number(totalCandidate) : null; + if (entry.kind === "request" && totalMs != null && totalMs >= 30000) { + summary.slow_requests ||= []; + summary.slow_requests.unshift({ + timestamp: new Date().toISOString(), + request_id: entry.request_id || null, + route_used: entry.route_used || null, + route_class: entry.route_class || null, + reason_code: entry.gate_reason_code || entry.reason_code || null, + deterministic_ms: entry.deterministic_ms || 0, + gate_ms: entry.gate_ms || 0, + queue_ms: entry.queue_ms ?? entry.main_queue_ms ?? 0, + prompt_eval_ms: entry.prompt_eval_ms || 0, + generation_ms: entry.generation_ms ?? entry.main_generate_ms ?? 0, + main_queue_ms: entry.main_queue_ms ?? entry.queue_ms ?? 0, + main_generate_ms: entry.main_generate_ms ?? entry.generation_ms ?? 0, + prompt_tokens: entry.prompt_tokens || 0, + generated_tokens: entry.generated_tokens || 0, + prompt_tps: entry.prompt_tps || 0, + generation_tps: entry.generation_tps || 0, + backend: entry.backend || null, + gpu_layers: entry.gpu_layers || 0, + context_size: entry.context_size || 0, + max_output_tokens_used: entry.max_output_tokens_used ?? entry.max_output_tokens ?? 0, + frontend_soft_timeout: Boolean(entry.frontend_soft_timeout), + total_ms: totalMs, + risk_504: totalMs >= 45000 + }); + summary.slow_requests = summary.slow_requests.slice(0, 25); + } fs.writeFileSync(stateFile(), JSON.stringify(summary, null, 2)); fs.appendFileSync(historyFile(), `${JSON.stringify({ timestamp:new Date().toISOString(), ...entry })}\n`); } function report() { - const s = getSummary(); const sorted=[...s.durations].sort((a,b)=>a-b); - return { ...s, average_response_ms: sorted.length ? Math.round(sorted.reduce((a,b)=>a+b,0)/sorted.length) : 0, median_response_ms: sorted.length ? sorted[Math.floor(sorted.length/2)] : 0 }; + return summarizeMetrics(getSummary()); +} +function summarizeMetrics(s = {}) { + const sorted = (Array.isArray(s.durations) ? s.durations : []) + .filter(isValidTiming) + .map(Number) + .sort((a,b)=>a-b); + const average_stage_ms = Object.fromEntries( + Object.entries(s.stage_totals || {}) + .filter(([, value]) => isValidTiming(value)) + .map(([key, value]) => { + const legacyCount = Math.max(0, Number(s.stage_samples) || 0); + const count = Math.max(0, Number(s.stage_counts?.[key]) || legacyCount); + return [key, count ? Math.max(0, Math.round(Number(value) / count)) : 0]; + }) + ); + return { + ...s, + durations: sorted, + average_stage_ms, + average_response_ms: sorted.length ? Math.max(0, Math.round(sorted.reduce((a,b)=>a+b,0)/sorted.length)) : 0, + median_response_ms: sorted.length ? Math.max(0, sorted[Math.floor(sorted.length/2)]) : 0 + }; } function history(limit=100) { - try { return fs.readFileSync(historyFile(),"utf8").trim().split(/\r?\n/).filter(Boolean).slice(-limit).reverse().map(JSON.parse); } catch { return []; } + try { + return fs.readFileSync(historyFile(),"utf8").trim().split(/\r?\n/) + .filter(Boolean) + .map(parseHistoryRow) + .filter(Boolean) + .slice(-limit) + .reverse(); + } catch { return []; } } function historyPage(page = 1, pageSize = 25) { const safePage = Math.max(1, Number.parseInt(page, 10) || 1); const safeSize = Math.max(1, Math.min(100, Number.parseInt(pageSize, 10) || 25)); try { - const rows = fs.readFileSync(historyFile(), "utf8").trim().split(/\r?\n/).filter(Boolean); - return paginateRows(rows, safePage, safeSize, JSON.parse); + const rows = fs.readFileSync(historyFile(), "utf8").trim().split(/\r?\n/) + .filter(Boolean) + .map(parseHistoryRow) + .filter(Boolean); + return paginateRows(rows, safePage, safeSize); + } catch { + return { entries: [], page: 1, pages: 1, page_size: safeSize, total: 0 }; + } +} +function slowRequestsPage(page = 1, pageSize = 15) { + const safePage = Math.max(1, Number.parseInt(page, 10) || 1); + const safeSize = Math.max(1, Math.min(100, Number.parseInt(pageSize, 10) || 15)); + try { + const rows = fs.readFileSync(historyFile(), "utf8").trim().split(/\r?\n/) + .filter(Boolean) + .map(parseHistoryRow) + .filter((entry) => { + const total = entry?.total_ms ?? entry?.duration_ms; + return entry?.kind === "request" && isValidTiming(total) && Number(total) >= 30000; + }) + .map(normalizeSlowEntry); + return paginateRows(rows, safePage, safeSize); } catch { return { entries: [], page: 1, pages: 1, page_size: safeSize, total: 0 }; } @@ -80,4 +190,46 @@ function paginateRows(rows, page = 1, pageSize = 25, map = (value) => value) { total }; } -module.exports = { record, report, history, historyPage, paginateRows }; + +function parseHistoryRow(value) { + try { return JSON.parse(value); } + catch { return null; } +} + +function normalizeSlowEntry(entry) { + const totalMs = Number(entry.total_ms ?? entry.duration_ms); + return { + ...entry, + total_ms: totalMs, + queue_ms: validOrZero(entry.queue_ms ?? entry.main_queue_ms), + prompt_eval_ms: validOrZero(entry.prompt_eval_ms), + generation_ms: validOrZero(entry.generation_ms ?? entry.main_generate_ms), + gate_ms: validOrZero(entry.gate_ms), + prompt_tokens: validOrZero(entry.prompt_tokens), + generated_tokens: validOrZero(entry.generated_tokens), + prompt_tps: validOrZero(entry.prompt_tps), + generation_tps: validOrZero(entry.generation_tps), + max_output_tokens_used: validOrZero(entry.max_output_tokens_used ?? entry.max_output_tokens), + risk_504: totalMs >= 45000 + }; +} + +function isValidTiming(value) { + const number = Number(value); + return Number.isFinite(number) && number >= 0; +} + +function validOrZero(value) { + return isValidTiming(value) ? Number(value) : 0; +} + +module.exports = { + record, + report, + history, + historyPage, + slowRequestsPage, + paginateRows, + isValidTiming, + summarizeMetrics +}; diff --git a/plugins/lumi_ai/backend/paths.js b/plugins/lumi_ai/backend/paths.js index 31779cf..2822c61 100644 --- a/plugins/lumi_ai/backend/paths.js +++ b/plugins/lumi_ai/backend/paths.js @@ -3,7 +3,10 @@ const fs = require("fs"); const PLUGIN_ROOT = path.resolve(__dirname, ".."); const PLUGIN_DATA = path.join(PLUGIN_ROOT, "data"); -const DIRS = ["config", "models", "runtime", "logs", "metrics", "rag", "repo_index", "cache", "tmp", "diagnostics"]; +const DIRS = [ + "config", "models", "runtime", "logs", "metrics", "rag", "repo_index", "cache", "tmp", + "diagnostics", "feedback", "corrections", "evals", "exports" +]; function ensureDataDirs() { for (const dir of DIRS) fs.mkdirSync(path.join(PLUGIN_DATA, dir), { recursive: true }); diff --git a/plugins/lumi_ai/backend/prompt_builder.js b/plugins/lumi_ai/backend/prompt_builder.js index bdff082..797d81f 100644 --- a/plugins/lumi_ai/backend/prompt_builder.js +++ b/plugins/lumi_ai/backend/prompt_builder.js @@ -4,7 +4,7 @@ const { PLUGIN_ROOT } = require("./paths"); const { buildPolicy } = require("./scope_manager"); function readTemplate(name){ return fs.readFileSync(path.join(PLUGIN_ROOT,"templates",name),"utf8").trim(); } -function buildPrompt({ config, role, message, contextBlocks=[], tools=[], repoContext=[], originContext=null }) { +function buildPrompt({ config, role, message, requestClass = "simple_answer", contextBlocks=[], correctionContext=[], tools=[], repoContext=[], originContext=null }) { const policy = buildPolicy({ scope: config.support_scope, role }); const moderatorCodeAllowed = role === "mod" && config.support_scope?.allow_moderator_code_help === true; const sections=[ @@ -19,14 +19,37 @@ function buildPrompt({ config, role, message, contextBlocks=[], tools=[], repoCo : "Provide user-facing navigation and operational help only. Do not expose repository paths, source filenames, HTTP methods, route implementation details, or internal code structure."}`, originContext ? `REQUEST ORIGIN AND FORMAT LIMITS:\n${JSON.stringify(originContext)}\nRespect these platform limits. Do not expose WebUI-only actions when webui_actions_allowed is false.` : "", `RESPONSE STYLE:\n${policy.style}\nAim to keep the final user-facing answer within ${policy.max_answer_length} characters when practical. This is a style target, not a limit on reasoning, retrieved context, or prompt construction.\nRoleplay intensity: ${config.instructions.roleplay_intensity || 0}/10.`, - `CUSTOM COMMAND OUTPUT RULES (hard requirements):\nWhen the user asks for a Lumi custom command, custom JavaScript command, or custom Python command:\n- Give a concise explanation before or after the code.\n- Always put the complete runnable command body in a fenced code block labeled javascript or python.\n- Prefer JavaScript and the exact top-level shape function run(ctx) { ... }.\n- Lumi custom commands return the reply value. Return a string, number, boolean, or an object with a content property; do not call ctx.reply because it is not part of the custom-command context.\n- Use def run(ctx): ... only when the user explicitly requests Python or Python is clearly required. Python custom commands are supported, but JavaScript is the default.\n- Do not use export, export default, import, require, module.exports, or other module syntax unless verified Lumi repository context explicitly requires it.\n- Use only standard language and runtime features by default.\n- Do not use non-standard modules such as opencv, numpy, requests, discord.py, or similar libraries unless the user explicitly confirms they are installed and supported.\n- Available ctx fields are platform, user, message, args, and argsText. Do not invent additional context APIs.\n- Preserve required async or sync behavior.\n- Do not perform destructive actions, bypass permissions, or access unavailable services.`, + `REQUEST CLASS AND CONCISION POLICY:\nRequest class: ${requestClass}.\n${requestClassPolicy(requestClass)}`, + `CUSTOM COMMAND OUTPUT RULES (hard requirements):\nWhen the user asks for a Lumi custom command, custom JavaScript command, or custom Python command:\n- Put the complete runnable command first in a fenced code block labeled javascript or python.\n- Add at most 3 short notes after the code unless the user asks for detail.\n- Do not repeat lists or data outside the code when they are already present in the code.\n- Prefer JavaScript and the exact top-level shape function run(ctx) { ... }.\n- Lumi custom commands return the reply value. Return a string, number, boolean, or an object with a content property; do not call ctx.reply because it is not part of the custom-command context.\n- Use def run(ctx): ... only when the user explicitly requests Python or Python is clearly required. Python custom commands are supported, but JavaScript is the default.\n- Do not use export, export default, import, require, module.exports, or other module syntax unless verified Lumi repository context explicitly requires it.\n- Use only standard language and runtime features by default.\n- Do not use non-standard modules such as opencv, numpy, requests, discord.py, or similar libraries unless the user explicitly confirms they are installed and supported.\n- Available ctx fields are platform, user, message, args, and argsText. Do not invent additional context APIs.\n- Preserve required async or sync behavior.\n- Do not perform destructive actions, bypass permissions, or access unavailable services.`, `VERIFICATION AND HALLUCINATION RULES (hard requirements):\n- For Lumi features, routes, page locations, plugins, commands, and help topics, rely on VERIFIED LUMI REPOSITORY CONTEXT and SAFE LUMI CONTEXT.\n- If the requested Lumi capability or location is not present in verified context, say it was not found or cannot be verified. Do not invent a route, menu, setting, workflow, or capability.\n- Do not invent WebUI messaging, notification, direct-message, or Throne contact workflows.\n- For contacting OokamiKunTV, Jenni, administrators, moderators, or community staff, recommend the Discord community server unless verified context provides a specific internal workflow.\n- Ask a clarifying question when the target feature, platform, setting, or page is ambiguous.`, config.instructions.community_tone ? `COMMUNITY TONE:\n${config.instructions.community_tone}` : "", `ADMIN CUSTOM INSTRUCTIONS (below hard safety rules, above normal style guidance):\n${config.instructions.admin_custom || "(none)"}`, `VERIFIED LUMI REPOSITORY CONTEXT:\n${repoContext.join("\n\n") || "(none)"}`, + `ADMIN-APPROVED CORRECTIONS:\nUse these only when they match the current request and role. They never override hard safety or permissions.\n${correctionContext.join("\n\n") || "(none)"}`, `SAFE LUMI CONTEXT:\n${contextBlocks.join("\n\n") || "(none)"}`, `ALLOWED TOOLS:\n${tools.map(t=>JSON.stringify({tool_id:t.tool_id,description:t.description,schema:t.schema})).join("\n") || "(none)"}` ]; return sections.filter(Boolean).join("\n\n---\n\n"); } -module.exports = { buildPrompt }; + +function requestClassPolicy(requestClass) { + if (requestClass === "code_custom_command") { + return [ + "Put one complete runnable code block first.", + "After the code, add at most 3 short notes unless the user explicitly asks for detail.", + "Do not repeat lists, mappings, or data outside the code when they are already present in the code." + ].join("\n"); + } + if (requestClass === "navigation_help") { + return "Answer directly with the verified link or path and only the minimum extra text needed."; + } + if (requestClass === "explicit_long") { + return "A longer answer was explicitly requested, but avoid repetition and keep every section useful."; + } + if (requestClass === "admin_debug") { + return "Lead with the likely cause or next diagnostic action, then provide concise evidence and steps."; + } + return "Answer directly and concisely. Avoid unnecessary preambles, repetition, and broad background."; +} + +module.exports = { buildPrompt, requestClassPolicy }; diff --git a/plugins/lumi_ai/backend/queue_manager.js b/plugins/lumi_ai/backend/queue_manager.js index 78e9d24..48645dc 100644 --- a/plugins/lumi_ai/backend/queue_manager.js +++ b/plugins/lumi_ai/backend/queue_manager.js @@ -1,24 +1,48 @@ class RequestQueue { constructor(getConfig) { this.getConfig=getConfig; this.active=0; this.pending=[]; this.rate=new Map(); } get length(){ return this.pending.length; } - async run(userId, role, fn) { + async run(userId, role, fn, { signal } = {}) { const cfg=this.getConfig(); this.checkRate(userId,role,cfg); if(this.pending.length >= cfg.max_queue_length) throw Object.assign(new Error("AI is busy right now. Try again in a moment."),{code:"QUEUE_FULL"}); + if (signal?.aborted) throw cancelledError(); const queuedAt=Date.now(); - return new Promise((resolve,reject)=>{ this.pending.push({fn,resolve,reject,queuedAt}); this.drain(); }); + return new Promise((resolve,reject)=>{ + const job={fn,resolve,reject,queuedAt,signal,onAbort:null}; + this.pending.push(job); + if (signal) { + job.onAbort=()=>{ + const index=this.pending.indexOf(job); + if(index>=0)this.pending.splice(index,1); + reject(cancelledError()); + }; + signal.addEventListener("abort",job.onAbort,{once:true}); + if(signal.aborted){job.onAbort();return;} + } + this.drain(); + }); } checkRate(userId,role,cfg) { if(role==="admin" && cfg.admin_bypass_rate_limit) return; const now=Date.now(), key=`${role}:${userId}`, rows=(this.rate.get(key)||[]).filter(t=>now-t<60000); - if(rows.length >= cfg.per_user_requests_per_minute) throw Object.assign(new Error("AI rate limit reached. Try again shortly."),{code:"RATE_LIMIT"}); + if(rows.length >= cfg.per_user_requests_per_minute) { + const retryAfter = Math.max(1, Math.ceil((60000 - (now - rows[0])) / 1000)); + throw Object.assign( + new Error(`AI rate limit reached. Try again in ${retryAfter}s.`), + { code:"RATE_LIMIT", retry_after_seconds: retryAfter } + ); + } rows.push(now); this.rate.set(key,rows); } drain(){ const limit=Math.max(1,Number(this.getConfig().concurrency)||1); while(this.activejob.fn(Date.now()-job.queuedAt)).then(job.resolve,job.reject).finally(()=>{this.active--;this.drain();}); } } } +function cancelledError(){return Object.assign(new Error("Assistant request was cancelled."),{name:"AbortError",code:"REQUEST_CANCELLED"});} module.exports = { RequestQueue }; diff --git a/plugins/lumi_ai/backend/repo_indexer.js b/plugins/lumi_ai/backend/repo_indexer.js index c45971d..4af822b 100644 --- a/plugins/lumi_ai/backend/repo_indexer.js +++ b/plugins/lumi_ai/backend/repo_indexer.js @@ -266,7 +266,7 @@ function scoreRoute(route, terms) { } function augmentedRoutes(index) { - return [...(index.routes || [])].filter((route) => route.method === "GET"); + return [...(index?.routes || [])].filter((route) => route.method === "GET"); } function verifiedRoutePaths(index = loadIndex()) { diff --git a/plugins/lumi_ai/backend/request_jobs.js b/plugins/lumi_ai/backend/request_jobs.js new file mode 100644 index 0000000..d0ff3d0 --- /dev/null +++ b/plugins/lumi_ai/backend/request_jobs.js @@ -0,0 +1,155 @@ +const crypto = require("crypto"); + +class AssistantRequestJobs { + constructor({ ttlMs = 15 * 60 * 1000, maxJobs = 200 } = {}) { + this.ttlMs = ttlMs; + this.maxJobs = maxJobs; + this.jobs = new Map(); + } + + create({ userId, execute, metadata = {} }) { + this.prune(); + const id = crypto.randomUUID(); + const controller = new AbortController(); + const job = { + id, + user_id: String(userId), + state: "queued", + stage: "queued", + created_at: Date.now(), + updated_at: Date.now(), + result: null, + error: null, + retry_after_seconds: null, + details: sanitizeDetails(metadata), + controller, + frontend_soft_timeout_at: null + }; + this.jobs.set(id, job); + setImmediate(async () => { + try { + const result = await execute( + (stage, details = {}) => this.update(id, stage, details), + controller.signal + ); + if (job.state === "cancelled") return; + Object.assign(job, { + state: "complete", + stage: "done", + result, + updated_at: Date.now() + }); + } catch (error) { + if (job.state === "cancelled") return; + const cancelled = error.code === "REQUEST_CANCELLED" || error.name === "AbortError"; + Object.assign(job, { + state: cancelled ? "cancelled" : "error", + stage: cancelled ? "cancelled" : "error", + error: cancelled ? "Assistant request was cancelled." : error.message || "Lumi Assistant could not complete the request.", + retry_after_seconds: error.retry_after_seconds || null, + details: sanitizeDetails({ ...job.details, error_code: error.code }), + updated_at: Date.now() + }); + } + }); + return this.publicJob(job); + } + + update(id, stage, details = {}) { + const job = this.jobs.get(id); + if (!job || ["complete", "error", "cancelled"].includes(job.state)) return null; + job.state = stage === "queued" ? "queued" : "running"; + job.stage = stage; + job.updated_at = Date.now(); + job.details = sanitizeDetails({ ...job.details, ...details }); + return this.publicJob(job); + } + + get(id, userId) { + this.prune(); + const job = this.jobs.get(id); + if (!job || job.user_id !== String(userId)) return null; + return this.publicJob(job); + } + + cancel(id, userId, { admin = false } = {}) { + this.prune(); + const job = this.jobs.get(id); + if (!job || (!admin && job.user_id !== String(userId))) return null; + if (["complete", "error", "cancelled"].includes(job.state)) return this.publicJob(job); + job.state = "cancelled"; + job.stage = "cancelled"; + job.error = "Assistant request was cancelled."; + job.updated_at = Date.now(); + job.controller.abort(); + return this.publicJob(job); + } + + markSoftTimeout(id, userId) { + const job = this.jobs.get(id); + if (!job || job.user_id !== String(userId)) return null; + job.frontend_soft_timeout_at = Date.now(); + job.details = sanitizeDetails({ ...job.details, frontend_soft_timeout: true }); + job.updated_at = Date.now(); + return this.publicJob(job); + } + + diagnostics(limit = 25) { + this.prune(); + return [...this.jobs.values()] + .sort((left, right) => right.created_at - left.created_at) + .slice(0, limit) + .map((job) => ({ ...this.publicJob(job), user_id: job.user_id })); + } + + publicJob(job) { + return { + id: job.id, + state: job.state, + stage: job.stage, + created_at: job.created_at, + updated_at: job.updated_at, + elapsed_ms: Math.max(0, (["complete", "error", "cancelled"].includes(job.state) ? job.updated_at : Date.now()) - job.created_at), + still_running: ["queued", "running"].includes(job.state), + frontend_soft_timeout_at: job.frontend_soft_timeout_at, + details: job.details || {}, + result: job.state === "complete" ? job.result : null, + error: ["error", "cancelled"].includes(job.state) ? job.error : null, + retry_after_seconds: job.retry_after_seconds + }; + } + + prune(now = Date.now()) { + for (const [id, job] of this.jobs) { + if ( + ["complete", "error", "cancelled"].includes(job.state) && + now - job.updated_at > this.ttlMs + ) this.jobs.delete(id); + } + if (this.jobs.size <= this.maxJobs) return; + const oldest = [...this.jobs.values()] + .filter((job) => ["complete", "error", "cancelled"].includes(job.state)) + .sort((left, right) => left.updated_at - right.updated_at) + .slice(0, this.jobs.size - this.maxJobs); + for (const job of oldest) this.jobs.delete(job.id); + } +} + +function sanitizeDetails(details) { + const output = {}; + for (const key of [ + "queue_position", "gate_ms", "queue_ms", "prompt_eval_ms", "generation_ms", "total_ms", + "prompt_tokens", "generated_tokens", "prompt_tps", "generation_tps", "gpu_layers", + "context_size", "batch_size", "ubatch_size", "threads", "max_output_tokens", + "max_output_tokens_used" + ]) { + if (Number.isFinite(Number(details[key]))) output[key] = Number(details[key]); + } + for (const key of ["route", "route_class", "reason_code", "backend", "error_code"]) { + if (details[key] != null) output[key] = String(details[key]).slice(0, 120); + } + if (details.frontend_soft_timeout != null) output.frontend_soft_timeout = Boolean(details.frontend_soft_timeout); + return output; +} + +module.exports = { AssistantRequestJobs }; diff --git a/plugins/lumi_ai/backend/response_formatter.js b/plugins/lumi_ai/backend/response_formatter.js index b71f050..1c6eb2d 100644 --- a/plugins/lumi_ai/backend/response_formatter.js +++ b/plugins/lumi_ai/backend/response_formatter.js @@ -18,7 +18,7 @@ function formatAssistantResponse({ verifiedRoutes = [], role = "user", allowModeratorCodeHelp = false, - maxLength = 4000 + maxLength = null }) { const routeSet = new Set(verifiedRoutes); const collected = []; @@ -55,7 +55,9 @@ function formatAssistantResponse({ output = output.replace(/[ \t]+\n/g, "\n").replace(/\n{4,}/g, "\n\n\n").trim(); const originalLength = output.length; - const delivered = truncateFinal(output, maxLength); + const delivered = Number.isFinite(Number(maxLength)) && Number(maxLength) > 0 + ? truncateFinal(output, maxLength) + : output; return { text: delivered, links: uniqueLinks(collected), diff --git a/plugins/lumi_ai/backend/runtime_manager.js b/plugins/lumi_ai/backend/runtime_manager.js index 990d66c..1600e71 100644 --- a/plugins/lumi_ai/backend/runtime_manager.js +++ b/plugins/lumi_ai/backend/runtime_manager.js @@ -53,6 +53,18 @@ class RuntimeManager { gpu_allocation_clamped_reason: acceleration.gpu_allocation_clamped_reason, gpu_layers: acceleration.gpu_layers, estimated_gpu_memory_mb: acceleration.estimated_gpu_memory_mb, + estimated_cpu_memory_mb: acceleration.estimated_cpu_memory_mb, + total_vram_mb: acceleration.total_vram_mb, + free_vram_mb: acceleration.free_vram_mb, + managed_model_vram_mb: acceleration.managed_model_vram_mb, + external_vram_estimate_mb: acceleration.external_vram_estimate_mb, + intended_gpu_allocation: acceleration.gpu_allocation_intent_percent, + actual_gpu_allocation: acceleration.gpu_allocation_actual_percent, + context_size: this.getConfig().context_size, + batch_size: this.getConfig().batch_size, + ubatch_size: this.getConfig().ubatch_size, + threads: this.getConfig().threads, + max_output_tokens: this.getConfig().max_output_tokens, acceleration_warning: acceleration.warning, latest_diagnostic: getLatestDiagnostic() }; @@ -292,14 +304,256 @@ class RuntimeManager { return { ...status, healthy: false, health_status: error.name === "TimeoutError" ? "connection_timeout" : "connection_refused" }; } } - async infer(messages, maxTokens = 300) { + async infer(messages, maxTokens = 300, { signal = null, timeoutMs = null } = {}) { if (!this.port) throw new Error("Runtime is offline."); - const response = await fetch(`http://127.0.0.1:${this.port}/v1/chat/completions`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: "local", messages, max_tokens: maxTokens, temperature: 0.2 }), signal: AbortSignal.timeout(this.getConfig().request_timeout_ms || 120000) }); - if (!response.ok) throw new Error(`Inference failed (${response.status})`); + const configuredTimeout = Number(timeoutMs ?? this.getConfig().hard_generation_timeout_ms); + const hardTimeoutMs = Math.max(30000, Math.min(3600000, Number.isFinite(configuredTimeout) ? configuredTimeout : 600000)); + const controller = new AbortController(); + let hardTimedOut = false; + const abortFromCaller = () => controller.abort(); + if (signal?.aborted) throw requestCancelledError(); + signal?.addEventListener("abort", abortFromCaller, { once: true }); + const timer = setTimeout(() => { + hardTimedOut = true; + controller.abort(); + }, hardTimeoutMs); + timer.unref?.(); + try { + const response = await fetch(`http://127.0.0.1:${this.port}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: "local", messages, max_tokens: maxTokens, temperature: 0.2 }), + signal: controller.signal + }); + if (!response.ok) throw new Error(`Inference failed (${response.status})`); + return response.json(); + } catch (error) { + if (hardTimedOut) { + throw Object.assign( + new Error(`Main model generation exceeded the ${Math.ceil(hardTimeoutMs / 1000)}s hard timeout.`), + { name: "TimeoutError", code: "HARD_GENERATION_TIMEOUT", timeout_ms: hardTimeoutMs } + ); + } + if (signal?.aborted || error.name === "AbortError") throw requestCancelledError(); + throw error; + } finally { + clearTimeout(timer); + signal?.removeEventListener("abort", abortFromCaller); + } + } +} + +class GateRuntimeManager { + constructor({ getConfig, getModel, onDiagnostic }) { + Object.assign(this, { getConfig, getModel, onDiagnostic }); + this.child = null; + this.port = null; + this.startedAt = null; + this.lastError = null; + this.activeLogPath = null; + } + + model() { + return this.getModel(this.getConfig().gate?.model_id); + } + + modelPath() { + const model = this.model(); + return model ? resolveData("models", model.filename) : null; + } + + status() { + const model = this.model(); + const modelPath = this.modelPath(); + const contextSize = Math.max(512, Math.min(4096, Number(this.getConfig().gate?.context_size) || 1024)); + const modelMb = model ? Math.ceil(model.size / 1048576) : 0; + const contextMb = Math.ceil((contextSize / 4096) * 192); + return { + state: this.child && !this.child.killed ? "running" : this.lastError ? "error" : "stopped", + healthy: false, + model_id: model?.id || null, + model_downloaded: Boolean(modelPath && fs.existsSync(modelPath)), + port: this.port, + pid: this.child?.pid || null, + uptime_ms: this.startedAt ? Date.now() - this.startedAt : 0, + last_error: this.lastError, + estimated_cpu_memory_mb: modelMb + contextMb, + estimated_gpu_memory_mb: 0, + backend: "cpu", + log_path: this.activeLogPath + }; + } + + async verifyModel() { + const model = this.model(); + const file = this.modelPath(); + if (!model || !file || !fs.existsSync(file)) { + return { success: false, message: "Gate model file is missing." }; + } + const stat = fs.statSync(file); + if (stat.size !== model.size) { + return { success: false, message: `Gate model expected ${model.size} bytes, found ${stat.size}.` }; + } + const header = Buffer.alloc(4); + const descriptor = fs.openSync(file, "r"); + try { fs.readSync(descriptor, header, 0, 4, 0); } finally { fs.closeSync(descriptor); } + if (header.toString("ascii") !== "GGUF") { + return { success: false, message: "Gate model does not have a GGUF header." }; + } + const sha256 = await hashFile(file); + if (sha256 !== model.sha256) { + return { success: false, message: "Gate model SHA-256 does not match the manifest.", sha256 }; + } + return { success: true, file, sha256 }; + } + + async start() { + if (this.child && !this.child.killed) return this.health(); + const binary = findRecursive( + resolveData("runtime"), + process.platform === "win32" ? "llama-server.exe" : "llama-server" + ); + if (!binary) throw new Error("Runtime executable was not found for the gate."); + const validation = await this.verifyModel(); + if (!validation.success) throw new Error(validation.message); + const cfg = this.getConfig(); + const gate = cfg.gate || {}; + this.port = await freePort(); + const threads = Math.max(1, Math.min( + os.cpus().length, + Number(gate.threads) || Math.min(2, os.cpus().length) + )); + const args = buildRuntimeArgs({ + port: this.port, + modelPath: validation.file, + config: { context_size: Math.max(512, Math.min(4096, Number(gate.context_size) || 1024)) }, + threads, + acceleration: { gpu_layers: 0 } + }); + const logPath = resolveData("logs", `gate-runtime-${Date.now()}.log`); + const log = fs.openSync(logPath, "a"); + const child = spawn(binary, args, { + cwd: path.dirname(binary), + stdio: ["ignore", log, log], + windowsHide: true, + shell: false + }); + fs.closeSync(log); + this.child = child; + this.activeLogPath = logPath; + this.startedAt = Date.now(); + this.lastError = null; + child.once("error", (error) => { + child.__spawnFailed = true; + this.lastError = error.message; + if (this.child === child) this.child = null; + this.onDiagnostic?.({ kind: "gate_runtime", status: "failed", message: error.message }); + }); + child.once("exit", (code, signal) => { + const expected = child.__manualStop || child.__spawnFailed; + if (this.child === child) this.child = null; + if (!expected) { + this.lastError = `Gate runtime exited (${code ?? signal ?? "unknown"}).`; + this.onDiagnostic?.({ kind: "gate_runtime", status: "failed", code, signal }); + } + }); + try { + await waitHealth(this, 45000); + this.onDiagnostic?.({ + kind: "gate_runtime", + status: "success", + model: this.model()?.id, + model_load_ms: Date.now() - this.startedAt + }); + return this.health(); + } catch (error) { + this.lastError = error.message; + await this.stop(); + throw error; + } + } + + async stop() { + if (this.child) { + const child = this.child; + child.__manualStop = true; + child.kill(); + await waitExit(child, 10000); + if (this.child === child && !child.killed) child.kill("SIGKILL"); + } + this.child = null; + this.port = null; + this.startedAt = null; + this.activeLogPath = null; + return this.status(); + } + + async restart() { + await this.stop(); + return this.start(); + } + + async health() { + const status = this.status(); + if (status.state !== "running") return status; + try { + const response = await fetch(`http://127.0.0.1:${this.port}/health`, { + signal: AbortSignal.timeout(2000) + }); + return { ...status, healthy: response.ok }; + } catch { + return { ...status, healthy: false }; + } + } + + async infer(messages, maxTokens = 64, timeoutMs = null) { + if (!this.port) throw new Error("Gate runtime is offline."); + const configured = Number(timeoutMs ?? this.getConfig().gate?.timeout_ms); + const timeout = Math.max(1000, Math.min(5000, Number.isFinite(configured) ? configured : 3000)); + const response = await fetch(`http://127.0.0.1:${this.port}/v1/chat/completions`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + model: "local-gate", + messages, + max_tokens: maxTokens, + temperature: 0 + }), + signal: AbortSignal.timeout(timeout) + }); + if (!response.ok) throw new Error(`Gate inference failed (${response.status})`); return response.json(); } } +function combinedResourceEstimate({ main, gate, hardware }) { + const mainCpu = Math.max(0, Number(main?.estimated_cpu_memory_mb) || 0); + const gateCpu = Math.max(0, Number(gate?.estimated_cpu_memory_mb) || 0); + const mainGpu = Math.max(0, Number(main?.estimated_gpu_memory_mb) || 0); + const gateGpu = Math.max(0, Number(gate?.estimated_gpu_memory_mb) || 0); + const totalRam = mainCpu + gateCpu; + const totalVram = mainGpu + gateGpu; + const hostRam = Math.max(0, Number(hardware?.total_ram_mb) || 0); + const hostVram = Math.max(0, Number(hardware?.gpu?.vram_mb) || 0); + const warnings = []; + if (hostRam && totalRam > hostRam * 0.85) { + warnings.push("Gate and main model estimates exceed 85% of host RAM."); + } + if (hostVram && totalVram > hostVram * 0.9) { + warnings.push("Gate and main model estimates exceed 90% of host VRAM."); + } + return { + main_cpu_memory_mb: mainCpu, + gate_cpu_memory_mb: gateCpu, + total_cpu_memory_mb: totalRam, + main_gpu_memory_mb: mainGpu, + gate_gpu_memory_mb: gateGpu, + total_gpu_memory_mb: totalVram, + exceeds_host_capacity: warnings.length > 0, + warnings + }; +} + function findRecursive(dir, name) { if (!fs.existsSync(dir)) return null; for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { @@ -380,7 +634,9 @@ function buildRuntimeArgs({ port, modelPath, config, threads, acceleration }) { "--port", String(port), "-m", modelPath, "-c", String(config.context_size || 4096), - "-t", String(threads) + "-t", String(threads), + "-b", String(Math.max(32, Number(config.batch_size) || 512)), + "-ub", String(Math.max(16, Math.min(Number(config.batch_size) || 512, Number(config.ubatch_size) || 128))) ]; if (acceleration?.gpu_layers > 0) { args.push("-ngl", String(acceleration.gpu_layers)); @@ -388,4 +644,17 @@ function buildRuntimeArgs({ port, modelPath, config, threads, acceleration }) { return args; } -module.exports = { RuntimeManager, runCaptured, buildRuntimeArgs }; +function requestCancelledError() { + return Object.assign( + new Error("Assistant request was cancelled."), + { name: "AbortError", code: "REQUEST_CANCELLED" } + ); +} + +module.exports = { + RuntimeManager, + GateRuntimeManager, + combinedResourceEstimate, + runCaptured, + buildRuntimeArgs +}; diff --git a/plugins/lumi_ai/backend/storage.js b/plugins/lumi_ai/backend/storage.js index 61090a0..6f6e097 100644 --- a/plugins/lumi_ai/backend/storage.js +++ b/plugins/lumi_ai/backend/storage.js @@ -9,6 +9,10 @@ const CATEGORY_DIRS = { metrics: "metrics", diagnostics: "diagnostics", cache: "cache", + feedback: "feedback", + corrections: "corrections", + evals: "evals", + exports: "exports", tmp: "tmp" }; @@ -46,12 +50,15 @@ function storageUsage(models = [], selectedModelId = null) { }; } -function deleteModel(model, { selectedModelId, runtimeRunning, confirmed }) { +function deleteModel(model, { selectedModelId, gateModelId = null, runtimeRunning, gateRuntimeRunning = false, confirmed }) { if (!model) throw new Error("Unknown model."); if (!confirmed) throw new Error("Model deletion requires confirmation."); if (model.id === selectedModelId && runtimeRunning) { throw new Error("Stop the runtime before deleting the selected model."); } + if (model.id === gateModelId && gateRuntimeRunning) { + throw new Error("Stop the gate runtime before deleting the gate model."); + } const file = modelPath(model); if (!fs.existsSync(file)) return { deleted: false, bytes_recovered: 0 }; const bytes = fs.statSync(file).size; @@ -59,13 +66,13 @@ function deleteModel(model, { selectedModelId, runtimeRunning, confirmed }) { return { deleted: true, bytes_recovered: bytes }; } -function cleanupStorage(categories, { models, selectedModelId, runtimeRunning, activeLogPath = null }) { +function cleanupStorage(categories, { models, selectedModelId, gateModelId = null, runtimeRunning, activeLogPath = null }) { const selected = new Set(Array.isArray(categories) ? categories : []); const result = {}; if (selected.has("unused_models")) { let recovered = 0; for (const model of models) { - if (model.id === selectedModelId) continue; + if (model.id === selectedModelId || model.id === gateModelId) continue; const file = modelPath(model); if (fs.existsSync(file)) { recovered += fs.statSync(file).size; diff --git a/plugins/lumi_ai/backend/training_export.js b/plugins/lumi_ai/backend/training_export.js new file mode 100644 index 0000000..ea236e0 --- /dev/null +++ b/plugins/lumi_ai/backend/training_export.js @@ -0,0 +1,55 @@ +const fs = require("fs"); +const path = require("path"); +const { resolveData } = require("./paths"); + +class TrainingExporter { + constructor({ feedback, corrections, outputDir }) { + this.feedback = feedback; + this.corrections = corrections; + this.outputDir = outputDir || resolveData("exports"); + } + + export(format) { + if (!["instruction", "dpo"].includes(format)) throw new Error("Unknown training export format."); + const examples = approvedExamples(this.feedback.all(), this.corrections.all()); + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filename = `lumi-ai-${format}-${timestamp}.jsonl`; + const file = path.join(this.outputDir, filename); + const rows = examples.map((entry) => format === "dpo" + ? { + prompt: entry.prompt, + preferred_answer: entry.preferred_answer, + rejected_answer: entry.rejected_answer + } + : { + instruction: entry.prompt, + input: "", + output: entry.preferred_answer + }); + fs.writeFileSync(file, rows.map((row) => JSON.stringify(row)).join("\n") + (rows.length ? "\n" : "")); + return { file, filename: path.basename(file), count: rows.length, format }; + } +} + +function approvedExamples(feedbackRows, correctionRows) { + const byFeedback = new Map( + correctionRows + .filter((entry) => entry.approved && entry.corrected_answer) + .map((entry) => [entry.source_feedback_id, entry]) + ); + return feedbackRows + .filter((entry) => entry.export_approved && entry.status === "approved") + .map((entry) => { + const correction = byFeedback.get(entry.id); + const preferred = correction?.corrected_answer || entry.optional_correction; + if (!preferred) return null; + return { + prompt: entry.user_message, + preferred_answer: preferred, + rejected_answer: entry.assistant_answer + }; + }) + .filter(Boolean); +} + +module.exports = { TrainingExporter, approvedExamples }; diff --git a/plugins/lumi_ai/data/corrections/.gitkeep b/plugins/lumi_ai/data/corrections/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/plugins/lumi_ai/data/corrections/.gitkeep @@ -0,0 +1 @@ + diff --git a/plugins/lumi_ai/data/evals/.gitkeep b/plugins/lumi_ai/data/evals/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/plugins/lumi_ai/data/evals/.gitkeep @@ -0,0 +1 @@ + diff --git a/plugins/lumi_ai/data/exports/.gitkeep b/plugins/lumi_ai/data/exports/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/plugins/lumi_ai/data/exports/.gitkeep @@ -0,0 +1 @@ + diff --git a/plugins/lumi_ai/data/feedback/.gitkeep b/plugins/lumi_ai/data/feedback/.gitkeep new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/plugins/lumi_ai/data/feedback/.gitkeep @@ -0,0 +1 @@ + diff --git a/plugins/lumi_ai/index.js b/plugins/lumi_ai/index.js index ccdffcf..752f398 100644 --- a/plugins/lumi_ai/index.js +++ b/plugins/lumi_ai/index.js @@ -3,15 +3,18 @@ const path = require("path"); const express = require("express"); const { ensureDataDirs, resolveData } = require("./backend/paths"); const { getConfig, saveConfig, getRuntimeState } = require("./backend/config_manager"); -const { detectHardware, estimateAllocation } = require("./backend/hardware"); +const { detectHardware, estimateAllocation, performanceTuningHints } = require("./backend/hardware"); const metrics = require("./backend/metrics"); const { roleOf } = require("./backend/permissions"); const { canUseAssistant } = require("./backend/assistant_permissions"); const { RequestQueue } = require("./backend/queue_manager"); const { ToolRegistry } = require("./backend/tool_router"); const { DownloadManager } = require("./backend/downloader"); -const { RuntimeManager } = require("./backend/runtime_manager"); +const { RuntimeManager, GateRuntimeManager, combinedResourceEstimate } = require("./backend/runtime_manager"); const { AiProvider } = require("./backend/ai_provider"); +const { GateProvider } = require("./backend/gate_provider"); +const { SafeAnswerCache } = require("./backend/cache"); +const { AssistantRequestJobs } = require("./backend/request_jobs"); const { getLatestDiagnostic, createDiagnosticsBundle } = require("./backend/diagnostics"); const { evaluateAssistantAvailability } = require("./backend/assistant_availability"); const { buildVisibilityDiagnostics } = require("./backend/assistant_visibility"); @@ -22,6 +25,10 @@ const { AiRateLimiter, mergeLimits } = require("./backend/rate_limits"); const { buildOriginContext, formatPlatformReply, formatPlatformReplyDetails } = require("./backend/commands"); const { AssistantPanelDiagnostics } = require("./backend/assistant_panel_diagnostics"); const { formatAssistantResponse } = require("./backend/response_formatter"); +const { FeedbackStore, FEEDBACK_TAGS, improvementAccess } = require("./backend/feedback"); +const { CorrectionStore, PROMOTION_TARGETS } = require("./backend/corrections"); +const { EvalStore } = require("./backend/evals"); +const { TrainingExporter } = require("./backend/training_export"); const storage = require("./backend/storage"); const { formatBytes, bytesFromMb, sanityCheckSize } = require("./backend/size_utils"); @@ -38,6 +45,11 @@ module.exports = { catch (error) { console.warn("Lumi AI repository index initialization failed", error.message); } } let config = getConfig(); + const feedbackStore = new FeedbackStore(); + const correctionStore = new CorrectionStore({ + getConfig: () => config, + verifyLink: isVerifiedImprovementLink + }); const getModel = (id) => modelManifest.models.find((model) => model.id === id); const downloads = new DownloadManager((entry) => metrics.record(entry)); const accessControl = new AiAccessControl((entry) => metrics.record(entry)); @@ -49,6 +61,7 @@ module.exports = { { endpoint: `/plugins/${PLUGIN_ID}`, user: { id: "diagnostic-user" } } ); const queue = new RequestQueue(() => config); + const requestJobs = new AssistantRequestJobs(); const tools = new ToolRegistry((entry) => metrics.record(entry)); const contextProviders = new Map(); const frontendVisibility = new Map(); @@ -62,17 +75,97 @@ module.exports = { onCrash: (message) => metrics.record({ kind: "runtime", status: "failed", runtime_crash: true, message }), onDiagnostic: (entry) => metrics.record(entry) }); + const gateRuntime = new GateRuntimeManager({ + getConfig: () => config, + getModel, + onDiagnostic: (entry) => metrics.record(entry) + }); + const gate = new GateProvider({ + getConfig: () => config, + runtime: gateRuntime, + lookupRepo: (message) => repoIndexer.lookupSupport(message), + lookupCorrection: (context) => correctionStore.findPredefined({ + message: context.message, + role: context.role, + origin: context.origin || context.platform, + platform: context.platform + }), + cache: new SafeAnswerCache(() => config), + metrics + }); + let mainStartPromise = null; + let gateStartPromise = null; + const ensureMainRuntime = async (options = {}) => { + const health = await runtime.health(); + if (health.healthy) return health; + if (!mainStartPromise) { + mainStartPromise = runtime.start(options).finally(() => { mainStartPromise = null; }); + } + return mainStartPromise; + }; + const ensureGateRuntime = async () => { + if (gateRuntime.status().state === "running") return gateRuntime.health(); + if (!gateStartPromise) { + gateStartPromise = gateRuntime.start().finally(() => { gateStartPromise = null; }); + } + return gateStartPromise; + }; const provider = new AiProvider({ getConfig: () => config, runtime, + gate, queue, tools, metrics, getContext: getSafeContext, lookupRepo: (message) => repoIndexer.lookupSupport(message), getRepoContext: (message, role, allowModeratorCodeHelp) => - repoIndexer.supportContext(message, repoIndexer.loadIndex(), 8, role, allowModeratorCodeHelp) + repoIndexer.supportContext(message, repoIndexer.loadIndex(), 8, role, allowModeratorCodeHelp), + getCorrections: (context) => correctionStore.context(context), + ensureRuntime: ensureMainRuntime }); + const evalStore = new EvalStore({ provider }); + const trainingExporter = new TrainingExporter({ + feedback: feedbackStore, + corrections: correctionStore + }); + const startRuntimes = async (options = {}) => { + if (config.enabled) { + ensureGateRuntime().catch((error) => { + metrics.record({ kind: "gate_runtime", status: "failed", reason_code: "gate_start_failed", message: error.message }); + }); + } + const main = await ensureMainRuntime(options); + return { ...main, gate: await gateRuntime.health() }; + }; + const stopRuntimes = async (options = {}) => { + await gateRuntime.stop(); + return runtime.stop(options); + }; + const restartRuntimes = async () => { + await gateRuntime.stop(); + const main = await runtime.restart(); + if (config.enabled) { + try { await ensureGateRuntime(); } + catch (error) { + metrics.record({ kind: "gate_runtime", status: "failed", reason_code: "gate_restart_failed", message: error.message }); + } + } + return { ...main, gate: await gateRuntime.health() }; + }; + let gateRecoveryPending = false; + const gateMonitor = setInterval(async () => { + if ( + gateRecoveryPending || + !config.enabled || + gateRuntime.status().state === "running" + ) return; + gateRecoveryPending = true; + try { await ensureGateRuntime(); } + catch {} + finally { gateRecoveryPending = false; } + }, 30000); + gateMonitor.unref?.(); const api = { health: () => runtime.health(), @@ -114,6 +207,7 @@ module.exports = { const runtimeTarget = getRuntimeTarget(hardware); const selectedModel = getModel(config.selected_model_id); const runtimeStatus = await runtime.health(); + const gateStatus = await gateRuntime.health(); const gpuAllocation = estimateAllocation({ model: selectedModel, contextSize: config.context_size, @@ -138,7 +232,23 @@ module.exports = { frontend: frontendVisibility.get(req.session.user.id) }); const usage = storage.storageUsage(modelManifest.models, config.selected_model_id); + const resourceEstimate = combinedResourceEstimate({ + main: runtimeStatus, + gate: gateStatus, + hardware + }); + const recentGeneration = metrics.history(100).find((entry) => + entry.kind === "request" && Number(entry.generation_tps) > 0 + ); + const tuningHints = performanceTuningHints({ + model: selectedModel, + config, + gpu: hardware.gpu, + allocation: gpuAllocation, + generationTps: Number(recentGeneration?.generation_tps) || 0 + }); const metricsPage = metrics.historyPage(req.query.metrics_page, 25); + const slowRequestsPage = metrics.slowRequestsPage(req.query.slow_page, 15); const accessPage = paginateRows(accessControl.list(), req.query.access_page, 25); const logPage = storage.listLogsPage(req.query.logs_page, 25); const runtimeFolderSize = usage.categories.runtime; @@ -168,6 +278,10 @@ module.exports = { runtimeTarget, runtimeManifest, runtimeStatus, + gateStatus, + resourceEstimate, + tuningHints, + jobDiagnostics: requestJobs.diagnostics(), gpuAllocation, assistantAvailability, visibilityDiagnostics, @@ -188,6 +302,7 @@ module.exports = { metrics: metrics.report(), history: metricsPage.entries, metricsPage, + slowRequestsPage, logFiles: logPage.entries, logPage, formatBytes, @@ -212,7 +327,19 @@ module.exports = { gpu_allocation_intent_percent: boundedInt(req.body.gpu_allocation_intent_percent, 0, 100, 0), concurrency: boundedInt(req.body.concurrency, 1, 8, 1), max_queue_length: boundedInt(req.body.max_queue_length, 1, 100, 8), - request_timeout_ms: boundedInt(req.body.request_timeout_ms, 5000, 600000, 120000), + request_timeout_ms: boundedInt(req.body.hard_generation_timeout_ms, 30000, 3600000, 600000), + ui_soft_timeout_ms: boundedInt(req.body.ui_soft_timeout_ms, 5000, 300000, 45000), + hard_generation_timeout_ms: boundedInt(req.body.hard_generation_timeout_ms, 30000, 3600000, 600000), + max_output_tokens: boundedInt(req.body.max_output_tokens, 64, 32768, 2048), + output_budgets: { + navigation_help: boundedInt(req.body.output_budget_navigation_help, 64, 32768, 256), + simple_answer: boundedInt(req.body.output_budget_simple_answer, 64, 32768, 512), + code_custom_command: boundedInt(req.body.output_budget_code_custom_command, 64, 32768, 896), + admin_debug: boundedInt(req.body.output_budget_admin_debug, 64, 32768, 1280), + explicit_long: boundedInt(req.body.output_budget_explicit_long, 64, 32768, 2048) + }, + batch_size: boundedInt(req.body.batch_size, 32, 4096, 512), + ubatch_size: boundedInt(req.body.ubatch_size, 16, 4096, 128), per_user_requests_per_minute: boundedInt(req.body.per_user_requests_per_minute, 1, 120, 6), admin_bypass_rate_limit: req.body.admin_bypass_rate_limit === "on", assistant_enabled: req.body.assistant_enabled === "on", @@ -258,6 +385,20 @@ module.exports = { per_channel: limitFromBody(req.body, "limit_channel", 12, 60), queue_when_limited: req.body.queue_when_limited === "on" }), + gate: { + ...config.gate, + model_id: getModel(req.body.gate_model_id)?.id || config.gate.model_id, + context_size: boundedInt(req.body.gate_context_size, 512, 4096, 1024), + threads: boundedInt(req.body.gate_threads, 1, 16, 2), + timeout_ms: boundedInt(req.body.gate_timeout_ms, 1000, 5000, 3000), + high_confidence_threshold: boundedNumber(req.body.gate_high_confidence_threshold, 0.5, 0.99, 0.88), + main_llm_threshold: boundedNumber(req.body.gate_main_llm_threshold, 0.1, 0.95, 0.72), + predefined_enabled: req.body.gate_predefined_enabled === "on", + cache_ttl_seconds: boundedInt(req.body.gate_cache_ttl_seconds, 30, 604800, 3600), + repeat_force_window_seconds: boundedInt(req.body.gate_repeat_force_window_seconds, 0, 3600, 90), + similarity_threshold: boundedNumber(req.body.gate_similarity_threshold, 0.5, 1, 0.86), + force_prefix: cleanText(req.body.gate_force_prefix, 40) + }, support_scope: normalizeScope({ allowed_topics: cleanText(req.body.allowed_topics, 3000), allowed_support_domains: cleanText(req.body.allowed_support_domains, 3000), @@ -286,6 +427,12 @@ module.exports = { log_tool_calls: req.body.log_tool_calls === "on", log_metrics: req.body.log_metrics === "on", log_internal_audit: req.body.log_internal_audit === "on" + }, + improvement: { + ...config.improvement, + allow_moderators_to_review_responses: req.body.allow_moderators_to_review_responses === "on", + trusted_moderator_reviewers: parseIdList(req.body.trusted_moderator_reviewers), + corrections_enabled: req.body.corrections_enabled === "on" } }); registerAssistantCommands({ @@ -303,14 +450,20 @@ module.exports = { "selected_model_id", "context_size", "threads", + "batch_size", + "ubatch_size", "gpu_allocation_intent_percent" ].some((key) => previousConfig[key] !== config[key]); - if (runtimeSettingsChanged && runtime.status().state === "running") { + const gateSettingsChanged = JSON.stringify(previousConfig.gate) !== JSON.stringify(config.gate); + const enabledChanged = previousConfig.enabled !== config.enabled; + if ((runtimeSettingsChanged || gateSettingsChanged || enabledChanged) && runtime.status().state === "running") { try { - await runtime.restart(); + await restartRuntimes(); } catch (error) { return flash(req, res, "error", `Settings saved, but runtime restart failed: ${error.message}`); } + } else if (!config.enabled && gateRuntime.status().state === "running") { + await gateRuntime.stop(); } return flash(req, res, "success", "Lumi AI settings saved."); }); @@ -342,8 +495,11 @@ module.exports = { if (!req.session.user?.isAdmin) return denied(res); const model = getModel(req.params.id); if (!model) return flash(req, res, "error", "Unknown model."); - if (model.id === config.selected_model_id && runtime.status().state === "running") { - return flash(req, res, "error", "Stop the runtime before replacing the selected model."); + if ( + (model.id === config.selected_model_id && runtime.status().state === "running") || + (model.id === config.gate.model_id && gateRuntime.status().state === "running") + ) { + return flash(req, res, "error", "Stop the AI runtimes before replacing an active model."); } const hardware = detectHardware(modelManifest.models); const incompatible = model.ram_gb * 1024 > hardware.total_ram_mb || model.size / 1048576 > hardware.free_disk_mb; @@ -374,7 +530,13 @@ module.exports = { requestedSurface: "webui_chat" }); if (!permission.allowed) return res.status(403).json({ error: "Access denied.", reason: permission.reason }); - res.json({ runtime: await runtime.health(), queue_length: queue.length, enabled: config.enabled, model_id: config.selected_model_id }); + res.json({ + runtime: await runtime.health(), + gate: await gateRuntime.health(), + queue_length: queue.length, + enabled: config.enabled, + model_id: config.selected_model_id + }); }); router.get("/api/downloads", (req, res) => { if (!req.session.user?.isAdmin) return res.status(403).json({ error: "Access denied." }); @@ -481,7 +643,9 @@ module.exports = { try { const result = storage.deleteModel(getModel(req.params.id), { selectedModelId: config.selected_model_id, + gateModelId: config.gate.model_id, runtimeRunning: runtime.status().state === "running", + gateRuntimeRunning: gateRuntime.status().state === "running", confirmed: req.body.confirm === "yes" }); return flash(req, res, "success", result.deleted ? `Model deleted. Recovered ${formatBytes(result.bytes_recovered)}.` : "Model was not installed."); @@ -505,7 +669,8 @@ module.exports = { const result = storage.cleanupStorage(categories, { models: modelManifest.models, selectedModelId: config.selected_model_id, - runtimeRunning: runtime.status().state === "running", + gateModelId: config.gate.model_id, + runtimeRunning: runtime.status().state === "running" || gateRuntime.status().state === "running", activeLogPath: runtime.activeLogPath }); return flash(req, res, "success", `Storage cleanup recovered ${formatBytes(result.recovered_bytes)}.`); @@ -545,13 +710,14 @@ module.exports = { if (!req.session.user?.isAdmin) return res.status(403).json({ error: "Access denied." }); try { const action = req.params.action; - if (!["start", "stop", "restart", "self-test", "verify-runtime", "verify-model"].includes(action)) throw new Error("Unknown runtime action."); + if (!["start", "stop", "restart", "self-test", "verify-runtime", "verify-model", "verify-gate-model"].includes(action)) throw new Error("Unknown runtime action."); const result = action === "self-test" ? await runtime.selfTest() : action === "verify-runtime" ? runtime.verifyRuntimeInstallation() : action === "verify-model" ? await runtime.verifyModel() + : action === "verify-gate-model" ? await gateRuntime.verifyModel() : action === "stop" - ? await runtime.stop({ manual: true, reason: "admin_stop" }) - : action === "restart" ? await runtime.restart() : await runtime.start(); + ? await stopRuntimes({ manual: true, reason: "admin_stop" }) + : action === "restart" ? await restartRuntimes() : await startRuntimes(); if (result?.success === false) return res.status(400).json({ error: result.message, diagnostic: result }); res.json(result); } catch (error) { @@ -563,7 +729,11 @@ module.exports = { const file = createDiagnosticsBundle({ config, runtimeState: getRuntimeState(), - manifest: { runtime: runtimeManifest, model: getModel(config.selected_model_id) }, + manifest: { + runtime: runtimeManifest, + model: getModel(config.selected_model_id), + gate_model: getModel(config.gate.model_id) + }, metrics: metrics.report() }); return res.download(file); @@ -588,40 +758,181 @@ module.exports = { const originContext = webOriginContext(req); const access = authorizeAiRequest({ userId: req.session.user.id, context: originContext, accessControl, rateLimiter }); if (!access.allowed) return res.status(429).json({ error: access.message, reason: access.reason, retry_after_seconds: access.retry_after_seconds }); + const requestUser = { ...req.session.user }; + const requestRole = roleOf(requestUser); + const requestConfig = config; + const requestSessionId = req.sessionID; + const history = normalizeConversationHistory(req.body.history); + const requestStarted = Date.now(); + const job = requestJobs.create({ + userId: requestUser.id, + metadata: { + context_size: requestConfig.context_size, + batch_size: requestConfig.batch_size, + ubatch_size: requestConfig.ubatch_size, + threads: requestConfig.threads + }, + execute: async (updateStage, signal) => { + try { + const result = await provider.generate({ + message, + user: requestUser, + sessionId: requestSessionId, + originContext, + allowDeterministicShortcut: requestConfig.support_scope.allow_deterministic_help_shortcuts, + history, + signal, + onStage: updateStage + }); + updateStage("formatting", { route: result.route_used, ...(result.diagnostics || {}) }); + const delivered = finalizeAssistantResult(result, { + role: requestRole, + config: requestConfig, + baseUrl: originContext.base_url, + maxLength: originContext.max_message_length, + requestMessage: message + }); + metrics.record({ + kind: "delivery", + status: "success", + scope: "webui_chat", + route_used: result.route_used || "llm", + route_class: result.route_class, + max_output_tokens_used: result.max_output_tokens_used, + role: requestRole, + user_id: requestUser.id, + internal_generated_length: result.internal_generated_length || String(result.text || "").length, + final_reply_length: delivered.original_final_length, + original_final_length: delivered.original_final_length, + delivered_length: delivered.delivered_length, + ...(result.stage_timings || {}) + }); + return { + ...delivered, + diagnostics: result.diagnostics || null, + feedback_context: { + user_message: message, + assistant_answer: delivered.text, + route_used: result.route_used || "main_llm", + role: requestRole, + origin: originContext.origin, + platform: originContext.platform, + model: result.model_id || requestConfig.selected_model_id, + timestamp: new Date().toISOString() + } + }; + } catch (error) { + metrics.record({ + kind: "request", + status: "failed", + user_id: requestUser.id, + role: requestRole, + message: error.message, + timeout: error.name === "TimeoutError" || error.code === "HARD_GENERATION_TIMEOUT", + cancelled: error.code === "REQUEST_CANCELLED", + total_ms: Date.now() - requestStarted, + duration_ms: Date.now() - requestStarted + }); + if (error.code === "QUEUE_FULL" && !error.retry_after_seconds) error.retry_after_seconds = 5; + throw error; + } + } + }); + return res.status(202).json({ + job_id: job.id, + state: job.state, + stage: job.stage, + status_url: `/plugins/${PLUGIN_ID}/assistant/jobs/${job.id}`, + cancel_url: `/plugins/${PLUGIN_ID}/assistant/jobs/${job.id}/cancel`, + soft_timeout_url: `/plugins/${PLUGIN_ID}/assistant/jobs/${job.id}/soft-timeout`, + ui_soft_timeout_ms: requestConfig.ui_soft_timeout_ms + }); + }); + router.post("/assistant/feedback", (req, res) => { + const permission = canUseAssistant({ + user: req.session.user, + config, + origin: "webui", + platform: "webui", + requestedSurface: "webui_chat" + }); + if (!permission.allowed) return res.status(403).json({ error: "Access denied." }); try { - const result = await provider.generate({ - message, - user: req.session.user, - sessionId: req.sessionID, - originContext, - allowDeterministicShortcut: config.support_scope.allow_deterministic_help_shortcuts, - history: normalizeConversationHistory(req.body.history) - }); - const delivered = finalizeAssistantResult(result, { + const entry = feedbackStore.capture({ + user_message: req.body.user_message, + assistant_answer: req.body.assistant_answer, + route_used: req.body.route_used, role: roleOf(req.session.user), - config, - baseUrl: originContext.base_url, - maxLength: originContext.max_message_length, - requestMessage: message - }); - metrics.record({ - kind: "delivery", - status: "success", - scope: "webui_chat", - route_used: result.route_used || "llm", - role: roleOf(req.session.user), - user_id: req.session.user.id, - internal_generated_length: result.internal_generated_length || String(result.text || "").length, - final_reply_length: delivered.original_final_length, - original_final_length: delivered.original_final_length, - delivered_length: delivered.delivered_length - }); - res.json(delivered); + origin: "webui", + platform: "webui", + model: req.body.model, + timestamp: req.body.timestamp, + feedback_tag: req.body.feedback_tag, + optional_correction: req.body.optional_correction + }, req.session.user); + return res.status(201).json({ success: true, id: entry.id }); } catch (error) { - metrics.record({ kind: "request", status: "failed", user_id: req.session.user.id, role: roleOf(req.session.user), message: error.message }); - res.status(error.code === "QUEUE_FULL" || error.code === "RATE_LIMIT" ? 429 : 503).json({ error: error.message }); + return res.status(400).json({ error: error.message }); } }); + router.get("/assistant/jobs/:id", (req, res) => { + const permission = canUseAssistant({ + user: req.session.user, + config, + origin: "webui", + platform: "webui", + requestedSurface: "webui_chat" + }); + if (!permission.allowed) return res.status(403).json({ error: "Access denied." }); + const job = requestJobs.get(req.params.id, req.session.user.id); + if (!job) return res.status(404).json({ error: "Assistant request was not found or expired." }); + return res.json(job); + }); + router.post("/assistant/jobs/:id/cancel", (req, res) => { + const permission = canUseAssistant({ + user: req.session.user, + config, + origin: "webui", + platform: "webui", + requestedSurface: "webui_chat" + }); + if (!permission.allowed) return res.status(403).json({ error: "Access denied." }); + const job = requestJobs.cancel(req.params.id, req.session.user.id, { + admin: Boolean(req.session.user?.isAdmin) + }); + if (!job) return res.status(404).json({ error: "Assistant request was not found or expired." }); + metrics.record({ + kind: "request_job", + status: "cancelled", + job_id: job.id, + user_id: req.session.user.id, + stage: job.stage, + elapsed_ms: job.elapsed_ms + }); + return res.json(job); + }); + router.post("/assistant/jobs/:id/soft-timeout", (req, res) => { + const permission = canUseAssistant({ + user: req.session.user, + config, + origin: "webui", + platform: "webui", + requestedSurface: "webui_chat" + }); + if (!permission.allowed) return res.status(403).json({ error: "Access denied." }); + const job = requestJobs.markSoftTimeout(req.params.id, req.session.user.id); + if (!job) return res.status(404).json({ error: "Assistant request was not found or expired." }); + metrics.record({ + kind: "request_job", + status: "soft_timeout", + job_id: job.id, + user_id: req.session.user.id, + stage: job.stage, + elapsed_ms: job.elapsed_ms, + still_running: job.still_running + }); + return res.json(job); + }); router.post("/assistant/test", async (req, res) => { if (!req.session.user?.isAdmin) return res.status(403).json({ error: "Access denied." }); const message = cleanText(req.body.message, 6000); @@ -671,11 +982,195 @@ module.exports = { res.json({ success: cancelled }); }); + router.get("/improvement_center", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.allowed) return deniedImprovement(res); + return res.render(path.join(__dirname, "views", "improvement-center.ejs"), { + title: "Lumi AI Improvement Center", + config, + access, + feedbackTags: FEEDBACK_TAGS, + promotionTargets: PROMOTION_TARGETS, + reviews: feedbackStore.list({ + page: req.query.review_page, + pageSize: 15, + status: cleanText(req.query.status, 30) + }), + corrections: correctionStore.list({ page: req.query.correction_page, pageSize: 15 }), + evalCases: evalStore.list({ page: req.query.eval_page, pageSize: 15 }), + evalResults: evalStore.results(25), + formatDate + }); + }); + + router.post("/improvement_center/settings", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_approve) return deniedImprovement(res); + config = saveConfig({ + ...config, + improvement: { + ...config.improvement, + allow_moderators_to_review_responses: req.body.allow_moderators_to_review_responses === "on", + trusted_moderator_reviewers: parseIdList(req.body.trusted_moderator_reviewers), + corrections_enabled: req.body.corrections_enabled === "on" + } + }); + ensureSidebarNavItem(settings); + return improvementFlash(req, res, "success", "Improvement Center settings saved."); + }); + + router.post("/improvement_center/reviews/:id", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.allowed) return deniedImprovement(res); + try { + const action = cleanText(req.body.action, 30); + if (action === "flag" && access.can_flag) { + feedbackStore.setStatus(req.params.id, "flagged", req.session.user, req.body.review_notes); + } else if (action === "verify" && access.can_verify) { + feedbackStore.verify(req.params.id, req.session.user, req.body.review_notes); + } else if (action === "approve" && access.can_approve) { + feedbackStore.setStatus(req.params.id, "approved", req.session.user, req.body.review_notes); + } else if (action === "reject" && access.can_approve) { + feedbackStore.setStatus(req.params.id, "rejected", req.session.user, req.body.review_notes); + } else if (action === "edit" && access.can_edit) { + feedbackStore.edit(req.params.id, req.body, req.session.user); + } else if (action === "export" && access.can_export) { + feedbackStore.markExportApproved(req.params.id, req.session.user); + } else if (action === "delete" && access.can_delete) { + feedbackStore.delete(req.params.id); + } else { + return deniedImprovement(res); + } + return improvementFlash(req, res, "success", `Review ${action} completed.`); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + + router.post("/improvement_center/reviews/:id/implement", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_implement) return deniedImprovement(res); + try { + const review = feedbackStore.get(req.params.id); + if (!review || review.status !== "approved") throw new Error("Approve the review before implementing it."); + const target = PROMOTION_TARGETS.includes(req.body.target) ? req.body.target : "correction"; + if (target === "eval_case") { + evalStore.add({ + prompt: review.user_message, + role: req.body.min_role || review.role, + origin: req.body.permission_origin || review.origin, + expected_behavior: req.body.corrected_answer || review.optional_correction, + forbidden_behavior: req.body.forbidden_behavior, + expected_link: req.body.expected_link, + notes: req.body.notes + }, req.session.user); + } else if (target === "training_export") { + feedbackStore.markExportApproved(review.id, req.session.user); + } else { + correctionStore.createFromFeedback(review, { + ...req.body, + target, + explicitly_safe: req.body.explicitly_safe === "on", + enabled: req.body.enabled === "on" + }, req.session.user); + } + return improvementFlash(req, res, "success", "Approved feedback was promoted. Save Corrections before it becomes active."); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + + router.post("/improvement_center/corrections/save", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_implement) return deniedImprovement(res); + const result = correctionStore.saveCorrections(req.session.user); + return improvementFlash(req, res, "success", `Corrections saved. ${result.active} of ${result.total} are active.`); + }); + + router.post("/improvement_center/corrections/:id", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.allowed) return deniedImprovement(res); + try { + const action = cleanText(req.body.action, 30); + if (action === "verify" && access.can_verify) { + correctionStore.verify(req.params.id, req.session.user); + } else if (action === "edit" && access.can_edit) { + if (req.body.expected_link && !isVerifiedImprovementLink(req.body.expected_link)) { + throw new Error("Internal correction links must match a verified Lumi route."); + } + correctionStore.update(req.params.id, { + ...req.body, + explicitly_safe: req.body.explicitly_safe === "on", + enabled: req.body.enabled === "on" + }); + } else if (action === "toggle" && access.can_edit) { + correctionStore.setEnabled(req.params.id, req.body.enabled === "on"); + } else if (action === "delete" && access.can_delete) { + correctionStore.delete(req.params.id); + } else { + return deniedImprovement(res); + } + return improvementFlash(req, res, "success", `Correction ${action} completed. Save Corrections to activate changes.`); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + + router.post("/improvement_center/evals", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_run_evals) return deniedImprovement(res); + try { + if (req.body.expected_link && !isVerifiedImprovementLink(req.body.expected_link)) { + throw new Error("Expected links must match a verified Lumi route."); + } + evalStore.add(req.body, req.session.user); + return improvementFlash(req, res, "success", "Eval case added."); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + + router.post("/improvement_center/evals/:id/delete", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_run_evals) return deniedImprovement(res); + evalStore.delete(req.params.id); + return improvementFlash(req, res, "success", "Eval case deleted."); + }); + + router.post("/improvement_center/evals/run", async (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_run_evals) return deniedImprovement(res); + try { + const results = await evalStore.runAll({ provider, actor: req.session.user }); + return improvementFlash(req, res, "success", `Eval run completed with ${results.length} result(s).`); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + + router.post("/improvement_center/exports/:format", (req, res) => { + const access = improvementAccess(req.session.user, config); + if (!access.can_export) return deniedImprovement(res); + try { + const output = trainingExporter.export(req.params.format); + return res.download(output.file, output.filename); + } catch (error) { + return improvementFlash(req, res, "error", error.message); + } + }); + web.mount(`/plugins/${PLUGIN_ID}`, router, { label: "Lumi AI", role: "admin", section: "plugins" }); + web.addNavItem({ + label: "AI Improvement Center", + path: `/plugins/${PLUGIN_ID}/improvement_center`, + role: "mod", + section: "moderation", + canAccess: (user) => improvementAccess(user, config).allowed + }); let removeAssistantPanel = () => {}; if (typeof web.addAssistantPanel === "function") { removeAssistantPanel = web.addAssistantPanel({ @@ -728,15 +1223,21 @@ module.exports = { }); writeCommandsManifest(plugin?.dir || __dirname, config); + if (config.enabled) { + setImmediate(() => ensureGateRuntime().catch((error) => + console.error("Lumi AI gate runtime start failed", error) + )); + } const state = getRuntimeState(); if (shouldAutoResume(config, state)) { - setImmediate(() => runtime.start({ resume: true }).catch((error) => console.error("Lumi AI runtime resume failed", error))); + setImmediate(() => startRuntimes({ resume: true }).catch((error) => console.error("Lumi AI runtime resume failed", error))); } return async () => { + clearInterval(gateMonitor); removeAssistantPanel(); commandRouter?.clearCommands?.(PLUGIN_ID); - await runtime.stop({ manual: false, reason: "bot_shutdown" }); + await stopRuntimes({ manual: false, reason: "bot_shutdown" }); if (global.lumiFrameworks?.ai === api) delete global.lumiFrameworks.ai; if (global.lumiFrameworks?.lumi_ai === api) delete global.lumiFrameworks.lumi_ai; }; @@ -755,6 +1256,10 @@ function boundedInt(value, min, max, fallback) { const number = Number.parseInt(value, 10); return Number.isFinite(number) ? Math.min(max, Math.max(min, number)) : fallback; } +function boundedNumber(value, min, max, fallback) { + const number = Number(value); + return Number.isFinite(number) ? Math.min(max, Math.max(min, number)) : fallback; +} function cleanText(value, max) { return String(value || "").trim().slice(0, max); } @@ -762,9 +1267,39 @@ function flash(req, res, type, message) { req.session.flash = { type, message }; return res.redirect(`/plugins/${PLUGIN_ID}`); } +function improvementFlash(req, res, type, message) { + req.session.flash = { type, message }; + return res.redirect(`/plugins/${PLUGIN_ID}/improvement_center`); +} function denied(res) { return res.status(403).render("error", { title: "Access denied", message: "Administrator access is required." }); } +function deniedImprovement(res) { + return res.status(403).render("error", { + title: "Access denied", + message: "Improvement Center access is not enabled for this account." + }); +} +function parseIdList(value) { + return [...new Set(String(value || "") + .split(/[\s,;]+/) + .map((entry) => entry.trim()) + .filter(Boolean))] + .slice(0, 250); +} +function isVerifiedImprovementLink(value) { + const cleaned = cleanText(value, 2000).replace(/^(?:GET|POST|PUT|PATCH|DELETE|HEAD|OPTIONS)\s+/i, ""); + if (!cleaned) return true; + let pathname; + try { + const url = new URL(cleaned, "https://lumi.invalid"); + if (!["http:", "https:"].includes(url.protocol)) return false; + pathname = url.pathname; + } catch { + return false; + } + return repoIndexer.verifiedRoutePaths().includes(pathname); +} function formatDuration(ms) { if (!ms) return "0 ms"; return ms < 1000 ? `${ms} ms` : `${(ms / 1000).toFixed(1)} s`; @@ -800,8 +1335,11 @@ function ensureSidebarNavItem(settings) { } if (!structure?.enabled || !Array.isArray(structure.sections)) return; const navId = "plugins_lumi_ai"; + const improvementNavId = "plugins_lumi_ai_improvement_center"; for (const section of structure.sections) { - if (Array.isArray(section.items)) section.items = section.items.filter((item) => item !== navId); + if (Array.isArray(section.items)) { + section.items = section.items.filter((item) => ![navId, improvementNavId].includes(item)); + } } let plugins = structure.sections.find((section) => section.id === "plugins"); if (!plugins) { @@ -810,6 +1348,13 @@ function ensureSidebarNavItem(settings) { } plugins.items = Array.isArray(plugins.items) ? plugins.items : []; plugins.items.push(navId); + let moderation = structure.sections.find((section) => section.id === "moderation"); + if (!moderation) { + moderation = { id: "moderation", label: "Mod", icon: "shield", items: [] }; + structure.sections.push(moderation); + } + moderation.items = Array.isArray(moderation.items) ? moderation.items : []; + moderation.items.push(improvementNavId); settings.setSetting("nav_structure", structure); } @@ -1019,7 +1564,7 @@ function finalizeAssistantResult(result, { role, config, baseUrl = "", maxLength verifiedRoutes: repoIndexer.verifiedRoutePaths(), role, allowModeratorCodeHelp: config.support_scope?.allow_moderator_code_help === true, - maxLength: maxLength || config.support_scope?.max_answer_length || 4000 + maxLength }); const output = { ...result, ...formatted }; if (role !== "admin") { diff --git a/plugins/lumi_ai/plugin.json b/plugins/lumi_ai/plugin.json index 20ed2f9..677de7a 100644 --- a/plugins/lumi_ai/plugin.json +++ b/plugins/lumi_ai/plugin.json @@ -1,7 +1,7 @@ { "id": "lumi_ai", "name": "Lumi AI", - "version": "0.4.6", + "version": "0.6.0", "description": "Managed local AI provider and scoped WebUI assistant for Lumi.", "main": "index.js" } diff --git a/plugins/lumi_ai/public/assistant.css b/plugins/lumi_ai/public/assistant.css index e2f5d4d..27b2839 100644 --- a/plugins/lumi_ai/public/assistant.css +++ b/plugins/lumi_ai/public/assistant.css @@ -6,6 +6,7 @@ .lumi-ai-pill-label { flex: 1; text-align: left; font-weight: 700; } .lumi-ai-state { width: 8px; height: 8px; border-radius: 50%; background: #8b949e; box-shadow: 0 0 0 3px color-mix(in srgb, #8b949e 18%, transparent); } .lumi-ai-state.ready { background: #2ea043; box-shadow: 0 0 0 3px color-mix(in srgb, #2ea043 18%, transparent); } +.lumi-ai-state.warming { background: #d29922; box-shadow: 0 0 0 3px color-mix(in srgb, #d29922 18%, transparent); } .lumi-ai-state.error { background: #d73a49; box-shadow: 0 0 0 3px color-mix(in srgb, #d73a49 18%, transparent); } .lumi-ai-panel { position: fixed; z-index: 1; left: calc(var(--sidebar-width, 260px) + 14px); right: 14px; top: var(--lumi-ai-top, calc(100vh - 16.666vh - 14px)); height: max(180px, 16.666vh); min-height: 180px; max-height: calc(100vh - 16px); display: grid; grid-template-rows: 8px auto 1fr auto auto; overflow: hidden; border: 1px solid var(--border); border-radius: 8px; background: var(--card); box-shadow: 0 18px 55px rgba(0,0,0,.22); opacity: 0; transform: translateY(100%); pointer-events: none; transition: transform 0.25s ease-in-out, opacity 0.25s ease-in-out; } .lumi-ai-panel.open { opacity: 1; transform: translateY(0); pointer-events: auto; } @@ -37,11 +38,25 @@ .lumi-ai-message.user { align-self: flex-end; background: var(--sea); color: white; } .lumi-ai-message.error { border-color: var(--rose); color: var(--rose); } .lumi-ai-message.pending { display: flex; align-items: center; gap: 9px; color: var(--ink-soft); } +.lumi-ai-pending-progress { min-width: 0; } +.lumi-ai-timeout-controls { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 8px; } +.lumi-ai-timeout-controls[hidden] { display: none; } +.lumi-ai-timeout-controls button { padding: 5px 8px; border: 1px solid var(--border); border-radius: 5px; background: var(--surface-2); color: var(--ink); cursor: pointer; font-weight: 700; } +.lumi-ai-timeout-controls button:disabled { opacity: .55; cursor: wait; } +.lumi-ai-timeout-details { margin-top: 7px; color: var(--ink-soft); font-size: 11px; line-height: 1.45; } +.lumi-ai-timeout-details[hidden] { display: none; } .lumi-ai-spinner { width: 14px; height: 14px; flex: 0 0 auto; border: 2px solid color-mix(in srgb, var(--sea) 25%, transparent); border-top-color: var(--sea); border-radius: 50%; animation: lumi-ai-spin .8s linear infinite; } .lumi-ai-retry { margin-top: 8px; padding: 5px 9px; border: 1px solid var(--rose); border-radius: 5px; background: transparent; color: inherit; cursor: pointer; font-weight: 700; } +.lumi-ai-retry:disabled { opacity: .65; cursor: wait; } .lumi-ai-confirm { display: flex; gap: 8px; margin-top: 8px; } .lumi-ai-confirm button { padding: 5px 9px; border-radius: 5px; border: 1px solid var(--border); cursor: pointer; } +.lumi-ai-feedback { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 9px; padding-top: 8px; border-top: 1px solid var(--border); color: var(--ink-soft); font-size: 11px; } +.lumi-ai-feedback select, .lumi-ai-feedback input, .lumi-ai-feedback button { min-height: 29px; border: 1px solid var(--border); border-radius: 5px; background: var(--surface-2); color: var(--ink); padding: 4px 7px; font: inherit; } +.lumi-ai-feedback input { flex: 1 1 180px; } +.lumi-ai-feedback button { cursor: pointer; font-weight: 700; } +.lumi-ai-feedback button:disabled { opacity: .6; cursor: wait; } .lumi-ai-compose { display: grid; grid-template-columns: 1fr 40px; gap: 8px; padding: 10px 12px; border-top: 1px solid var(--border); } +.lumi-ai-cooldown { grid-column: 1 / -1; color: var(--rose); font-size: 12px; font-weight: 700; } .lumi-ai-compose textarea { width: 100%; min-height: 40px; max-height: 96px; resize: vertical; border: 1px solid var(--border); border-radius: 6px; background: var(--surface-2); color: var(--ink); padding: 8px; } .lumi-ai-compose button { display: grid; place-items: center; border: 0; border-radius: 6px; background: var(--sea); color: white; cursor: pointer; } .lumi-ai-compose button:disabled { opacity: .55; cursor: wait; } diff --git a/plugins/lumi_ai/public/assistant.js b/plugins/lumi_ai/public/assistant.js index 50a4542..d2da01a 100644 --- a/plugins/lumi_ai/public/assistant.js +++ b/plugins/lumi_ai/public/assistant.js @@ -19,6 +19,7 @@ const form = root.querySelector("[data-lumi-ai-form]"); const input = form?.querySelector("textarea"); const submit = form?.querySelector("[data-lumi-ai-submit]"); + const cooldown = form?.querySelector("[data-lumi-ai-cooldown]"); if (!endpoint || !panel || !toggle || !close || !clear || !resizeHandle || !state || !status || !messages || !form || !input || !submit) return; const listeners = new AbortController(); @@ -33,6 +34,9 @@ let conversation = loadJson(storageKey, []); let panelState = loadJson(stateKey, {}); let statusTimer = null; + let cooldownTimer = null; + let cooldownUntil = 0; + let requestInFlight = false; const trackedFetch = async (url, options = {}) => { const controller = new AbortController(); @@ -86,7 +90,7 @@ } }; - const addMessage = (text, type, confirmation = null, links = [], persist = true) => { + const addMessage = (text, type, confirmation = null, links = [], persist = true, feedbackContext = null) => { const item = document.createElement("div"); item.className = `lumi-ai-message ${type}`; if (type === "assistant") renderMarkdown(item, text); @@ -98,6 +102,7 @@ } appendLinks(item, links); if (confirmation) appendConfirmation(item, confirmation); + if (type === "assistant" && feedbackContext) appendFeedback(item, feedbackContext); messages.append(item); messages.scrollTop = messages.scrollHeight; if (persist && ["user", "assistant"].includes(type)) { @@ -115,21 +120,115 @@ spinner.setAttribute("aria-hidden", "true"); const label = document.createElement("span"); label.textContent = "Queued for Lumi Assistant..."; - item.append(spinner, label); + const progress = document.createElement("div"); + progress.className = "lumi-ai-pending-progress"; + progress.append(label); + const controls = document.createElement("div"); + controls.className = "lumi-ai-timeout-controls"; + controls.hidden = true; + const details = document.createElement("div"); + details.className = "lumi-ai-timeout-details"; + details.hidden = true; + const buttons = {}; + for (const [key, text] of [ + ["continue", "Continue waiting"], + ["cancel", "Cancel"], + ["details", "Details"] + ]) { + const button = document.createElement("button"); + button.type = "button"; + button.textContent = text; + button.dataset.timeoutAction = key; + buttons[key] = button; + controls.append(button); + } + progress.append(controls, details); + item.append(spinner, progress); messages.append(item); messages.scrollTop = messages.scrollHeight; - const processingTimer = window.setTimeout(() => { - if (item.isConnected) label.textContent = "Lumi Assistant is processing..."; - }, 350); + let latestJob = null; return { + setStage(stage, job = null) { + latestJob = job || latestJob; + const labels = { + queued: "Queued for Lumi Assistant...", + deterministic: "Checking verified answers...", + gating: "Routing with the lightweight gate...", + gate: "Routing with the lightweight gate...", + main_model_loading: "Loading the main model...", + prompt_eval: "Evaluating the prompt...", + generating: "Main model is generating...", + formatting: "Formatting the reply...", + done: "Reply complete.", + cancelled: "Request cancelled." + }; + const baseLabel = labels[stage] || "Lumi Assistant is processing..."; + const elapsed = latestJob?.elapsed_ms ? ` · ${formatElapsed(latestJob.elapsed_ms)}` : ""; + const budget = Number(latestJob?.details?.max_output_tokens_used || latestJob?.details?.max_output_tokens) || 0; + const budgetText = budget && ["prompt_eval", "generating"].includes(stage) + ? ` · budget ${budget} tokens` + : ""; + label.textContent = `${baseLabel}${elapsed}${budgetText}`; + if (!details.hidden && latestJob) this.updateDetails(latestJob); + }, + showSoftTimeout(actions) { + controls.hidden = false; + buttons.continue.onclick = actions.continueWaiting; + buttons.cancel.onclick = actions.cancel; + buttons.details.onclick = () => { + details.hidden = !details.hidden; + if (!details.hidden && latestJob) this.updateDetails(latestJob); + }; + updateCooldown(); + }, + hideSoftTimeout() { + controls.hidden = true; + details.hidden = true; + }, + updateDetails(job) { + latestJob = job; + const jobDetails = job.details || {}; + const generated = Number(jobDetails.generated_tokens) || 0; + details.textContent = [ + `Stage: ${job.stage || "unknown"}`, + `Elapsed: ${formatElapsed(job.elapsed_ms)}`, + `Generated tokens: ${generated || "not reported"}`, + `Job alive: ${job.still_running ? "yes" : "no"}` + ].join(" | "); + }, remove() { - window.clearTimeout(processingTimer); item.remove(); } }; }; - const addError = (text, retry = null) => { + const cooldownSeconds = () => Math.max(0, Math.ceil((cooldownUntil - Date.now()) / 1000)); + const updateCooldown = () => { + const seconds = cooldownSeconds(); + const active = seconds > 0; + submit.disabled = requestInFlight || active; + submit.title = active ? `Retry available in ${seconds}s` : "Send"; + if (cooldown) { + cooldown.hidden = !active; + cooldown.textContent = active ? `Retry available in ${seconds}s` : ""; + } + for (const button of messages.querySelectorAll(".lumi-ai-retry[data-cooldown]")) { + button.disabled = active; + button.textContent = active ? `Retry in ${seconds}s` : "Retry"; + } + if (!active && cooldownTimer) { + window.clearInterval(cooldownTimer); + cooldownTimer = null; + } + }; + const beginCooldown = (seconds) => { + const duration = Math.max(1, Number(seconds) || 1); + cooldownUntil = Math.max(cooldownUntil, Date.now() + duration * 1000); + if (!cooldownTimer) cooldownTimer = window.setInterval(updateCooldown, 1000); + updateCooldown(); + }; + + const addError = (text, retry = null, retryAfterSeconds = 0) => { const item = document.createElement("div"); item.className = "lumi-ai-message assistant error"; item.setAttribute("role", "alert"); @@ -141,11 +240,14 @@ button.type = "button"; button.className = "lumi-ai-retry"; button.textContent = "Retry"; + if (retryAfterSeconds > 0) button.dataset.cooldown = "true"; button.addEventListener("click", () => { + if (cooldownSeconds() > 0) return; item.remove(); retry(); }, { once: true, signal: listeners.signal }); item.append(button); + updateCooldown(); } messages.append(item); messages.scrollTop = messages.scrollHeight; @@ -179,6 +281,60 @@ item.append(actions); }; + const appendFeedback = (item, context) => { + const controls = document.createElement("form"); + controls.className = "lumi-ai-feedback"; + controls.setAttribute("aria-label", "Rate this Lumi Assistant reply"); + const select = document.createElement("select"); + select.setAttribute("aria-label", "Feedback tag"); + for (const tag of [ + "good", + "bad", + "wrong_link", + "hallucinated", + "too_generic", + "unsafe", + "should_clarify", + "bad_code", + "wrong_scope" + ]) { + const option = document.createElement("option"); + option.value = tag; + option.textContent = tag.replaceAll("_", " "); + select.append(option); + } + const correction = document.createElement("input"); + correction.maxLength = 16000; + correction.placeholder = "Optional correction"; + correction.setAttribute("aria-label", "Optional correction"); + const submitFeedback = document.createElement("button"); + submitFeedback.type = "submit"; + submitFeedback.textContent = "Send feedback"; + controls.append(select, correction, submitFeedback); + controls.addEventListener("submit", async (event) => { + event.preventDefault(); + submitFeedback.disabled = true; + try { + const response = await trackedFetch(`${endpoint}/assistant/feedback`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + ...context, + feedback_tag: select.value, + optional_correction: correction.value.trim() + }) + }); + const data = await readResponseJson(response); + if (!response.ok) throw new Error(data.error || "Feedback could not be saved."); + controls.replaceChildren(document.createTextNode("Feedback saved.")); + } catch (error) { + submitFeedback.disabled = false; + submitFeedback.textContent = error.message || "Try again"; + } + }, { signal: listeners.signal }); + item.append(controls); + }; + const restoreConversation = () => { messages.replaceChildren(); for (const entry of conversation.slice(-HISTORY_LIMIT)) { @@ -201,8 +357,14 @@ const response = await trackedFetch(`${endpoint}/api/status`); const data = await response.json(); const ready = response.ok && data.enabled && data.runtime?.healthy; - state.className = `lumi-ai-state ${ready ? "ready" : "error"}`; - status.textContent = ready ? `${data.model_id} ready` : "Runtime unavailable"; + const cold = response.ok && data.enabled && data.runtime?.runtime_installed && + data.runtime?.model_downloaded && data.runtime?.state === "stopped"; + state.className = `lumi-ai-state ${ready ? "ready" : cold ? "warming" : "error"}`; + status.textContent = ready + ? `Main ready · gate ${data.gate?.healthy ? "ready" : "fallback"}` + : cold + ? `Main loads on request · gate ${data.gate?.healthy ? "ready" : "starting"}` + : "Runtime unavailable"; } catch (error) { if (error.name !== "AbortError") { state.className = "lumi-ai-state error"; @@ -239,6 +401,7 @@ const sendMessage = async (message, history, addUserMessage = true) => { if (addUserMessage) addMessage(message, "user"); + requestInFlight = true; input.disabled = true; submit.disabled = true; messages.setAttribute("aria-busy", "true"); @@ -253,29 +416,98 @@ if (!response.ok) { const error = new Error(data.error || `Request failed (${response.status}).`); error.status = response.status; + error.retryAfterSeconds = Number(data.retry_after_seconds) || 0; throw error; } - addMessage(data.text, "assistant", data.confirmation, data.links); + const result = response.status === 202 && data.status_url + ? await pollAssistantJob(data, pending) + : data; + addMessage(result.text, "assistant", result.confirmation, result.links, true, result.feedback_context); } catch (error) { - if (error.name !== "AbortError") { + if (error.name === "AbortError") { + if (root.isConnected) { + addError("Assistant request was cancelled.", () => sendMessage(message, history, false)); + } + } else { const retrySafe = ![400, 401, 403].includes(error.status); - addError(error.message, retrySafe ? () => sendMessage(message, history, false) : null); + if (error.status === 429 && error.retryAfterSeconds > 0) { + beginCooldown(error.retryAfterSeconds); + } + addError( + error.message, + retrySafe ? () => sendMessage(message, history, false) : null, + error.retryAfterSeconds + ); } } finally { pending.remove(); + requestInFlight = false; if (root.isConnected) { input.disabled = false; - submit.disabled = false; + updateCooldown(); messages.setAttribute("aria-busy", "false"); input.focus(); } } }; + const pollAssistantJob = async (jobRequest, pending) => { + const softTimeoutMs = Math.max(5000, Number(jobRequest.ui_soft_timeout_ms) || 45000); + let nextSoftTimeoutAt = Date.now() + softTimeoutMs; + let softTimeoutReported = false; + let requestedAction = null; + while (root.isConnected) { + if (requestedAction === "continue") { + pending.hideSoftTimeout(); + nextSoftTimeoutAt = Date.now() + softTimeoutMs; + requestedAction = null; + } else if (requestedAction === "cancel") { + requestedAction = null; + const cancelResponse = await trackedFetch(jobRequest.cancel_url, { method: "POST" }); + const cancelled = await readResponseJson(cancelResponse); + if (!cancelResponse.ok) { + const error = new Error(cancelled.error || "Could not cancel the running request."); + error.status = cancelResponse.status; + throw error; + } + } + const response = await trackedFetch(jobRequest.status_url, { cache: "no-store" }); + const job = await readResponseJson(response); + if (!response.ok) { + const error = new Error(job.error || `Request status failed (${response.status}).`); + error.status = response.status; + throw error; + } + pending.setStage(job.stage, job); + pending.updateDetails(job); + if (job.state === "complete") return job.result || {}; + if (["error", "cancelled"].includes(job.state)) { + const error = new Error(job.error || "Lumi Assistant could not complete the request."); + if (job.state === "cancelled") error.name = "AbortError"; + error.status = job.retry_after_seconds ? 429 : 503; + error.retryAfterSeconds = Number(job.retry_after_seconds) || 0; + throw error; + } + if (Date.now() >= nextSoftTimeoutAt) { + pending.showSoftTimeout({ + continueWaiting: () => { requestedAction = "continue"; }, + cancel: () => { requestedAction = "cancel"; } + }); + if (!softTimeoutReported && jobRequest.soft_timeout_url) { + softTimeoutReported = true; + trackedFetch(jobRequest.soft_timeout_url, { method: "POST" }).catch(() => {}); + } + nextSoftTimeoutAt = Number.POSITIVE_INFINITY; + } + await new Promise((resolve) => window.setTimeout(resolve, 750)); + } + throw Object.assign(new Error("Assistant panel closed."), { name: "AbortError" }); + }; + form.addEventListener("submit", async (event) => { event.preventDefault(); const message = input.value.trim(); - if (!message || input.disabled) return; + if (!message || input.disabled || cooldownSeconds() > 0) return; const history = conversation.slice(-REQUEST_HISTORY_LIMIT).map((entry) => ({ role: entry.role, content: entry.content @@ -313,6 +545,7 @@ for (const request of requests) request.abort(); requests.clear(); if (statusTimer) window.clearInterval(statusTimer); + if (cooldownTimer) window.clearInterval(cooldownTimer); overlayRoot.remove(); instances.delete(root); } @@ -395,6 +628,12 @@ flushList(); } + function formatElapsed(milliseconds) { + const seconds = Math.max(0, Math.floor((Number(milliseconds) || 0) / 1000)); + const minutes = Math.floor(seconds / 60); + return minutes ? `${minutes}m ${seconds % 60}s` : `${seconds}s`; + } + function appendInlineMarkdown(parent, value) { const pattern = /(]*\bhref\s*=\s*(["'])(.*?)\2[^>]*>(.*?)<\/a>|`[^`\n]+`|\*\*[^*]+\*\*|_[^_\n]+_|\[[^\]]+\]\([^)]+\))/gi; let offset = 0; diff --git a/plugins/lumi_ai/public/improvement-center.css b/plugins/lumi_ai/public/improvement-center.css new file mode 100644 index 0000000..9a3240c --- /dev/null +++ b/plugins/lumi_ai/public/improvement-center.css @@ -0,0 +1,21 @@ +.improvement-titlebar { align-items: center; } +.improvement-filters, .improvement-actions { display: flex; flex-wrap: wrap; align-items: center; gap: 7px; } +.improvement-actions form { margin: 0; } +.improvement-list { display: grid; gap: 12px; } +.improvement-card { padding: 13px; border: 1px solid var(--border); border-radius: 8px; background: var(--card); } +.improvement-card > header { display: flex; flex-wrap: wrap; justify-content: space-between; gap: 8px; margin-bottom: 10px; color: var(--ink-soft); font-size: 12px; } +.improvement-card > header strong { color: var(--ink); font-size: 14px; } +.improvement-pair { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 10px; margin-bottom: 10px; } +.improvement-pair > div, .improvement-correction { min-width: 0; padding: 9px; border-radius: 6px; background: var(--surface-2); } +.improvement-pair span { display: block; margin-bottom: 5px; color: var(--ink-soft); font-size: 11px; font-weight: 700; text-transform: uppercase; } +.improvement-card pre, .table pre { max-height: 240px; margin: 0; overflow: auto; white-space: pre-wrap; overflow-wrap: anywhere; color: var(--ink); font: inherit; } +.improvement-correction { margin-bottom: 10px; border-left: 3px solid var(--sea); } +.improvement-correction strong { display: block; margin-bottom: 5px; } +.improvement-dialog { width: min(760px, calc(100vw - 28px)); max-height: calc(100vh - 28px); padding: 18px; overflow: auto; border: 1px solid var(--border); border-radius: 9px; background: var(--card); color: var(--ink); box-shadow: 0 20px 60px rgba(0, 0, 0, .35); } +.improvement-dialog::backdrop { background: rgba(0, 0, 0, .55); } +.improvement-dialog .ai-form { margin: 0; } +.improvement-card .button, .improvement-actions .button { white-space: nowrap; } +@media (max-width: 760px) { + .improvement-pair { grid-template-columns: 1fr; } + .improvement-titlebar { align-items: flex-start; } +} diff --git a/plugins/lumi_ai/public/improvement-center.js b/plugins/lumi_ai/public/improvement-center.js new file mode 100644 index 0000000..902519d --- /dev/null +++ b/plugins/lumi_ai/public/improvement-center.js @@ -0,0 +1,17 @@ +(() => { + document.addEventListener("click", (event) => { + const opener = event.target.closest("[data-open-dialog]"); + if (opener) { + document.getElementById(opener.dataset.openDialog)?.showModal(); + return; + } + const closer = event.target.closest("[data-close-dialog]"); + if (closer) closer.closest("dialog")?.close(); + }); + + document.addEventListener("submit", (event) => { + const form = event.target.closest("[data-improvement-confirm]"); + if (!form) return; + if (!window.confirm(form.dataset.improvementConfirm || "Continue?")) event.preventDefault(); + }); +})(); diff --git a/plugins/lumi_ai/public/settings.js b/plugins/lumi_ai/public/settings.js index f647f4f..20c3f54 100644 --- a/plugins/lumi_ai/public/settings.js +++ b/plugins/lumi_ai/public/settings.js @@ -16,8 +16,8 @@ const data = await response.json(); if (!response.ok) throw new Error(data.error || "Runtime action failed."); if (data.state) state.textContent = data.state; - if (["self-test", "verify-runtime", "verify-model"].includes(button.dataset.runtimeAction)) { - const labels = { "self-test": "Runtime self-test passed.", "verify-runtime": "Runtime installation verified.", "verify-model": "Model verification passed." }; + if (["self-test", "verify-runtime", "verify-model", "verify-gate-model"].includes(button.dataset.runtimeAction)) { + const labels = { "self-test": "Runtime self-test passed.", "verify-runtime": "Runtime installation verified.", "verify-model": "Model verification passed.", "verify-gate-model": "Gate model verification passed." }; window.alert(labels[button.dataset.runtimeAction]); } } catch (error) { @@ -80,7 +80,9 @@ const limit = gpuControl.querySelector("[data-gpu-limit]"); const backend = gpuControl.querySelector("[data-gpu-backend]"); const memory = gpuControl.querySelector("[data-gpu-memory]"); - const vram = gpuControl.querySelector("[data-gpu-vram]"); + const totalVram = gpuControl.querySelector("[data-gpu-total-vram]"); + const freeVram = gpuControl.querySelector("[data-gpu-free-vram]"); + const externalVram = gpuControl.querySelector("[data-gpu-external-vram]"); const warning = gpuControl.querySelector("[data-gpu-warning]"); let maximum = Number.parseInt(limit.textContent.match(/\d+/)?.[0], 10) || 0; let capacityTimer = null; @@ -110,8 +112,10 @@ actualLabel.textContent = `${actual}%`; backend.textContent = String(data.backend || "cpu").toUpperCase(); memory.dataset.fullOffloadMb = String(Number(data.estimated_full_offload_mb) || 0); - memory.textContent = formatBytes((Number(data.estimated_full_offload_mb) || 0) * actual / 100); - vram.textContent = formatBytes(data.available_vram_mb); + memory.textContent = formatBytes(data.managed_model_vram_mb); + totalVram.textContent = formatBytes(data.total_vram_mb); + freeVram.textContent = formatBytes(data.free_vram_mb); + externalVram.textContent = formatBytes(data.external_vram_estimate_mb); warning.hidden = !data.warning; warning.textContent = data.warning || ""; }; diff --git a/plugins/lumi_ai/tests/verify.js b/plugins/lumi_ai/tests/verify.js index 9183989..aad34d2 100644 --- a/plugins/lumi_ai/tests/verify.js +++ b/plugins/lumi_ai/tests/verify.js @@ -5,9 +5,11 @@ const { canUse } = require("../backend/permissions"); const { canUseAssistant } = require("../backend/assistant_permissions"); const { ToolRegistry } = require("../backend/tool_router"); const { RequestQueue } = require("../backend/queue_manager"); -const { RuntimeManager, runCaptured, buildRuntimeArgs } = require("../backend/runtime_manager"); +const { RuntimeManager, combinedResourceEstimate, runCaptured, buildRuntimeArgs } = require("../backend/runtime_manager"); const { getRuntimeState } = require("../backend/config_manager"); -const { AiProvider, normalizeHistory } = require("../backend/ai_provider"); +const { AiProvider, normalizeHistory, normalizeInferenceDiagnostics, resolveOutputBudget } = require("../backend/ai_provider"); +const { GateProvider, similarity, stripForcePrefix, isSensitiveRequest, classifyRequestType, withTimeout } = require("../backend/gate_provider"); +const { AssistantRequestJobs } = require("../backend/request_jobs"); const { shouldAutoResume } = require("../index"); const { normalizeExitCode, classifyLaunchError } = require("../backend/error_codes"); const { redact } = require("../backend/diagnostics"); @@ -17,19 +19,25 @@ const { buildVisibilityDiagnostics, CONDITION_KEYS } = require("../backend/assis const { buildPrompt } = require("../backend/prompt_builder"); const { HARD_RULES, normalizeScope } = require("../backend/scope_manager"); const repoIndexer = require("../backend/repo_indexer"); -const { selectRuntimeTarget, calculateGpuCapacity, estimateAllocation } = require("../backend/hardware"); +const { selectRuntimeTarget, calculateGpuCapacity, estimateAllocation, performanceTuningHints } = require("../backend/hardware"); const modelManifest = require("../models_manifest.json"); const runtimeManifest = require("../runtime_manifest.json"); const storage = require("../backend/storage"); const { formatBytes, bytesFromMb, sanityCheckSize } = require("../backend/size_utils"); -const { paginateRows } = require("../backend/metrics"); +const { paginateRows, summarizeMetrics, isValidTiming } = require("../backend/metrics"); const { AiAccessControl } = require("../backend/access_control"); const { AiRateLimiter, mergeLimits } = require("../backend/rate_limits"); const { buildOriginContext, formatPlatformReply } = require("../backend/commands"); const { AssistantPanelDiagnostics } = require("../backend/assistant_panel_diagnostics"); const { formatAssistantResponse, normalizeLink, normalizeCodeFences } = require("../backend/response_formatter"); +const { FeedbackStore, FEEDBACK_TAGS, improvementAccess } = require("../backend/feedback"); +const { CorrectionStore } = require("../backend/corrections"); +const { EvalStore, evaluateCase } = require("../backend/evals"); +const { TrainingExporter, approvedExamples } = require("../backend/training_export"); const { registerAssistantCommands, authorizeAiRequest, searchKnownUsers, finalizeAssistantResult } = require("../index"); const { EventEmitter } = require("events"); +const os = require("os"); +const path = require("path"); async function run() { ensureDataDirs(); @@ -51,16 +59,58 @@ async function run() { const captured = await runCaptured(process.execPath, ["-e", "console.log('llama server usage')"], process.cwd(), 3000); assert.equal(captured.code, 0); assert.match(captured.stdout, /llama server usage/); + const timeoutRuntime = new RuntimeManager({ + getConfig: () => ({ hard_generation_timeout_ms: 30000 }), + getModel: () => null, + runtimeManifest: {} + }); + timeoutRuntime.port = 12345; + const originalFetch = global.fetch; + const originalSetTimeout = global.setTimeout; + const originalClearTimeout = global.clearTimeout; + try { + global.setTimeout = (callback) => { + const timer = { unref() {} }; + queueMicrotask(callback); + return timer; + }; + global.clearTimeout = () => {}; + global.fetch = (_url, options) => new Promise((_resolve, reject) => { + options.signal.addEventListener("abort", () => { + reject(Object.assign(new Error("aborted"), { name: "AbortError" })); + }, { once: true }); + }); + await assert.rejects( + timeoutRuntime.infer([{ role: "user", content: "slow" }], 100), + (error) => error.code === "HARD_GENERATION_TIMEOUT" + ); + } finally { + global.fetch = originalFetch; + global.setTimeout = originalSetTimeout; + global.clearTimeout = originalClearTimeout; + } assert.equal(formatBytes(2497280960), "2.33 GB"); assert.equal(formatBytes(38407211), "36.6 MB"); assert.equal(bytesFromMb(512), 536870912); assert.equal(sanityCheckSize("runtime", 40 * 1024 * 1024, 1024 * 1024 * 1024).valid, true); assert.equal(sanityCheckSize("runtime", 10 * 1024 ** 4, 1024 * 1024 * 1024).valid, false); + assert.deepEqual(repoIndexer.verifiedRoutePaths(null), []); const page = paginateRows(Array.from({ length: 60 }, (_, index) => index + 1), 2, 25); assert.equal(page.entries.length, 25); assert.equal(page.entries[0], 35); assert.equal(page.entries[24], 11); assert.equal(page.pages, 3); + const sanitizedMetrics = summarizeMetrics({ + durations: [-500, "invalid", Number.POSITIVE_INFINITY, 100, 200], + stage_totals: { prompt_eval_ms: 300, generation_ms: -40 }, + stage_counts: { prompt_eval_ms: 1, generation_ms: 1 } + }); + assert.equal(sanitizedMetrics.average_response_ms, 150); + assert.equal(sanitizedMetrics.median_response_ms, 200); + assert.equal(sanitizedMetrics.average_stage_ms.prompt_eval_ms, 300); + assert.equal(sanitizedMetrics.average_stage_ms.generation_ms, undefined); + assert.equal(isValidTiming(-1), false); + assert.equal(isValidTiming(0), true); const panelTemplate = require("path").join(PLUGIN_ROOT, "views", "assistant-panel.ejs"); const panelDiagnostic = new AssistantPanelDiagnostics(panelTemplate); @@ -75,6 +125,127 @@ async function run() { for (const group of ["Platform commands", "Rate limits", "User AI access", "Assistant identity and scope"]) { assert(settingsTemplate.includes(group)); } + assert(settingsTemplate.includes("Improvement Center")); + const improvementTemplate = fs.readFileSync(path.join(PLUGIN_ROOT, "views", "improvement-center.ejs"), "utf8"); + for (const control of ["Review queue", "Save Corrections", "Run all evals", "Export instruction JSONL", "Export DPO JSONL"]) { + assert(improvementTemplate.includes(control)); + } + const assistantFeedbackScript = fs.readFileSync(path.join(PLUGIN_ROOT, "public", "assistant.js"), "utf8"); + for (const tag of FEEDBACK_TAGS) assert(assistantFeedbackScript.includes(`"${tag}"`)); + assert(assistantFeedbackScript.includes("/assistant/feedback")); + + const improvementConfig = { + improvement: { + allow_moderators_to_review_responses: true, + trusted_moderator_reviewers: ["trusted-mod"], + corrections_enabled: true + } + }; + assert.equal(improvementAccess({ id: "admin", isAdmin: true }, improvementConfig).can_implement, true); + assert.equal(improvementAccess({ id: "trusted-mod", isMod: true }, improvementConfig).can_verify, true); + assert.equal(improvementAccess({ id: "trusted-mod", isMod: true }, improvementConfig).can_implement, false); + assert.equal(improvementAccess({ id: "other-mod", isMod: true }, { + improvement: { allow_moderators_to_review_responses: false, trusted_moderator_reviewers: [] } + }).allowed, false); + + const improvementTemp = fs.mkdtempSync(path.join(os.tmpdir(), "lumi-ai-improvement-")); + const feedbackStore = new FeedbackStore({ file: path.join(improvementTemp, "feedback.json") }); + const review = feedbackStore.capture({ + user_message: "Where is the verified page?", + assistant_answer: "An invented answer.", + route_used: "main_llm", + role: "admin", + origin: "webui", + platform: "webui", + model: "test-model", + feedback_tag: "wrong_link", + optional_correction: "Use /admin/settings." + }, { id: "reporter" }); + assert.deepEqual( + Object.keys(review).filter((key) => /prompt|context|reasoning/i.test(key)), + [] + ); + feedbackStore.verify(review.id, { id: "trusted-mod" }); + assert.equal(feedbackStore.get(review.id).status, "verified"); + feedbackStore.setStatus(review.id, "approved", { id: "admin" }); + const correctionConfig = structuredClone(improvementConfig); + const correctionStore = new CorrectionStore({ + getConfig: () => correctionConfig, + verifyLink: (value) => value === "/admin/settings", + file: path.join(improvementTemp, "corrections.json") + }); + assert.throws(() => correctionStore.createFromFeedback(feedbackStore.get(review.id), { + target: "predefined_answer", + corrected_answer: "Open /admin/invented.", + min_role: "admin", + explicitly_safe: true + }, { id: "admin" }), /verified Lumi route/); + const correction = correctionStore.createFromFeedback(feedbackStore.get(review.id), { + target: "predefined_answer", + corrected_answer: "Open /admin/settings.", + expected_link: "/admin/settings", + min_role: "admin", + permission_origin: "webui", + permission_platform: "webui", + explicitly_safe: true, + enabled: true + }, { id: "admin" }); + assert.equal(correction.active, false); + assert.equal(correctionStore.match({ + message: review.user_message, + role: "admin", + origin: "webui", + platform: "webui" + }).length, 0); + correctionStore.saveCorrections({ id: "admin" }); + assert.equal(correctionStore.match({ + message: review.user_message, + role: "user", + origin: "webui", + platform: "webui" + }).length, 0); + assert.equal(correctionStore.findPredefined({ + message: review.user_message, + role: "admin", + origin: "webui", + platform: "webui" + }).id, correction.id); + correctionConfig.improvement.corrections_enabled = false; + assert.equal(correctionStore.match({ + message: review.user_message, + role: "admin", + origin: "webui", + platform: "webui" + }).length, 0); + correctionConfig.improvement.corrections_enabled = true; + + const evalStore = new EvalStore({ + casesFile: path.join(improvementTemp, "eval-cases.json"), + resultsFile: path.join(improvementTemp, "eval-results.json") + }); + const evalCase = evalStore.add({ + prompt: "Where are settings?", + role: "mod", + origin: "webui", + expected_behavior: "settings", + forbidden_behavior: "invented", + expected_link: "/admin/settings", + notes: "Navigation regression" + }, { id: "admin" }); + assert.equal(evalCase.expected_link, "/admin/settings"); + assert.equal(evaluateCase(evalCase, "Use settings at /admin/settings.", []).status, "pass"); + feedbackStore.markExportApproved(review.id, { id: "admin" }); + assert.equal(approvedExamples(feedbackStore.all(), correctionStore.all()).length, 1); + const exporter = new TrainingExporter({ + feedback: feedbackStore, + corrections: correctionStore, + outputDir: improvementTemp + }); + const instructionExport = exporter.export("instruction"); + const dpoExport = exporter.export("dpo"); + assert.match(fs.readFileSync(instructionExport.file, "utf8"), /"instruction"/); + assert.match(fs.readFileSync(dpoExport.file, "utf8"), /"preferred_answer"/); + fs.rmSync(improvementTemp, { recursive: true, force: true }); assert.deepEqual( modelManifest.models.map((model) => model.tier), @@ -143,6 +314,37 @@ async function run() { assert.equal(managedAllocation.gpu_allocation_intent_percent, 90); assert(managedAllocation.gpu_allocation_actual_percent > unmanagedAllocation.gpu_allocation_actual_percent); assert.equal(managedAllocation.managed_gpu_memory_mb, 4000); + assert.equal(managedAllocation.managed_model_vram_mb, 4000); + assert.equal( + managedAllocation.external_vram_estimate_mb, + 8192 - 1800 - 4000 + ); + const normalLoadedModel = estimateAllocation({ + model: { size: 4 * 1024 ** 3, gpu_layers: 36, default_context: 4096 }, + contextSize: 4096, + gpu: { ...testGpu, model: "NVIDIA GeForce GTX 1060 6GB", vram_mb: 6144, available_vram_mb: 1200 }, + backend: "vulkan", + intentPercent: 100, + managedUsageMb: 4500 + }); + assert.equal(normalLoadedModel.gpu_allocation_actual_percent, 100); + assert.equal(normalLoadedModel.external_vram_estimate_mb, 444); + const externalPressure = estimateAllocation({ + model: { size: 4 * 1024 ** 3, gpu_layers: 36, default_context: 4096 }, + contextSize: 4096, + gpu: { ...testGpu, model: "NVIDIA GeForce GTX 1060 6GB", vram_mb: 6144, available_vram_mb: 500 }, + backend: "vulkan", + intentPercent: 100, + managedUsageMb: 3000 + }); + assert(externalPressure.gpu_allocation_actual_percent < 100); + assert.equal(externalPressure.gpu_allocation_clamped_reason, "external_vram_pressure"); + assert(performanceTuningHints({ + model: { size: 4 * 1024 ** 3 }, + config: { max_output_tokens: 4096, context_size: 16384, concurrency: 2 }, + gpu: { model: "NVIDIA GeForce GTX 1060 6GB", vram_mb: 6144 }, + allocation: normalLoadedModel + }).some((hint) => hint.includes("normal managed-model allocation"))); const launchArgs = buildRuntimeArgs({ port: 1234, modelPath: "model.gguf", @@ -151,6 +353,8 @@ async function run() { acceleration: allocation }); assert.deepEqual(launchArgs.slice(-2), ["-ngl", String(allocation.gpu_layers)]); + assert(launchArgs.includes("-b")); + assert(launchArgs.includes("-ub")); assert.equal(buildRuntimeArgs({ port: 1234, modelPath: "model.gguf", @@ -259,6 +463,12 @@ async function run() { role: "admin" }); assert.equal(safeExternalReply.text, "[External help](https://example.com/help)"); + const styleOnlyLength = formatAssistantResponse({ + text: "x".repeat(5000), + verifiedRoutes: [], + role: "admin" + }); + assert.equal(styleOnlyLength.delivered_length, 5000); const punctuatedRouteReply = formatAssistantResponse({ text: "Open GET /admin/twitch-wizard.", baseUrl: "https://lumi.example", @@ -339,12 +549,15 @@ async function run() { model: availabilityModel, runtimeHealth: healthyRuntime }).reason_code, "role_forbidden"); - assert.equal(evaluateAssistantAvailability({ + const coldAvailability = evaluateAssistantAvailability({ user: { id: "a", isAdmin: true }, config: { enabled: true, assistant_visibility: visibility }, model: availabilityModel, runtimeHealth: { ...healthyRuntime, state: "stopped", healthy: false } - }).reason_code, "runtime_stopped"); + }); + assert.equal(coldAvailability.available, true); + assert.equal(coldAvailability.status, "cold_start"); + assert.equal(coldAvailability.reason_code, null); assert.equal(evaluateAssistantAvailability({ user: { id: "a", isAdmin: true }, config: { enabled: true, assistant_visibility: visibility }, @@ -533,6 +746,386 @@ async function run() { assert.match(identity.text, /Lumi Assistant/); assert.equal(identity.route_used, "llm"); assert(providerAudit.some((entry) => entry.route_used === "llm")); + + const gateAudit = []; + const cachedAnswers = new Map(); + const gateCacheReads = []; + const gateConfig = { + gate: { + high_confidence_threshold: 0.88, + main_llm_threshold: 0.72, + predefined_enabled: true, + cache_ttl_seconds: 3600, + repeat_force_window_seconds: 90, + similarity_threshold: 0.8, + force_prefix: "force ai:", + timeout_ms: 3000 + }, + instructions: { out_of_scope_response: "Outside scope." }, + commands: { unavailable_message: "Unavailable." } + }; + const gateProvider = new GateProvider({ + getConfig: () => gateConfig, + runtime: { + status: () => ({ state: "running" }), + infer: async (_messages) => ({ + choices: [{ + message: { + content: JSON.stringify({ + route: "predefined_answer", + confidence: 0.96, + reason_code: "verified_navigation" + }) + } + }] + }) + }, + lookupRepo: (message) => /twitch/i.test(message) ? { + type: "route", + text: "Twitch Configuration Wizard is available in Lumi's WebUI.", + links: [{ href: "/admin/twitch-wizard", label: "Twitch Configuration Wizard" }], + source: { confidence: "high" } + } : null, + cache: { + get(input) { + gateCacheReads.push(input.message); + return cachedAnswers.get(`${input.role}:${input.platform}:${input.message}`) || null; + }, + set(input, answer) { + cachedAnswers.set(`${input.role}:${input.platform}:${input.message}`, answer); + return answer; + } + }, + metrics: { record: (entry) => gateAudit.push(entry) } + }); + const knownGate = await gateProvider.route({ + message: "Where are Twitch settings?", + user: { id: "gate-user-1" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(knownGate.route, "predefined_answer"); + assert.equal(knownGate.reason_code, "exact_verified_route"); + const reviewedGateProvider = new GateProvider({ + getConfig: () => gateConfig, + runtime: { status: () => ({ state: "stopped" }) }, + lookupCorrection: () => ({ + id: "approved-correction", + target: "route_alias", + score: 0.94, + corrected_answer: "Open the approved settings page.", + expected_link: "/admin/settings", + route_alias: "Approved settings" + }), + metrics: { record() {} } + }); + const reviewedGate = await reviewedGateProvider.route({ + message: "Where is the approved settings page?", + user: { id: "gate-reviewed" }, + role: "admin", + scope: "assistant", + originContext: { origin: "webui", platform: "webui" } + }); + assert.equal(reviewedGate.route, "predefined_answer"); + assert.equal(reviewedGate.answer.links[0].href, "/admin/settings"); + const cachedGate = await gateProvider.route({ + message: "Where are Twitch settings?", + user: { id: "gate-user-2" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(cachedGate.route, "cached_answer"); + const repeatedGate = await gateProvider.route({ + message: "Where can I find the Twitch settings?", + user: { id: "gate-user-1" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(repeatedGate.route, "main_llm"); + assert.equal(repeatedGate.reason_code, "repeat_prompt_force"); + const forcedGate = await gateProvider.route({ + message: "force ai: Where are Twitch settings?", + user: { id: "gate-user-3" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(forcedGate.route, "main_llm"); + assert.equal(forcedGate.reason_code, "explicit_force_prefix"); + assert.equal(forcedGate.message, "Where are Twitch settings?"); + const sensitiveGate = await gateProvider.route({ + message: "Delete this user's economy balance", + user: { id: "gate-user-4" }, + role: "admin", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(sensitiveGate.route, "main_llm"); + assert.equal(sensitiveGate.reason_code, "sensitive_or_user_specific"); + assert.equal(gateCacheReads.includes("Delete this user's economy balance"), false); + const uncertainGate = new GateProvider({ + getConfig: () => gateConfig, + runtime: { + status: () => ({ state: "running" }), + infer: async () => ({ + choices: [{ + message: { + content: JSON.stringify({ + route: "main_llm", + confidence: 0.51, + reason_code: "uncertain_request" + }) + } + }] + }) + }, + lookupRepo: () => null, + cache: { get: () => null, set: () => null }, + metrics: { record: (entry) => gateAudit.push(entry) } + }); + const uncertainDecision = await uncertainGate.route({ + message: "Is Lumi online?", + user: { id: "gate-user-5" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(uncertainDecision.route, "main_llm"); + assert.equal(uncertainDecision.reason_code, "low_confidence"); + const timeoutGate = new GateProvider({ + getConfig: () => gateConfig, + runtime: { + status: () => ({ state: "running" }), + infer: async () => { + throw Object.assign(new Error("Gate inference timed out."), { name: "TimeoutError" }); + } + }, + lookupRepo: () => null, + cache: { get: () => null, set: () => null }, + metrics: { record: (entry) => gateAudit.push(entry) } + }); + const timeoutDecision = await timeoutGate.route({ + message: "Is the bot available?", + user: { id: "gate-user-6" }, + role: "user", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(timeoutDecision.route, "main_llm"); + assert.equal(timeoutDecision.reason_code, "gate_timeout_escalated"); + const timeoutStarted = Date.now(); + await assert.rejects( + () => withTimeout(new Promise(() => {}), 25), + /Gate timed out/ + ); + assert(Date.now() - timeoutStarted < 250); + let complexGateCalls = 0; + const complexGate = new GateProvider({ + getConfig: () => gateConfig, + runtime: { + status: () => ({ state: "running" }), + infer: async () => { complexGateCalls += 1; } + }, + lookupRepo: () => null, + cache: { get: () => null, set: () => null }, + metrics: { record: (entry) => gateAudit.push(entry) } + }); + const complexDecision = await complexGate.route({ + message: "Troubleshoot this JavaScript error and explain the multi-step fix.", + user: { id: "gate-user-7" }, + role: "admin", + scope: "assistant", + originContext: { platform: "webui" } + }); + assert.equal(complexDecision.route, "main_llm"); + assert.equal(complexDecision.reason_code, "deterministic_complexity_escalation"); + assert.equal(complexGateCalls, 0); + assert(gateAudit.some((entry) => + entry.kind === "gate_decision" && + typeof entry.confidence === "number" && + entry.reason_code + )); + assert(similarity("where are twitch settings", "where can I find twitch settings") >= 0.8); + assert.equal(stripForcePrefix("force ai: hello", "force ai:").forced, true); + assert.equal(isSensitiveRequest("Show my balance"), true); + assert.equal(classifyRequestType("Where are Twitch settings?"), "navigation_help"); + assert.equal(classifyRequestType("Write a Lumi custom command in JavaScript"), "code_custom_command"); + assert.equal(classifyRequestType("Please provide a detailed long explanation"), "explicit_long"); + assert.equal(classifyRequestType("Debug the runtime metrics", { role: "admin" }), "admin_debug"); + const outputBudgets = { + navigation_help: 256, + simple_answer: 512, + code_custom_command: 896, + admin_debug: 1280, + explicit_long: 2048 + }; + assert.equal(resolveOutputBudget({ + config: { output_budgets: outputBudgets }, + requestClass: classifyRequestType("Where is the settings page?") + }), 256); + assert.equal(resolveOutputBudget({ + config: { output_budgets: outputBudgets }, + requestClass: classifyRequestType("Write a custom command") + }), 896); + assert.equal(resolveOutputBudget({ + config: { output_budgets: outputBudgets }, + requestClass: classifyRequestType("Please provide a comprehensive long guide") + }), 2048); + + const combined = combinedResourceEstimate({ + main: { estimated_cpu_memory_mb: 4000, estimated_gpu_memory_mb: 2000 }, + gate: { estimated_cpu_memory_mb: 600, estimated_gpu_memory_mb: 0 }, + hardware: { total_ram_mb: 5000, gpu: { vram_mb: 1800 } } + }); + assert.equal(combined.total_cpu_memory_mb, 4600); + assert.equal(combined.total_gpu_memory_mb, 2000); + assert.equal(combined.exceeds_host_capacity, true); + + const stageAudit = []; + const stages = []; + const timingProvider = new AiProvider({ + getConfig: () => ({ + selected_model_id: "test", + support_scope: normalizeScope(), + instructions: {}, + logging: {}, + internal_generation_char_budget: 2000, + output_budgets: outputBudgets + }), + runtime: { + infer: async () => ({ + choices: [{ message: { content: "Main answer" }, finish_reason: "stop" }], + usage: { prompt_tokens: 80, completion_tokens: 20 }, + timings: { prompt_ms: 400, predicted_ms: 1000, prompt_per_second: 200, predicted_per_second: 20 } + }) + }, + gate: { + route: async ({ onStage }) => { + onStage("deterministic"); + onStage("gating"); + return { + route: "main_llm", + confidence: 0.4, + reason_code: "low_confidence", + message: "Need help", + deterministic_ms: 2, + gate_ms: 3 + }; + } + }, + queue: new RequestQueue(() => ({ + concurrency: 1, + max_queue_length: 5, + per_user_requests_per_minute: 20 + })), + tools: registry, + metrics: { record: (entry) => stageAudit.push(entry) }, + getContext: () => [] + }); + const timedResult = await timingProvider.generate({ + message: "Need help", + user: { id: "timing-user" }, + sessionId: "timing-session", + onStage: (stage) => stages.push(stage) + }); + assert.equal(timedResult.route_used, "main_llm"); + assert.equal(timedResult.stage_timings.deterministic_ms, 2); + assert.equal(timedResult.stage_timings.gate_ms, 3); + assert.equal(timedResult.route_class, "simple_answer"); + assert.equal(timedResult.max_output_tokens_used, 512); + assert(Number.isFinite(timedResult.stage_timings.main_queue_ms)); + assert(Number.isFinite(timedResult.stage_timings.main_generate_ms)); + assert(Number.isFinite(timedResult.stage_timings.total_ms)); + assert(stages.includes("queued")); + assert(stages.includes("prompt_eval")); + assert(stages.includes("generating")); + assert(stageAudit.some((entry) => + entry.kind === "request" && + Number.isFinite(entry.deterministic_ms) && + Number.isFinite(entry.main_generate_ms) + )); + assert(stageAudit.some((entry) => + entry.kind === "request" && + entry.route_class === "simple_answer" && + entry.max_output_tokens_used === 512 && + entry.prompt_tokens === 80 && + entry.generated_tokens === 20 && + entry.prompt_tps === 200 && + entry.generation_tps === 20 + )); + + const requestJobs = new AssistantRequestJobs({ ttlMs: 60000, maxJobs: 5 }); + const job = requestJobs.create({ + userId: "job-user", + execute: async (update) => { + update("gate"); + await new Promise((resolve) => setTimeout(resolve, 25)); + update("generating"); + return { text: "complete" }; + } + }); + assert.equal(job.state, "queued"); + await new Promise((resolve) => setTimeout(resolve, 50)); + const completedJob = requestJobs.get(job.id, "job-user"); + assert.equal(completedJob.state, "complete"); + assert.equal(completedJob.result.text, "complete"); + assert.equal(completedJob.stage, "done"); + assert.equal(requestJobs.get(job.id, "different-user"), null); + const softTimedJob = requestJobs.create({ + userId: "soft-user", + execute: async (update) => { + update("generating", { generated_tokens: 12 }); + await new Promise((resolve) => setTimeout(resolve, 30)); + return { text: "waited" }; + } + }); + await new Promise((resolve) => setTimeout(resolve, 5)); + const softState = requestJobs.markSoftTimeout(softTimedJob.id, "soft-user"); + assert.equal(softState.still_running, true); + assert.equal(softState.details.frontend_soft_timeout, true); + await new Promise((resolve) => setTimeout(resolve, 40)); + assert.equal(requestJobs.get(softTimedJob.id, "soft-user").result.text, "waited"); + let cancellationObserved = false; + const cancellableJob = requestJobs.create({ + userId: "cancel-user", + execute: async (update, signal) => { + update("generating"); + await new Promise((resolve, reject) => { + const timer = setTimeout(resolve, 500); + signal.addEventListener("abort", () => { + clearTimeout(timer); + cancellationObserved = true; + reject(Object.assign(new Error("cancelled"), { name: "AbortError", code: "REQUEST_CANCELLED" })); + }, { once: true }); + }); + return { text: "should not complete" }; + } + }); + await new Promise((resolve) => setTimeout(resolve, 5)); + assert.equal(requestJobs.cancel(cancellableJob.id, "cancel-user").state, "cancelled"); + await new Promise((resolve) => setTimeout(resolve, 10)); + assert.equal(cancellationObserved, true); + assert.equal(requestJobs.get(cancellableJob.id, "cancel-user").state, "cancelled"); + const inferenceDiagnostics = normalizeInferenceDiagnostics({ + usage: { prompt_tokens: 120, completion_tokens: 48 }, + timings: { + prompt_ms: 600, + predicted_ms: 2400, + prompt_per_second: 200, + predicted_per_second: 20 + } + }, 3000); + assert.deepEqual(inferenceDiagnostics, { + prompt_tokens: 120, + generated_tokens: 48, + prompt_eval_ms: 600, + generation_ms: 2400, + prompt_tps: 200, + generation_tps: 20 + }); let assembledPrompt = ""; let assembledMessages = []; let generatedTokenBudget = 0; @@ -542,7 +1135,8 @@ async function run() { selected_model_id: "test", support_scope: normalizeScope({ max_answer_length: 100 }), instructions: {}, - logging: {} + logging: {}, + output_budgets: outputBudgets }), runtime: { infer: async (messages, maxTokens) => { @@ -558,7 +1152,7 @@ async function run() { getContext: () => [longContext] }); await promptProvider.generate({ - message: "test", + message: "Write a Lumi custom command in JavaScript that greets the user", user: { id: "u1" }, sessionId: "s1", history: [{ role: "user", content: "Earlier question" }, { role: "assistant", content: "Earlier answer" }] @@ -571,7 +1165,9 @@ async function run() { assert(assembledPrompt.includes("Discord community server")); assert.equal(assembledMessages[1].content, "Earlier question"); assert.equal(assembledMessages[2].content, "Earlier answer"); - assert(generatedTokenBudget > 1000); + assert.equal(generatedTokenBudget, 896); + assert(assembledPrompt.includes("Request class: code_custom_command")); + assert(assembledPrompt.includes("Put one complete runnable code block first.")); const ambiguousProvider = new AiProvider({ getConfig: () => ({ selected_model_id: "test", request_timeout_ms: 1000, logging: {}, support_scope: normalizeScope(), instructions: { out_of_scope_response: "OUT" } }), runtime: { infer: async () => ({ choices: [{ message: { content: "Open the relevant Lumi settings page." }, finish_reason: "stop" }] }) }, @@ -772,6 +1368,7 @@ async function run() { const assistantScript = fs.readFileSync(require("path").join(PLUGIN_ROOT, "public", "assistant.js"), "utf8"); const assistantStyles = fs.readFileSync(require("path").join(PLUGIN_ROOT, "public", "assistant.css"), "utf8"); const assistantPanel = fs.readFileSync(require("path").join(PLUGIN_ROOT, "views", "assistant-panel.ejs"), "utf8"); + const pluginIndex = fs.readFileSync(require("path").join(PLUGIN_ROOT, "index.js"), "utf8"); assert(accessSettingsTemplate.includes("data-user-search")); assert(accessSettingsTemplate.includes("data-timeout-field hidden")); assert(settingsScript.includes('action.value === "timeout"')); @@ -783,17 +1380,49 @@ async function run() { assert(assistantScript.includes("Queued for Lumi Assistant")); assert(assistantScript.includes("Lumi Assistant is processing")); assert(assistantScript.includes("lumi-ai-retry")); + assert(assistantScript.includes("beginCooldown(error.retryAfterSeconds)")); + assert(assistantScript.includes("Retry available in ${seconds}s")); + assert(assistantScript.includes("cooldownSeconds() > 0")); + assert(assistantScript.includes("pollAssistantJob")); + assert(assistantScript.includes("Main model is generating")); + assert(assistantScript.includes("Loading the main model")); + for (const label of ["Continue waiting", "Retry", "Cancel", "Details"]) { + assert(assistantScript.includes(label)); + } + const liveTimeoutControls = assistantScript.match(/for \(const \[key, text\] of \[([\s\S]*?)\]\) \{/); + assert(liveTimeoutControls); + assert.equal(liveTimeoutControls[1].includes('"Retry"'), false); + assert(assistantScript.includes('addError("Assistant request was cancelled."')); + assert(assistantScript.includes("budget ${budget} tokens")); + assert(assistantScript.includes("soft_timeout_url")); + assert(assistantScript.includes("job.still_running")); + assert(assistantStyles.includes(".lumi-ai-timeout-controls")); + assert(pluginIndex.includes("res.status(202).json")); + assert(pluginIndex.includes('router.get("/assistant/jobs/:id"')); + assert(pluginIndex.includes('router.post("/assistant/jobs/:id/cancel"')); + assert(pluginIndex.includes('router.post("/assistant/jobs/:id/soft-timeout"')); + assert(accessSettingsTemplate.includes("UI soft timeout")); + assert(accessSettingsTemplate.includes("Hard generation timeout")); + assert(accessSettingsTemplate.includes("Managed model VRAM")); + assert(accessSettingsTemplate.includes("External VRAM estimate")); + assert(accessSettingsTemplate.includes("Current and recent assistant jobs")); + assert(accessSettingsTemplate.includes("Code/custom command tokens")); + assert(accessSettingsTemplate.includes("Explicit long-answer tokens")); + assert(accessSettingsTemplate.includes("Next slow requests")); assert(assistantScript.includes('messages.addEventListener("wheel"')); assert(assistantStyles.includes("z-index: 60")); assert(assistantStyles.includes("overscroll-behavior: contain")); assert(assistantStyles.includes(".modal-backdrop.is-open { z-index: 200; }")); assert(assistantStyles.includes("cursor: ns-resize")); assert(assistantPanel.includes("data-lumi-ai-clear")); + assert(assistantPanel.includes("data-lumi-ai-cooldown")); assert(assistantPanel.includes("AI can make mistakes. Verify important info.")); assert(assistantPanel.includes("do not represent Jenni, OokamiKunTV, admins, moderators, or the community")); const statePath = resolveData("config", "runtime_state.json"); - const originalState = fs.readFileSync(statePath, "utf8"); + const hadRuntimeState = fs.existsSync(statePath); + const originalState = hadRuntimeState ? fs.readFileSync(statePath, "utf8") : null; + getRuntimeState(); try { const runtime = new RuntimeManager({ getConfig: () => ({}), getModel: () => null, runtimeManifest: {} }); runtime.child = fakeChild(); @@ -806,7 +1435,8 @@ async function run() { assert.equal(shouldAutoResume({ enabled: true }, getRuntimeState()), false); assert.equal(shouldAutoResume({ enabled: true }, { desired_state: "running", last_manual_stop: false, last_crashed: true }), false); } finally { - fs.writeFileSync(statePath, originalState); + if (hadRuntimeState) fs.writeFileSync(statePath, originalState); + else fs.rmSync(statePath, { force: true }); } console.log("Lumi AI verification passed."); diff --git a/plugins/lumi_ai/views/assistant-panel.ejs b/plugins/lumi_ai/views/assistant-panel.ejs index 84b07e6..505f36e 100644 --- a/plugins/lumi_ai/views/assistant-panel.ejs +++ b/plugins/lumi_ai/views/assistant-panel.ejs @@ -18,6 +18,7 @@
+ +
+ +<% } %> + +
+
+

Review queue

Feedback records contain the user message, assistant answer, delivery metadata, tag, and optional correction only.

+
+ <% ["", "pending", "flagged", "verified", "approved", "rejected"].forEach((status) => { %> + <%= status || "All" %> + <% }) %> +
+
+
+ <% reviews.entries.forEach((review) => { %> +
+
+
<%= review.feedback_tag %> <%= review.status %>
+ <%= formatDate(review.timestamp) %> · <%= review.role %> · <%= review.platform %> · <%= review.route_used || "unknown route" %> +
+
+
User message
<%= review.user_message %>
+
Assistant answer
<%= review.assistant_answer %>
+
+ <% if (review.optional_correction) { %>
Suggested correction
<%= review.optional_correction %>
<% } %> + <% if (review.review_notes) { %>

Review notes: <%= review.review_notes %>

<% } %> +
+ <% if (access.can_flag) { %> +
+ + +
+ <% } %> + <% if (access.can_verify && !["approved", "rejected"].includes(review.status)) { %> +
+ + +
+ <% } %> + <% if (access.can_approve) { %> +
+ + +
+
+ + +
+ + <% if (review.status === "approved") { %><% } %> +
+ + +
+
+ + +
+ <% } %> +
+
+ + <% if (access.can_edit) { %> + +
+ +
+
+
+
+
+
+ <% } %> + + <% if (access.can_implement && review.status === "approved") { %> + +
+
+
+
+
" />
+
" />
+
+
+
+
+
+
+
+
+
+ <% } %> + <% }) %> + <% if (!reviews.entries.length) { %>
No feedback matches this filter.
<% } %> +
+ +
+ +
+
+

Correction bank

Edits and toggles are staged. Save Corrections is required before they become active.

+ <% if (access.can_implement) { %>
<% } %> +
+
+ + + + <% corrections.entries.forEach((entry) => { %> + + + + + + + + <% if (access.can_edit) { %> + +
+ +
+
+
+
+
+
+
+
+
+ +
+ <% } %> + <% }) %> + <% if (!corrections.entries.length) { %><% } %> + +
TargetPrompt / answerPermissionStateActions
<%= entry.target.replaceAll("_", " ") %>
<%= entry.prompt.slice(0, 100) %>
<%= entry.corrected_answer %>
<%= entry.min_role %> · <%= entry.permission_scope.origin %>/<%= entry.permission_scope.platform %><%= entry.active ? "active" : entry.enabled ? "staged" : "disabled" %> + <% if (access.can_verify) { %>
<% } %> + <% if (access.can_edit) { %>
" />
<% } %> +
No corrections have been promoted.
+
+ +
+ +
+
+

Evals

Stored cases can be run manually against the current Lumi AI configuration.

+ <% if (access.can_run_evals) { %>
<% } %> +
+ <% if (access.can_run_evals) { %> +
Add eval case +
+
+
+
+
+
+
+
+
+
+
+ <% } %> +
+ <% evalCases.entries.forEach((entry) => { %><% }) %> + <% if (!evalCases.entries.length) { %><% } %> +
PromptRole / originExpectedForbiddenExpected linkActions
<%= entry.prompt %><%= entry.role %> / <%= entry.origin %><%= entry.expected_behavior || "-" %><%= entry.forbidden_behavior || "-" %><%= entry.expected_link || "-" %><% if (access.can_run_evals) { %>
<% } %>
No eval cases.
+ +
Recent eval results +
+ <% evalResults.forEach((result) => { %><% }) %> + <% if (!evalResults.length) { %><% } %> +
TimeCaseResultNotes
<%= formatDate(result.run_at) %><%= result.case_id %><%= result.status %><%= result.notes || "-" %>
No eval results.
+
+
+ +<% if (access.can_export) { %> +
+

Training exports

Manual JSONL exports include approved examples only. Lumi does not start training.

+
+
+
+
+
+<% } %> + + +<%- include("../../../src/web/views/partials/layout-bottom") %> diff --git a/plugins/lumi_ai/views/settings.ejs b/plugins/lumi_ai/views/settings.ejs index 877b062..50b6c02 100644 --- a/plugins/lumi_ai/views/settings.ejs +++ b/plugins/lumi_ai/views/settings.ejs @@ -9,6 +9,7 @@
<%= runtimeStatus.healthy ? "Runtime ready" : "Runtime offline" %>
+ Improvement Center