%- include("../../../src/web/views/partials/layout-top", { title }) %>
<%
const renderPresetOptions = (options, current) => {
const value = Number(current);
const hasValue = options.some((option) => option.value === value);
let html = "";
if (!hasValue && Number.isFinite(value)) {
html += `Unsupported current value (${value}) `;
}
html += options.map((option) => `${option.label} `).join("");
return html;
};
%>
Lumi AI
Managed local inference, assistant access, and guarded plugin tools.
<%= runtimeStatus.healthy ? "Runtime ready" : "Runtime offline" %>
Tools
Improvement Center
Overview
Models
Runtime
Storage
Assistant
Repo index
Metrics
Logs
Overview Current installation and host capacity.
Provider llama.cpp
Selected model <%= models.find((model) => model.id === config.selected_model_id)?.label || config.selected_model_id %>
Gate model <%= models.find((model) => model.id === config.gate.model_id)?.label || config.gate.model_id %>
Gate status <%= gateStatus.healthy ? "Ready" : gateStatus.state %>
RAM <%= Math.round(hardware.total_ram_mb / 1024) %> GB
Free disk <%= formatBytes(hardware.free_disk_mb * 1048576) %>
CPU threads <%= hardware.cpu_threads %>
GPU <%= hardware.gpu.present ? hardware.gpu.model : "Not detected" %>
VRAM <%= hardware.gpu.vram_mb ? `${Math.round(hardware.gpu.vram_mb / 1024)} GB` : "Unavailable" %>
Compute API <%= hardware.gpu.compute_api?.length ? hardware.gpu.compute_api.map((api) => api.toUpperCase()).join(", ") : "CPU only" %>
GPU driver <%= hardware.gpu.driver || "Unavailable" %>
Installed backend <%= String(runtimeStatus.runtime_backend || "cpu").toUpperCase() %>
Recommended backend <%= String(hardware.runtime_selection.backend || "cpu").toUpperCase() %>
Total AI RAM estimate <%= formatBytes(resourceEstimate.total_cpu_memory_mb * 1048576) %>
Total AI VRAM estimate <%= formatBytes(resourceEstimate.total_gpu_memory_mb * 1048576) %>
<% resourceEstimate.warnings.forEach((warning) => { %><%= warning %>
<% }) %>
<% sizeDiagnostics.forEach((diagnostic) => { %>
<%= diagnostic.message %>
<% }) %>
Models Pinned GGUF files downloaded directly from Hugging Face and verified by SHA-256.
<% models.forEach((model) => { %>
<%= model.label %>
<%= formatBytes(model.size) %> · <%= model.ram_gb %> GB recommended RAM · <%= model.repo %>
<%= model.downloaded ? "Installed" : model.compatible ? "Available" : "Exceeds host" %>
<% if (model.downloaded) { %>
<% } else { %>
<% } %>
<% }) %>
Runtime Official llama.cpp release, bound to localhost and stored inside this plugin.
<%- include("../../../src/web/views/partials/state-button", {
type: "button",
attrs: "data-runtime-primary",
loadingState: "starting",
successState: "running",
errorState: "error",
defaultState: runtimeStatus.state === "running" ? "running" : "idle",
states: [
{ id: "idle", text: "Start" },
{ id: "starting", text: "Starting", spinner: true },
{ id: "running", text: "Restart" },
{ id: "restarting", text: "Restarting", spinner: true },
{ id: "error", text: "Retry" }
]
}) %>
Run self-test
Verify runtime
Verify model
Verify gate model
Stop
Installed <%= runtimeStatus.runtime_installed ? "Yes" : "No" %>
Process <%= runtimeStatus.state %>
Health <%= runtimeStatus.healthy ? "Healthy" : "Unavailable" %>
PID <%= runtimeStatus.pid || "None" %>
Last stop <%= runtimeState.last_stop_reason %>
Platform <%= hardware.platform %>-<%= hardware.architecture %>
Self-test <%= runtimeStatus.last_self_test?.success ? "Passed" : runtimeStatus.last_self_test ? "Failed" : "Not run" %>
Runtime folder <%= formatBytes(runtimeFolderSize) %>
Runtime archive <%= runtimeTarget ? formatBytes(runtimeTarget.size) : "Unavailable" %>
Model installed <%= formatBytes(modelFileSize) %>
Model download <%= formatBytes(models.find((model) => model.id === config.selected_model_id)?.size || 0) %>
Backend <%= String(runtimeStatus.runtime_backend || "cpu").toUpperCase() %>
GPU intent <%= runtimeStatus.gpu_allocation_intent_percent || 0 %>%
GPU actual <%= runtimeStatus.gpu_allocation_actual_percent || 0 %>%
GPU safe maximum <%= runtimeStatus.gpu_allocation_max_safe_percent || 0 %>%
GPU layers <%= runtimeStatus.gpu_layers || 0 %>
Total VRAM <%= formatBytes((runtimeStatus.total_vram_mb || 0) * 1048576) %>
Free VRAM <%= formatBytes((runtimeStatus.free_vram_mb || 0) * 1048576) %>
Managed model VRAM <%= formatBytes((runtimeStatus.managed_model_vram_mb || 0) * 1048576) %>
External VRAM estimate <%= formatBytes((runtimeStatus.external_vram_estimate_mb || 0) * 1048576) %>
Lightweight gate: <%= gateStatus.healthy ? "Ready" : gateStatus.state %>
<%= gateStatus.model_id || config.gate.model_id %> ·
CPU <%= formatBytes((gateStatus.estimated_cpu_memory_mb || 0) * 1048576) %> ·
VRAM <%= formatBytes((gateStatus.estimated_gpu_memory_mb || 0) * 1048576) %>
<% if (gateStatus.last_error) { %>
<%= gateStatus.last_error %>
<% } %>
<% if (runtimeTarget) { %>
Managed <%= String(runtimeTarget.backend || "cpu").toUpperCase() %> release <%= runtimeManifest?.version || "b9592" %>
<%= runtimeTarget.filename %> · <%= formatBytes(runtimeTarget.size) %>
<% } else { %>
No managed runtime build is available for this OS and architecture.
<% } %>
<% if (runtimeStatus.last_error) { %>
<%= runtimeStatus.last_error %>
<% } %>
<% if (runtimeStatus.acceleration_warning) { %>
<%= runtimeStatus.acceleration_warning %>
<% } %>
<% tuningHints.forEach((hint) => { %>
<%= hint %>
<% }) %>
<% if (hardware.runtime_selection.fallback_to_cpu) { %>
<%= hardware.runtime_selection.reason %>
<% } %>
Runtime diagnostics Latest plugin-local runtime failure and remediation details.
Download diagnostics
<% if (latestDiagnostic) { %>
<%= latestDiagnostic.code %>: <%= latestDiagnostic.message %>
<%= latestDiagnostic.category %> / <%= latestDiagnostic.severity %>
<% if (latestDiagnostic.remediation_steps?.length) { %>
<% latestDiagnostic.remediation_steps.forEach((step) => { %><%= step %> <% }) %>
<% } %>
Raw diagnostic details
<%= JSON.stringify(latestDiagnostic, null, 2) %>
<% } else { %>
No runtime diagnostic has been recorded.
<% } %>
<% if (hardware.network_path_warning) { %>
The plugin path may be a mapped or network-like location. A local disk path is more reliable for native runtime DLL loading.
<% } %>
<% if (hardware.long_path_warning) { %>The plugin path is unusually long for Windows native loading. Consider a shorter local installation path.
<% } %>
Storage cleanup Plugin-local files only. Selected models and active runtimes are protected.
<%= formatBytes(storageUsage.total) %> total
<% Object.entries(storageUsage.categories).forEach(([category, bytes]) => { %>
<%= category.replace("_", " ") %> <%= formatBytes(bytes) %>
<% }) %>
Repository support index Local Lumi routes, settings pages, plugin manifests, commands, and documentation.
Refresh local
Refresh public
Status <%= repoIndexStatus.present ? repoIndexStatus.stale ? "Stale" : "Ready" : "Missing" %>
Last indexed <%= repoIndexStatus.indexed_at ? formatDate(repoIndexStatus.indexed_at) : "Never" %>
Commit <%= repoIndexStatus.commit ? repoIndexStatus.commit.slice(0, 12) : "Unavailable" %>
Routes <%= repoIndexStatus.route_count %>
Plugins <%= repoIndexStatus.plugin_count %>
Commands <%= repoIndexStatus.command_count %>
Metrics Plugin-local operational counters and recent requests.
Requests <%= metrics.total_requests %>
Successful <%= metrics.successful %>
Failed <%= metrics.failed %>
Refused <%= metrics.refusals %>
Gate decisions <%= metrics.gate_decisions || 0 %>
Average <%= formatDuration(metrics.average_response_ms) %>
Median <%= formatDuration(metrics.median_response_ms) %>
Avg gate <%= formatDuration(metrics.average_stage_ms?.gate_ms || 0) %>
Avg main generation <%= formatDuration(metrics.average_stage_ms?.main_generate_ms || 0) %>
Current and recent assistant jobs
Created State / stage Class / budget Elapsed Gate Queue Prompt eval Generation Tokens Speed Runtime UI timeout
<% jobDiagnostics.forEach((job) => { %><%= formatDate(job.created_at) %> <%= job.state %> / <%= job.stage %> <%= job.details.route_class || "-" %> / <%= job.details.max_output_tokens_used || job.details.max_output_tokens || "-" %> <%= formatDuration(job.elapsed_ms) %> <%= formatDuration(job.details.gate_ms) %> <%= formatDuration(job.details.queue_ms) %> <%= formatDuration(job.details.prompt_eval_ms) %> <%= formatDuration(job.details.generation_ms) %> <%= job.details.prompt_tokens || 0 %> / <%= job.details.generated_tokens || 0 %> <%= job.details.prompt_tps || 0 %> / <%= job.details.generation_tps || 0 %> tok/s <%= job.details.backend || "-" %>, <%= job.details.gpu_layers || 0 %> layers, ctx <%= job.details.context_size || "-" %> <%= job.frontend_soft_timeout_at ? (job.still_running ? "Still running" : "Recorded") : "No" %> <% }) %>
<% if (!jobDiagnostics.length) { %>No assistant jobs recorded since this plugin process started. <% } %>
Recent slow and 504-risk requests
Time Route / class Reason / budget Gate Queue Prompt eval Generation Tokens Speed Total Risk
<% slowRequestsPage.entries.forEach((entry) => { %><%= entry.timestamp %> <%= entry.route_used || "-" %> / <%= entry.route_class || "-" %> <%= entry.reason_code || "-" %> / max <%= entry.max_output_tokens_used || "-" %> <%= formatDuration(entry.gate_ms) %> <%= formatDuration(entry.queue_ms) %> <%= formatDuration(entry.prompt_eval_ms) %> <%= formatDuration(entry.generation_ms) %> <%= entry.prompt_tokens || 0 %> / <%= entry.generated_tokens || 0 %> <%= entry.prompt_tps || 0 %> / <%= entry.generation_tps || 0 %> tok/s <%= formatDuration(entry.total_ms) %> <%= entry.frontend_soft_timeout ? "UI waited" : entry.risk_504 ? "504 risk" : "Slow" %> <% }) %>
<% if (!slowRequestsPage.entries.length) { %>No requests over 30 seconds. <% } %>
Time Kind Status Route Confidence / reason Role Generated / final / delivered Duration
<% history.forEach((entry) => { %><%= entry.timestamp %> <%= entry.kind %> <%= entry.status %> <%= entry.route_used || "-" %> <%= entry.confidence ?? entry.gate_confidence ?? "-" %> / <%= entry.reason_code || entry.gate_reason_code || "-" %> <%= entry.role || "-" %> <%= entry.internal_generated_length ?? "-" %> / <%= entry.final_reply_length ?? entry.original_final_length ?? "-" %> / <%= entry.delivered_length ?? "-" %> <%= formatDuration(entry.duration_ms) %> <% }) %>
<% if (!history.length) { %>No requests recorded. <% } %>
Runtime logs Open a tail view without loading entire large files.
Filename Size Modified Actions
<% logFiles.forEach((file) => { %>
<%= file.name %>
<%= formatBytes(file.size) %>
<%= formatDate(file.modified_at) %>
View
Download
Delete
<% }) %>
<% if (!logFiles.length) { %>No runtime logs found. <% } %>
Privacy and troubleshooting Local inference remains on this host.
Models are downloaded from pinned Hugging Face revisions. The managed runtime is downloaded from the official llama.cpp release and verified by SHA-256. No cloud inference is used. Prompt and response logging are off by default.
If startup fails, confirm that the runtime and selected model show as installed, the plugin directory is writable, and enough RAM and disk are available. Runtime logs are stored under plugins/lumi_ai/data/logs/.
<%- include("tool-modal") %>
<%- include("../../../src/web/views/partials/layout-bottom") %>