Lumi/plugins/lumi_ai/backend/hardware.js
2026-06-12 19:27:43 +02:00

392 lines
14 KiB
JavaScript

const os = require("os");
const fs = require("fs");
const path = require("path");
const { spawnSync } = require("child_process");
const { PLUGIN_DATA, PLUGIN_ROOT } = require("./paths");
const SUPPORTED_BACKENDS = ["cuda", "vulkan", "metal", "sycl"];
function detectHardware(models, runtimeManifest = null) {
const freeDisk = getFreeDisk();
const totalRamMb = Math.floor(os.totalmem() / 1048576);
const availableRamMb = Math.floor(os.freemem() / 1048576);
const gpus = detectGpus();
const gpu = choosePrimaryGpu(gpus);
const runtimeSelection = selectRuntimeTarget(runtimeManifest, gpu);
const writable = testWritable();
const recommendation = [...models]
.filter((model) => model.ram_gb * 1024 <= totalRamMb && model.size / 1048576 <= freeDisk)
.sort((a, b) => b.ram_gb - a.ram_gb)[0]?.tier || "tiny";
return {
platform: os.platform(),
architecture: os.arch(),
cpu_threads: os.cpus().length,
total_ram_mb: totalRamMb,
available_ram_mb: availableRamMb,
free_disk_mb: freeDisk,
gpu,
gpus,
runtime_selection: runtimeSelection,
subprocess_allowed: true,
plugin_writable: writable,
recommended_tier: recommendation,
plugin_path: PLUGIN_ROOT,
path_length: PLUGIN_ROOT.length,
long_path_warning: os.platform() === "win32" && PLUGIN_ROOT.length > 220,
network_path_warning: os.platform() === "win32" && PLUGIN_ROOT.startsWith("\\\\")
};
}
function getFreeDisk() {
try {
if (typeof fs.statfsSync === "function") {
const stat = fs.statfsSync(PLUGIN_DATA);
return Math.floor((Number(stat.bavail) * Number(stat.bsize)) / 1048576);
}
} catch {}
return 0;
}
function detectGpus(platform = os.platform()) {
if (platform === "darwin") return detectMacGpus();
const nvidia = detectNvidiaGpus();
const system = platform === "win32" ? detectWindowsGpus() : detectLinuxGpus();
const merged = [...nvidia];
for (const candidate of system) {
if (!merged.some((gpu) => normalizeName(gpu.model) === normalizeName(candidate.model))) {
merged.push(candidate);
}
}
return merged.filter((gpu) => gpu.model && !/virtual|display hub|remote display/i.test(gpu.model));
}
function detectNvidiaGpus() {
const result = capture("nvidia-smi", [
"--query-gpu=name,memory.total,memory.free,driver_version",
"--format=csv,noheader,nounits"
]);
if (!result.ok) return [];
return result.stdout.split(/\r?\n/).filter(Boolean).map((line) => {
const [model, total, free, driver] = line.split(",").map((value) => value.trim());
return createGpu({
vendor: "NVIDIA",
model,
driver,
vram_mb: numberOrNull(total),
available_vram_mb: numberOrNull(free),
compute_api: ["cuda", "vulkan"]
});
});
}
function detectWindowsGpus() {
const script = [
"Get-CimInstance Win32_VideoController |",
"Select-Object Name,AdapterRAM,DriverVersion,PNPDeviceID |",
"ConvertTo-Json -Compress"
].join(" ");
const result = capture("powershell", ["-NoProfile", "-Command", script], 5000);
if (!result.ok || !result.stdout.trim()) return [];
try {
const parsed = JSON.parse(result.stdout);
return (Array.isArray(parsed) ? parsed : [parsed]).map((item) => {
const vendor = vendorFromName(item.Name, item.PNPDeviceID);
return createGpu({
vendor,
model: item.Name,
driver: item.DriverVersion,
vram_mb: item.AdapterRAM ? Math.round(Number(item.AdapterRAM) / 1048576) : null,
compute_api: computeApis(vendor, "win32")
});
});
} catch {
return [];
}
}
function detectLinuxGpus() {
const result = capture("lspci", ["-nn"], 3000);
if (!result.ok) return [];
const vulkan = capture("vulkaninfo", ["--summary"], 3000).ok;
return result.stdout.split(/\r?\n/)
.filter((line) => /(VGA compatible controller|3D controller)/i.test(line))
.map((line) => {
const model = line.replace(/^.*?(VGA compatible controller|3D controller):\s*/i, "").trim();
const vendor = vendorFromName(model, line);
const apis = computeApis(vendor, "linux");
if (vulkan && !apis.includes("vulkan")) apis.push("vulkan");
return createGpu({ vendor, model, driver: null, vram_mb: null, compute_api: apis });
});
}
function detectMacGpus() {
const result = capture("system_profiler", ["SPDisplaysDataType", "-json"], 5000);
if (!result.ok) return [];
try {
const displays = JSON.parse(result.stdout).SPDisplaysDataType || [];
return displays.map((item) => createGpu({
vendor: /apple/i.test(item.sppci_model || "") ? "Apple" : vendorFromName(item.sppci_model),
model: item.sppci_model,
driver: os.release(),
vram_mb: parseMemoryMb(item.spdisplays_vram || item.spdisplays_vram_shared),
compute_api: ["metal"]
}));
} catch {
return [];
}
}
function createGpu({ vendor, model, driver, vram_mb, available_vram_mb, compute_api }) {
const apis = [...new Set((compute_api || []).filter((api) => SUPPORTED_BACKENDS.includes(api)))];
return {
present: true,
vendor: vendor || "Unknown",
model: model || null,
name: model || null,
driver: driver || null,
vram_mb: numberOrNull(vram_mb),
available_vram_mb: numberOrNull(available_vram_mb),
compute_api: apis,
supported_runtime: apis.length > 0
};
}
function choosePrimaryGpu(gpus) {
if (!gpus.length) {
return {
present: false,
vendor: null,
model: null,
name: null,
driver: null,
vram_mb: null,
available_vram_mb: null,
compute_api: [],
supported_runtime: false
};
}
return [...gpus].sort((a, b) => {
const support = Number(b.supported_runtime) - Number(a.supported_runtime);
if (support) return support;
return (b.vram_mb || 0) - (a.vram_mb || 0);
})[0];
}
function selectRuntimeTarget(runtimeManifest, gpu, platform = os.platform(), architecture = os.arch()) {
const key = `${platform}-${architecture}`;
const cpu = runtimeManifest?.targets?.[key] || null;
const accelerated = runtimeManifest?.accelerated_targets?.[key] || null;
const backend = accelerated?.backend;
const compatible = Boolean(
gpu?.present &&
accelerated &&
(backend === "metal" || gpu.compute_api?.includes(backend))
);
if (compatible) {
return { backend, accelerated: true, fallback_to_cpu: false, target: accelerated };
}
return {
backend: "cpu",
accelerated: false,
fallback_to_cpu: Boolean(gpu?.present),
reason: gpu?.present ? "No compatible managed GPU runtime is available." : "No supported GPU detected.",
target: cpu
};
}
function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb = 0 }) {
const requestedBackend = backend || "cpu";
const supported = Boolean(
model &&
gpu?.present &&
gpu.supported_runtime &&
requestedBackend !== "cpu" &&
(requestedBackend === "metal" || gpu.compute_api?.includes(requestedBackend))
);
const modelMb = model ? model.size / 1048576 : 0;
const contextMb = estimateContextMemoryMb(model, contextSize);
const overheadMb = supported ? Math.max(256, modelMb * 0.08) : 0;
const fullOffloadMb = Math.ceil(modelMb + contextMb + overheadMb);
const totalVramMb = Math.max(0, Number(gpu?.vram_mb) || 0);
const hasObservedFree = Number.isFinite(Number(gpu?.available_vram_mb));
const observedAvailableVramMb = Math.max(0, Math.floor(
hasObservedFree ? Number(gpu.available_vram_mb) : totalVramMb * 0.9
));
const totalUsableVramMb = Math.floor((totalVramMb || observedAvailableVramMb) * 0.9);
const managedModelVramMb = Math.max(0, Number(managedUsageMb) || 0);
const observedUsedVramMb = Math.max(0, totalVramMb - observedAvailableVramMb);
const externalUsageMb = Math.max(
0,
observedUsedVramMb - managedModelVramMb
);
// The managed model's live allocation remains part of its own usable budget.
const safeAvailableVramMb = Math.max(0, totalUsableVramMb - externalUsageMb);
const maxPercent = supported && fullOffloadMb > 0
? Math.max(0, Math.min(100, Math.floor((safeAvailableVramMb / fullOffloadMb) * 100)))
: 0;
return {
supported,
backend: supported ? requestedBackend : "cpu",
max_percent: maxPercent,
total_vram_mb: totalVramMb,
free_vram_mb: observedAvailableVramMb,
managed_model_vram_mb: managedModelVramMb,
external_vram_estimate_mb: externalUsageMb,
available_vram_mb: observedAvailableVramMb,
safe_available_vram_mb: safeAvailableVramMb,
managed_gpu_memory_mb: managedModelVramMb,
external_gpu_memory_mb: externalUsageMb,
estimated_full_offload_mb: fullOffloadMb,
estimated_context_mb: contextMb,
warning: !gpu?.present
? "No supported GPU detected. CPU fallback will be used."
: !supported
? "The detected GPU does not match the installed runtime backend. CPU fallback will be used."
: externalUsageMb > 0 && maxPercent < 100
? "External VRAM usage limits maximum GPU acceleration."
: maxPercent < 100
? "The selected model and context exceed the GPU's safe VRAM budget."
: null
};
}
function estimateAllocation({ model, contextSize, gpu, backend, intentPercent, workloadPercent, managedUsageMb = 0 }) {
const capacity = calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb });
const intent = Math.max(0, Math.min(100, Number(intentPercent ?? workloadPercent) || 0));
const actual = Math.min(intent, capacity.max_percent);
const gpuLayers = actual > 0
? Math.max(1, Math.round((model?.gpu_layers || 0) * actual / 100))
: 0;
const clampedReason = actual < intent
? capacity.supported
? capacity.external_vram_estimate_mb > 0
? "external_vram_pressure"
: "model_vram_capacity"
: "gpu_backend_unavailable"
: null;
return {
...capacity,
gpu_allocation_intent_percent: intent,
gpu_allocation_actual_percent: actual,
gpu_allocation_max_safe_percent: capacity.max_percent,
gpu_allocation_clamped_reason: clampedReason,
intended_gpu_allocation: intent,
actual_gpu_allocation: actual,
workload_percent: actual,
gpu_layers: gpuLayers,
estimated_gpu_memory_mb: Math.ceil(capacity.estimated_full_offload_mb * actual / 100),
estimated_cpu_memory_mb: Math.ceil((model?.size || 0) / 1048576 * (1 - actual / 100)),
warning: clampedReason === "external_vram_pressure"
? `Actual GPU allocation is limited to ${actual}% because other processes are using VRAM. Your ${intent}% intent is preserved.`
: clampedReason === "model_vram_capacity"
? `Actual GPU allocation is limited to ${actual}% because the selected model and context exceed the safe VRAM budget.`
: clampedReason === "gpu_backend_unavailable"
? `GPU acceleration intent is ${intent}%, but the installed runtime cannot use the detected GPU.`
: capacity.warning
};
}
function performanceTuningHints({ model, config, gpu, allocation, generationTps = 0 }) {
const hints = [];
const maxOutput = Math.max(
Number(config?.max_output_tokens) || 0,
...Object.values(config?.output_budgets || {}).map((value) => Number(value) || 0)
);
const context = Number(config?.context_size) || 0;
const concurrency = Number(config?.concurrency) || 1;
if (maxOutput > 2048) hints.push("Lower maximum output tokens to reduce long generation time.");
if (context > 8192) hints.push("Lower context size if prompt evaluation is slow.");
if (allocation?.gpu_allocation_actual_percent < allocation?.gpu_allocation_intent_percent) {
hints.push("Review GPU layers/offload and external VRAM use before increasing allocation.");
}
if (concurrency > 1) hints.push("Reduce concurrent AI jobs when generation throughput is low.");
if (/GTX\s*1060/i.test(String(gpu?.model || "")) && Number(gpu?.vram_mb) <= 6144) {
hints.push("On a GTX 1060 6GB, a full VRAM graph can be normal managed-model allocation rather than external pressure.");
}
if ((model?.size || 0) >= 3.5 * 1024 ** 3) {
hints.push("This model may generate slowly on older GPUs even when its VRAM allocation is expected.");
}
if (generationTps > 0 && generationTps < 8) {
hints.push(`Recent generation speed is ${generationTps.toFixed(1)} tok/s. Lower class output budgets, reduce context, improve GPU offload, or reduce concurrency.`);
}
return hints;
}
function estimateContextMemoryMb(model, contextSize) {
const context = Math.max(512, Number(contextSize) || model?.default_context || 4096);
const scale = Math.max(1, (model?.gpu_layers || 24) / 24);
return Math.ceil((context / 4096) * 192 * scale);
}
function computeApis(vendor, platform) {
if (platform === "darwin") return ["metal"];
if (vendor === "NVIDIA") return ["cuda", "vulkan"];
if (vendor === "AMD") return ["vulkan"];
if (vendor === "Intel") return ["sycl", "vulkan"];
return ["vulkan"];
}
function vendorFromName(name = "", deviceId = "") {
const value = `${name} ${deviceId}`;
if (/nvidia|VEN_10DE/i.test(value)) return "NVIDIA";
if (/amd|radeon|advanced micro devices|VEN_1002/i.test(value)) return "AMD";
if (/intel|VEN_8086/i.test(value)) return "Intel";
if (/apple/i.test(value)) return "Apple";
return "Unknown";
}
function capture(command, args, timeout = 3000) {
try {
const result = spawnSync(command, args, {
encoding: "utf8",
timeout,
windowsHide: true,
shell: false
});
return {
ok: result.status === 0,
stdout: result.stdout || "",
stderr: result.stderr || ""
};
} catch {
return { ok: false, stdout: "", stderr: "" };
}
}
function parseMemoryMb(value) {
const match = String(value || "").match(/([\d.]+)\s*(GB|MB)/i);
if (!match) return null;
const amount = Number(match[1]);
return Math.round(amount * (match[2].toUpperCase() === "GB" ? 1024 : 1));
}
function numberOrNull(value) {
const number = Number(value);
return Number.isFinite(number) && number > 0 ? number : null;
}
function normalizeName(value) {
return String(value || "").toLowerCase().replace(/[^a-z0-9]+/g, "");
}
function testWritable() {
try {
const file = path.join(PLUGIN_DATA, ".write-test");
fs.writeFileSync(file, "ok");
fs.unlinkSync(file);
return true;
} catch {
return false;
}
}
module.exports = {
SUPPORTED_BACKENDS,
detectHardware,
detectGpus,
selectRuntimeTarget,
calculateGpuCapacity,
estimateAllocation,
performanceTuningHints
};