392 lines
14 KiB
JavaScript
392 lines
14 KiB
JavaScript
const os = require("os");
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
const { spawnSync } = require("child_process");
|
|
const { PLUGIN_DATA, PLUGIN_ROOT } = require("./paths");
|
|
|
|
const SUPPORTED_BACKENDS = ["cuda", "vulkan", "metal", "sycl"];
|
|
|
|
function detectHardware(models, runtimeManifest = null) {
|
|
const freeDisk = getFreeDisk();
|
|
const totalRamMb = Math.floor(os.totalmem() / 1048576);
|
|
const availableRamMb = Math.floor(os.freemem() / 1048576);
|
|
const gpus = detectGpus();
|
|
const gpu = choosePrimaryGpu(gpus);
|
|
const runtimeSelection = selectRuntimeTarget(runtimeManifest, gpu);
|
|
const writable = testWritable();
|
|
const recommendation = [...models]
|
|
.filter((model) => model.ram_gb * 1024 <= totalRamMb && model.size / 1048576 <= freeDisk)
|
|
.sort((a, b) => b.ram_gb - a.ram_gb)[0]?.tier || "tiny";
|
|
return {
|
|
platform: os.platform(),
|
|
architecture: os.arch(),
|
|
cpu_threads: os.cpus().length,
|
|
total_ram_mb: totalRamMb,
|
|
available_ram_mb: availableRamMb,
|
|
free_disk_mb: freeDisk,
|
|
gpu,
|
|
gpus,
|
|
runtime_selection: runtimeSelection,
|
|
subprocess_allowed: true,
|
|
plugin_writable: writable,
|
|
recommended_tier: recommendation,
|
|
plugin_path: PLUGIN_ROOT,
|
|
path_length: PLUGIN_ROOT.length,
|
|
long_path_warning: os.platform() === "win32" && PLUGIN_ROOT.length > 220,
|
|
network_path_warning: os.platform() === "win32" && PLUGIN_ROOT.startsWith("\\\\")
|
|
};
|
|
}
|
|
|
|
function getFreeDisk() {
|
|
try {
|
|
if (typeof fs.statfsSync === "function") {
|
|
const stat = fs.statfsSync(PLUGIN_DATA);
|
|
return Math.floor((Number(stat.bavail) * Number(stat.bsize)) / 1048576);
|
|
}
|
|
} catch {}
|
|
return 0;
|
|
}
|
|
|
|
function detectGpus(platform = os.platform()) {
|
|
if (platform === "darwin") return detectMacGpus();
|
|
const nvidia = detectNvidiaGpus();
|
|
const system = platform === "win32" ? detectWindowsGpus() : detectLinuxGpus();
|
|
const merged = [...nvidia];
|
|
for (const candidate of system) {
|
|
if (!merged.some((gpu) => normalizeName(gpu.model) === normalizeName(candidate.model))) {
|
|
merged.push(candidate);
|
|
}
|
|
}
|
|
return merged.filter((gpu) => gpu.model && !/virtual|display hub|remote display/i.test(gpu.model));
|
|
}
|
|
|
|
function detectNvidiaGpus() {
|
|
const result = capture("nvidia-smi", [
|
|
"--query-gpu=name,memory.total,memory.free,driver_version",
|
|
"--format=csv,noheader,nounits"
|
|
]);
|
|
if (!result.ok) return [];
|
|
return result.stdout.split(/\r?\n/).filter(Boolean).map((line) => {
|
|
const [model, total, free, driver] = line.split(",").map((value) => value.trim());
|
|
return createGpu({
|
|
vendor: "NVIDIA",
|
|
model,
|
|
driver,
|
|
vram_mb: numberOrNull(total),
|
|
available_vram_mb: numberOrNull(free),
|
|
compute_api: ["cuda", "vulkan"]
|
|
});
|
|
});
|
|
}
|
|
|
|
function detectWindowsGpus() {
|
|
const script = [
|
|
"Get-CimInstance Win32_VideoController |",
|
|
"Select-Object Name,AdapterRAM,DriverVersion,PNPDeviceID |",
|
|
"ConvertTo-Json -Compress"
|
|
].join(" ");
|
|
const result = capture("powershell", ["-NoProfile", "-Command", script], 5000);
|
|
if (!result.ok || !result.stdout.trim()) return [];
|
|
try {
|
|
const parsed = JSON.parse(result.stdout);
|
|
return (Array.isArray(parsed) ? parsed : [parsed]).map((item) => {
|
|
const vendor = vendorFromName(item.Name, item.PNPDeviceID);
|
|
return createGpu({
|
|
vendor,
|
|
model: item.Name,
|
|
driver: item.DriverVersion,
|
|
vram_mb: item.AdapterRAM ? Math.round(Number(item.AdapterRAM) / 1048576) : null,
|
|
compute_api: computeApis(vendor, "win32")
|
|
});
|
|
});
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
function detectLinuxGpus() {
|
|
const result = capture("lspci", ["-nn"], 3000);
|
|
if (!result.ok) return [];
|
|
const vulkan = capture("vulkaninfo", ["--summary"], 3000).ok;
|
|
return result.stdout.split(/\r?\n/)
|
|
.filter((line) => /(VGA compatible controller|3D controller)/i.test(line))
|
|
.map((line) => {
|
|
const model = line.replace(/^.*?(VGA compatible controller|3D controller):\s*/i, "").trim();
|
|
const vendor = vendorFromName(model, line);
|
|
const apis = computeApis(vendor, "linux");
|
|
if (vulkan && !apis.includes("vulkan")) apis.push("vulkan");
|
|
return createGpu({ vendor, model, driver: null, vram_mb: null, compute_api: apis });
|
|
});
|
|
}
|
|
|
|
function detectMacGpus() {
|
|
const result = capture("system_profiler", ["SPDisplaysDataType", "-json"], 5000);
|
|
if (!result.ok) return [];
|
|
try {
|
|
const displays = JSON.parse(result.stdout).SPDisplaysDataType || [];
|
|
return displays.map((item) => createGpu({
|
|
vendor: /apple/i.test(item.sppci_model || "") ? "Apple" : vendorFromName(item.sppci_model),
|
|
model: item.sppci_model,
|
|
driver: os.release(),
|
|
vram_mb: parseMemoryMb(item.spdisplays_vram || item.spdisplays_vram_shared),
|
|
compute_api: ["metal"]
|
|
}));
|
|
} catch {
|
|
return [];
|
|
}
|
|
}
|
|
|
|
function createGpu({ vendor, model, driver, vram_mb, available_vram_mb, compute_api }) {
|
|
const apis = [...new Set((compute_api || []).filter((api) => SUPPORTED_BACKENDS.includes(api)))];
|
|
return {
|
|
present: true,
|
|
vendor: vendor || "Unknown",
|
|
model: model || null,
|
|
name: model || null,
|
|
driver: driver || null,
|
|
vram_mb: numberOrNull(vram_mb),
|
|
available_vram_mb: numberOrNull(available_vram_mb),
|
|
compute_api: apis,
|
|
supported_runtime: apis.length > 0
|
|
};
|
|
}
|
|
|
|
function choosePrimaryGpu(gpus) {
|
|
if (!gpus.length) {
|
|
return {
|
|
present: false,
|
|
vendor: null,
|
|
model: null,
|
|
name: null,
|
|
driver: null,
|
|
vram_mb: null,
|
|
available_vram_mb: null,
|
|
compute_api: [],
|
|
supported_runtime: false
|
|
};
|
|
}
|
|
return [...gpus].sort((a, b) => {
|
|
const support = Number(b.supported_runtime) - Number(a.supported_runtime);
|
|
if (support) return support;
|
|
return (b.vram_mb || 0) - (a.vram_mb || 0);
|
|
})[0];
|
|
}
|
|
|
|
function selectRuntimeTarget(runtimeManifest, gpu, platform = os.platform(), architecture = os.arch()) {
|
|
const key = `${platform}-${architecture}`;
|
|
const cpu = runtimeManifest?.targets?.[key] || null;
|
|
const accelerated = runtimeManifest?.accelerated_targets?.[key] || null;
|
|
const backend = accelerated?.backend;
|
|
const compatible = Boolean(
|
|
gpu?.present &&
|
|
accelerated &&
|
|
(backend === "metal" || gpu.compute_api?.includes(backend))
|
|
);
|
|
if (compatible) {
|
|
return { backend, accelerated: true, fallback_to_cpu: false, target: accelerated };
|
|
}
|
|
return {
|
|
backend: "cpu",
|
|
accelerated: false,
|
|
fallback_to_cpu: Boolean(gpu?.present),
|
|
reason: gpu?.present ? "No compatible managed GPU runtime is available." : "No supported GPU detected.",
|
|
target: cpu
|
|
};
|
|
}
|
|
|
|
function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb = 0 }) {
|
|
const requestedBackend = backend || "cpu";
|
|
const supported = Boolean(
|
|
model &&
|
|
gpu?.present &&
|
|
gpu.supported_runtime &&
|
|
requestedBackend !== "cpu" &&
|
|
(requestedBackend === "metal" || gpu.compute_api?.includes(requestedBackend))
|
|
);
|
|
const modelMb = model ? model.size / 1048576 : 0;
|
|
const contextMb = estimateContextMemoryMb(model, contextSize);
|
|
const overheadMb = supported ? Math.max(256, modelMb * 0.08) : 0;
|
|
const fullOffloadMb = Math.ceil(modelMb + contextMb + overheadMb);
|
|
const totalVramMb = Math.max(0, Number(gpu?.vram_mb) || 0);
|
|
const hasObservedFree = Number.isFinite(Number(gpu?.available_vram_mb));
|
|
const observedAvailableVramMb = Math.max(0, Math.floor(
|
|
hasObservedFree ? Number(gpu.available_vram_mb) : totalVramMb * 0.9
|
|
));
|
|
const totalUsableVramMb = Math.floor((totalVramMb || observedAvailableVramMb) * 0.9);
|
|
const managedModelVramMb = Math.max(0, Number(managedUsageMb) || 0);
|
|
const observedUsedVramMb = Math.max(0, totalVramMb - observedAvailableVramMb);
|
|
const externalUsageMb = Math.max(
|
|
0,
|
|
observedUsedVramMb - managedModelVramMb
|
|
);
|
|
// The managed model's live allocation remains part of its own usable budget.
|
|
const safeAvailableVramMb = Math.max(0, totalUsableVramMb - externalUsageMb);
|
|
const maxPercent = supported && fullOffloadMb > 0
|
|
? Math.max(0, Math.min(100, Math.floor((safeAvailableVramMb / fullOffloadMb) * 100)))
|
|
: 0;
|
|
return {
|
|
supported,
|
|
backend: supported ? requestedBackend : "cpu",
|
|
max_percent: maxPercent,
|
|
total_vram_mb: totalVramMb,
|
|
free_vram_mb: observedAvailableVramMb,
|
|
managed_model_vram_mb: managedModelVramMb,
|
|
external_vram_estimate_mb: externalUsageMb,
|
|
available_vram_mb: observedAvailableVramMb,
|
|
safe_available_vram_mb: safeAvailableVramMb,
|
|
managed_gpu_memory_mb: managedModelVramMb,
|
|
external_gpu_memory_mb: externalUsageMb,
|
|
estimated_full_offload_mb: fullOffloadMb,
|
|
estimated_context_mb: contextMb,
|
|
warning: !gpu?.present
|
|
? "No supported GPU detected. CPU fallback will be used."
|
|
: !supported
|
|
? "The detected GPU does not match the installed runtime backend. CPU fallback will be used."
|
|
: externalUsageMb > 0 && maxPercent < 100
|
|
? "External VRAM usage limits maximum GPU acceleration."
|
|
: maxPercent < 100
|
|
? "The selected model and context exceed the GPU's safe VRAM budget."
|
|
: null
|
|
};
|
|
}
|
|
|
|
function estimateAllocation({ model, contextSize, gpu, backend, intentPercent, workloadPercent, managedUsageMb = 0 }) {
|
|
const capacity = calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb });
|
|
const intent = Math.max(0, Math.min(100, Number(intentPercent ?? workloadPercent) || 0));
|
|
const actual = Math.min(intent, capacity.max_percent);
|
|
const gpuLayers = actual > 0
|
|
? Math.max(1, Math.round((model?.gpu_layers || 0) * actual / 100))
|
|
: 0;
|
|
const clampedReason = actual < intent
|
|
? capacity.supported
|
|
? capacity.external_vram_estimate_mb > 0
|
|
? "external_vram_pressure"
|
|
: "model_vram_capacity"
|
|
: "gpu_backend_unavailable"
|
|
: null;
|
|
return {
|
|
...capacity,
|
|
gpu_allocation_intent_percent: intent,
|
|
gpu_allocation_actual_percent: actual,
|
|
gpu_allocation_max_safe_percent: capacity.max_percent,
|
|
gpu_allocation_clamped_reason: clampedReason,
|
|
intended_gpu_allocation: intent,
|
|
actual_gpu_allocation: actual,
|
|
workload_percent: actual,
|
|
gpu_layers: gpuLayers,
|
|
estimated_gpu_memory_mb: Math.ceil(capacity.estimated_full_offload_mb * actual / 100),
|
|
estimated_cpu_memory_mb: Math.ceil((model?.size || 0) / 1048576 * (1 - actual / 100)),
|
|
warning: clampedReason === "external_vram_pressure"
|
|
? `Actual GPU allocation is limited to ${actual}% because other processes are using VRAM. Your ${intent}% intent is preserved.`
|
|
: clampedReason === "model_vram_capacity"
|
|
? `Actual GPU allocation is limited to ${actual}% because the selected model and context exceed the safe VRAM budget.`
|
|
: clampedReason === "gpu_backend_unavailable"
|
|
? `GPU acceleration intent is ${intent}%, but the installed runtime cannot use the detected GPU.`
|
|
: capacity.warning
|
|
};
|
|
}
|
|
|
|
function performanceTuningHints({ model, config, gpu, allocation, generationTps = 0 }) {
|
|
const hints = [];
|
|
const maxOutput = Math.max(
|
|
Number(config?.max_output_tokens) || 0,
|
|
...Object.values(config?.output_budgets || {}).map((value) => Number(value) || 0)
|
|
);
|
|
const context = Number(config?.context_size) || 0;
|
|
const concurrency = Number(config?.concurrency) || 1;
|
|
if (maxOutput > 2048) hints.push("Lower maximum output tokens to reduce long generation time.");
|
|
if (context > 8192) hints.push("Lower context size if prompt evaluation is slow.");
|
|
if (allocation?.gpu_allocation_actual_percent < allocation?.gpu_allocation_intent_percent) {
|
|
hints.push("Review GPU layers/offload and external VRAM use before increasing allocation.");
|
|
}
|
|
if (concurrency > 1) hints.push("Reduce concurrent AI jobs when generation throughput is low.");
|
|
if (/GTX\s*1060/i.test(String(gpu?.model || "")) && Number(gpu?.vram_mb) <= 6144) {
|
|
hints.push("On a GTX 1060 6GB, a full VRAM graph can be normal managed-model allocation rather than external pressure.");
|
|
}
|
|
if ((model?.size || 0) >= 3.5 * 1024 ** 3) {
|
|
hints.push("This model may generate slowly on older GPUs even when its VRAM allocation is expected.");
|
|
}
|
|
if (generationTps > 0 && generationTps < 8) {
|
|
hints.push(`Recent generation speed is ${generationTps.toFixed(1)} tok/s. Lower class output budgets, reduce context, improve GPU offload, or reduce concurrency.`);
|
|
}
|
|
return hints;
|
|
}
|
|
|
|
function estimateContextMemoryMb(model, contextSize) {
|
|
const context = Math.max(512, Number(contextSize) || model?.default_context || 4096);
|
|
const scale = Math.max(1, (model?.gpu_layers || 24) / 24);
|
|
return Math.ceil((context / 4096) * 192 * scale);
|
|
}
|
|
|
|
function computeApis(vendor, platform) {
|
|
if (platform === "darwin") return ["metal"];
|
|
if (vendor === "NVIDIA") return ["cuda", "vulkan"];
|
|
if (vendor === "AMD") return ["vulkan"];
|
|
if (vendor === "Intel") return ["sycl", "vulkan"];
|
|
return ["vulkan"];
|
|
}
|
|
|
|
function vendorFromName(name = "", deviceId = "") {
|
|
const value = `${name} ${deviceId}`;
|
|
if (/nvidia|VEN_10DE/i.test(value)) return "NVIDIA";
|
|
if (/amd|radeon|advanced micro devices|VEN_1002/i.test(value)) return "AMD";
|
|
if (/intel|VEN_8086/i.test(value)) return "Intel";
|
|
if (/apple/i.test(value)) return "Apple";
|
|
return "Unknown";
|
|
}
|
|
|
|
function capture(command, args, timeout = 3000) {
|
|
try {
|
|
const result = spawnSync(command, args, {
|
|
encoding: "utf8",
|
|
timeout,
|
|
windowsHide: true,
|
|
shell: false
|
|
});
|
|
return {
|
|
ok: result.status === 0,
|
|
stdout: result.stdout || "",
|
|
stderr: result.stderr || ""
|
|
};
|
|
} catch {
|
|
return { ok: false, stdout: "", stderr: "" };
|
|
}
|
|
}
|
|
|
|
function parseMemoryMb(value) {
|
|
const match = String(value || "").match(/([\d.]+)\s*(GB|MB)/i);
|
|
if (!match) return null;
|
|
const amount = Number(match[1]);
|
|
return Math.round(amount * (match[2].toUpperCase() === "GB" ? 1024 : 1));
|
|
}
|
|
|
|
function numberOrNull(value) {
|
|
const number = Number(value);
|
|
return Number.isFinite(number) && number > 0 ? number : null;
|
|
}
|
|
|
|
function normalizeName(value) {
|
|
return String(value || "").toLowerCase().replace(/[^a-z0-9]+/g, "");
|
|
}
|
|
|
|
function testWritable() {
|
|
try {
|
|
const file = path.join(PLUGIN_DATA, ".write-test");
|
|
fs.writeFileSync(file, "ok");
|
|
fs.unlinkSync(file);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
SUPPORTED_BACKENDS,
|
|
detectHardware,
|
|
detectGpus,
|
|
selectRuntimeTarget,
|
|
calculateGpuCapacity,
|
|
estimateAllocation,
|
|
performanceTuningHints
|
|
};
|