Lumi/plugins/lumi_ai/backend/hardware.js

const os = require("os");
const fs = require("fs");
const path = require("path");
const { spawnSync } = require("child_process");
const { PLUGIN_DATA, PLUGIN_ROOT } = require("./paths");

const SUPPORTED_BACKENDS = ["cuda", "vulkan", "metal", "sycl"];

function detectHardware(models, runtimeManifest = null) {
  const freeDisk = getFreeDisk();
  const totalRamMb = Math.floor(os.totalmem() / 1048576);
  const availableRamMb = Math.floor(os.freemem() / 1048576);
  const gpus = detectGpus();
  const gpu = choosePrimaryGpu(gpus);
  const runtimeSelection = selectRuntimeTarget(runtimeManifest, gpu);
  const writable = testWritable();
  const recommendation = [...models]
    .filter((model) => model.ram_gb * 1024 <= totalRamMb && model.size / 1048576 <= freeDisk)
    .sort((a, b) => b.ram_gb - a.ram_gb)[0]?.tier || "tiny";
  return {
    platform: os.platform(),
    architecture: os.arch(),
    cpu_threads: os.cpus().length,
    total_ram_mb: totalRamMb,
    available_ram_mb: availableRamMb,
    free_disk_mb: freeDisk,
    gpu,
    gpus,
    runtime_selection: runtimeSelection,
    subprocess_allowed: true,
    plugin_writable: writable,
    recommended_tier: recommendation,
    plugin_path: PLUGIN_ROOT,
    path_length: PLUGIN_ROOT.length,
    long_path_warning: os.platform() === "win32" && PLUGIN_ROOT.length > 220,
    network_path_warning: os.platform() === "win32" && PLUGIN_ROOT.startsWith("\\\\")
  };
}

function getFreeDisk() {
  try {
    if (typeof fs.statfsSync === "function") {
      const stat = fs.statfsSync(PLUGIN_DATA);
      return Math.floor((Number(stat.bavail) * Number(stat.bsize)) / 1048576);
    }
  } catch {}
  return 0;
}

function detectGpus(platform = os.platform()) {
  if (platform === "darwin") return detectMacGpus();
  const nvidia = detectNvidiaGpus();
  const system = platform === "win32" ? detectWindowsGpus() : detectLinuxGpus();
  const merged = [...nvidia];
  for (const candidate of system) {
    if (!merged.some((gpu) => normalizeName(gpu.model) === normalizeName(candidate.model))) {
      merged.push(candidate);
    }
  }
  return merged.filter((gpu) => gpu.model && !/virtual|display hub|remote display/i.test(gpu.model));
}

function detectNvidiaGpus() {
  const result = capture("nvidia-smi", [
    "--query-gpu=name,memory.total,memory.free,driver_version",
    "--format=csv,noheader,nounits"
  ]);
  if (!result.ok) return [];
  return result.stdout.split(/\r?\n/).filter(Boolean).map((line) => {
    const [model, total, free, driver] = line.split(",").map((value) => value.trim());
    return createGpu({
      vendor: "NVIDIA",
      model,
      driver,
      vram_mb: numberOrNull(total),
      available_vram_mb: numberOrNull(free),
      compute_api: ["cuda", "vulkan"]
    });
  });
}

function detectWindowsGpus() {
  const script = [
    "Get-CimInstance Win32_VideoController |",
    "Select-Object Name,AdapterRAM,DriverVersion,PNPDeviceID |",
    "ConvertTo-Json -Compress"
  ].join(" ");
  const result = capture("powershell", ["-NoProfile", "-Command", script], 5000);
  if (!result.ok || !result.stdout.trim()) return [];
  try {
    const parsed = JSON.parse(result.stdout);
    return (Array.isArray(parsed) ? parsed : [parsed]).map((item) => {
      const vendor = vendorFromName(item.Name, item.PNPDeviceID);
      return createGpu({
        vendor,
        model: item.Name,
        driver: item.DriverVersion,
        vram_mb: item.AdapterRAM ? Math.round(Number(item.AdapterRAM) / 1048576) : null,
        compute_api: computeApis(vendor, "win32")
      });
    });
  } catch {
    return [];
  }
}

function detectLinuxGpus() {
  const result = capture("lspci", ["-nn"], 3000);
  if (!result.ok) return [];
  const vulkan = capture("vulkaninfo", ["--summary"], 3000).ok;
  return result.stdout.split(/\r?\n/)
    .filter((line) => /(VGA compatible controller|3D controller)/i.test(line))
    .map((line) => {
      const model = line.replace(/^.*?(VGA compatible controller|3D controller):\s*/i, "").trim();
      const vendor = vendorFromName(model, line);
      const apis = computeApis(vendor, "linux");
      if (vulkan && !apis.includes("vulkan")) apis.push("vulkan");
      return createGpu({ vendor, model, driver: null, vram_mb: null, compute_api: apis });
    });
}

function detectMacGpus() {
  const result = capture("system_profiler", ["SPDisplaysDataType", "-json"], 5000);
  if (!result.ok) return [];
  try {
    const displays = JSON.parse(result.stdout).SPDisplaysDataType || [];
    return displays.map((item) => createGpu({
      vendor: /apple/i.test(item.sppci_model || "") ? "Apple" : vendorFromName(item.sppci_model),
      model: item.sppci_model,
      driver: os.release(),
      vram_mb: parseMemoryMb(item.spdisplays_vram || item.spdisplays_vram_shared),
      compute_api: ["metal"]
    }));
  } catch {
    return [];
  }
}

function createGpu({ vendor, model, driver, vram_mb, available_vram_mb, compute_api }) {
  const apis = [...new Set((compute_api || []).filter((api) => SUPPORTED_BACKENDS.includes(api)))];
  return {
    present: true,
    vendor: vendor || "Unknown",
    model: model || null,
    name: model || null,
    driver: driver || null,
    vram_mb: numberOrNull(vram_mb),
    available_vram_mb: numberOrNull(available_vram_mb),
    compute_api: apis,
    supported_runtime: apis.length > 0
  };
}

function choosePrimaryGpu(gpus) {
  if (!gpus.length) {
    return {
      present: false,
      vendor: null,
      model: null,
      name: null,
      driver: null,
      vram_mb: null,
      available_vram_mb: null,
      compute_api: [],
      supported_runtime: false
    };
  }
  return [...gpus].sort((a, b) => {
    const support = Number(b.supported_runtime) - Number(a.supported_runtime);
    if (support) return support;
    return (b.vram_mb || 0) - (a.vram_mb || 0);
  })[0];
}

function selectRuntimeTarget(runtimeManifest, gpu, platform = os.platform(), architecture = os.arch()) {
  const key = `${platform}-${architecture}`;
  const cpu = runtimeManifest?.targets?.[key] || null;
  const accelerated = runtimeManifest?.accelerated_targets?.[key] || null;
  const backend = accelerated?.backend;
  const compatible = Boolean(
    gpu?.present &&
    accelerated &&
    (backend === "metal" || gpu.compute_api?.includes(backend))
  );
  if (compatible) {
    return { backend, accelerated: true, fallback_to_cpu: false, target: accelerated };
  }
  return {
    backend: "cpu",
    accelerated: false,
    fallback_to_cpu: Boolean(gpu?.present),
    reason: gpu?.present ? "No compatible managed GPU runtime is available." : "No supported GPU detected.",
    target: cpu
  };
}

function calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb = 0 }) {
  const requestedBackend = backend || "cpu";
  const supported = Boolean(
    model &&
    gpu?.present &&
    gpu.supported_runtime &&
    requestedBackend !== "cpu" &&
    (requestedBackend === "metal" || gpu.compute_api?.includes(requestedBackend))
  );
  const modelMb = model ? model.size / 1048576 : 0;
  const contextMb = estimateContextMemoryMb(model, contextSize);
  const overheadMb = supported ? Math.max(256, modelMb * 0.08) : 0;
  const fullOffloadMb = Math.ceil(modelMb + contextMb + overheadMb);
  const totalVramMb = Math.max(0, Number(gpu?.vram_mb) || 0);
  const hasObservedFree = Number.isFinite(Number(gpu?.available_vram_mb));
  const observedAvailableVramMb = Math.max(0, Math.floor(
    hasObservedFree ? Number(gpu.available_vram_mb) : totalVramMb * 0.9
  ));
  const totalUsableVramMb = Math.floor((totalVramMb || observedAvailableVramMb) * 0.9);
  const managedModelVramMb = Math.max(0, Number(managedUsageMb) || 0);
  const observedUsedVramMb = Math.max(0, totalVramMb - observedAvailableVramMb);
  const externalUsageMb = Math.max(
    0,
    observedUsedVramMb - managedModelVramMb
  );
  // The managed model's live allocation remains part of its own usable budget.
  const safeAvailableVramMb = Math.max(0, totalUsableVramMb - externalUsageMb);
  const maxPercent = supported && fullOffloadMb > 0
    ? Math.max(0, Math.min(100, Math.floor((safeAvailableVramMb / fullOffloadMb) * 100)))
    : 0;
  return {
    supported,
    backend: supported ? requestedBackend : "cpu",
    max_percent: maxPercent,
    total_vram_mb: totalVramMb,
    free_vram_mb: observedAvailableVramMb,
    managed_model_vram_mb: managedModelVramMb,
    external_vram_estimate_mb: externalUsageMb,
    available_vram_mb: observedAvailableVramMb,
    safe_available_vram_mb: safeAvailableVramMb,
    managed_gpu_memory_mb: managedModelVramMb,
    external_gpu_memory_mb: externalUsageMb,
    estimated_full_offload_mb: fullOffloadMb,
    estimated_context_mb: contextMb,
    warning: !gpu?.present
      ? "No supported GPU detected. CPU fallback will be used."
      : !supported
        ? "The detected GPU does not match the installed runtime backend. CPU fallback will be used."
        : externalUsageMb > 0 && maxPercent < 100
          ? "External VRAM usage limits maximum GPU acceleration."
          : maxPercent < 100
            ? "The selected model and context exceed the GPU's safe VRAM budget."
          : null
  };
}

function estimateAllocation({ model, contextSize, gpu, backend, intentPercent, workloadPercent, managedUsageMb = 0 }) {
  const capacity = calculateGpuCapacity({ model, contextSize, gpu, backend, managedUsageMb });
  const intent = Math.max(0, Math.min(100, Number(intentPercent ?? workloadPercent) || 0));
  const actual = Math.min(intent, capacity.max_percent);
  const gpuLayers = actual > 0
    ? Math.max(1, Math.round((model?.gpu_layers || 0) * actual / 100))
    : 0;
  const clampedReason = actual < intent
    ? capacity.supported
      ? capacity.external_vram_estimate_mb > 0
        ? "external_vram_pressure"
        : "model_vram_capacity"
      : "gpu_backend_unavailable"
    : null;
  return {
    ...capacity,
    gpu_allocation_intent_percent: intent,
    gpu_allocation_actual_percent: actual,
    gpu_allocation_max_safe_percent: capacity.max_percent,
    gpu_allocation_clamped_reason: clampedReason,
    intended_gpu_allocation: intent,
    actual_gpu_allocation: actual,
    workload_percent: actual,
    gpu_layers: gpuLayers,
    estimated_gpu_memory_mb: Math.ceil(capacity.estimated_full_offload_mb * actual / 100),
    estimated_cpu_memory_mb: Math.ceil((model?.size || 0) / 1048576 * (1 - actual / 100)),
    warning: clampedReason === "external_vram_pressure"
      ? `Actual GPU allocation is limited to ${actual}% because other processes are using VRAM. Your ${intent}% intent is preserved.`
      : clampedReason === "model_vram_capacity"
        ? `Actual GPU allocation is limited to ${actual}% because the selected model and context exceed the safe VRAM budget.`
      : clampedReason === "gpu_backend_unavailable"
        ? `GPU acceleration intent is ${intent}%, but the installed runtime cannot use the detected GPU.`
        : capacity.warning
  };
}

function performanceTuningHints({ model, config, gpu, allocation, generationTps = 0 }) {
  const hints = [];
  const maxOutput = Math.max(
    Number(config?.max_output_tokens) || 0,
    ...Object.values(config?.output_budgets || {}).map((value) => Number(value) || 0)
  );
  const context = Number(config?.context_size) || 0;
  const concurrency = Number(config?.concurrency) || 1;
  if (maxOutput > 2048) hints.push("Lower maximum output tokens to reduce long generation time.");
  if (context > 8192) hints.push("Lower context size if prompt evaluation is slow.");
  if (allocation?.gpu_allocation_actual_percent < allocation?.gpu_allocation_intent_percent) {
    hints.push("Review GPU layers/offload and external VRAM use before increasing allocation.");
  }
  if (concurrency > 1) hints.push("Reduce concurrent AI jobs when generation throughput is low.");
  if (/GTX\s*1060/i.test(String(gpu?.model || "")) && Number(gpu?.vram_mb) <= 6144) {
    hints.push("On a GTX 1060 6GB, a full VRAM graph can be normal managed-model allocation rather than external pressure.");
  }
  if ((model?.size || 0) >= 3.5 * 1024 ** 3) {
    hints.push("This model may generate slowly on older GPUs even when its VRAM allocation is expected.");
  }
  if (generationTps > 0 && generationTps < 8) {
    hints.push(`Recent generation speed is ${generationTps.toFixed(1)} tok/s. Lower class output budgets, reduce context, improve GPU offload, or reduce concurrency.`);
  }
  return hints;
}

function estimateContextMemoryMb(model, contextSize) {
  const context = Math.max(512, Number(contextSize) || model?.default_context || 4096);
  const scale = Math.max(1, (model?.gpu_layers || 24) / 24);
  return Math.ceil((context / 4096) * 192 * scale);
}

function computeApis(vendor, platform) {
  if (platform === "darwin") return ["metal"];
  if (vendor === "NVIDIA") return ["cuda", "vulkan"];
  if (vendor === "AMD") return ["vulkan"];
  if (vendor === "Intel") return ["sycl", "vulkan"];
  return ["vulkan"];
}

function vendorFromName(name = "", deviceId = "") {
  const value = `${name} ${deviceId}`;
  if (/nvidia|VEN_10DE/i.test(value)) return "NVIDIA";
  if (/amd|radeon|advanced micro devices|VEN_1002/i.test(value)) return "AMD";
  if (/intel|VEN_8086/i.test(value)) return "Intel";
  if (/apple/i.test(value)) return "Apple";
  return "Unknown";
}

function capture(command, args, timeout = 3000) {
  try {
    const result = spawnSync(command, args, {
      encoding: "utf8",
      timeout,
      windowsHide: true,
      shell: false
    });
    return {
      ok: result.status === 0,
      stdout: result.stdout || "",
      stderr: result.stderr || ""
    };
  } catch {
    return { ok: false, stdout: "", stderr: "" };
  }
}

function parseMemoryMb(value) {
  const match = String(value || "").match(/([\d.]+)\s*(GB|MB)/i);
  if (!match) return null;
  const amount = Number(match[1]);
  return Math.round(amount * (match[2].toUpperCase() === "GB" ? 1024 : 1));
}

function numberOrNull(value) {
  const number = Number(value);
  return Number.isFinite(number) && number > 0 ? number : null;
}

function normalizeName(value) {
  return String(value || "").toLowerCase().replace(/[^a-z0-9]+/g, "");
}

function testWritable() {
  try {
    const file = path.join(PLUGIN_DATA, ".write-test");
    fs.writeFileSync(file, "ok");
    fs.unlinkSync(file);
    return true;
  } catch {
    return false;
  }
}

module.exports = {
  SUPPORTED_BACKENDS,
  detectHardware,
  detectGpus,
  selectRuntimeTarget,
  calculateGpuCapacity,
  estimateAllocation,
  performanceTuningHints
};