Lumi/plugins/lumi_ai/models_manifest.json
2026-06-12 11:54:46 +02:00

215 lines
7.4 KiB
JSON

{
"models": [
{
"id": "smollm2-360m-q8",
"label": "Tiny - SmolLM2 360M Q8",
"display_name": "SmolLM2 360M Instruct",
"model_family": "SmolLM2",
"parameter_count": "360M",
"repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q8_0",
"min_ram_mb": 1024,
"recommended_ram_mb": 2048,
"min_disk_mb": 400,
"recommended_threads": 2,
"default_context": 2048,
"max_context": 8192,
"supports_chat": true,
"supports_json": false,
"supports_tool_routing": false,
"recommended_use": "Basic intent routing and very short responses.",
"warnings": "Not recommended for complex assistant behavior.",
"repo": "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
"revision": "593b5a2e04c8f3e4ee880263f93e0bd2901ad47f",
"filename": "smollm2-360m-instruct-q8_0.gguf",
"size": 386404992,
"sha256": "48ab3034d0dd401fbc721eb1df3217902fee7dab9078992d66431f09b7750201",
"ram_gb": 2,
"gpu_layers": 24,
"tier": "tiny"
},
{
"id": "qwen3-0.6b-q4",
"label": "Small - Qwen3 0.6B Q4_K_M",
"display_name": "Qwen3 0.6B Instruct",
"model_family": "Qwen3",
"parameter_count": "0.6B",
"repo_id": "bartowski/Qwen_Qwen3-0.6B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 1536,
"recommended_ram_mb": 3072,
"min_disk_mb": 500,
"recommended_threads": 4,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": false,
"recommended_use": "Basic scoped assistant for weak servers.",
"warnings": "Limited reasoning and tool-routing reliability.",
"repo": "bartowski/Qwen_Qwen3-0.6B-GGUF",
"revision": "60b85c0e3d8fe0f6474f406922a26d12aca4550d",
"filename": "Qwen_Qwen3-0.6B-Q4_K_M.gguf",
"size": 484220320,
"sha256": "9acfc1e001311f34b4252001b626f2e466d592a42065f66571bff3790d4e1b14",
"ram_gb": 3,
"gpu_layers": 28,
"tier": "small"
},
{
"id": "qwen3-1.7b-q4",
"label": "Medium - Qwen3 1.7B Q4_K_M",
"display_name": "Qwen3 1.7B Instruct",
"model_family": "Qwen3",
"parameter_count": "1.7B",
"repo_id": "bartowski/Qwen_Qwen3-1.7B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 3072,
"recommended_ram_mb": 5120,
"min_disk_mb": 1300,
"recommended_threads": 6,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": true,
"recommended_use": "Recommended minimum for useful bot assistant behavior.",
"warnings": "CPU response speed depends heavily on host memory bandwidth.",
"repo": "bartowski/Qwen_Qwen3-1.7B-GGUF",
"revision": "dcb19155b962dbb6389f4691a982043a8e651022",
"filename": "Qwen_Qwen3-1.7B-Q4_K_M.gguf",
"size": 1282439584,
"sha256": "72c5c3cb38fa32d5256e2fe30d03e7a64c6c79e668ad84057e3bd66e250b24fb",
"ram_gb": 5,
"gpu_layers": 28,
"tier": "medium"
},
{
"id": "qwen3-4b-q4",
"label": "Large - Qwen3 4B Q4_K_M",
"display_name": "Qwen3 4B Instruct",
"model_family": "Qwen3",
"parameter_count": "4B",
"repo_id": "bartowski/Qwen_Qwen3-4B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 5120,
"recommended_ram_mb": 8192,
"min_disk_mb": 2500,
"recommended_threads": 8,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": true,
"recommended_use": "Better style following, tool routing, and reasoning.",
"warnings": "May be slow on CPU-only systems.",
"repo": "bartowski/Qwen_Qwen3-4B-GGUF",
"revision": "cb76885dc66d50759b207c5a48c4e78dfa00c638",
"filename": "Qwen_Qwen3-4B-Q4_K_M.gguf",
"size": 2497280960,
"sha256": "fbe1d5edd4ce802ae3ae7c7e4ab7d09789d697fdac1fc7929f8df4ca3c41bae3",
"ram_gb": 8,
"gpu_layers": 36,
"tier": "large"
},
{
"id": "qwen3-8b-q4",
"label": "General - Qwen3 8B Q4_K_M",
"display_name": "Qwen3 8B Instruct",
"model_family": "Qwen3",
"parameter_count": "8B",
"repo_id": "bartowski/Qwen_Qwen3-8B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 8192,
"recommended_ram_mb": 12288,
"min_disk_mb": 5100,
"recommended_threads": 10,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": true,
"recommended_use": "More capable general assistant.",
"warnings": "Requires decent RAM and patience on CPU.",
"repo": "bartowski/Qwen_Qwen3-8B-GGUF",
"revision": "0b69f75b7472688e6808490aa2b85efdb81b5ce7",
"filename": "Qwen_Qwen3-8B-Q4_K_M.gguf",
"size": 5027784224,
"sha256": "54fffa050078e984116639c83dfb64b5aa6d4cd474e018b076777c632bbccccd",
"ram_gb": 12,
"gpu_layers": 36,
"tier": "general"
},
{
"id": "qwen3-14b-q4",
"label": "Power - Qwen3 14B Q4_K_M",
"display_name": "Qwen3 14B Instruct",
"model_family": "Qwen3",
"parameter_count": "14B",
"repo_id": "bartowski/Qwen_Qwen3-14B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 14336,
"recommended_ram_mb": 20480,
"min_disk_mb": 9000,
"recommended_threads": 12,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": true,
"recommended_use": "Serious local assistant tier.",
"warnings": "GPU strongly recommended.",
"repo": "bartowski/Qwen_Qwen3-14B-GGUF",
"revision": "bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2",
"filename": "Qwen_Qwen3-14B-Q4_K_M.gguf",
"size": 9001753632,
"sha256": "915913e22399475dbe6c968ac014d9f1fbe08975e489279aede9d5c7b2c98eb6",
"ram_gb": 20,
"gpu_layers": 40,
"tier": "power"
},
{
"id": "qwen3-30b-a3b-q4",
"label": "Extreme - Qwen3 30B-A3B Q4_K_M",
"display_name": "Qwen3 30B-A3B Instruct",
"model_family": "Qwen3 MoE",
"parameter_count": "30B total / 3B active",
"repo_id": "bartowski/Qwen_Qwen3-30B-A3B-GGUF",
"license": "apache-2.0",
"format": "GGUF",
"quantization": "Q4_K_M",
"min_ram_mb": 24576,
"recommended_ram_mb": 32768,
"min_disk_mb": 18700,
"recommended_threads": 16,
"default_context": 4096,
"max_context": 32768,
"supports_chat": true,
"supports_json": true,
"supports_tool_routing": true,
"recommended_use": "Experimental high-end assistant tier.",
"warnings": "Requires strong hardware and substantial disk space.",
"repo": "bartowski/Qwen_Qwen3-30B-A3B-GGUF",
"revision": "46f17e079cba70b04390bef39b57d2783e9fd015",
"filename": "Qwen_Qwen3-30B-A3B-Q4_K_M.gguf",
"size": 18632184480,
"sha256": "a015794bfb1d69cb03dbb86b185fb2b9b339f757df5f8f9dd9ebdab8f6ed5d32",
"ram_gb": 32,
"gpu_layers": 48,
"tier": "extreme"
}
]
}