{ "models": [ { "id": "smollm2-360m-q8", "label": "Tiny - SmolLM2 360M Q8", "display_name": "SmolLM2 360M Instruct", "model_family": "SmolLM2", "parameter_count": "360M", "repo_id": "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q8_0", "min_ram_mb": 1024, "recommended_ram_mb": 2048, "min_disk_mb": 400, "recommended_threads": 2, "default_context": 2048, "max_context": 8192, "supports_chat": true, "supports_json": false, "supports_tool_routing": false, "recommended_use": "Basic intent routing and very short responses.", "warnings": "Not recommended for complex assistant behavior.", "repo": "HuggingFaceTB/SmolLM2-360M-Instruct-GGUF", "revision": "593b5a2e04c8f3e4ee880263f93e0bd2901ad47f", "filename": "smollm2-360m-instruct-q8_0.gguf", "size": 386404992, "sha256": "48ab3034d0dd401fbc721eb1df3217902fee7dab9078992d66431f09b7750201", "ram_gb": 2, "tier": "tiny" }, { "id": "qwen3-0.6b-q4", "label": "Small - Qwen3 0.6B Q4_K_M", "display_name": "Qwen3 0.6B Instruct", "model_family": "Qwen3", "parameter_count": "0.6B", "repo_id": "bartowski/Qwen_Qwen3-0.6B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 1536, "recommended_ram_mb": 3072, "min_disk_mb": 500, "recommended_threads": 4, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": false, "recommended_use": "Basic scoped assistant for weak servers.", "warnings": "Limited reasoning and tool-routing reliability.", "repo": "bartowski/Qwen_Qwen3-0.6B-GGUF", "revision": "60b85c0e3d8fe0f6474f406922a26d12aca4550d", "filename": "Qwen_Qwen3-0.6B-Q4_K_M.gguf", "size": 484220320, "sha256": "9acfc1e001311f34b4252001b626f2e466d592a42065f66571bff3790d4e1b14", "ram_gb": 3, "tier": "small" }, { "id": "qwen3-1.7b-q4", "label": "Medium - Qwen3 1.7B Q4_K_M", "display_name": "Qwen3 1.7B Instruct", "model_family": "Qwen3", "parameter_count": "1.7B", "repo_id": "bartowski/Qwen_Qwen3-1.7B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 3072, "recommended_ram_mb": 5120, "min_disk_mb": 1300, "recommended_threads": 6, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": true, "recommended_use": "Recommended minimum for useful bot assistant behavior.", "warnings": "CPU response speed depends heavily on host memory bandwidth.", "repo": "bartowski/Qwen_Qwen3-1.7B-GGUF", "revision": "dcb19155b962dbb6389f4691a982043a8e651022", "filename": "Qwen_Qwen3-1.7B-Q4_K_M.gguf", "size": 1282439584, "sha256": "72c5c3cb38fa32d5256e2fe30d03e7a64c6c79e668ad84057e3bd66e250b24fb", "ram_gb": 5, "tier": "medium" }, { "id": "qwen3-4b-q4", "label": "Large - Qwen3 4B Q4_K_M", "display_name": "Qwen3 4B Instruct", "model_family": "Qwen3", "parameter_count": "4B", "repo_id": "bartowski/Qwen_Qwen3-4B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 5120, "recommended_ram_mb": 8192, "min_disk_mb": 2500, "recommended_threads": 8, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": true, "recommended_use": "Better style following, tool routing, and reasoning.", "warnings": "May be slow on CPU-only systems.", "repo": "bartowski/Qwen_Qwen3-4B-GGUF", "revision": "cb76885dc66d50759b207c5a48c4e78dfa00c638", "filename": "Qwen_Qwen3-4B-Q4_K_M.gguf", "size": 2497280960, "sha256": "fbe1d5edd4ce802ae3ae7c7e4ab7d09789d697fdac1fc7929f8df4ca3c41bae3", "ram_gb": 8, "tier": "large" }, { "id": "qwen3-8b-q4", "label": "General - Qwen3 8B Q4_K_M", "display_name": "Qwen3 8B Instruct", "model_family": "Qwen3", "parameter_count": "8B", "repo_id": "bartowski/Qwen_Qwen3-8B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 8192, "recommended_ram_mb": 12288, "min_disk_mb": 5100, "recommended_threads": 10, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": true, "recommended_use": "More capable general assistant.", "warnings": "Requires decent RAM and patience on CPU.", "repo": "bartowski/Qwen_Qwen3-8B-GGUF", "revision": "0b69f75b7472688e6808490aa2b85efdb81b5ce7", "filename": "Qwen_Qwen3-8B-Q4_K_M.gguf", "size": 5027784224, "sha256": "54fffa050078e984116639c83dfb64b5aa6d4cd474e018b076777c632bbccccd", "ram_gb": 12, "tier": "general" }, { "id": "qwen3-14b-q4", "label": "GPU - Qwen3 14B Q4_K_M", "display_name": "Qwen3 14B Instruct", "model_family": "Qwen3", "parameter_count": "14B", "repo_id": "bartowski/Qwen_Qwen3-14B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 14336, "recommended_ram_mb": 20480, "min_disk_mb": 9000, "recommended_threads": 12, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": true, "recommended_use": "Serious local assistant tier.", "warnings": "GPU strongly recommended.", "repo": "bartowski/Qwen_Qwen3-14B-GGUF", "revision": "bd080f768a6401c2d5a7fa53a2e50cd8218a9ce2", "filename": "Qwen_Qwen3-14B-Q4_K_M.gguf", "size": 9001753632, "sha256": "915913e22399475dbe6c968ac014d9f1fbe08975e489279aede9d5c7b2c98eb6", "ram_gb": 20, "tier": "gpu" }, { "id": "qwen3-30b-a3b-q4", "label": "GPU XL - Qwen3 30B-A3B Q4_K_M", "display_name": "Qwen3 30B-A3B Instruct", "model_family": "Qwen3 MoE", "parameter_count": "30B total / 3B active", "repo_id": "bartowski/Qwen_Qwen3-30B-A3B-GGUF", "license": "apache-2.0", "format": "GGUF", "quantization": "Q4_K_M", "min_ram_mb": 24576, "recommended_ram_mb": 32768, "min_disk_mb": 18700, "recommended_threads": 16, "default_context": 4096, "max_context": 32768, "supports_chat": true, "supports_json": true, "supports_tool_routing": true, "recommended_use": "Experimental high-end assistant tier.", "warnings": "Requires strong hardware and substantial disk space.", "repo": "bartowski/Qwen_Qwen3-30B-A3B-GGUF", "revision": "46f17e079cba70b04390bef39b57d2783e9fd015", "filename": "Qwen_Qwen3-30B-A3B-Q4_K_M.gguf", "size": 18632184480, "sha256": "a015794bfb1d69cb03dbb86b185fb2b9b339f757df5f8f9dd9ebdab8f6ed5d32", "ram_gb": 32, "tier": "gpu_xl" } ] }