feat: added qwen3 thinking model support

sauravpanda · sauravpanda · commit dc17c910c729 · 2025-05-21T12:12:39.000-07:00
diff --git a/README.md b/README.md
@@ -178,6 +178,10 @@ More models will be added soon. Request a model by creating an issue.
 - Gemma-2B-IT
 - TinyLlama-1.1B-Chat-v0.4
 - Phi-3.5-mini-instruct
+- Qwen3-0.6B
+- Qwen3-1.7B
+- Qwen3-4B
+- Qwen3-8B
 - Qwen2.5-1.5B-Instruct
 - DeepSeek-R1-Distill-Qwen-7B
 - DeepSeek-R1-Distill-Llama-8B
diff --git a/examples/chat-demo/src/components/ChatInterface.tsx b/examples/chat-demo/src/components/ChatInterface.tsx
@@ -846,6 +846,10 @@ export default function ChatInterface({ children }: ChatInterfaceProps) {
                 <option value="qwen2.5-0.5b-instruct">Qwen2.5 0.5B Instruct (278MB)</option>
                 <option value="qwen2.5-1.5b-instruct">Qwen2.5 1.5B Instruct (868MB)</option>
                 <option value="qwen2.5-3b-instruct">Qwen2.5 3B Instruct (1.7GB)</option>
+                <option value="qwen3-0.6b">Qwen3 0.6B (1.4GB)</option>
+                <option value="qwen3-1.7b">Qwen3 1.7B (2.0GB)</option>
+                <option value="qwen3-4b">Qwen3 4B (3.4GB)</option>
+                <option value="qwen3-8b">Qwen3 8B (5.7GB)</option>
                 <option value="gemma-2b-it">Gemma 2B Instruct (1.44GB)</option>
                 <option value="phi-3.5-mini-instruct">Phi 3.5 Mini Instruct (2.1GB)</option>
                 <option value="tinyllama-1.1b-chat-v0.4">TinyLlama 1.1B Chat (800MB)</option>
diff --git a/package.json b/package.json
@@ -37,7 +37,7 @@
   "homepage": "https://github.com/Cloud-Code-AI/BrowserAI#readme",
   "dependencies": {
     "@huggingface/jinja": "^0.3.3",
-    "@mlc-ai/web-llm": "^0.2.78",
+    "@mlc-ai/web-llm": "^0.2.79",
     "@types/pdfjs-dist": "^2.10.378",
     "mammoth": "^1.9.0",
     "onnxruntime-web": "1.21.0-dev.20250206-d981b153d3",
diff --git a/src/config/models/mlc-models.json b/src/config/models/mlc-models.json
@@ -1,7 +1,7 @@
 {
     "llama-3.2-1b-instruct": {
         "engine": "mlc",
-        "modelName": "Llama-3.2-1b-Instruct",
+        "modelName": "Llama-3.2-1B-Instruct",
         "modelType": "text-generation",
         "repo": "mlc-ai/Llama-3.2-1B-Instruct-{quantization}-MLC",
         "quantizations": [
@@ -323,5 +323,101 @@
                 "q0f32": 66.4
             }
         }
+    },
+    "qwen3-0.6b": {
+        "engine": "mlc",
+        "modelName": "Qwen3-0.6B",
+        "modelType": "text-generation",
+        "repo": "mlc-ai/Qwen3-0.6B-{quantization}-MLC",
+        "quantizations": [
+            "q4f16_1",
+            "q4f32_1",
+            "q0f16",
+            "q0f32"
+        ],
+        "defaultQuantization": "q4f32_1",
+        "defaultParams": {
+            "temperature": 0.7,
+            "maxTokens": 4096
+        },
+        "pipeline": "text-generation",
+        "metadata": {
+            "context_window_size": 4096,
+            "estimated_vram_in_mb": {
+                "q4f16_1": 1403.34,
+                "q4f32_1": 1924.98,
+                "q0f16": 2220.38,
+                "q0f32": 3843.25
+            }
+        }
+    },
+    "qwen3-1.7b": {
+        "engine": "mlc",
+        "modelName": "Qwen3-1.7B",
+        "modelType": "text-generation",
+        "repo": "mlc-ai/Qwen3-1.7B-{quantization}-MLC",
+        "quantizations": [
+            "q4f16_1",
+            "q4f32_1"
+        ],
+        "defaultQuantization": "q4f32_1",
+        "defaultParams": {
+            "temperature": 0.7,
+            "maxTokens": 4096
+        },
+        "pipeline": "text-generation",
+        "metadata": {
+            "context_window_size": 4096,
+            "estimated_vram_in_mb": {
+                "q4f16_1": 2036.66,
+                "q4f32_1": 2635.44
+            }
+        }
+    },
+    "qwen3-4b": {
+        "engine": "mlc",
+        "modelName": "Qwen3-4B",
+        "modelType": "text-generation",
+        "repo": "mlc-ai/Qwen3-4B-{quantization}-MLC",
+        "quantizations": [
+            "q4f16_1",
+            "q4f32_1"
+        ],
+        "defaultQuantization": "q4f32_1",
+        "defaultParams": {
+            "temperature": 0.7,
+            "maxTokens": 4096
+        },
+        "pipeline": "text-generation",
+        "metadata": {
+            "context_window_size": 4096,
+            "estimated_vram_in_mb": {
+                "q4f16_1": 3431.59,
+                "q4f32_1": 4327.71
+            }
+        }
+    },
+    "qwen3-8b": {
+        "engine": "mlc",
+        "modelName": "Qwen3-8B",
+        "modelType": "text-generation",
+        "repo": "mlc-ai/Qwen3-8B-{quantization}-MLC",
+        "quantizations": [
+            "q4f16_1",
+            "q4f32_1"
+        ],
+        "defaultQuantization": "q4f32_1",
+        "defaultParams": {
+            "temperature": 0.7,
+            "maxTokens": 4096
+        },
+        "pipeline": "text-generation",
+        "metadata": {
+            "context_window_size": 4096,
+            "estimated_vram_in_mb": {
+                "q4f16_1": 5695.78,
+                "q4f32_1": 6852.55
+            }
+        }
     }
 }