Skip to content

Commit dc17c91

Browse files
committed
feat: added qwen3 thinking model support
1 parent 1ab5012 commit dc17c91

File tree

4 files changed

+106
-2
lines changed

4 files changed

+106
-2
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ More models will be added soon. Request a model by creating an issue.
178178
- Gemma-2B-IT
179179
- TinyLlama-1.1B-Chat-v0.4
180180
- Phi-3.5-mini-instruct
181+
- Qwen3-0.6B
182+
- Qwen3-1.7B
183+
- Qwen3-4B
184+
- Qwen3-8B
181185
- Qwen2.5-1.5B-Instruct
182186
- DeepSeek-R1-Distill-Qwen-7B
183187
- DeepSeek-R1-Distill-Llama-8B

examples/chat-demo/src/components/ChatInterface.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,10 @@ export default function ChatInterface({ children }: ChatInterfaceProps) {
846846
<option value="qwen2.5-0.5b-instruct">Qwen2.5 0.5B Instruct (278MB)</option>
847847
<option value="qwen2.5-1.5b-instruct">Qwen2.5 1.5B Instruct (868MB)</option>
848848
<option value="qwen2.5-3b-instruct">Qwen2.5 3B Instruct (1.7GB)</option>
849+
<option value="qwen3-0.6b">Qwen3 0.6B (1.4GB)</option>
850+
<option value="qwen3-1.7b">Qwen3 1.7B (2.0GB)</option>
851+
<option value="qwen3-4b">Qwen3 4B (3.4GB)</option>
852+
<option value="qwen3-8b">Qwen3 8B (5.7GB)</option>
849853
<option value="gemma-2b-it">Gemma 2B Instruct (1.44GB)</option>
850854
<option value="phi-3.5-mini-instruct">Phi 3.5 Mini Instruct (2.1GB)</option>
851855
<option value="tinyllama-1.1b-chat-v0.4">TinyLlama 1.1B Chat (800MB)</option>

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
"homepage": "https://github.com/Cloud-Code-AI/BrowserAI#readme",
3838
"dependencies": {
3939
"@huggingface/jinja": "^0.3.3",
40-
"@mlc-ai/web-llm": "^0.2.78",
40+
"@mlc-ai/web-llm": "^0.2.79",
4141
"@types/pdfjs-dist": "^2.10.378",
4242
"mammoth": "^1.9.0",
4343
"onnxruntime-web": "1.21.0-dev.20250206-d981b153d3",

src/config/models/mlc-models.json

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"llama-3.2-1b-instruct": {
33
"engine": "mlc",
4-
"modelName": "Llama-3.2-1b-Instruct",
4+
"modelName": "Llama-3.2-1B-Instruct",
55
"modelType": "text-generation",
66
"repo": "mlc-ai/Llama-3.2-1B-Instruct-{quantization}-MLC",
77
"quantizations": [
@@ -323,5 +323,101 @@
323323
"q0f32": 66.4
324324
}
325325
}
326+
},
327+
"qwen3-0.6b": {
328+
"engine": "mlc",
329+
"modelName": "Qwen3-0.6B",
330+
"modelType": "text-generation",
331+
"repo": "mlc-ai/Qwen3-0.6B-{quantization}-MLC",
332+
"quantizations": [
333+
"q4f16_1",
334+
"q4f32_1",
335+
"q0f16",
336+
"q0f32"
337+
],
338+
"defaultQuantization": "q4f32_1",
339+
"defaultParams": {
340+
"temperature": 0.7,
341+
"maxTokens": 4096
342+
},
343+
"pipeline": "text-generation",
344+
"metadata": {
345+
"context_window_size": 4096,
346+
"estimated_vram_in_mb": {
347+
"q4f16_1": 1403.34,
348+
"q4f32_1": 1924.98,
349+
"q0f16": 2220.38,
350+
"q0f32": 3843.25
351+
}
352+
}
353+
},
354+
"qwen3-1.7b": {
355+
"engine": "mlc",
356+
"modelName": "Qwen3-1.7B",
357+
"modelType": "text-generation",
358+
"repo": "mlc-ai/Qwen3-1.7B-{quantization}-MLC",
359+
"quantizations": [
360+
"q4f16_1",
361+
"q4f32_1"
362+
],
363+
"defaultQuantization": "q4f32_1",
364+
"defaultParams": {
365+
"temperature": 0.7,
366+
"maxTokens": 4096
367+
},
368+
"pipeline": "text-generation",
369+
"metadata": {
370+
"context_window_size": 4096,
371+
"estimated_vram_in_mb": {
372+
"q4f16_1": 2036.66,
373+
"q4f32_1": 2635.44
374+
}
375+
}
376+
},
377+
"qwen3-4b": {
378+
"engine": "mlc",
379+
"modelName": "Qwen3-4B",
380+
"modelType": "text-generation",
381+
"repo": "mlc-ai/Qwen3-4B-{quantization}-MLC",
382+
"quantizations": [
383+
"q4f16_1",
384+
"q4f32_1"
385+
],
386+
"defaultQuantization": "q4f32_1",
387+
"defaultParams": {
388+
"temperature": 0.7,
389+
"maxTokens": 4096
390+
},
391+
"pipeline": "text-generation",
392+
"metadata": {
393+
"context_window_size": 4096,
394+
"estimated_vram_in_mb": {
395+
"q4f16_1": 3431.59,
396+
"q4f32_1": 4327.71
397+
}
398+
}
399+
},
400+
"qwen3-8b": {
401+
"engine": "mlc",
402+
"modelName": "Qwen3-8B",
403+
"modelType": "text-generation",
404+
"repo": "mlc-ai/Qwen3-8B-{quantization}-MLC",
405+
"quantizations": [
406+
"q4f16_1",
407+
"q4f32_1"
408+
],
409+
"defaultQuantization": "q4f32_1",
410+
"defaultParams": {
411+
"temperature": 0.7,
412+
"maxTokens": 4096
413+
},
414+
"pipeline": "text-generation",
415+
"metadata": {
416+
"context_window_size": 4096,
417+
"estimated_vram_in_mb": {
418+
"q4f16_1": 5695.78,
419+
"q4f32_1": 6852.55
420+
}
421+
}
326422
}
327423
}

0 commit comments

Comments
 (0)