Commit
·
108c075
1
Parent(s):
fbe26aa
Add weights
Browse files- README.md +8 -0
- config.json +5 -0
- ndarray-cache.json +0 -0
- params_shard_0.bin +3 -0
- params_shard_1.bin +3 -0
- params_shard_10.bin +3 -0
- params_shard_11.bin +3 -0
- params_shard_12.bin +3 -0
- params_shard_13.bin +3 -0
- params_shard_14.bin +3 -0
- params_shard_15.bin +3 -0
- params_shard_16.bin +3 -0
- params_shard_17.bin +3 -0
- params_shard_18.bin +3 -0
- params_shard_19.bin +3 -0
- params_shard_2.bin +3 -0
- params_shard_20.bin +3 -0
- params_shard_21.bin +3 -0
- params_shard_22.bin +3 -0
- params_shard_23.bin +3 -0
- params_shard_24.bin +3 -0
- params_shard_25.bin +3 -0
- params_shard_26.bin +3 -0
- params_shard_27.bin +3 -0
- params_shard_28.bin +3 -0
- params_shard_29.bin +3 -0
- params_shard_3.bin +3 -0
- params_shard_4.bin +3 -0
- params_shard_5.bin +3 -0
- params_shard_6.bin +3 -0
- params_shard_7.bin +3 -0
- params_shard_8.bin +3 -0
- params_shard_9.bin +3 -0
- private-llm-config.json +45 -0
- tokenizer.json +0 -0
- tokenizer_config.json +35 -0
README.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1 |
---
|
|
|
|
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
license: mit
|
5 |
+
base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
6 |
+
base_model_relation: quantized
|
7 |
+
library_name: mlc-llm
|
8 |
+
pipeline_tag: text-generation
|
9 |
---
|
10 |
+
|
11 |
+
4-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) for inference with the [Private LLM](http://privatellm.app) app.
|
config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"quantization_config": {
|
3 |
+
"bits": 4
|
4 |
+
}
|
5 |
+
}
|
ndarray-cache.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
params_shard_0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a60bad787360d8b80306ba4d6f78398d5737bf1b9b3b843270bc2268a15f5f9e
|
3 |
+
size 116686848
|
params_shard_1.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52352b924e3eacc1e8ddf01236ccb90fbea9c2668a835fe6d2a234edafed4edd
|
3 |
+
size 116686848
|
params_shard_10.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ddd78963ddda4f0f900669c5997656882566ed0490694f8ea2c0c39b95ee950
|
3 |
+
size 24137728
|
params_shard_11.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98972f8b318330e2956d5a69185bb2668dbe661ab0239be6fca9366d5340a599
|
3 |
+
size 24137728
|
params_shard_12.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aea186173c89142c24c32a3ace9bd5c55eb8f1d1f9ffc16dff972dfdb0365c98
|
3 |
+
size 24137728
|
params_shard_13.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f415097b83921aaf26f429026e1b0ebba520eceb3bc591b4d63904db0e81aae5
|
3 |
+
size 24137728
|
params_shard_14.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa3f8ff3c21d66fa57e4ced81ebdc9c3469096cb78f9a5a9b46f99f7c26d05bf
|
3 |
+
size 24137728
|
params_shard_15.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad3dff0dbb0e45f0bab78f1fb27c3083c07b1734fc73d3c8f9ab94be7c831a67
|
3 |
+
size 24137728
|
params_shard_16.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d97a9ead5baa8791337ebf492b43eee2c7e879d18a59e025422d41efce91e41
|
3 |
+
size 24137728
|
params_shard_17.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ec70ed8cef7aef68f847ac4263cf788d3d56a96406b5fdbafc93fcda34dc206
|
3 |
+
size 24137728
|
params_shard_18.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55ee2815ab3bcbefcc82af77cdd115bbc14fb4ed1221da968c0346bb82f1df36
|
3 |
+
size 24137728
|
params_shard_19.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e7ee7977605d261447b45958f56cd99816b34919c07c2b4b04721dd1238e2c9
|
3 |
+
size 24137728
|
params_shard_2.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ad7a9a4504510810a2563f14963d220fa8c8ddcb091c32851b12479a6aa144d
|
3 |
+
size 31433728
|
params_shard_20.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1942307f5577f60f12698577e276c1787f86e0748c18718e70bf8eee6401e383
|
3 |
+
size 24137728
|
params_shard_21.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4811ec0f90fb20162f126dda4ef9bfbdd4f39be958a041b2abbc54c4e322a4c
|
3 |
+
size 24137728
|
params_shard_22.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452bfd3df249085983f981d7f16001c0b8640d22eed651ab93301a94ae73cbef
|
3 |
+
size 24137728
|
params_shard_23.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00e742c3dc08d1629420721248e914bd6c67da735427a957463f29cf68c90594
|
3 |
+
size 24137728
|
params_shard_24.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9ee26477fcdcae6ffbe1c3c0bc6427e6c35637ead7378f6c28ad65b50ee6b9c
|
3 |
+
size 24137728
|
params_shard_25.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e465a8c446b594c817ef22b4b098d1121b21c00ab38295a47341ddd541d7828e
|
3 |
+
size 24137728
|
params_shard_26.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c401df0bc9ff022a028bc8272de1bc6c0cbf86774a4e6ba8c5f4b965b6cea25
|
3 |
+
size 24137728
|
params_shard_27.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3e6a1e2daaa762cbd61ec823d82fa2c266fb69c796c03d676c058fd8da31aeb
|
3 |
+
size 24137728
|
params_shard_28.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:221a13a69815402c2ee244e629d5a7d9ca69947b7c6bfe7ca40d5979eb21bd0b
|
3 |
+
size 24137728
|
params_shard_29.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a854b7ea016d3a8918eceea50eaef8aa8e1c28902126e0fd7f549352ce2b6bd0
|
3 |
+
size 17041408
|
params_shard_3.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b74982a14f684d051e1796d3fd658d8eca99fbff67cd16369bd09d107ebd6eb
|
3 |
+
size 31234048
|
params_shard_4.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d40ba07eb14ea983ac7ca38d206787fbf812b2be7bbcf6878653bc34c3c3982
|
3 |
+
size 24137728
|
params_shard_5.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56b9cc40d4c43bbcc0ef2a59bd01b695cc2cf3937809a1520acf821333440a2f
|
3 |
+
size 24137728
|
params_shard_6.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20cbb05346d4cd73e4fec49faa56583d474d9f15422a5bde7f05d439eae68792
|
3 |
+
size 24137728
|
params_shard_7.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36d960800014ceb8bdb63b787b84d2079c5241d4727867a989ddb10809e420a1
|
3 |
+
size 24137728
|
params_shard_8.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6c69ff52531c46533d45ae896ddb6bb6eb9f3bd24b69ff52604b05d6509ec6d
|
3 |
+
size 24137728
|
params_shard_9.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5c9086d063f764faea83951fe83a24707d143f34da56888e23ebebec1f17412
|
3 |
+
size 24137728
|
private-llm-config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "qwen2",
|
3 |
+
"quantization": "GPTQ-Int4",
|
4 |
+
"model_config": {
|
5 |
+
"hidden_act": "silu",
|
6 |
+
"hidden_size": 1536,
|
7 |
+
"intermediate_size": 8960,
|
8 |
+
"num_attention_heads": 12,
|
9 |
+
"num_hidden_layers": 28,
|
10 |
+
"num_key_value_heads": 2,
|
11 |
+
"rms_norm_eps": 1e-06,
|
12 |
+
"rope_theta": 10000,
|
13 |
+
"vocab_size": 151936,
|
14 |
+
"tie_word_embeddings": false,
|
15 |
+
"context_window_size": 8192,
|
16 |
+
"prefill_chunk_size": 128,
|
17 |
+
"tensor_parallel_shards": 1,
|
18 |
+
"head_dim": 128,
|
19 |
+
"dtype": "float32",
|
20 |
+
"max_batch_size": 80
|
21 |
+
},
|
22 |
+
"vocab_size": 151936,
|
23 |
+
"context_window_size": 8192,
|
24 |
+
"sliding_window_size": -1,
|
25 |
+
"prefill_chunk_size": 128,
|
26 |
+
"attention_sink_size": -1,
|
27 |
+
"tensor_parallel_shards": 1,
|
28 |
+
"mean_gen_len": 512,
|
29 |
+
"max_gen_len": 4096,
|
30 |
+
"shift_fill_factor": 0.3,
|
31 |
+
"temperature": 0.7,
|
32 |
+
"presence_penalty": 0.0,
|
33 |
+
"frequency_penalty": 0.0,
|
34 |
+
"repetition_penalty": 1.2,
|
35 |
+
"top_p": 0.95,
|
36 |
+
"conv_template": "deepseek-r1-qwen",
|
37 |
+
"pad_token_id": 0,
|
38 |
+
"bos_token_id": 151643,
|
39 |
+
"eos_token_id": 151643,
|
40 |
+
"tokenizer_files": [
|
41 |
+
"tokenizer.json",
|
42 |
+
"tokenizer_config.json"
|
43 |
+
],
|
44 |
+
"version": "0.1.0"
|
45 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"bos_token": {
|
5 |
+
"__type": "AddedToken",
|
6 |
+
"content": "<|begin▁of▁sentence|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false
|
11 |
+
},
|
12 |
+
"clean_up_tokenization_spaces": false,
|
13 |
+
"eos_token": {
|
14 |
+
"__type": "AddedToken",
|
15 |
+
"content": "<|end▁of▁sentence|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"legacy": true,
|
22 |
+
"model_max_length": 16384,
|
23 |
+
"pad_token": {
|
24 |
+
"__type": "AddedToken",
|
25 |
+
"content": "<|end▁of▁sentence|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": true,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
},
|
31 |
+
"sp_model_kwargs": {},
|
32 |
+
"unk_token": null,
|
33 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
34 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}"
|
35 |
+
}
|