awni commited on
Commit
bee7b46
·
verified ·
1 Parent(s): 7f05a79

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +33 -0
  2. config.json +69 -0
  3. model-00001-of-00088.safetensors +3 -0
  4. model-00002-of-00088.safetensors +3 -0
  5. model-00003-of-00088.safetensors +3 -0
  6. model-00004-of-00088.safetensors +3 -0
  7. model-00005-of-00088.safetensors +3 -0
  8. model-00006-of-00088.safetensors +3 -0
  9. model-00007-of-00088.safetensors +3 -0
  10. model-00008-of-00088.safetensors +3 -0
  11. model-00009-of-00088.safetensors +3 -0
  12. model-00010-of-00088.safetensors +3 -0
  13. model-00011-of-00088.safetensors +3 -0
  14. model-00012-of-00088.safetensors +3 -0
  15. model-00013-of-00088.safetensors +3 -0
  16. model-00014-of-00088.safetensors +3 -0
  17. model-00015-of-00088.safetensors +3 -0
  18. model-00016-of-00088.safetensors +3 -0
  19. model-00017-of-00088.safetensors +3 -0
  20. model-00018-of-00088.safetensors +3 -0
  21. model-00019-of-00088.safetensors +3 -0
  22. model-00020-of-00088.safetensors +3 -0
  23. model-00021-of-00088.safetensors +3 -0
  24. model-00022-of-00088.safetensors +3 -0
  25. model-00023-of-00088.safetensors +3 -0
  26. model-00024-of-00088.safetensors +3 -0
  27. model-00025-of-00088.safetensors +3 -0
  28. model-00026-of-00088.safetensors +3 -0
  29. model-00027-of-00088.safetensors +3 -0
  30. model-00028-of-00088.safetensors +3 -0
  31. model-00029-of-00088.safetensors +3 -0
  32. model-00030-of-00088.safetensors +3 -0
  33. model-00031-of-00088.safetensors +3 -0
  34. model-00032-of-00088.safetensors +3 -0
  35. model-00033-of-00088.safetensors +3 -0
  36. model-00034-of-00088.safetensors +3 -0
  37. model-00035-of-00088.safetensors +3 -0
  38. model-00036-of-00088.safetensors +3 -0
  39. model-00037-of-00088.safetensors +3 -0
  40. model-00038-of-00088.safetensors +3 -0
  41. model-00039-of-00088.safetensors +3 -0
  42. model-00040-of-00088.safetensors +3 -0
  43. model-00041-of-00088.safetensors +3 -0
  44. model-00042-of-00088.safetensors +3 -0
  45. model-00043-of-00088.safetensors +3 -0
  46. model-00044-of-00088.safetensors +3 -0
  47. model-00045-of-00088.safetensors +3 -0
  48. model-00046-of-00088.safetensors +3 -0
  49. model-00047-of-00088.safetensors +3 -0
  50. model-00048-of-00088.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: deepseek-ai/DeepSeek-R1
3
+ tags:
4
+ - mlx
5
+ ---
6
+
7
+ # mlx-community/DeepSeek-R1-4bit
8
+
9
+ The Model [mlx-community/DeepSeek-R1-4bit](https://huggingface.co/mlx-community/DeepSeek-R1-4bit) was
10
+ converted to MLX format from [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
11
+ using mlx-lm version **0.21.0**.
12
+
13
+ ## Use with mlx
14
+
15
+ ```bash
16
+ pip install mlx-lm
17
+ ```
18
+
19
+ ```python
20
+ from mlx_lm import load, generate
21
+
22
+ model, tokenizer = load("mlx-community/DeepSeek-R1-4bit")
23
+
24
+ prompt = "hello"
25
+
26
+ if tokenizer.chat_template is not None:
27
+ messages = [{"role": "user", "content": prompt}]
28
+ prompt = tokenizer.apply_chat_template(
29
+ messages, add_generation_prompt=True
30
+ )
31
+
32
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
33
+ ```
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DeepseekV3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_deepseek.DeepseekV3Config",
9
+ "AutoModel": "modeling_deepseek.DeepseekV3Model",
10
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV3ForCausalLM"
11
+ },
12
+ "aux_loss_alpha": 0.001,
13
+ "bos_token_id": 0,
14
+ "eos_token_id": 1,
15
+ "ep_size": 1,
16
+ "first_k_dense_replace": 3,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 7168,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 18432,
21
+ "kv_lora_rank": 512,
22
+ "max_position_embeddings": 163840,
23
+ "model_type": "deepseek_v3",
24
+ "moe_intermediate_size": 2048,
25
+ "moe_layer_freq": 1,
26
+ "n_group": 8,
27
+ "n_routed_experts": 256,
28
+ "n_shared_experts": 1,
29
+ "norm_topk_prob": true,
30
+ "num_attention_heads": 128,
31
+ "num_experts_per_tok": 8,
32
+ "num_hidden_layers": 61,
33
+ "num_key_value_heads": 128,
34
+ "num_nextn_predict_layers": 1,
35
+ "pretraining_tp": 1,
36
+ "q_lora_rank": 1536,
37
+ "qk_nope_head_dim": 128,
38
+ "qk_rope_head_dim": 64,
39
+ "quantization": {
40
+ "group_size": 32,
41
+ "bits": 4
42
+ },
43
+ "quantization_config": {
44
+ "group_size": 32,
45
+ "bits": 4
46
+ },
47
+ "rms_norm_eps": 1e-06,
48
+ "rope_scaling": {
49
+ "beta_fast": 32,
50
+ "beta_slow": 1,
51
+ "factor": 40,
52
+ "mscale": 1.0,
53
+ "mscale_all_dim": 1.0,
54
+ "original_max_position_embeddings": 4096,
55
+ "type": "yarn"
56
+ },
57
+ "rope_theta": 10000,
58
+ "routed_scaling_factor": 2.5,
59
+ "scoring_func": "sigmoid",
60
+ "seq_aux": true,
61
+ "tie_word_embeddings": false,
62
+ "topk_group": 4,
63
+ "topk_method": "noaux_tc",
64
+ "torch_dtype": "bfloat16",
65
+ "transformers_version": "4.46.3",
66
+ "use_cache": true,
67
+ "v_head_dim": 128,
68
+ "vocab_size": 129280
69
+ }
model-00001-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1a9ad1ae3dc9a82c5680e4df8419cbd0c716e2b4efbb25db389da9a301d6e5
3
+ size 4139040883
model-00002-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:212d45f261464422936665176ce84df517a68683e7a09d88742d19eb1e1b678f
3
+ size 4845794023
model-00003-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c330182e4b0b999e14f88ec95a8d0e9be94baac5b808cc379713f185645b8fd
3
+ size 4697621266
model-00004-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b95f158884ea9b00b3114639d94a293f22c5829a9036dca38e3575cb639aff
3
+ size 4845794093
model-00005-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7fa0425c84328012b8e1b818e928e93f5f41c7968b40015a250be381a8db3f6
3
+ size 4845794031
model-00006-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ee6280c1bb7b50c395fd88054b0d87068f9a8d32b7eee26ad5a706a634ee9c
3
+ size 4697621262
model-00007-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97f974564c367594f0ea1bb438b1a13f75470149ab55dbd3944bf59ff51b4dc
3
+ size 4845794091
model-00008-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:712c76c4175044191f965d5d523eb96030c643eaff28554967e7033efc193799
3
+ size 4845794003
model-00009-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835ea13d926bc247ac70c15e891144b04823c0b5f0e21f0acba499a5673f5182
3
+ size 4697621266
model-00010-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfa2b036c4713302b85c2b21308e01f5c85a2e58fdac6bc7a3b13b6ccaab5885
3
+ size 4845794025
model-00011-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fee492133ad0f4a1d60bec513bcd2fd792beda239057e433f037913e83ca8e
3
+ size 4845794032
model-00012-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3fc27225c7a6b1815fd40559b01e9cf7324ae8f12d9bb48723ace95254ed912
3
+ size 4697621272
model-00013-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2868c8f1dd2ce51b9de18e638ba6c312cf7155c9bf1adac72db0d5aeddbda557
3
+ size 4845794103
model-00014-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a09c7a00c8539a654eae40e60da4ba8deaac79109659eb52decd79b2392b5f
3
+ size 4845794035
model-00015-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b594393f5ff4047248c7ab48260a061d04bb77223b0c6b95037da94a03385b04
3
+ size 4697621272
model-00016-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:282c73e9bc1da16fd2e1ec3b1bcd4ecf782d4b9d96305da063d0d08bc5d7692f
3
+ size 4845794097
model-00017-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b9b3d4b28c034bfd033267f96f4a80bfcca330d081b699fb82b627be392baed
3
+ size 4845794053
model-00018-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ada8ced123a450b641a6fdfa0bd8237724d0456de93f262835257544d670f1
3
+ size 4697621268
model-00019-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8e984344a80029ab2e99fcaccfc86266e605dcb6219b65cb0eccf41fdade2f6
3
+ size 4845794105
model-00020-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a08d3e1cef5edd2b597cd3efaec7ee569dc0de916aa1b46fcfa9a0c543e5f5d
3
+ size 4845794065
model-00021-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c3fff651eb02748125e2e2748a7db01a2524013640a2e60b2ac49902411d2b
3
+ size 4697621272
model-00022-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b06c7b553fc2542a0ea4fa3b0f9eb43b701fda69f51ce7c7b50f6bca42019c
3
+ size 4845794083
model-00023-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d24e642b37e0eb080da776a9247a460f7075493cb7d9223db4930404653d030
3
+ size 4845794047
model-00024-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b4898b9302a8e15acdbcd5c2340a24dbafea096a4e701b040117c5f3959976
3
+ size 4697621274
model-00025-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982c38a7c155583671ad8a97a925be54cf75d66fa3973f70ecfe5c93ef3870d8
3
+ size 4845794127
model-00026-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6274f9a44f06275d73991a7afb552c6eb24588d8e4c93a4d19e6a5c6494983
3
+ size 4845794049
model-00027-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1779352d6a7f313473cc9a19b8e8d7eeecee843dd08f329bd10421bf65c7a12
3
+ size 4697621268
model-00028-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d71b175b8c3b27ebbe4410475fd9642e24349ed096051c26a9f50b3d81981df
3
+ size 4845794095
model-00029-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca477f933ccef4897adb044a298d244ed25fa4ff2f362919d220b9446dac4f3
3
+ size 4845794015
model-00030-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c2183b03e2043f8453dd4cf8de4202862415aa2b89323f9c887b8cbb8c540c3
3
+ size 4697621270
model-00031-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29caa25c6808fd43f4972e42278cbed7427425cff7a5911757f7d555b8fd5b74
3
+ size 4845794129
model-00032-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9920044ccf8437b46ff827b6cb0b1d281329eff0d54c319b07d6bff53d118d
3
+ size 4845794079
model-00033-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75cbad40518282888d8378222704f410c874391c1e012c69ce8fb81321252495
3
+ size 4697621266
model-00034-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74a9e1cbc19a7bd07a980a63471296fe201b3e7487278b5147cad5ad87a5c49d
3
+ size 4845794083
model-00035-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32dec2fc27150baaaa13b33c0cca40eeed32323de3e229a25519f1ecdabd9a5b
3
+ size 4845794057
model-00036-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfaead94a1faf27c4fd8b4a6e5418e3ea672f9fdbfaa0e1fb2433299c2f09f4d
3
+ size 4697621272
model-00037-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5734bc734ee090d28edcba8be0181aa7a74779e4aff5092e48ef9807ebf07192
3
+ size 4845794129
model-00038-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45c1e799f39894142db25a082c11a33f5f484fc8c6c59e00ef9350e45e6638e
3
+ size 4845794041
model-00039-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7cbb8623216a76634f2cc3c140ff9f6f0ce1dd247c63c2d1670880a6c6b3fe7
3
+ size 4697621268
model-00040-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6dc12d88249ef32382c530557e145049b02fe26d1b714206fece5d449551ad4
3
+ size 4845794069
model-00041-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:959d96d060596d90e5e494c25429c3070940c9dc3fd76d2002db8622a3cf224a
3
+ size 4845794043
model-00042-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b63dc25a11421ba5a562521c741944819b04fae54c1242ed63f00ad0af5e19
3
+ size 4697621272
model-00043-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4eb5ad5aa3eba836a7e4367967decab272a1b478a61a50229b915d94355e5a
3
+ size 4845794127
model-00044-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:affe4c9156c6c631bcf8fefff22af829d48f9fdda9a3d3adac2ed0d9e3afde0a
3
+ size 4845794067
model-00045-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129d1794b40eed71307a591fd38946c538141d2d15175654f2a9ec6a8cf0ca30
3
+ size 4697621272
model-00046-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e76827e32577fac5f8bece6edaff3d3c61e6bd9a88642eef5a1a8896616c28b
3
+ size 4845794127
model-00047-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964ebc9e587814ae9105042028d75e42230db98325f8404a75f9ac97c9daa230
3
+ size 4845794031
model-00048-of-00088.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8de92fa42b4492d36f4ba96a8990adb6e1bf7cd7392e88a8f976b19ad91dda95
3
+ size 4697621272