mobicham commited on
Commit
2edfda3
·
verified ·
1 Parent(s): 5ba05de

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -8,7 +8,7 @@ This is a version of the <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1
8
 
9
  ## Performance
10
 
11
- | Models | <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B">DeepSeek-R1-Distill-Qwen-1.5B</a> | <a href="https://huggingface.co/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1">DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1</a> |
12
  |:-------------------:|:--------:|:----------------:|
13
  | ARC (25-shot) | 40.96 | <b>41.3</b> |
14
  | HellaSwag (10-shot)| 44 | <b>45.22</b> |
@@ -18,7 +18,7 @@ This is a version of the <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1
18
  | GSM8K (5-shot) | 69.9 | <b>73.24</b> |
19
  | Average | 49.13 | <b>50.86</b> |
20
 
21
- | Models | <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B">DeepSeek-R1-Distill-Qwen-1.5B</a> | <a href="https://huggingface.co/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1">DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1</a> |
22
  |:-------------------:|:--------:|:----------------:|
23
  | GPQA (0-shot) | 26.96 | <b>27.8</b> |
24
  | MMLU PRO (5-shot) | 16.74 | <b>19.44</b> |
@@ -32,7 +32,7 @@ import torch
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
  compute_dtype = torch.bfloat16
34
  device = 'cuda'
35
- model_id = "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1"
36
 
37
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=compute_dtype, attn_implementation="sdpa", device_map=device)
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
8
 
9
  ## Performance
10
 
11
+ | Models | <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B">DeepSeek-R1-Distill-Qwen-1.5B</a> | <a href="https://huggingface.co/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.0">DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1</a> |
12
  |:-------------------:|:--------:|:----------------:|
13
  | ARC (25-shot) | 40.96 | <b>41.3</b> |
14
  | HellaSwag (10-shot)| 44 | <b>45.22</b> |
 
18
  | GSM8K (5-shot) | 69.9 | <b>73.24</b> |
19
  | Average | 49.13 | <b>50.86</b> |
20
 
21
+ | Models | <a href="https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B">DeepSeek-R1-Distill-Qwen-1.5B</a> | <a href="https://huggingface.co/mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.0">DeepSeek-R1-ReDistill-Qwen-1.5B-v1.1</a> |
22
  |:-------------------:|:--------:|:----------------:|
23
  | GPQA (0-shot) | 26.96 | <b>27.8</b> |
24
  | MMLU PRO (5-shot) | 16.74 | <b>19.44</b> |
 
32
  from transformers import AutoModelForCausalLM, AutoTokenizer
33
  compute_dtype = torch.bfloat16
34
  device = 'cuda'
35
+ model_id = "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-1.5B-v1.0"
36
 
37
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=compute_dtype, attn_implementation="sdpa", device_map=device)
38
  tokenizer = AutoTokenizer.from_pretrained(model_id)