Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
magnum-v4-123b quantized to 4-bit precision using [HQQ](https://github.com/mobiusml/hqq/).
|
2 |
+
|
3 |
+
HQQ provides a similar level of precision to AWQ at 4-bit, but with no need for calibration.
|
4 |
+
|
5 |
+
This quant was generated using 8xA40s within only 10 minutes.
|
6 |
+
|
7 |
+
```py
|
8 |
+
import torch
|
9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, HqqConfig
|
10 |
+
|
11 |
+
|
12 |
+
model_path = "anthracite-org/magnum-v4-123b"
|
13 |
+
quant_config = HqqConfig(nbits=4, group_size=128, axis=1)
|
14 |
+
|
15 |
+
model = AutoModelForCausalLM.from_pretrained(model_path,
|
16 |
+
torch_dtype=torch.float16,
|
17 |
+
cache_dir='.',
|
18 |
+
device_map="cuda:0",
|
19 |
+
quantization_config=quant_config,
|
20 |
+
low_cpu_mem_usage=True)
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
22 |
+
|
23 |
+
output_path = "magnum-v4-123b-hqq-4bit"
|
24 |
+
model.save_pretrained(output_path)
|
25 |
+
tokenizer.save_pretrained(output_path)
|
26 |
+
```
|