Update README.md
Browse files
README.md
CHANGED
@@ -1,28 +1,92 @@
|
|
1 |
---
|
2 |
-
base_model:
|
|
|
|
|
|
|
|
|
3 |
library_name: transformers
|
4 |
tags:
|
5 |
- mergekit
|
6 |
- merge
|
7 |
|
8 |
---
|
9 |
-
#
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
## Merge Details
|
14 |
-
###
|
15 |
|
16 |
-
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
* Step1
|
22 |
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
|
27 |
```yaml
|
28 |
models:
|
@@ -34,5 +98,4 @@ parameters:
|
|
34 |
select_topk:
|
35 |
- value: [0.3, 0.35, 0.4, 0.35, 0.2]
|
36 |
dtype: bfloat16
|
37 |
-
|
38 |
-
```
|
|
|
1 |
---
|
2 |
+
base_model:
|
3 |
+
- allura-org/Qwen2.5-32b-RP-Ink
|
4 |
+
- deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
5 |
+
- Aryanne/QwentileSwap
|
6 |
+
- Daemontatox/Cogito-Ultima
|
7 |
library_name: transformers
|
8 |
tags:
|
9 |
- mergekit
|
10 |
- merge
|
11 |
|
12 |
---
|
13 |
+
# Qwetiapin
|
14 |
|
15 |
+
> There's no 'I' in 'brain damage'
|
16 |
+
|
17 |
+
![]()
|
18 |
+
|
19 |
+
### Overview
|
20 |
+
|
21 |
+
I'll write something here later
|
22 |
+
|
23 |
+
### Quants
|
24 |
+
|
25 |
+
-
|
26 |
|
27 |
## Merge Details
|
28 |
+
### Merging Steps
|
29 |
|
30 |
+
### Step1
|
31 |
|
32 |
+
```yaml
|
33 |
+
dtype: bfloat16
|
34 |
+
tokenizer_source: base
|
35 |
+
merge_method: della_linear
|
36 |
+
parameters:
|
37 |
+
density: 0.5
|
38 |
+
epsilon: 0.4
|
39 |
+
lambda: 1.1
|
40 |
+
base_model: allura-org/Qwen2.5-32b-RP-Ink
|
41 |
+
models:
|
42 |
+
- model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
|
43 |
+
parameters:
|
44 |
+
weight:
|
45 |
+
- filter: v_proj
|
46 |
+
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
|
47 |
+
- filter: o_proj
|
48 |
+
value: [1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1]
|
49 |
+
- filter: up_proj
|
50 |
+
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
51 |
+
- filter: gate_proj
|
52 |
+
value: [0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0]
|
53 |
+
- filter: down_proj
|
54 |
+
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
55 |
+
- value: 0
|
56 |
+
- model: allura-org/Qwen2.5-32b-RP-Ink
|
57 |
+
parameters:
|
58 |
+
weight:
|
59 |
+
- filter: v_proj
|
60 |
+
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
|
61 |
+
- filter: o_proj
|
62 |
+
value: [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0]
|
63 |
+
- filter: up_proj
|
64 |
+
value: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
|
65 |
+
- filter: gate_proj
|
66 |
+
value: [1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1]
|
67 |
+
- filter: down_proj
|
68 |
+
value: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
69 |
+
- value: 1
|
70 |
+
```
|
71 |
|
72 |
+
### Step2
|
|
|
73 |
|
74 |
+
```yaml
|
75 |
+
models:
|
76 |
+
- model: Aryanne/QwentileSwap
|
77 |
+
parameters:
|
78 |
+
weight: [1.0, 0.9, 0.8, 0.9, 1.0]
|
79 |
+
- model: Daemontatox/Cogito-Ultima
|
80 |
+
parameters:
|
81 |
+
weight: [0, 0.1, 0.2, 0.1, 0]
|
82 |
+
merge_method: nuslerp
|
83 |
+
parameters:
|
84 |
+
nuslerp_row_wise: true
|
85 |
+
dtype: bfloat16
|
86 |
+
tokenizer_source: base
|
87 |
+
```
|
88 |
|
89 |
+
### Step3
|
90 |
|
91 |
```yaml
|
92 |
models:
|
|
|
98 |
select_topk:
|
99 |
- value: [0.3, 0.35, 0.4, 0.35, 0.2]
|
100 |
dtype: bfloat16
|
101 |
+
```
|
|