williambarberjr commited on
Commit
9378174
·
verified ·
1 Parent(s): 1442400

Upload snr_results_meta-llama-Meta-Llama-3.1-8B_unfrozenparameters_50percent.yaml with huggingface_hub

Browse files
snr_results_meta-llama-Meta-Llama-3.1-8B_unfrozenparameters_50percent.yaml ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ unfrozen_parameters:
2
+ - ^lm_head.weight$
3
+ - ^model.embed_tokens.weight$
4
+ # input_layernorm layers
5
+ - model.layers.0.input_layernorm
6
+ - model.layers.1.input_layernorm
7
+ - model.layers.2.input_layernorm
8
+ - model.layers.3.input_layernorm
9
+ - model.layers.4.input_layernorm
10
+ - model.layers.5.input_layernorm
11
+ - model.layers.6.input_layernorm
12
+ - model.layers.7.input_layernorm
13
+ - model.layers.8.input_layernorm
14
+ - model.layers.9.input_layernorm
15
+ - model.layers.10.input_layernorm
16
+ - model.layers.11.input_layernorm
17
+ - model.layers.12.input_layernorm
18
+ - model.layers.13.input_layernorm
19
+ - model.layers.14.input_layernorm
20
+ - model.layers.15.input_layernorm
21
+ # lm_head layers
22
+ # mlp.down_proj layers
23
+ - model.layers.1.mlp.down_proj
24
+ - model.layers.0.mlp.down_proj
25
+ - model.layers.30.mlp.down_proj
26
+ - model.layers.2.mlp.down_proj
27
+ - model.layers.21.mlp.down_proj
28
+ - model.layers.22.mlp.down_proj
29
+ - model.layers.29.mlp.down_proj
30
+ - model.layers.5.mlp.down_proj
31
+ - model.layers.4.mlp.down_proj
32
+ - model.layers.20.mlp.down_proj
33
+ - model.layers.23.mlp.down_proj
34
+ - model.layers.19.mlp.down_proj
35
+ - model.layers.3.mlp.down_proj
36
+ - model.layers.17.mlp.down_proj
37
+ - model.layers.6.mlp.down_proj
38
+ - model.layers.31.mlp.down_proj
39
+ # mlp.gate_proj layers
40
+ - model.layers.1.mlp.gate_proj
41
+ - model.layers.2.mlp.gate_proj
42
+ - model.layers.3.mlp.gate_proj
43
+ - model.layers.4.mlp.gate_proj
44
+ - model.layers.0.mlp.gate_proj
45
+ - model.layers.25.mlp.gate_proj
46
+ - model.layers.26.mlp.gate_proj
47
+ - model.layers.5.mlp.gate_proj
48
+ - model.layers.24.mlp.gate_proj
49
+ - model.layers.28.mlp.gate_proj
50
+ - model.layers.23.mlp.gate_proj
51
+ - model.layers.27.mlp.gate_proj
52
+ - model.layers.21.mlp.gate_proj
53
+ - model.layers.22.mlp.gate_proj
54
+ - model.layers.29.mlp.gate_proj
55
+ - model.layers.20.mlp.gate_proj
56
+ # mlp.up_proj layers
57
+ - model.layers.4.mlp.up_proj
58
+ - model.layers.3.mlp.up_proj
59
+ - model.layers.0.mlp.up_proj
60
+ - model.layers.5.mlp.up_proj
61
+ - model.layers.7.mlp.up_proj
62
+ - model.layers.6.mlp.up_proj
63
+ - model.layers.2.mlp.up_proj
64
+ - model.layers.1.mlp.up_proj
65
+ - model.layers.8.mlp.up_proj
66
+ - model.layers.12.mlp.up_proj
67
+ - model.layers.14.mlp.up_proj
68
+ - model.layers.9.mlp.up_proj
69
+ - model.layers.15.mlp.up_proj
70
+ - model.layers.17.mlp.up_proj
71
+ - model.layers.13.mlp.up_proj
72
+ - model.layers.19.mlp.up_proj
73
+ # model.embed_tokens layers
74
+ # model.norm layers
75
+ # post_attention_layernorm layers
76
+ - model.layers.0.post_attention_layernorm
77
+ - model.layers.1.post_attention_layernorm
78
+ - model.layers.2.post_attention_layernorm
79
+ - model.layers.3.post_attention_layernorm
80
+ - model.layers.4.post_attention_layernorm
81
+ - model.layers.5.post_attention_layernorm
82
+ - model.layers.6.post_attention_layernorm
83
+ - model.layers.7.post_attention_layernorm
84
+ - model.layers.8.post_attention_layernorm
85
+ - model.layers.9.post_attention_layernorm
86
+ - model.layers.10.post_attention_layernorm
87
+ - model.layers.11.post_attention_layernorm
88
+ - model.layers.12.post_attention_layernorm
89
+ - model.layers.13.post_attention_layernorm
90
+ - model.layers.14.post_attention_layernorm
91
+ - model.layers.15.post_attention_layernorm
92
+ # self_attn.k_proj layers
93
+ - model.layers.29.self_attn.k_proj
94
+ - model.layers.25.self_attn.k_proj
95
+ - model.layers.23.self_attn.k_proj
96
+ - model.layers.28.self_attn.k_proj
97
+ - model.layers.21.self_attn.k_proj
98
+ - model.layers.19.self_attn.k_proj
99
+ - model.layers.22.self_attn.k_proj
100
+ - model.layers.20.self_attn.k_proj
101
+ - model.layers.24.self_attn.k_proj
102
+ - model.layers.31.self_attn.k_proj
103
+ - model.layers.27.self_attn.k_proj
104
+ - model.layers.26.self_attn.k_proj
105
+ - model.layers.17.self_attn.k_proj
106
+ - model.layers.11.self_attn.k_proj
107
+ - model.layers.18.self_attn.k_proj
108
+ - model.layers.14.self_attn.k_proj
109
+ # self_attn.o_proj layers
110
+ - model.layers.14.self_attn.o_proj
111
+ - model.layers.7.self_attn.o_proj
112
+ - model.layers.5.self_attn.o_proj
113
+ - model.layers.11.self_attn.o_proj
114
+ - model.layers.6.self_attn.o_proj
115
+ - model.layers.24.self_attn.o_proj
116
+ - model.layers.9.self_attn.o_proj
117
+ - model.layers.13.self_attn.o_proj
118
+ - model.layers.10.self_attn.o_proj
119
+ - model.layers.12.self_attn.o_proj
120
+ - model.layers.8.self_attn.o_proj
121
+ - model.layers.25.self_attn.o_proj
122
+ - model.layers.21.self_attn.o_proj
123
+ - model.layers.23.self_attn.o_proj
124
+ - model.layers.15.self_attn.o_proj
125
+ - model.layers.16.self_attn.o_proj
126
+ # self_attn.q_proj layers
127
+ - model.layers.8.self_attn.q_proj
128
+ - model.layers.13.self_attn.q_proj
129
+ - model.layers.9.self_attn.q_proj
130
+ - model.layers.14.self_attn.q_proj
131
+ - model.layers.10.self_attn.q_proj
132
+ - model.layers.11.self_attn.q_proj
133
+ - model.layers.0.self_attn.q_proj
134
+ - model.layers.15.self_attn.q_proj
135
+ - model.layers.1.self_attn.q_proj
136
+ - model.layers.6.self_attn.q_proj
137
+ - model.layers.5.self_attn.q_proj
138
+ - model.layers.7.self_attn.q_proj
139
+ - model.layers.12.self_attn.q_proj
140
+ - model.layers.16.self_attn.q_proj
141
+ - model.layers.17.self_attn.q_proj
142
+ - model.layers.26.self_attn.q_proj
143
+ # self_attn.v_proj layers
144
+ - model.layers.26.self_attn.v_proj
145
+ - model.layers.17.self_attn.v_proj
146
+ - model.layers.3.self_attn.v_proj
147
+ - model.layers.28.self_attn.v_proj
148
+ - model.layers.29.self_attn.v_proj
149
+ - model.layers.21.self_attn.v_proj
150
+ - model.layers.15.self_attn.v_proj
151
+ - model.layers.16.self_attn.v_proj
152
+ - model.layers.20.self_attn.v_proj
153
+ - model.layers.25.self_attn.v_proj
154
+ - model.layers.6.self_attn.v_proj
155
+ - model.layers.23.self_attn.v_proj
156
+ - model.layers.4.self_attn.v_proj
157
+ - model.layers.1.self_attn.v_proj
158
+ - model.layers.22.self_attn.v_proj
159
+ - model.layers.14.self_attn.v_proj