Generate perplexity and kld scores
Browse files- scores/DeepSeek-R1-Distill-Llama-8B-iq3_m.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-iq3_s.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-iq4_nl.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q3_k_l.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q3_k_m.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q3_k_s.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q4_k_m.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q4_k_s.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q5_k_m.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q5_k_s.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q6_k.log +37 -0
- scores/DeepSeek-R1-Distill-Llama-8B-q8_0.log +37 -0
scores/DeepSeek-R1-Distill-Llama-8B-iq3_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 15.074649 ± 0.125392
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 96.24%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.073299 ± 0.002303
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.076052 ± 0.002479
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.065433 ± 0.034120
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.231125 ± 0.001065
|
11 |
+
Maximum KLD: 15.317444
|
12 |
+
99.9% KLD: 4.158607
|
13 |
+
99.0% KLD: 1.925704
|
14 |
+
99.0% KLD: 1.925704
|
15 |
+
Median KLD: 0.108549
|
16 |
+
10.0% KLD: 0.004011
|
17 |
+
5.0% KLD: 0.000968
|
18 |
+
1.0% KLD: 0.000097
|
19 |
+
Minimum KLD: -0.000002
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -2.727 ± 0.032 %
|
23 |
+
Maximum Δp: 99.189%
|
24 |
+
99.9% Δp: 58.452%
|
25 |
+
99.0% Δp: 28.292%
|
26 |
+
95.0% Δp: 11.053%
|
27 |
+
90.0% Δp: 5.190%
|
28 |
+
75.0% Δp: 0.345%
|
29 |
+
Median Δp: -0.134%
|
30 |
+
25.0% Δp: -4.078%
|
31 |
+
10.0% Δp: -14.463%
|
32 |
+
5.0% Δp: -24.044%
|
33 |
+
1.0% Δp: -50.741%
|
34 |
+
0.1% Δp: -83.958%
|
35 |
+
Minimum Δp: -99.686%
|
36 |
+
RMS Δp : 12.503 ± 0.061 %
|
37 |
+
Same top p: 79.332 ± 0.107 %
|
scores/DeepSeek-R1-Distill-Llama-8B-iq3_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 15.334981 ± 0.127912
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 95.98%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.090421 ± 0.002383
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.094635 ± 0.002609
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.325764 ± 0.036143
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.248986 ± 0.001138
|
11 |
+
Maximum KLD: 17.916763
|
12 |
+
99.9% KLD: 4.568046
|
13 |
+
99.0% KLD: 2.008440
|
14 |
+
99.0% KLD: 2.008440
|
15 |
+
Median KLD: 0.118888
|
16 |
+
10.0% KLD: 0.004193
|
17 |
+
5.0% KLD: 0.000998
|
18 |
+
1.0% KLD: 0.000099
|
19 |
+
Minimum KLD: -0.000002
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -2.891 ± 0.033 %
|
23 |
+
Maximum Δp: 99.825%
|
24 |
+
99.9% Δp: 60.879%
|
25 |
+
99.0% Δp: 28.581%
|
26 |
+
95.0% Δp: 11.317%
|
27 |
+
90.0% Δp: 5.294%
|
28 |
+
75.0% Δp: 0.331%
|
29 |
+
Median Δp: -0.141%
|
30 |
+
25.0% Δp: -4.267%
|
31 |
+
10.0% Δp: -15.067%
|
32 |
+
5.0% Δp: -25.229%
|
33 |
+
1.0% Δp: -52.525%
|
34 |
+
0.1% Δp: -84.834%
|
35 |
+
Minimum Δp: -99.864%
|
36 |
+
RMS Δp : 12.940 ± 0.062 %
|
37 |
+
Same top p: 78.391 ± 0.109 %
|
scores/DeepSeek-R1-Distill-Llama-8B-iq4_nl.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.150903 ± 0.119732
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.72%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.010063 ± 0.001356
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.010114 ± 0.001369
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.141687 ± 0.019129
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.081957 ± 0.000567
|
11 |
+
Maximum KLD: 9.373822
|
12 |
+
99.9% KLD: 2.712739
|
13 |
+
99.0% KLD: 0.913387
|
14 |
+
99.0% KLD: 0.913387
|
15 |
+
Median KLD: 0.029345
|
16 |
+
10.0% KLD: 0.000732
|
17 |
+
5.0% KLD: 0.000163
|
18 |
+
1.0% KLD: 0.000012
|
19 |
+
Minimum KLD: -0.000014
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.342 ± 0.019 %
|
23 |
+
Maximum Δp: 95.532%
|
24 |
+
99.9% Δp: 53.636%
|
25 |
+
99.0% Δp: 21.914%
|
26 |
+
95.0% Δp: 8.298%
|
27 |
+
90.0% Δp: 4.309%
|
28 |
+
75.0% Δp: 0.568%
|
29 |
+
Median Δp: -0.003%
|
30 |
+
25.0% Δp: -1.122%
|
31 |
+
10.0% Δp: -5.580%
|
32 |
+
5.0% Δp: -10.127%
|
33 |
+
1.0% Δp: -23.871%
|
34 |
+
0.1% Δp: -51.466%
|
35 |
+
Minimum Δp: -97.545%
|
36 |
+
RMS Δp : 7.192 ± 0.050 %
|
37 |
+
Same top p: 87.782 ± 0.086 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_l.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.871865 ± 0.126955
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.50%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.059756 ± 0.001903
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.061577 ± 0.002020
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.862649 ± 0.028722
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.154127 ± 0.000850
|
11 |
+
Maximum KLD: 11.128032
|
12 |
+
99.9% KLD: 3.615189
|
13 |
+
99.0% KLD: 1.488761
|
14 |
+
99.0% KLD: 1.488761
|
15 |
+
Median KLD: 0.061692
|
16 |
+
10.0% KLD: 0.001610
|
17 |
+
5.0% KLD: 0.000344
|
18 |
+
1.0% KLD: 0.000026
|
19 |
+
Minimum KLD: -0.000067
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.010 ± 0.027 %
|
23 |
+
Maximum Δp: 97.798%
|
24 |
+
99.9% Δp: 57.985%
|
25 |
+
99.0% Δp: 26.935%
|
26 |
+
95.0% Δp: 11.279%
|
27 |
+
90.0% Δp: 5.918%
|
28 |
+
75.0% Δp: 0.729%
|
29 |
+
Median Δp: -0.010%
|
30 |
+
25.0% Δp: -1.838%
|
31 |
+
10.0% Δp: -8.856%
|
32 |
+
5.0% Δp: -16.093%
|
33 |
+
1.0% Δp: -39.077%
|
34 |
+
0.1% Δp: -76.112%
|
35 |
+
Minimum Δp: -99.152%
|
36 |
+
RMS Δp : 10.103 ± 0.058 %
|
37 |
+
Same top p: 83.151 ± 0.099 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 15.028835 ± 0.128791
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.12%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.070255 ± 0.002045
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.072782 ± 0.002194
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.019619 ± 0.031378
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.176316 ± 0.000924
|
11 |
+
Maximum KLD: 12.648300
|
12 |
+
99.9% KLD: 3.905615
|
13 |
+
99.0% KLD: 1.629902
|
14 |
+
99.0% KLD: 1.629902
|
15 |
+
Median KLD: 0.073991
|
16 |
+
10.0% KLD: 0.001869
|
17 |
+
5.0% KLD: 0.000395
|
18 |
+
1.0% KLD: 0.000032
|
19 |
+
Minimum KLD: -0.000021
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.132 ± 0.028 %
|
23 |
+
Maximum Δp: 98.577%
|
24 |
+
99.9% Δp: 60.593%
|
25 |
+
99.0% Δp: 28.113%
|
26 |
+
95.0% Δp: 12.093%
|
27 |
+
90.0% Δp: 6.356%
|
28 |
+
75.0% Δp: 0.792%
|
29 |
+
Median Δp: -0.010%
|
30 |
+
25.0% Δp: -2.011%
|
31 |
+
10.0% Δp: -9.787%
|
32 |
+
5.0% Δp: -17.659%
|
33 |
+
1.0% Δp: -41.512%
|
34 |
+
0.1% Δp: -79.331%
|
35 |
+
Minimum Δp: -99.621%
|
36 |
+
RMS Δp : 10.735 ± 0.059 %
|
37 |
+
Same top p: 82.001 ± 0.101 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 15.513099 ± 0.129202
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 94.85%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.101969 ± 0.002698
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.107350 ± 0.002987
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.503883 ± 0.041148
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.320092 ± 0.001344
|
11 |
+
Maximum KLD: 17.145338
|
12 |
+
99.9% KLD: 5.248430
|
13 |
+
99.0% KLD: 2.399956
|
14 |
+
99.0% KLD: 2.399956
|
15 |
+
Median KLD: 0.171489
|
16 |
+
10.0% KLD: 0.005972
|
17 |
+
5.0% KLD: 0.001335
|
18 |
+
1.0% KLD: 0.000109
|
19 |
+
Minimum KLD: -0.000000
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -3.246 ± 0.037 %
|
23 |
+
Maximum Δp: 99.963%
|
24 |
+
99.9% Δp: 71.232%
|
25 |
+
99.0% Δp: 34.565%
|
26 |
+
95.0% Δp: 13.472%
|
27 |
+
90.0% Δp: 6.248%
|
28 |
+
75.0% Δp: 0.411%
|
29 |
+
Median Δp: -0.166%
|
30 |
+
25.0% Δp: -5.212%
|
31 |
+
10.0% Δp: -17.956%
|
32 |
+
5.0% Δp: -28.544%
|
33 |
+
1.0% Δp: -57.576%
|
34 |
+
0.1% Δp: -87.811%
|
35 |
+
Minimum Δp: -99.472%
|
36 |
+
RMS Δp : 14.572 ± 0.064 %
|
37 |
+
Same top p: 75.385 ± 0.114 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q4_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.049490 ± 0.119296
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.90%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.002871 ± 0.001257
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.002875 ± 0.001261
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.040274 ± 0.017652
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.071652 ± 0.000563
|
11 |
+
Maximum KLD: 9.861359
|
12 |
+
99.9% KLD: 2.837626
|
13 |
+
99.0% KLD: 0.888377
|
14 |
+
99.0% KLD: 0.888377
|
15 |
+
Median KLD: 0.024927
|
16 |
+
10.0% KLD: 0.000580
|
17 |
+
5.0% KLD: 0.000121
|
18 |
+
1.0% KLD: 0.000009
|
19 |
+
Minimum KLD: -0.000063
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.076 ± 0.017 %
|
23 |
+
Maximum Δp: 97.871%
|
24 |
+
99.9% Δp: 57.205%
|
25 |
+
99.0% Δp: 21.178%
|
26 |
+
95.0% Δp: 8.128%
|
27 |
+
90.0% Δp: 4.301%
|
28 |
+
75.0% Δp: 0.634%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.896%
|
31 |
+
10.0% Δp: -4.761%
|
32 |
+
5.0% Δp: -8.767%
|
33 |
+
1.0% Δp: -20.512%
|
34 |
+
0.1% Δp: -41.932%
|
35 |
+
Minimum Δp: -99.017%
|
36 |
+
RMS Δp : 6.634 ± 0.049 %
|
37 |
+
Same top p: 89.014 ± 0.082 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q4_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.156039 ± 0.120378
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.86%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.010426 ± 0.001279
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.010480 ± 0.001292
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.146822 ± 0.018099
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.073625 ± 0.000505
|
11 |
+
Maximum KLD: 9.293770
|
12 |
+
99.9% KLD: 2.486929
|
13 |
+
99.0% KLD: 0.821050
|
14 |
+
99.0% KLD: 0.821050
|
15 |
+
Median KLD: 0.028050
|
16 |
+
10.0% KLD: 0.000652
|
17 |
+
5.0% KLD: 0.000138
|
18 |
+
1.0% KLD: 0.000010
|
19 |
+
Minimum KLD: -0.000033
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.182 ± 0.018 %
|
23 |
+
Maximum Δp: 97.527%
|
24 |
+
99.9% Δp: 53.244%
|
25 |
+
99.0% Δp: 20.616%
|
26 |
+
95.0% Δp: 8.379%
|
27 |
+
90.0% Δp: 4.475%
|
28 |
+
75.0% Δp: 0.663%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.955%
|
31 |
+
10.0% Δp: -5.172%
|
32 |
+
5.0% Δp: -9.361%
|
33 |
+
1.0% Δp: -22.373%
|
34 |
+
0.1% Δp: -47.265%
|
35 |
+
Minimum Δp: -98.736%
|
36 |
+
RMS Δp : 6.819 ± 0.047 %
|
37 |
+
Same top p: 88.499 ± 0.084 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q5_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.094598 ± 0.119327
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.25%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.006076 ± 0.001033
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.006095 ± 0.001039
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.085381 ± 0.014539
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.045327 ± 0.000475
|
11 |
+
Maximum KLD: 7.581351
|
12 |
+
99.9% KLD: 2.318281
|
13 |
+
99.0% KLD: 0.782267
|
14 |
+
99.0% KLD: 0.782267
|
15 |
+
Median KLD: 0.008818
|
16 |
+
10.0% KLD: 0.000216
|
17 |
+
5.0% KLD: 0.000044
|
18 |
+
1.0% KLD: 0.000003
|
19 |
+
Minimum KLD: -0.000050
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.184 ± 0.014 %
|
23 |
+
Maximum Δp: 98.658%
|
24 |
+
99.9% Δp: 37.143%
|
25 |
+
99.0% Δp: 13.901%
|
26 |
+
95.0% Δp: 5.155%
|
27 |
+
90.0% Δp: 2.665%
|
28 |
+
75.0% Δp: 0.413%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.512%
|
31 |
+
10.0% Δp: -2.979%
|
32 |
+
5.0% Δp: -5.673%
|
33 |
+
1.0% Δp: -17.165%
|
34 |
+
0.1% Δp: -55.297%
|
35 |
+
Minimum Δp: -98.261%
|
36 |
+
RMS Δp : 5.344 ± 0.055 %
|
37 |
+
Same top p: 92.340 ± 0.070 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q5_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.060444 ± 0.119091
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.49%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.003650 ± 0.000853
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.003657 ± 0.000856
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.051228 ± 0.011989
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.030597 ± 0.000278
|
11 |
+
Maximum KLD: 6.643202
|
12 |
+
99.9% KLD: 1.283375
|
13 |
+
99.0% KLD: 0.440988
|
14 |
+
99.0% KLD: 0.440988
|
15 |
+
Median KLD: 0.008143
|
16 |
+
10.0% KLD: 0.000194
|
17 |
+
5.0% KLD: 0.000041
|
18 |
+
1.0% KLD: 0.000003
|
19 |
+
Minimum KLD: -0.000057
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.114 ± 0.012 %
|
23 |
+
Maximum Δp: 87.529%
|
24 |
+
99.9% Δp: 32.065%
|
25 |
+
99.0% Δp: 12.407%
|
26 |
+
95.0% Δp: 4.823%
|
27 |
+
90.0% Δp: 2.536%
|
28 |
+
75.0% Δp: 0.383%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.498%
|
31 |
+
10.0% Δp: -2.851%
|
32 |
+
5.0% Δp: -5.251%
|
33 |
+
1.0% Δp: -13.971%
|
34 |
+
0.1% Δp: -37.086%
|
35 |
+
Minimum Δp: -94.993%
|
36 |
+
RMS Δp : 4.385 ± 0.043 %
|
37 |
+
Same top p: 93.015 ± 0.067 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q6_k.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 13.846273 ± 0.117603
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.39%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : -0.011699 ± 0.000939
|
6 |
+
Mean PPL(Q)/PPL(base) : 0.988369 ± 0.000928
|
7 |
+
Mean PPL(Q)-PPL(base) : -0.162943 ± 0.013097
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.044284 ± 0.000722
|
11 |
+
Maximum KLD: 12.169687
|
12 |
+
99.9% KLD: 3.787726
|
13 |
+
99.0% KLD: 1.212246
|
14 |
+
99.0% KLD: 1.212246
|
15 |
+
Median KLD: 0.004281
|
16 |
+
10.0% KLD: 0.000100
|
17 |
+
5.0% KLD: 0.000020
|
18 |
+
1.0% KLD: 0.000001
|
19 |
+
Minimum KLD: -0.000077
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: 0.326 ± 0.013 %
|
23 |
+
Maximum Δp: 98.618%
|
24 |
+
99.9% Δp: 65.197%
|
25 |
+
99.0% Δp: 15.137%
|
26 |
+
95.0% Δp: 4.119%
|
27 |
+
90.0% Δp: 2.167%
|
28 |
+
75.0% Δp: 0.376%
|
29 |
+
Median Δp: 0.000%
|
30 |
+
25.0% Δp: -0.274%
|
31 |
+
10.0% Δp: -1.794%
|
32 |
+
5.0% Δp: -3.369%
|
33 |
+
1.0% Δp: -8.767%
|
34 |
+
0.1% Δp: -26.074%
|
35 |
+
Minimum Δp: -85.045%
|
36 |
+
RMS Δp : 4.831 ± 0.068 %
|
37 |
+
Same top p: 94.289 ± 0.061 %
|
scores/DeepSeek-R1-Distill-Llama-8B-q8_0.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 14.034924 ± 0.119030
|
3 |
+
Mean PPL(base) : 14.009216 ± 0.118474
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.93%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.001833 ± 0.000321
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.001835 ± 0.000322
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.025708 ± 0.004525
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.001945 ± 0.000033
|
11 |
+
Maximum KLD: 2.361588
|
12 |
+
99.9% KLD: 0.088093
|
13 |
+
99.0% KLD: 0.026191
|
14 |
+
99.0% KLD: 0.026191
|
15 |
+
Median KLD: 0.000515
|
16 |
+
10.0% KLD: 0.000015
|
17 |
+
5.0% KLD: 0.000003
|
18 |
+
1.0% KLD: -0.000000
|
19 |
+
Minimum KLD: -0.000085
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.066 ± 0.003 %
|
23 |
+
Maximum Δp: 35.566%
|
24 |
+
99.9% Δp: 8.162%
|
25 |
+
99.0% Δp: 3.022%
|
26 |
+
95.0% Δp: 1.092%
|
27 |
+
90.0% Δp: 0.545%
|
28 |
+
75.0% Δp: 0.066%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.166%
|
31 |
+
10.0% Δp: -0.787%
|
32 |
+
5.0% Δp: -1.385%
|
33 |
+
1.0% Δp: -3.441%
|
34 |
+
0.1% Δp: -8.874%
|
35 |
+
Minimum Δp: -78.643%
|
36 |
+
RMS Δp : 1.131 ± 0.027 %
|
37 |
+
Same top p: 98.203 ± 0.035 %
|