Generate perplexity and kld scores
Browse files- scores/hammer2.1-7b-iq3_m.log +37 -0
- scores/hammer2.1-7b-iq3_s.log +37 -0
- scores/hammer2.1-7b-iq4_nl.log +37 -0
- scores/hammer2.1-7b-q3_k_l.log +37 -0
- scores/hammer2.1-7b-q3_k_m.log +37 -0
- scores/hammer2.1-7b-q3_k_s.log +37 -0
- scores/hammer2.1-7b-q4_k_m.log +37 -0
- scores/hammer2.1-7b-q4_k_s.log +37 -0
- scores/hammer2.1-7b-q5_k_m.log +37 -0
- scores/hammer2.1-7b-q5_k_s.log +37 -0
- scores/hammer2.1-7b-q6_k.log +37 -0
- scores/hammer2.1-7b-q8_0.log +37 -0
scores/hammer2.1-7b-iq3_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.349753 ± 0.074664
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.18%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.030701 ± 0.000925
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.031177 ± 0.000954
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.312918 ± 0.009631
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.048768 ± 0.000209
|
11 |
+
Maximum KLD: 3.758030
|
12 |
+
99.9% KLD: 0.887181
|
13 |
+
99.0% KLD: 0.364024
|
14 |
+
99.0% KLD: 0.364024
|
15 |
+
Median KLD: 0.029039
|
16 |
+
10.0% KLD: 0.001092
|
17 |
+
5.0% KLD: 0.000244
|
18 |
+
1.0% KLD: 0.000018
|
19 |
+
Minimum KLD: -0.000019
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.020 ± 0.015 %
|
23 |
+
Maximum Δp: 79.699%
|
24 |
+
99.9% Δp: 30.611%
|
25 |
+
99.0% Δp: 14.151%
|
26 |
+
95.0% Δp: 5.746%
|
27 |
+
90.0% Δp: 2.903%
|
28 |
+
75.0% Δp: 0.312%
|
29 |
+
Median Δp: -0.060%
|
30 |
+
25.0% Δp: -1.929%
|
31 |
+
10.0% Δp: -6.302%
|
32 |
+
5.0% Δp: -10.024%
|
33 |
+
1.0% Δp: -20.875%
|
34 |
+
0.1% Δp: -43.369%
|
35 |
+
Minimum Δp: -76.104%
|
36 |
+
RMS Δp : 5.764 ± 0.034 %
|
37 |
+
Same top p: 89.288 ± 0.080 %
|
scores/hammer2.1-7b-iq3_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.401451 ± 0.075250
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.14%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.035684 ± 0.000949
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.036328 ± 0.000983
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.364616 ± 0.010024
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.050746 ± 0.000215
|
11 |
+
Maximum KLD: 3.334370
|
12 |
+
99.9% KLD: 0.923916
|
13 |
+
99.0% KLD: 0.376239
|
14 |
+
99.0% KLD: 0.376239
|
15 |
+
Median KLD: 0.030081
|
16 |
+
10.0% KLD: 0.001119
|
17 |
+
5.0% KLD: 0.000250
|
18 |
+
1.0% KLD: 0.000020
|
19 |
+
Minimum KLD: -0.000007
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.975 ± 0.015 %
|
23 |
+
Maximum Δp: 72.807%
|
24 |
+
99.9% Δp: 31.246%
|
25 |
+
99.0% Δp: 14.309%
|
26 |
+
95.0% Δp: 6.030%
|
27 |
+
90.0% Δp: 3.075%
|
28 |
+
75.0% Δp: 0.343%
|
29 |
+
Median Δp: -0.055%
|
30 |
+
25.0% Δp: -1.867%
|
31 |
+
10.0% Δp: -6.234%
|
32 |
+
5.0% Δp: -10.091%
|
33 |
+
1.0% Δp: -21.519%
|
34 |
+
0.1% Δp: -44.651%
|
35 |
+
Minimum Δp: -88.497%
|
36 |
+
RMS Δp : 5.862 ± 0.035 %
|
37 |
+
Same top p: 89.036 ± 0.081 %
|
scores/hammer2.1-7b-iq4_nl.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.188365 ± 0.074008
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.71%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.014985 ± 0.000549
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.015097 ± 0.000557
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.151531 ± 0.005697
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.016340 ± 0.000073
|
11 |
+
Maximum KLD: 1.379349
|
12 |
+
99.9% KLD: 0.306994
|
13 |
+
99.0% KLD: 0.123776
|
14 |
+
99.0% KLD: 0.123776
|
15 |
+
Median KLD: 0.009530
|
16 |
+
10.0% KLD: 0.000284
|
17 |
+
5.0% KLD: 0.000056
|
18 |
+
1.0% KLD: 0.000003
|
19 |
+
Minimum KLD: -0.000110
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.221 ± 0.009 %
|
23 |
+
Maximum Δp: 58.518%
|
24 |
+
99.9% Δp: 19.108%
|
25 |
+
99.0% Δp: 9.095%
|
26 |
+
95.0% Δp: 4.085%
|
27 |
+
90.0% Δp: 2.282%
|
28 |
+
75.0% Δp: 0.400%
|
29 |
+
Median Δp: -0.002%
|
30 |
+
25.0% Δp: -0.680%
|
31 |
+
10.0% Δp: -2.932%
|
32 |
+
5.0% Δp: -5.028%
|
33 |
+
1.0% Δp: -11.387%
|
34 |
+
0.1% Δp: -25.096%
|
35 |
+
Minimum Δp: -63.543%
|
36 |
+
RMS Δp : 3.306 ± 0.023 %
|
37 |
+
Same top p: 93.581 ± 0.064 %
|
scores/hammer2.1-7b-q3_k_l.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.455858 ± 0.076831
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.30%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.040901 ± 0.000871
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.041749 ± 0.000907
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.419023 ± 0.009775
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.039060 ± 0.000178
|
11 |
+
Maximum KLD: 5.251009
|
12 |
+
99.9% KLD: 0.766137
|
13 |
+
99.0% KLD: 0.290706
|
14 |
+
99.0% KLD: 0.290706
|
15 |
+
Median KLD: 0.022797
|
16 |
+
10.0% KLD: 0.000638
|
17 |
+
5.0% KLD: 0.000122
|
18 |
+
1.0% KLD: 0.000007
|
19 |
+
Minimum KLD: -0.000132
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.404 ± 0.014 %
|
23 |
+
Maximum Δp: 84.448%
|
24 |
+
99.9% Δp: 27.062%
|
25 |
+
99.0% Δp: 13.834%
|
26 |
+
95.0% Δp: 6.500%
|
27 |
+
90.0% Δp: 3.713%
|
28 |
+
75.0% Δp: 0.682%
|
29 |
+
Median Δp: -0.003%
|
30 |
+
25.0% Δp: -1.065%
|
31 |
+
10.0% Δp: -4.833%
|
32 |
+
5.0% Δp: -8.289%
|
33 |
+
1.0% Δp: -18.737%
|
34 |
+
0.1% Δp: -40.529%
|
35 |
+
Minimum Δp: -97.396%
|
36 |
+
RMS Δp : 5.260 ± 0.034 %
|
37 |
+
Same top p: 90.124 ± 0.077 %
|
scores/hammer2.1-7b-q3_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.469704 ± 0.076871
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.20%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.042224 ± 0.000927
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.043128 ± 0.000967
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.432869 ± 0.010328
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.044002 ± 0.000201
|
11 |
+
Maximum KLD: 5.247561
|
12 |
+
99.9% KLD: 0.866438
|
13 |
+
99.0% KLD: 0.326842
|
14 |
+
99.0% KLD: 0.326842
|
15 |
+
Median KLD: 0.025367
|
16 |
+
10.0% KLD: 0.000731
|
17 |
+
5.0% KLD: 0.000142
|
18 |
+
1.0% KLD: 0.000009
|
19 |
+
Minimum KLD: -0.000234
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.465 ± 0.014 %
|
23 |
+
Maximum Δp: 82.693%
|
24 |
+
99.9% Δp: 28.711%
|
25 |
+
99.0% Δp: 14.576%
|
26 |
+
95.0% Δp: 6.726%
|
27 |
+
90.0% Δp: 3.801%
|
28 |
+
75.0% Δp: 0.673%
|
29 |
+
Median Δp: -0.005%
|
30 |
+
25.0% Δp: -1.170%
|
31 |
+
10.0% Δp: -5.104%
|
32 |
+
5.0% Δp: -8.747%
|
33 |
+
1.0% Δp: -19.949%
|
34 |
+
0.1% Δp: -42.515%
|
35 |
+
Minimum Δp: -96.762%
|
36 |
+
RMS Δp : 5.550 ± 0.036 %
|
37 |
+
Same top p: 89.641 ± 0.079 %
|
scores/hammer2.1-7b-q3_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.774104 ± 0.078933
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.73%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.070884 ± 0.001163
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.073456 ± 0.001249
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.737270 ± 0.013581
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.072166 ± 0.000301
|
11 |
+
Maximum KLD: 4.513918
|
12 |
+
99.9% KLD: 1.255733
|
13 |
+
99.0% KLD: 0.525244
|
14 |
+
99.0% KLD: 0.525244
|
15 |
+
Median KLD: 0.043659
|
16 |
+
10.0% KLD: 0.001389
|
17 |
+
5.0% KLD: 0.000295
|
18 |
+
1.0% KLD: 0.000023
|
19 |
+
Minimum KLD: -0.000033
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.248 ± 0.018 %
|
23 |
+
Maximum Δp: 86.155%
|
24 |
+
99.9% Δp: 34.118%
|
25 |
+
99.0% Δp: 16.809%
|
26 |
+
95.0% Δp: 7.276%
|
27 |
+
90.0% Δp: 3.730%
|
28 |
+
75.0% Δp: 0.431%
|
29 |
+
Median Δp: -0.052%
|
30 |
+
25.0% Δp: -2.217%
|
31 |
+
10.0% Δp: -7.749%
|
32 |
+
5.0% Δp: -12.608%
|
33 |
+
1.0% Δp: -27.291%
|
34 |
+
0.1% Δp: -55.755%
|
35 |
+
Minimum Δp: -90.520%
|
36 |
+
RMS Δp : 7.155 ± 0.041 %
|
37 |
+
Same top p: 86.887 ± 0.087 %
|
scores/hammer2.1-7b-q4_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.130249 ± 0.073552
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.79%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.009264 ± 0.000475
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.009307 ± 0.000479
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.093414 ± 0.004859
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.012012 ± 0.000059
|
11 |
+
Maximum KLD: 2.987801
|
12 |
+
99.9% KLD: 0.227534
|
13 |
+
99.0% KLD: 0.085719
|
14 |
+
99.0% KLD: 0.085719
|
15 |
+
Median KLD: 0.007136
|
16 |
+
10.0% KLD: 0.000206
|
17 |
+
5.0% KLD: 0.000040
|
18 |
+
1.0% KLD: 0.000002
|
19 |
+
Minimum KLD: -0.000146
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.144 ± 0.007 %
|
23 |
+
Maximum Δp: 88.099%
|
24 |
+
99.9% Δp: 17.014%
|
25 |
+
99.0% Δp: 8.223%
|
26 |
+
95.0% Δp: 3.729%
|
27 |
+
90.0% Δp: 2.080%
|
28 |
+
75.0% Δp: 0.351%
|
29 |
+
Median Δp: -0.002%
|
30 |
+
25.0% Δp: -0.591%
|
31 |
+
10.0% Δp: -2.522%
|
32 |
+
5.0% Δp: -4.245%
|
33 |
+
1.0% Δp: -9.152%
|
34 |
+
0.1% Δp: -21.093%
|
35 |
+
Minimum Δp: -54.638%
|
36 |
+
RMS Δp : 2.845 ± 0.021 %
|
37 |
+
Same top p: 94.427 ± 0.059 %
|
scores/hammer2.1-7b-q4_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.139939 ± 0.073658
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.75%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.010220 ± 0.000518
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.010273 ± 0.000523
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.103104 ± 0.005308
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.014417 ± 0.000070
|
11 |
+
Maximum KLD: 3.952321
|
12 |
+
99.9% KLD: 0.271614
|
13 |
+
99.0% KLD: 0.107147
|
14 |
+
99.0% KLD: 0.107147
|
15 |
+
Median KLD: 0.008493
|
16 |
+
10.0% KLD: 0.000243
|
17 |
+
5.0% KLD: 0.000047
|
18 |
+
1.0% KLD: 0.000002
|
19 |
+
Minimum KLD: -0.000098
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.146 ± 0.008 %
|
23 |
+
Maximum Δp: 90.019%
|
24 |
+
99.9% Δp: 18.437%
|
25 |
+
99.0% Δp: 9.000%
|
26 |
+
95.0% Δp: 4.047%
|
27 |
+
90.0% Δp: 2.286%
|
28 |
+
75.0% Δp: 0.407%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.621%
|
31 |
+
10.0% Δp: -2.714%
|
32 |
+
5.0% Δp: -4.622%
|
33 |
+
1.0% Δp: -10.223%
|
34 |
+
0.1% Δp: -22.645%
|
35 |
+
Minimum Δp: -68.862%
|
36 |
+
RMS Δp : 3.110 ± 0.023 %
|
37 |
+
Same top p: 94.018 ± 0.061 %
|
scores/hammer2.1-7b-q5_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.071679 ± 0.073178
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.93%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.003466 ± 0.000272
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.003472 ± 0.000273
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.034844 ± 0.002769
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.003501 ± 0.000018
|
11 |
+
Maximum KLD: 1.326304
|
12 |
+
99.9% KLD: 0.063485
|
13 |
+
99.0% KLD: 0.024422
|
14 |
+
99.0% KLD: 0.024422
|
15 |
+
Median KLD: 0.002144
|
16 |
+
10.0% KLD: 0.000053
|
17 |
+
5.0% KLD: 0.000008
|
18 |
+
1.0% KLD: -0.000001
|
19 |
+
Minimum KLD: -0.000235
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: 0.013 ± 0.004 %
|
23 |
+
Maximum Δp: 34.867%
|
24 |
+
99.9% Δp: 9.405%
|
25 |
+
99.0% Δp: 4.684%
|
26 |
+
95.0% Δp: 2.228%
|
27 |
+
90.0% Δp: 1.306%
|
28 |
+
75.0% Δp: 0.272%
|
29 |
+
Median Δp: 0.000%
|
30 |
+
25.0% Δp: -0.249%
|
31 |
+
10.0% Δp: -1.227%
|
32 |
+
5.0% Δp: -2.154%
|
33 |
+
1.0% Δp: -4.685%
|
34 |
+
0.1% Δp: -10.648%
|
35 |
+
Minimum Δp: -30.875%
|
36 |
+
RMS Δp : 1.532 ± 0.011 %
|
37 |
+
Same top p: 96.908 ± 0.045 %
|
scores/hammer2.1-7b-q5_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.092163 ± 0.073418
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.92%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.005497 ± 0.000290
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.005513 ± 0.000291
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.055329 ± 0.002985
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.004008 ± 0.000021
|
11 |
+
Maximum KLD: 1.392368
|
12 |
+
99.9% KLD: 0.076297
|
13 |
+
99.0% KLD: 0.029280
|
14 |
+
99.0% KLD: 0.029280
|
15 |
+
Median KLD: 0.002400
|
16 |
+
10.0% KLD: 0.000055
|
17 |
+
5.0% KLD: 0.000008
|
18 |
+
1.0% KLD: -0.000002
|
19 |
+
Minimum KLD: -0.000235
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: 0.003 ± 0.004 %
|
23 |
+
Maximum Δp: 44.164%
|
24 |
+
99.9% Δp: 10.258%
|
25 |
+
99.0% Δp: 4.882%
|
26 |
+
95.0% Δp: 2.317%
|
27 |
+
90.0% Δp: 1.357%
|
28 |
+
75.0% Δp: 0.286%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.264%
|
31 |
+
10.0% Δp: -1.302%
|
32 |
+
5.0% Δp: -2.305%
|
33 |
+
1.0% Δp: -5.091%
|
34 |
+
0.1% Δp: -11.711%
|
35 |
+
Minimum Δp: -28.870%
|
36 |
+
RMS Δp : 1.636 ± 0.011 %
|
37 |
+
Same top p: 96.720 ± 0.046 %
|
scores/hammer2.1-7b-q6_k.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.063662 ± 0.073025
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.97%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.002669 ± 0.000186
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.002673 ± 0.000186
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.026827 ± 0.001889
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.001360 ± 0.000005
|
11 |
+
Maximum KLD: 0.216463
|
12 |
+
99.9% KLD: 0.021616
|
13 |
+
99.0% KLD: 0.008655
|
14 |
+
99.0% KLD: 0.008655
|
15 |
+
Median KLD: 0.000893
|
16 |
+
10.0% KLD: 0.000021
|
17 |
+
5.0% KLD: 0.000003
|
18 |
+
1.0% KLD: -0.000002
|
19 |
+
Minimum KLD: -0.000123
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.011 ± 0.002 %
|
23 |
+
Maximum Δp: 26.248%
|
24 |
+
99.9% Δp: 5.746%
|
25 |
+
99.0% Δp: 2.880%
|
26 |
+
95.0% Δp: 1.388%
|
27 |
+
90.0% Δp: 0.805%
|
28 |
+
75.0% Δp: 0.157%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.176%
|
31 |
+
10.0% Δp: -0.830%
|
32 |
+
5.0% Δp: -1.418%
|
33 |
+
1.0% Δp: -2.952%
|
34 |
+
0.1% Δp: -6.178%
|
35 |
+
Minimum Δp: -16.853%
|
36 |
+
RMS Δp : 0.953 ± 0.006 %
|
37 |
+
Same top p: 98.010 ± 0.036 %
|
scores/hammer2.1-7b-q8_0.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 10.039829 ± 0.072796
|
3 |
+
Mean PPL(base) : 10.036835 ± 0.072696
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.99%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.000298 ± 0.000120
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.000298 ± 0.000120
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.002994 ± 0.001202
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.000162 ± 0.000001
|
11 |
+
Maximum KLD: 0.031653
|
12 |
+
99.9% KLD: 0.002376
|
13 |
+
99.0% KLD: 0.000973
|
14 |
+
99.0% KLD: 0.000973
|
15 |
+
Median KLD: 0.000112
|
16 |
+
10.0% KLD: 0.000003
|
17 |
+
5.0% KLD: 0.000000
|
18 |
+
1.0% KLD: -0.000004
|
19 |
+
Minimum KLD: -0.000071
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.006 ± 0.001 %
|
23 |
+
Maximum Δp: 7.425%
|
24 |
+
99.9% Δp: 1.985%
|
25 |
+
99.0% Δp: 0.955%
|
26 |
+
95.0% Δp: 0.463%
|
27 |
+
90.0% Δp: 0.270%
|
28 |
+
75.0% Δp: 0.053%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.064%
|
31 |
+
10.0% Δp: -0.293%
|
32 |
+
5.0% Δp: -0.489%
|
33 |
+
1.0% Δp: -0.973%
|
34 |
+
0.1% Δp: -1.929%
|
35 |
+
Minimum Δp: -9.811%
|
36 |
+
RMS Δp : 0.319 ± 0.002 %
|
37 |
+
Same top p: 99.303 ± 0.022 %
|