eaddario commited on
Commit
f0c2d6d
·
unverified ·
1 Parent(s): 87d3868

Generate perplexity and kld scores

Browse files
scores/DeepSeek-R1-Distill-Llama-8B-iq3_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 15.074649 ± 0.125392
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 96.24%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.073299 ± 0.002303
6
+ Mean PPL(Q)/PPL(base) : 1.076052 ± 0.002479
7
+ Mean PPL(Q)-PPL(base) : 1.065433 ± 0.034120
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.231125 ± 0.001065
11
+ Maximum KLD: 15.317444
12
+ 99.9% KLD: 4.158607
13
+ 99.0% KLD: 1.925704
14
+ 99.0% KLD: 1.925704
15
+ Median KLD: 0.108549
16
+ 10.0% KLD: 0.004011
17
+ 5.0% KLD: 0.000968
18
+ 1.0% KLD: 0.000097
19
+ Minimum KLD: -0.000002
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -2.727 ± 0.032 %
23
+ Maximum Δp: 99.189%
24
+ 99.9% Δp: 58.452%
25
+ 99.0% Δp: 28.292%
26
+ 95.0% Δp: 11.053%
27
+ 90.0% Δp: 5.190%
28
+ 75.0% Δp: 0.345%
29
+ Median Δp: -0.134%
30
+ 25.0% Δp: -4.078%
31
+ 10.0% Δp: -14.463%
32
+ 5.0% Δp: -24.044%
33
+ 1.0% Δp: -50.741%
34
+ 0.1% Δp: -83.958%
35
+ Minimum Δp: -99.686%
36
+ RMS Δp : 12.503 ± 0.061 %
37
+ Same top p: 79.332 ± 0.107 %
scores/DeepSeek-R1-Distill-Llama-8B-iq3_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 15.334981 ± 0.127912
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 95.98%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.090421 ± 0.002383
6
+ Mean PPL(Q)/PPL(base) : 1.094635 ± 0.002609
7
+ Mean PPL(Q)-PPL(base) : 1.325764 ± 0.036143
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.248986 ± 0.001138
11
+ Maximum KLD: 17.916763
12
+ 99.9% KLD: 4.568046
13
+ 99.0% KLD: 2.008440
14
+ 99.0% KLD: 2.008440
15
+ Median KLD: 0.118888
16
+ 10.0% KLD: 0.004193
17
+ 5.0% KLD: 0.000998
18
+ 1.0% KLD: 0.000099
19
+ Minimum KLD: -0.000002
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -2.891 ± 0.033 %
23
+ Maximum Δp: 99.825%
24
+ 99.9% Δp: 60.879%
25
+ 99.0% Δp: 28.581%
26
+ 95.0% Δp: 11.317%
27
+ 90.0% Δp: 5.294%
28
+ 75.0% Δp: 0.331%
29
+ Median Δp: -0.141%
30
+ 25.0% Δp: -4.267%
31
+ 10.0% Δp: -15.067%
32
+ 5.0% Δp: -25.229%
33
+ 1.0% Δp: -52.525%
34
+ 0.1% Δp: -84.834%
35
+ Minimum Δp: -99.864%
36
+ RMS Δp : 12.940 ± 0.062 %
37
+ Same top p: 78.391 ± 0.109 %
scores/DeepSeek-R1-Distill-Llama-8B-iq4_nl.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.150903 ± 0.119732
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.72%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.010063 ± 0.001356
6
+ Mean PPL(Q)/PPL(base) : 1.010114 ± 0.001369
7
+ Mean PPL(Q)-PPL(base) : 0.141687 ± 0.019129
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.081957 ± 0.000567
11
+ Maximum KLD: 9.373822
12
+ 99.9% KLD: 2.712739
13
+ 99.0% KLD: 0.913387
14
+ 99.0% KLD: 0.913387
15
+ Median KLD: 0.029345
16
+ 10.0% KLD: 0.000732
17
+ 5.0% KLD: 0.000163
18
+ 1.0% KLD: 0.000012
19
+ Minimum KLD: -0.000014
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.342 ± 0.019 %
23
+ Maximum Δp: 95.532%
24
+ 99.9% Δp: 53.636%
25
+ 99.0% Δp: 21.914%
26
+ 95.0% Δp: 8.298%
27
+ 90.0% Δp: 4.309%
28
+ 75.0% Δp: 0.568%
29
+ Median Δp: -0.003%
30
+ 25.0% Δp: -1.122%
31
+ 10.0% Δp: -5.580%
32
+ 5.0% Δp: -10.127%
33
+ 1.0% Δp: -23.871%
34
+ 0.1% Δp: -51.466%
35
+ Minimum Δp: -97.545%
36
+ RMS Δp : 7.192 ± 0.050 %
37
+ Same top p: 87.782 ± 0.086 %
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_l.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.871865 ± 0.126955
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 97.50%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.059756 ± 0.001903
6
+ Mean PPL(Q)/PPL(base) : 1.061577 ± 0.002020
7
+ Mean PPL(Q)-PPL(base) : 0.862649 ± 0.028722
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.154127 ± 0.000850
11
+ Maximum KLD: 11.128032
12
+ 99.9% KLD: 3.615189
13
+ 99.0% KLD: 1.488761
14
+ 99.0% KLD: 1.488761
15
+ Median KLD: 0.061692
16
+ 10.0% KLD: 0.001610
17
+ 5.0% KLD: 0.000344
18
+ 1.0% KLD: 0.000026
19
+ Minimum KLD: -0.000067
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -1.010 ± 0.027 %
23
+ Maximum Δp: 97.798%
24
+ 99.9% Δp: 57.985%
25
+ 99.0% Δp: 26.935%
26
+ 95.0% Δp: 11.279%
27
+ 90.0% Δp: 5.918%
28
+ 75.0% Δp: 0.729%
29
+ Median Δp: -0.010%
30
+ 25.0% Δp: -1.838%
31
+ 10.0% Δp: -8.856%
32
+ 5.0% Δp: -16.093%
33
+ 1.0% Δp: -39.077%
34
+ 0.1% Δp: -76.112%
35
+ Minimum Δp: -99.152%
36
+ RMS Δp : 10.103 ± 0.058 %
37
+ Same top p: 83.151 ± 0.099 %
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 15.028835 ± 0.128791
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 97.12%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.070255 ± 0.002045
6
+ Mean PPL(Q)/PPL(base) : 1.072782 ± 0.002194
7
+ Mean PPL(Q)-PPL(base) : 1.019619 ± 0.031378
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.176316 ± 0.000924
11
+ Maximum KLD: 12.648300
12
+ 99.9% KLD: 3.905615
13
+ 99.0% KLD: 1.629902
14
+ 99.0% KLD: 1.629902
15
+ Median KLD: 0.073991
16
+ 10.0% KLD: 0.001869
17
+ 5.0% KLD: 0.000395
18
+ 1.0% KLD: 0.000032
19
+ Minimum KLD: -0.000021
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -1.132 ± 0.028 %
23
+ Maximum Δp: 98.577%
24
+ 99.9% Δp: 60.593%
25
+ 99.0% Δp: 28.113%
26
+ 95.0% Δp: 12.093%
27
+ 90.0% Δp: 6.356%
28
+ 75.0% Δp: 0.792%
29
+ Median Δp: -0.010%
30
+ 25.0% Δp: -2.011%
31
+ 10.0% Δp: -9.787%
32
+ 5.0% Δp: -17.659%
33
+ 1.0% Δp: -41.512%
34
+ 0.1% Δp: -79.331%
35
+ Minimum Δp: -99.621%
36
+ RMS Δp : 10.735 ± 0.059 %
37
+ Same top p: 82.001 ± 0.101 %
scores/DeepSeek-R1-Distill-Llama-8B-q3_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 15.513099 ± 0.129202
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 94.85%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.101969 ± 0.002698
6
+ Mean PPL(Q)/PPL(base) : 1.107350 ± 0.002987
7
+ Mean PPL(Q)-PPL(base) : 1.503883 ± 0.041148
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.320092 ± 0.001344
11
+ Maximum KLD: 17.145338
12
+ 99.9% KLD: 5.248430
13
+ 99.0% KLD: 2.399956
14
+ 99.0% KLD: 2.399956
15
+ Median KLD: 0.171489
16
+ 10.0% KLD: 0.005972
17
+ 5.0% KLD: 0.001335
18
+ 1.0% KLD: 0.000109
19
+ Minimum KLD: -0.000000
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -3.246 ± 0.037 %
23
+ Maximum Δp: 99.963%
24
+ 99.9% Δp: 71.232%
25
+ 99.0% Δp: 34.565%
26
+ 95.0% Δp: 13.472%
27
+ 90.0% Δp: 6.248%
28
+ 75.0% Δp: 0.411%
29
+ Median Δp: -0.166%
30
+ 25.0% Δp: -5.212%
31
+ 10.0% Δp: -17.956%
32
+ 5.0% Δp: -28.544%
33
+ 1.0% Δp: -57.576%
34
+ 0.1% Δp: -87.811%
35
+ Minimum Δp: -99.472%
36
+ RMS Δp : 14.572 ± 0.064 %
37
+ Same top p: 75.385 ± 0.114 %
scores/DeepSeek-R1-Distill-Llama-8B-q4_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.049490 ± 0.119296
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.90%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.002871 ± 0.001257
6
+ Mean PPL(Q)/PPL(base) : 1.002875 ± 0.001261
7
+ Mean PPL(Q)-PPL(base) : 0.040274 ± 0.017652
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.071652 ± 0.000563
11
+ Maximum KLD: 9.861359
12
+ 99.9% KLD: 2.837626
13
+ 99.0% KLD: 0.888377
14
+ 99.0% KLD: 0.888377
15
+ Median KLD: 0.024927
16
+ 10.0% KLD: 0.000580
17
+ 5.0% KLD: 0.000121
18
+ 1.0% KLD: 0.000009
19
+ Minimum KLD: -0.000063
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.076 ± 0.017 %
23
+ Maximum Δp: 97.871%
24
+ 99.9% Δp: 57.205%
25
+ 99.0% Δp: 21.178%
26
+ 95.0% Δp: 8.128%
27
+ 90.0% Δp: 4.301%
28
+ 75.0% Δp: 0.634%
29
+ Median Δp: -0.001%
30
+ 25.0% Δp: -0.896%
31
+ 10.0% Δp: -4.761%
32
+ 5.0% Δp: -8.767%
33
+ 1.0% Δp: -20.512%
34
+ 0.1% Δp: -41.932%
35
+ Minimum Δp: -99.017%
36
+ RMS Δp : 6.634 ± 0.049 %
37
+ Same top p: 89.014 ± 0.082 %
scores/DeepSeek-R1-Distill-Llama-8B-q4_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.156039 ± 0.120378
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.86%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.010426 ± 0.001279
6
+ Mean PPL(Q)/PPL(base) : 1.010480 ± 0.001292
7
+ Mean PPL(Q)-PPL(base) : 0.146822 ± 0.018099
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.073625 ± 0.000505
11
+ Maximum KLD: 9.293770
12
+ 99.9% KLD: 2.486929
13
+ 99.0% KLD: 0.821050
14
+ 99.0% KLD: 0.821050
15
+ Median KLD: 0.028050
16
+ 10.0% KLD: 0.000652
17
+ 5.0% KLD: 0.000138
18
+ 1.0% KLD: 0.000010
19
+ Minimum KLD: -0.000033
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.182 ± 0.018 %
23
+ Maximum Δp: 97.527%
24
+ 99.9% Δp: 53.244%
25
+ 99.0% Δp: 20.616%
26
+ 95.0% Δp: 8.379%
27
+ 90.0% Δp: 4.475%
28
+ 75.0% Δp: 0.663%
29
+ Median Δp: -0.001%
30
+ 25.0% Δp: -0.955%
31
+ 10.0% Δp: -5.172%
32
+ 5.0% Δp: -9.361%
33
+ 1.0% Δp: -22.373%
34
+ 0.1% Δp: -47.265%
35
+ Minimum Δp: -98.736%
36
+ RMS Δp : 6.819 ± 0.047 %
37
+ Same top p: 88.499 ± 0.084 %
scores/DeepSeek-R1-Distill-Llama-8B-q5_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.094598 ± 0.119327
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.25%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.006076 ± 0.001033
6
+ Mean PPL(Q)/PPL(base) : 1.006095 ± 0.001039
7
+ Mean PPL(Q)-PPL(base) : 0.085381 ± 0.014539
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.045327 ± 0.000475
11
+ Maximum KLD: 7.581351
12
+ 99.9% KLD: 2.318281
13
+ 99.0% KLD: 0.782267
14
+ 99.0% KLD: 0.782267
15
+ Median KLD: 0.008818
16
+ 10.0% KLD: 0.000216
17
+ 5.0% KLD: 0.000044
18
+ 1.0% KLD: 0.000003
19
+ Minimum KLD: -0.000050
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.184 ± 0.014 %
23
+ Maximum Δp: 98.658%
24
+ 99.9% Δp: 37.143%
25
+ 99.0% Δp: 13.901%
26
+ 95.0% Δp: 5.155%
27
+ 90.0% Δp: 2.665%
28
+ 75.0% Δp: 0.413%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.512%
31
+ 10.0% Δp: -2.979%
32
+ 5.0% Δp: -5.673%
33
+ 1.0% Δp: -17.165%
34
+ 0.1% Δp: -55.297%
35
+ Minimum Δp: -98.261%
36
+ RMS Δp : 5.344 ± 0.055 %
37
+ Same top p: 92.340 ± 0.070 %
scores/DeepSeek-R1-Distill-Llama-8B-q5_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.060444 ± 0.119091
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.49%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.003650 ± 0.000853
6
+ Mean PPL(Q)/PPL(base) : 1.003657 ± 0.000856
7
+ Mean PPL(Q)-PPL(base) : 0.051228 ± 0.011989
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.030597 ± 0.000278
11
+ Maximum KLD: 6.643202
12
+ 99.9% KLD: 1.283375
13
+ 99.0% KLD: 0.440988
14
+ 99.0% KLD: 0.440988
15
+ Median KLD: 0.008143
16
+ 10.0% KLD: 0.000194
17
+ 5.0% KLD: 0.000041
18
+ 1.0% KLD: 0.000003
19
+ Minimum KLD: -0.000057
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.114 ± 0.012 %
23
+ Maximum Δp: 87.529%
24
+ 99.9% Δp: 32.065%
25
+ 99.0% Δp: 12.407%
26
+ 95.0% Δp: 4.823%
27
+ 90.0% Δp: 2.536%
28
+ 75.0% Δp: 0.383%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.498%
31
+ 10.0% Δp: -2.851%
32
+ 5.0% Δp: -5.251%
33
+ 1.0% Δp: -13.971%
34
+ 0.1% Δp: -37.086%
35
+ Minimum Δp: -94.993%
36
+ RMS Δp : 4.385 ± 0.043 %
37
+ Same top p: 93.015 ± 0.067 %
scores/DeepSeek-R1-Distill-Llama-8B-q6_k.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 13.846273 ± 0.117603
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.39%
5
+ Mean ln(PPL(Q)/PPL(base)) : -0.011699 ± 0.000939
6
+ Mean PPL(Q)/PPL(base) : 0.988369 ± 0.000928
7
+ Mean PPL(Q)-PPL(base) : -0.162943 ± 0.013097
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.044284 ± 0.000722
11
+ Maximum KLD: 12.169687
12
+ 99.9% KLD: 3.787726
13
+ 99.0% KLD: 1.212246
14
+ 99.0% KLD: 1.212246
15
+ Median KLD: 0.004281
16
+ 10.0% KLD: 0.000100
17
+ 5.0% KLD: 0.000020
18
+ 1.0% KLD: 0.000001
19
+ Minimum KLD: -0.000077
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: 0.326 ± 0.013 %
23
+ Maximum Δp: 98.618%
24
+ 99.9% Δp: 65.197%
25
+ 99.0% Δp: 15.137%
26
+ 95.0% Δp: 4.119%
27
+ 90.0% Δp: 2.167%
28
+ 75.0% Δp: 0.376%
29
+ Median Δp: 0.000%
30
+ 25.0% Δp: -0.274%
31
+ 10.0% Δp: -1.794%
32
+ 5.0% Δp: -3.369%
33
+ 1.0% Δp: -8.767%
34
+ 0.1% Δp: -26.074%
35
+ Minimum Δp: -85.045%
36
+ RMS Δp : 4.831 ± 0.068 %
37
+ Same top p: 94.289 ± 0.061 %
scores/DeepSeek-R1-Distill-Llama-8B-q8_0.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 14.034924 ± 0.119030
3
+ Mean PPL(base) : 14.009216 ± 0.118474
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.93%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.001833 ± 0.000321
6
+ Mean PPL(Q)/PPL(base) : 1.001835 ± 0.000322
7
+ Mean PPL(Q)-PPL(base) : 0.025708 ± 0.004525
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.001945 ± 0.000033
11
+ Maximum KLD: 2.361588
12
+ 99.9% KLD: 0.088093
13
+ 99.0% KLD: 0.026191
14
+ 99.0% KLD: 0.026191
15
+ Median KLD: 0.000515
16
+ 10.0% KLD: 0.000015
17
+ 5.0% KLD: 0.000003
18
+ 1.0% KLD: -0.000000
19
+ Minimum KLD: -0.000085
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.066 ± 0.003 %
23
+ Maximum Δp: 35.566%
24
+ 99.9% Δp: 8.162%
25
+ 99.0% Δp: 3.022%
26
+ 95.0% Δp: 1.092%
27
+ 90.0% Δp: 0.545%
28
+ 75.0% Δp: 0.066%
29
+ Median Δp: -0.001%
30
+ 25.0% Δp: -0.166%
31
+ 10.0% Δp: -0.787%
32
+ 5.0% Δp: -1.385%
33
+ 1.0% Δp: -3.441%
34
+ 0.1% Δp: -8.874%
35
+ Minimum Δp: -78.643%
36
+ RMS Δp : 1.131 ± 0.027 %
37
+ Same top p: 98.203 ± 0.035 %