eaddario commited on
Commit
75442e7
·
unverified ·
1 Parent(s): 10e6062

Generate perplexity and kld scores

Browse files
scores/deepseek-r1-distill-qwen-7b-iq3_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.530303 ± 0.256822
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.58%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.062158 ± 0.001634
6
+ Mean PPL(Q)/PPL(base) : 1.064131 ± 0.001738
7
+ Mean PPL(Q)-PPL(base) : 1.598873 ± 0.044813
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.110374 ± 0.000408
11
+ Maximum KLD: 4.856987
12
+ 99.9% KLD: 1.554827
13
+ 99.0% KLD: 0.706525
14
+ 99.0% KLD: 0.706525
15
+ Median KLD: 0.063658
16
+ 10.0% KLD: 0.001721
17
+ 5.0% KLD: 0.000315
18
+ 1.0% KLD: 0.000020
19
+ Minimum KLD: -0.000091
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -1.459 ± 0.021 %
23
+ Maximum Δp: 90.908%
24
+ 99.9% Δp: 41.637%
25
+ 99.0% Δp: 20.507%
26
+ 95.0% Δp: 8.036%
27
+ 90.0% Δp: 3.551%
28
+ 75.0% Δp: 0.185%
29
+ Median Δp: -0.025%
30
+ 25.0% Δp: -2.071%
31
+ 10.0% Δp: -9.115%
32
+ 5.0% Δp: -15.397%
33
+ 1.0% Δp: -30.740%
34
+ 0.1% Δp: -54.239%
35
+ Minimum Δp: -95.795%
36
+ RMS Δp : 8.130 ± 0.041 %
37
+ Same top p: 84.107 ± 0.095 %
scores/deepseek-r1-distill-qwen-7b-iq3_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.638550 ± 0.259075
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.51%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.066230 ± 0.001677
6
+ Mean PPL(Q)/PPL(base) : 1.068473 ± 0.001792
7
+ Mean PPL(Q)-PPL(base) : 1.707120 ± 0.046752
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.115060 ± 0.000427
11
+ Maximum KLD: 6.862354
12
+ 99.9% KLD: 1.668972
13
+ 99.0% KLD: 0.747263
14
+ 99.0% KLD: 0.747263
15
+ Median KLD: 0.065809
16
+ 10.0% KLD: 0.001661
17
+ 5.0% KLD: 0.000301
18
+ 1.0% KLD: 0.000018
19
+ Minimum KLD: -0.000103
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -1.254 ± 0.021 %
23
+ Maximum Δp: 92.550%
24
+ 99.9% Δp: 43.219%
25
+ 99.0% Δp: 21.697%
26
+ 95.0% Δp: 8.707%
27
+ 90.0% Δp: 4.022%
28
+ 75.0% Δp: 0.260%
29
+ Median Δp: -0.015%
30
+ 25.0% Δp: -1.839%
31
+ 10.0% Δp: -8.696%
32
+ 5.0% Δp: -15.122%
33
+ 1.0% Δp: -31.087%
34
+ 0.1% Δp: -57.206%
35
+ Minimum Δp: -96.125%
36
+ RMS Δp : 8.252 ± 0.042 %
37
+ Same top p: 83.845 ± 0.095 %
scores/deepseek-r1-distill-qwen-7b-iq4_nl.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.645935 ± 0.251542
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.48%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.028256 ± 0.001007
6
+ Mean PPL(Q)/PPL(base) : 1.028659 ± 0.001035
7
+ Mean PPL(Q)-PPL(base) : 0.714505 ± 0.027258
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.035420 ± 0.000140
11
+ Maximum KLD: 3.406450
12
+ 99.9% KLD: 0.527580
13
+ 99.0% KLD: 0.238919
14
+ 99.0% KLD: 0.238919
15
+ Median KLD: 0.019661
16
+ 10.0% KLD: 0.000409
17
+ 5.0% KLD: 0.000068
18
+ 1.0% KLD: 0.000002
19
+ Minimum KLD: -0.000216
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.143 ± 0.012 %
23
+ Maximum Δp: 79.304%
24
+ 99.9% Δp: 28.190%
25
+ 99.0% Δp: 14.472%
26
+ 95.0% Δp: 6.173%
27
+ 90.0% Δp: 3.204%
28
+ 75.0% Δp: 0.378%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.552%
31
+ 10.0% Δp: -3.690%
32
+ 5.0% Δp: -6.890%
33
+ 1.0% Δp: -15.483%
34
+ 0.1% Δp: -31.224%
35
+ Minimum Δp: -74.269%
36
+ RMS Δp : 4.584 ± 0.027 %
37
+ Same top p: 90.668 ± 0.075 %
scores/deepseek-r1-distill-qwen-7b-q3_k_l.log ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.027373 ± 0.255226
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.91%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.043020 ± 0.001445
6
+ Mean PPL(Q)/PPL(base) : 1.043958 ± 0.001509
7
+ Mean PPL(Q)-PPL(base) : 1.095943 ± 0.039245
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.080142 ± 0.000319
11
+ Maximum KLD: 6.251925
12
+ 99.9% KLD: 1.256165
13
+ 99.0% KLD: 0.545236
14
+ 99.0% KLD: 0.545236
15
+ Median KLD: 0.043817
16
+ 10.0% KLD: 0.000973
17
+ 5.0% KLD: 0.000158
18
+ 1.0% KLD: 0.000006
19
+ Minimum KLD: -0.000183
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.308 ± 0.018 %
23
+ Maximum Δp: 90.671%
24
+ 99.9% Δp: 40.078%
25
+ 99.0% Δp: 20.806%
26
+ 95.0% Δp: 9.213%
27
+ 90.0% Δp: 4.792%
28
+ 75.0% Δp: 0.558%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.862%
31
+ 10.0% Δp: -5.664%
32
+ 5.0% Δp: -10.659%
33
+ 1.0% Δp: -25.039%
34
+ 0.1% Δp: -47.819%
35
+ Minimum Δp: -89.317%
36
+ RMS Δp : 6.946 ± 0.038 %
37
+ Same top p: 86.343 ± 0.089 %
38
+
39
+ llama_perf_context_print: load time = 1821.07 ms
40
+ llama_perf_context_print: prompt eval time = 1019684.89 ms / 299008 tokens ( 3.41 ms per token, 293.24 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1045149.32 ms / 299009 tokens
43
+ ggml_metal_free: deallocating
scores/deepseek-r1-distill-qwen-7b-q3_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.889366 ± 0.253265
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.77%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.037703 ± 0.001531
6
+ Mean PPL(Q)/PPL(base) : 1.038423 ± 0.001590
7
+ Mean PPL(Q)-PPL(base) : 0.957936 ± 0.040635
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.091519 ± 0.000361
11
+ Maximum KLD: 8.082105
12
+ 99.9% KLD: 1.367977
13
+ 99.0% KLD: 0.621550
14
+ 99.0% KLD: 0.621550
15
+ Median KLD: 0.049987
16
+ 10.0% KLD: 0.001159
17
+ 5.0% KLD: 0.000199
18
+ 1.0% KLD: 0.000009
19
+ Minimum KLD: -0.000175
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.368 ± 0.019 %
23
+ Maximum Δp: 86.561%
24
+ 99.9% Δp: 43.423%
25
+ 99.0% Δp: 22.085%
26
+ 95.0% Δp: 9.741%
27
+ 90.0% Δp: 5.056%
28
+ 75.0% Δp: 0.582%
29
+ Median Δp: -0.001%
30
+ 25.0% Δp: -0.958%
31
+ 10.0% Δp: -6.146%
32
+ 5.0% Δp: -11.492%
33
+ 1.0% Δp: -26.685%
34
+ 0.1% Δp: -50.676%
35
+ Minimum Δp: -89.036%
36
+ RMS Δp : 7.422 ± 0.040 %
37
+ Same top p: 85.436 ± 0.091 %
scores/deepseek-r1-distill-qwen-7b-q3_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.552814 ± 0.258782
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 97.86%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.063006 ± 0.002011
6
+ Mean PPL(Q)/PPL(base) : 1.065034 ± 0.002142
7
+ Mean PPL(Q)-PPL(base) : 1.621384 ± 0.054614
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.164859 ± 0.000610
11
+ Maximum KLD: 8.583980
12
+ 99.9% KLD: 2.315967
13
+ 99.0% KLD: 1.085407
14
+ 99.0% KLD: 1.085407
15
+ Median KLD: 0.093793
16
+ 10.0% KLD: 0.002759
17
+ 5.0% KLD: 0.000509
18
+ 1.0% KLD: 0.000027
19
+ Minimum KLD: -0.000053
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -1.105 ± 0.025 %
23
+ Maximum Δp: 95.542%
24
+ 99.9% Δp: 53.440%
25
+ 99.0% Δp: 27.342%
26
+ 95.0% Δp: 11.606%
27
+ 90.0% Δp: 5.754%
28
+ 75.0% Δp: 0.494%
29
+ Median Δp: -0.008%
30
+ 25.0% Δp: -1.857%
31
+ 10.0% Δp: -9.691%
32
+ 5.0% Δp: -17.127%
33
+ 1.0% Δp: -37.275%
34
+ 0.1% Δp: -65.081%
35
+ Minimum Δp: -94.606%
36
+ RMS Δp : 9.854 ± 0.049 %
37
+ Same top p: 80.956 ± 0.102 %
scores/deepseek-r1-distill-qwen-7b-q4_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.409863 ± 0.248331
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.58%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.019008 ± 0.000899
6
+ Mean PPL(Q)/PPL(base) : 1.019190 ± 0.000916
7
+ Mean PPL(Q)-PPL(base) : 0.478433 ± 0.023588
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.027115 ± 0.000104
11
+ Maximum KLD: 2.016436
12
+ 99.9% KLD: 0.391155
13
+ 99.0% KLD: 0.179995
14
+ 99.0% KLD: 0.179995
15
+ Median KLD: 0.015400
16
+ 10.0% KLD: 0.000310
17
+ 5.0% KLD: 0.000046
18
+ 1.0% KLD: 0.000000
19
+ Minimum KLD: -0.000275
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.073 ± 0.010 %
23
+ Maximum Δp: 66.267%
24
+ 99.9% Δp: 25.309%
25
+ 99.0% Δp: 12.706%
26
+ 95.0% Δp: 5.590%
27
+ 90.0% Δp: 2.959%
28
+ 75.0% Δp: 0.362%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.466%
31
+ 10.0% Δp: -3.166%
32
+ 5.0% Δp: -5.937%
33
+ 1.0% Δp: -13.474%
34
+ 0.1% Δp: -26.235%
35
+ Minimum Δp: -66.500%
36
+ RMS Δp : 4.030 ± 0.024 %
37
+ Same top p: 91.745 ± 0.071 %
scores/deepseek-r1-distill-qwen-7b-q4_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.379497 ± 0.247973
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.52%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.017812 ± 0.000955
6
+ Mean PPL(Q)/PPL(base) : 1.017972 ± 0.000972
7
+ Mean PPL(Q)-PPL(base) : 0.448067 ± 0.024845
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.032033 ± 0.000122
11
+ Maximum KLD: 1.726200
12
+ 99.9% KLD: 0.461810
13
+ 99.0% KLD: 0.211016
14
+ 99.0% KLD: 0.211016
15
+ Median KLD: 0.017953
16
+ 10.0% KLD: 0.000386
17
+ 5.0% KLD: 0.000063
18
+ 1.0% KLD: 0.000001
19
+ Minimum KLD: -0.000295
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.088 ± 0.011 %
23
+ Maximum Δp: 67.370%
24
+ 99.9% Δp: 27.270%
25
+ 99.0% Δp: 13.595%
26
+ 95.0% Δp: 5.977%
27
+ 90.0% Δp: 3.182%
28
+ 75.0% Δp: 0.383%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.511%
31
+ 10.0% Δp: -3.408%
32
+ 5.0% Δp: -6.399%
33
+ 1.0% Δp: -14.558%
34
+ 0.1% Δp: -29.089%
35
+ Minimum Δp: -68.450%
36
+ RMS Δp : 4.330 ± 0.025 %
37
+ Same top p: 91.049 ± 0.074 %
scores/deepseek-r1-distill-qwen-7b-q5_k_m.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.331908 ± 0.247609
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.81%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.015935 ± 0.000613
6
+ Mean PPL(Q)/PPL(base) : 1.016063 ± 0.000623
7
+ Mean PPL(Q)-PPL(base) : 0.400477 ± 0.016493
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.008227 ± 0.000037
11
+ Maximum KLD: 3.104945
12
+ 99.9% KLD: 0.117209
13
+ 99.0% KLD: 0.052437
14
+ 99.0% KLD: 0.052437
15
+ Median KLD: 0.004830
16
+ 10.0% KLD: 0.000092
17
+ 5.0% KLD: 0.000012
18
+ 1.0% KLD: -0.000002
19
+ Minimum KLD: -0.000180
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.032 ± 0.006 %
23
+ Maximum Δp: 38.164%
24
+ 99.9% Δp: 13.916%
25
+ 99.0% Δp: 6.928%
26
+ 95.0% Δp: 3.133%
27
+ 90.0% Δp: 1.652%
28
+ 75.0% Δp: 0.209%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.248%
31
+ 10.0% Δp: -1.741%
32
+ 5.0% Δp: -3.275%
33
+ 1.0% Δp: -7.358%
34
+ 0.1% Δp: -14.696%
35
+ Minimum Δp: -60.848%
36
+ RMS Δp : 2.211 ± 0.014 %
37
+ Same top p: 95.381 ± 0.054 %
scores/deepseek-r1-distill-qwen-7b-q5_k_s.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.403544 ± 0.248578
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.79%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.018759 ± 0.000634
6
+ Mean PPL(Q)/PPL(base) : 1.018936 ± 0.000646
7
+ Mean PPL(Q)-PPL(base) : 0.472114 ± 0.017348
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.009320 ± 0.000039
11
+ Maximum KLD: 2.831377
12
+ 99.9% KLD: 0.127923
13
+ 99.0% KLD: 0.059484
14
+ 99.0% KLD: 0.059484
15
+ Median KLD: 0.005393
16
+ 10.0% KLD: 0.000103
17
+ 5.0% KLD: 0.000013
18
+ 1.0% KLD: -0.000002
19
+ Minimum KLD: -0.000203
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.016 ± 0.006 %
23
+ Maximum Δp: 42.542%
24
+ 99.9% Δp: 15.234%
25
+ 99.0% Δp: 7.440%
26
+ 95.0% Δp: 3.334%
27
+ 90.0% Δp: 1.787%
28
+ 75.0% Δp: 0.232%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.256%
31
+ 10.0% Δp: -1.792%
32
+ 5.0% Δp: -3.371%
33
+ 1.0% Δp: -7.776%
34
+ 0.1% Δp: -15.189%
35
+ Minimum Δp: -60.684%
36
+ RMS Δp : 2.343 ± 0.015 %
37
+ Same top p: 95.099 ± 0.056 %
scores/deepseek-r1-distill-qwen-7b-q6_k.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.139045 ± 0.245198
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.87%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.008293 ± 0.000501
6
+ Mean PPL(Q)/PPL(base) : 1.008327 ± 0.000505
7
+ Mean PPL(Q)-PPL(base) : 0.207614 ± 0.013014
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.003335 ± 0.000014
11
+ Maximum KLD: 1.228119
12
+ 99.9% KLD: 0.039909
13
+ 99.0% KLD: 0.019418
14
+ 99.0% KLD: 0.019418
15
+ Median KLD: 0.002067
16
+ 10.0% KLD: 0.000037
17
+ 5.0% KLD: 0.000005
18
+ 1.0% KLD: -0.000004
19
+ Minimum KLD: -0.000159
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.007 ± 0.004 %
23
+ Maximum Δp: 22.155%
24
+ 99.9% Δp: 8.711%
25
+ 99.0% Δp: 4.504%
26
+ 95.0% Δp: 2.060%
27
+ 90.0% Δp: 1.096%
28
+ 75.0% Δp: 0.141%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.153%
31
+ 10.0% Δp: -1.126%
32
+ 5.0% Δp: -2.115%
33
+ 1.0% Δp: -4.485%
34
+ 0.1% Δp: -8.494%
35
+ Minimum Δp: -55.308%
36
+ RMS Δp : 1.402 ± 0.011 %
37
+ Same top p: 96.985 ± 0.044 %
scores/deepseek-r1-distill-qwen-7b-q8_0.log ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 25.118649 ± 0.245079
3
+ Mean PPL(base) : 24.931431 ± 0.241228
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.91%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.007481 ± 0.000431
6
+ Mean PPL(Q)/PPL(base) : 1.007509 ± 0.000434
7
+ Mean PPL(Q)-PPL(base) : 0.187218 ± 0.011259
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.000361 ± 0.000002
11
+ Maximum KLD: 0.215854
12
+ 99.9% KLD: 0.004745
13
+ 99.0% KLD: 0.002203
14
+ 99.0% KLD: 0.002203
15
+ Median KLD: 0.000212
16
+ 10.0% KLD: 0.000002
17
+ 5.0% KLD: -0.000000
18
+ 1.0% KLD: -0.000014
19
+ Minimum KLD: -0.000127
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: 0.003 ± 0.001 %
23
+ Maximum Δp: 24.559%
24
+ 99.9% Δp: 2.974%
25
+ 99.0% Δp: 1.523%
26
+ 95.0% Δp: 0.686%
27
+ 90.0% Δp: 0.364%
28
+ 75.0% Δp: 0.050%
29
+ Median Δp: 0.000%
30
+ 25.0% Δp: -0.047%
31
+ 10.0% Δp: -0.356%
32
+ 5.0% Δp: -0.670%
33
+ 1.0% Δp: -1.484%
34
+ 0.1% Δp: -2.907%
35
+ Minimum Δp: -7.835%
36
+ RMS Δp : 0.468 ± 0.005 %
37
+ Same top p: 98.983 ± 0.026 %