eaddario commited on
Commit
1f75c4c
·
1 Parent(s): 554e5f2

Generate perplexity and kld scores

Browse files
scores/Dolphin3.0-R1-Mistral-24B-IQ3_M.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.172864 ± 0.256006
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.01%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.114031 ± 0.001391
6
+ Mean PPL(Q)/PPL(base) : 1.120786 ± 0.001559
7
+ Mean PPL(Q)-PPL(base) : 2.820631 ± 0.048526
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.078326 ± 0.000305
11
+ Maximum KLD: 7.825869
12
+ 99.9% KLD: 1.127171
13
+ 99.0% KLD: 0.520127
14
+ 99.0% KLD: 0.520127
15
+ Median KLD: 0.038346
16
+ 10.0% KLD: 0.000902
17
+ 5.0% KLD: 0.000207
18
+ 1.0% KLD: 0.000001
19
+ Minimum KLD: -0.000505
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.107 ± 0.018 %
23
+ Maximum Δp: 86.241%
24
+ 99.9% Δp: 39.725%
25
+ 99.0% Δp: 22.115%
26
+ 95.0% Δp: 9.997%
27
+ 90.0% Δp: 5.175%
28
+ 75.0% Δp: 0.626%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.677%
31
+ 10.0% Δp: -5.458%
32
+ 5.0% Δp: -10.553%
33
+ 1.0% Δp: -23.853%
34
+ 0.1% Δp: -43.880%
35
+ Minimum Δp: -97.767%
36
+ RMS Δp : 6.900 ± 0.035 %
37
+ Same top p: 88.021 ± 0.083 %
38
+
39
+ llama_perf_context_print: load time = 82098.99 ms
40
+ llama_perf_context_print: prompt eval time = 1718829.57 ms / 304128 tokens ( 5.65 ms per token, 176.94 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1775127.52 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-IQ3_S.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 26.143038 ± 0.254443
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 98.95%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.112890 ± 0.001420
6
+ Mean PPL(Q)/PPL(base) : 1.119509 ± 0.001590
7
+ Mean PPL(Q)-PPL(base) : 2.790806 ± 0.048102
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.082486 ± 0.000324
11
+ Maximum KLD: 8.223216
12
+ 99.9% KLD: 1.163796
13
+ 99.0% KLD: 0.554553
14
+ 99.0% KLD: 0.554553
15
+ Median KLD: 0.040744
16
+ 10.0% KLD: 0.000979
17
+ 5.0% KLD: 0.000241
18
+ 1.0% KLD: 0.000008
19
+ Minimum KLD: -0.000550
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.290 ± 0.018 %
23
+ Maximum Δp: 83.668%
24
+ 99.9% Δp: 39.492%
25
+ 99.0% Δp: 22.349%
26
+ 95.0% Δp: 10.000%
27
+ 90.0% Δp: 4.974%
28
+ 75.0% Δp: 0.538%
29
+ Median Δp: -0.001%
30
+ 25.0% Δp: -0.805%
31
+ 10.0% Δp: -5.962%
32
+ 5.0% Δp: -11.458%
33
+ 1.0% Δp: -25.491%
34
+ 0.1% Δp: -47.606%
35
+ Minimum Δp: -94.438%
36
+ RMS Δp : 7.163 ± 0.036 %
37
+ Same top p: 87.635 ± 0.085 %
38
+
39
+ llama_perf_context_print: load time = 80439.51 ms
40
+ llama_perf_context_print: prompt eval time = 1705180.19 ms / 304128 tokens ( 5.61 ms per token, 178.36 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1852832.45 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-IQ4_NL.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 23.491557 ± 0.221754
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.65%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.005948 ± 0.000786
6
+ Mean PPL(Q)/PPL(base) : 1.005966 ± 0.000791
7
+ Mean PPL(Q)-PPL(base) : 0.139324 ± 0.018431
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.025519 ± 0.000102
11
+ Maximum KLD: 2.251977
12
+ 99.9% KLD: 0.379603
13
+ 99.0% KLD: 0.171840
14
+ 99.0% KLD: 0.171840
15
+ Median KLD: 0.012414
16
+ 10.0% KLD: 0.000339
17
+ 5.0% KLD: 0.000079
18
+ 1.0% KLD: -0.000015
19
+ Minimum KLD: -0.000684
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.210 ± 0.010 %
23
+ Maximum Δp: 56.116%
24
+ 99.9% Δp: 23.081%
25
+ 99.0% Δp: 12.242%
26
+ 95.0% Δp: 5.439%
27
+ 90.0% Δp: 2.734%
28
+ 75.0% Δp: 0.282%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.493%
31
+ 10.0% Δp: -3.448%
32
+ 5.0% Δp: -6.500%
33
+ 1.0% Δp: -14.293%
34
+ 0.1% Δp: -27.100%
35
+ Minimum Δp: -67.311%
36
+ RMS Δp : 4.044 ± 0.022 %
37
+ Same top p: 92.678 ± 0.067 %
38
+
39
+ llama_perf_context_print: load time = 103365.78 ms
40
+ llama_perf_context_print: prompt eval time = 1896464.64 ms / 304128 tokens ( 6.24 ms per token, 160.37 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1956813.06 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q3_K_L.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 24.550639 ± 0.235362
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.30%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.050045 ± 0.001135
6
+ Mean PPL(Q)/PPL(base) : 1.051319 ± 0.001194
7
+ Mean PPL(Q)-PPL(base) : 1.198406 ± 0.030666
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.053089 ± 0.000213
11
+ Maximum KLD: 4.851488
12
+ 99.9% KLD: 0.770215
13
+ 99.0% KLD: 0.362664
14
+ 99.0% KLD: 0.362664
15
+ Median KLD: 0.025806
16
+ 10.0% KLD: 0.000577
17
+ 5.0% KLD: 0.000120
18
+ 1.0% KLD: -0.000018
19
+ Minimum KLD: -0.000444
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.115 ± 0.015 %
23
+ Maximum Δp: 82.459%
24
+ 99.9% Δp: 33.908%
25
+ 99.0% Δp: 18.275%
26
+ 95.0% Δp: 8.313%
27
+ 90.0% Δp: 4.302%
28
+ 75.0% Δp: 0.536%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.557%
31
+ 10.0% Δp: -4.600%
32
+ 5.0% Δp: -8.973%
33
+ 1.0% Δp: -20.298%
34
+ 0.1% Δp: -38.592%
35
+ Minimum Δp: -79.558%
36
+ RMS Δp : 5.840 ± 0.030 %
37
+ Same top p: 89.673 ± 0.078 %
38
+
39
+ llama_perf_context_print: load time = 95100.93 ms
40
+ llama_perf_context_print: prompt eval time = 1924525.06 ms / 304128 tokens ( 6.33 ms per token, 158.03 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 2042841.27 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q3_K_M.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 24.806925 ± 0.237610
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.16%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.060430 ± 0.001239
6
+ Mean PPL(Q)/PPL(base) : 1.062293 ± 0.001316
7
+ Mean PPL(Q)-PPL(base) : 1.454692 ± 0.034094
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.063828 ± 0.000254
11
+ Maximum KLD: 4.249581
12
+ 99.9% KLD: 0.920882
13
+ 99.0% KLD: 0.436885
14
+ 99.0% KLD: 0.436885
15
+ Median KLD: 0.031026
16
+ 10.0% KLD: 0.000750
17
+ 5.0% KLD: 0.000177
18
+ 1.0% KLD: 0.000000
19
+ Minimum KLD: -0.000455
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.257 ± 0.016 %
23
+ Maximum Δp: 73.489%
24
+ 99.9% Δp: 36.078%
25
+ 99.0% Δp: 19.828%
26
+ 95.0% Δp: 8.789%
27
+ 90.0% Δp: 4.486%
28
+ 75.0% Δp: 0.496%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.687%
31
+ 10.0% Δp: -5.257%
32
+ 5.0% Δp: -10.230%
33
+ 1.0% Δp: -22.714%
34
+ 0.1% Δp: -43.074%
35
+ Minimum Δp: -90.990%
36
+ RMS Δp : 6.411 ± 0.033 %
37
+ Same top p: 88.802 ± 0.081 %
38
+
39
+ llama_perf_context_print: load time = 87933.48 ms
40
+ llama_perf_context_print: prompt eval time = 1845252.27 ms / 304128 tokens ( 6.07 ms per token, 164.82 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1979075.22 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q3_K_S.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 39.396539 ± 0.409082
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 94.50%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.522985 ± 0.003415
6
+ Mean PPL(Q)/PPL(base) : 1.687057 ± 0.005762
7
+ Mean PPL(Q)-PPL(base) : 16.044307 ± 0.213016
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.448856 ± 0.001660
11
+ Maximum KLD: 11.228906
12
+ 99.9% KLD: 5.432167
13
+ 99.0% KLD: 3.097294
14
+ 99.0% KLD: 3.097294
15
+ Median KLD: 0.213731
16
+ 10.0% KLD: 0.005922
17
+ 5.0% KLD: 0.001612
18
+ 1.0% KLD: 0.000166
19
+ Minimum KLD: -0.000425
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -2.648 ± 0.043 %
23
+ Maximum Δp: 96.523%
24
+ 99.9% Δp: 66.438%
25
+ 99.0% Δp: 40.723%
26
+ 95.0% Δp: 18.234%
27
+ 90.0% Δp: 8.706%
28
+ 75.0% Δp: 0.595%
29
+ Median Δp: -0.023%
30
+ 25.0% Δp: -2.797%
31
+ 10.0% Δp: -17.572%
32
+ 5.0% Δp: -33.515%
33
+ 1.0% Δp: -71.845%
34
+ 0.1% Δp: -94.533%
35
+ Minimum Δp: -99.575%
36
+ RMS Δp : 16.773 ± 0.071 %
37
+ Same top p: 74.363 ± 0.112 %
38
+
39
+ llama_perf_context_print: load time = 79800.46 ms
40
+ llama_perf_context_print: prompt eval time = 1798108.56 ms / 304128 tokens ( 5.91 ms per token, 169.14 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 1960563.16 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q4_K_M.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 24.940671 ± 0.241913
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.60%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.065807 ± 0.000886
6
+ Mean PPL(Q)/PPL(base) : 1.068021 ± 0.000947
7
+ Mean PPL(Q)-PPL(base) : 1.588438 ± 0.029452
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.030244 ± 0.000124
11
+ Maximum KLD: 2.276901
12
+ 99.9% KLD: 0.467041
13
+ 99.0% KLD: 0.202787
14
+ 99.0% KLD: 0.202787
15
+ Median KLD: 0.014692
16
+ 10.0% KLD: 0.000262
17
+ 5.0% KLD: 0.000030
18
+ 1.0% KLD: -0.000070
19
+ Minimum KLD: -0.000565
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: 0.252 ± 0.011 %
23
+ Maximum Δp: 56.165%
24
+ 99.9% Δp: 26.367%
25
+ 99.0% Δp: 14.890%
26
+ 95.0% Δp: 7.165%
27
+ 90.0% Δp: 3.916%
28
+ 75.0% Δp: 0.648%
29
+ Median Δp: 0.000%
30
+ 25.0% Δp: -0.249%
31
+ 10.0% Δp: -2.747%
32
+ 5.0% Δp: -5.791%
33
+ 1.0% Δp: -14.128%
34
+ 0.1% Δp: -27.579%
35
+ Minimum Δp: -73.858%
36
+ RMS Δp : 4.403 ± 0.023 %
37
+ Same top p: 92.309 ± 0.068 %
38
+
39
+ llama_perf_context_print: load time = 288598.22 ms
40
+ llama_perf_context_print: prompt eval time = 1999513.57 ms / 304128 tokens ( 6.57 ms per token, 152.10 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 2559785.48 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q4_K_S.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 23.315818 ± 0.220552
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.50%
5
+ Mean ln(PPL(Q)/PPL(base)) : -0.001561 ± 0.000949
6
+ Mean PPL(Q)/PPL(base) : 0.998441 ± 0.000948
7
+ Mean PPL(Q)-PPL(base) : -0.036415 ± 0.022157
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.037680 ± 0.000153
11
+ Maximum KLD: 5.026946
12
+ 99.9% KLD: 0.544697
13
+ 99.0% KLD: 0.251806
14
+ 99.0% KLD: 0.251806
15
+ Median KLD: 0.018222
16
+ 10.0% KLD: 0.000471
17
+ 5.0% KLD: 0.000107
18
+ 1.0% KLD: -0.000012
19
+ Minimum KLD: -0.000554
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: 0.151 ± 0.012 %
23
+ Maximum Δp: 80.430%
24
+ 99.9% Δp: 29.435%
25
+ 99.0% Δp: 16.228%
26
+ 95.0% Δp: 7.580%
27
+ 90.0% Δp: 4.031%
28
+ 75.0% Δp: 0.584%
29
+ Median Δp: 0.000%
30
+ 25.0% Δp: -0.350%
31
+ 10.0% Δp: -3.338%
32
+ 5.0% Δp: -6.778%
33
+ 1.0% Δp: -15.802%
34
+ 0.1% Δp: -30.165%
35
+ Minimum Δp: -63.405%
36
+ RMS Δp : 4.833 ± 0.025 %
37
+ Same top p: 91.410 ± 0.072 %
38
+
39
+ llama_perf_context_print: load time = 103826.14 ms
40
+ llama_perf_context_print: prompt eval time = 1941845.24 ms / 304128 tokens ( 6.38 ms per token, 156.62 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 2633698.11 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q5_K_M.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 24.304562 ± 0.233045
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.74%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.039971 ± 0.000693
6
+ Mean PPL(Q)/PPL(base) : 1.040781 ± 0.000721
7
+ Mean PPL(Q)-PPL(base) : 0.952329 ± 0.020290
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.019001 ± 0.000084
11
+ Maximum KLD: 2.667997
12
+ 99.9% KLD: 0.307100
13
+ 99.0% KLD: 0.131595
14
+ 99.0% KLD: 0.131595
15
+ Median KLD: 0.009005
16
+ 10.0% KLD: 0.000235
17
+ 5.0% KLD: 0.000046
18
+ 1.0% KLD: -0.000033
19
+ Minimum KLD: -0.000534
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.033 ± 0.009 %
23
+ Maximum Δp: 53.497%
24
+ 99.9% Δp: 21.400%
25
+ 99.0% Δp: 11.091%
26
+ 95.0% Δp: 4.986%
27
+ 90.0% Δp: 2.607%
28
+ 75.0% Δp: 0.323%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.333%
31
+ 10.0% Δp: -2.621%
32
+ 5.0% Δp: -5.142%
33
+ 1.0% Δp: -11.765%
34
+ 0.1% Δp: -23.329%
35
+ Minimum Δp: -88.396%
36
+ RMS Δp : 3.498 ± 0.021 %
37
+ Same top p: 93.797 ± 0.062 %
38
+
39
+ llama_perf_context_print: load time = 128672.98 ms
40
+ llama_perf_context_print: prompt eval time = 2206637.86 ms / 304128 tokens ( 7.26 ms per token, 137.82 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 2358244.53 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q6_K.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 24.415348 ± 0.234385
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.79%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.044519 ± 0.000634
6
+ Mean PPL(Q)/PPL(base) : 1.045525 ± 0.000663
7
+ Mean PPL(Q)-PPL(base) : 1.063116 ± 0.020031
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.015503 ± 0.000075
11
+ Maximum KLD: 3.272221
12
+ 99.9% KLD: 0.267841
13
+ 99.0% KLD: 0.107995
14
+ 99.0% KLD: 0.107995
15
+ Median KLD: 0.007291
16
+ 10.0% KLD: 0.000184
17
+ 5.0% KLD: 0.000033
18
+ 1.0% KLD: -0.000041
19
+ Minimum KLD: -0.000471
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: -0.008 ± 0.008 %
23
+ Maximum Δp: 80.786%
24
+ 99.9% Δp: 18.971%
25
+ 99.0% Δp: 10.037%
26
+ 95.0% Δp: 4.585%
27
+ 90.0% Δp: 2.431%
28
+ 75.0% Δp: 0.317%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.289%
31
+ 10.0% Δp: -2.327%
32
+ 5.0% Δp: -4.593%
33
+ 1.0% Δp: -10.622%
34
+ 0.1% Δp: -22.631%
35
+ Minimum Δp: -95.612%
36
+ RMS Δp : 3.201 ± 0.022 %
37
+ Same top p: 94.448 ± 0.059 %
38
+
39
+ llama_perf_context_print: load time = 134148.25 ms
40
+ llama_perf_context_print: prompt eval time = 2502976.05 ms / 304128 tokens ( 8.23 ms per token, 121.51 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 3009280.06 ms / 304129 tokens
scores/Dolphin3.0-R1-Mistral-24B-Q8_0.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ====== Perplexity statistics ======
2
+ Mean PPL(Q) : 23.519878 ± 0.223215
3
+ Mean PPL(base) : 23.352232 ± 0.220841
4
+ Cor(ln(PPL(Q)), ln(PPL(base))): 99.96%
5
+ Mean ln(PPL(Q)/PPL(base)) : 0.007153 ± 0.000271
6
+ Mean PPL(Q)/PPL(base) : 1.007179 ± 0.000273
7
+ Mean PPL(Q)-PPL(base) : 0.167646 ± 0.006738
8
+
9
+ ====== KL divergence statistics ======
10
+ Mean KLD: 0.002318 ± 0.000013
11
+ Maximum KLD: 0.848010
12
+ 99.9% KLD: 0.043823
13
+ 99.0% KLD: 0.015801
14
+ 99.0% KLD: 0.015801
15
+ Median KLD: 0.001166
16
+ 10.0% KLD: 0.000025
17
+ 5.0% KLD: -0.000001
18
+ 1.0% KLD: -0.000063
19
+ Minimum KLD: -0.000401
20
+
21
+ ====== Token probability statistics ======
22
+ Mean Δp: 0.006 ± 0.003 %
23
+ Maximum Δp: 26.906%
24
+ 99.9% Δp: 7.636%
25
+ 99.0% Δp: 4.056%
26
+ 95.0% Δp: 1.919%
27
+ 90.0% Δp: 0.994%
28
+ 75.0% Δp: 0.127%
29
+ Median Δp: -0.000%
30
+ 25.0% Δp: -0.119%
31
+ 10.0% Δp: -0.958%
32
+ 5.0% Δp: -1.871%
33
+ 1.0% Δp: -4.090%
34
+ 0.1% Δp: -7.817%
35
+ Minimum Δp: -20.640%
36
+ RMS Δp : 1.266 ± 0.008 %
37
+ Same top p: 97.778 ± 0.038 %
38
+
39
+ llama_perf_context_print: load time = 3238.55 ms
40
+ llama_perf_context_print: prompt eval time = 2842839.16 ms / 304128 tokens ( 9.35 ms per token, 106.98 tokens per second)
41
+ llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
42
+ llama_perf_context_print: total time = 2975924.16 ms / 304129 tokens