Generate perplexity and kld scores
Browse files- scores/deepseek-r1-distill-qwen-7b-iq3_m.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-iq3_s.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-iq4_nl.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q3_k_l.log +43 -0
- scores/deepseek-r1-distill-qwen-7b-q3_k_m.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q3_k_s.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q4_k_m.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q4_k_s.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q5_k_m.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q5_k_s.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q6_k.log +37 -0
- scores/deepseek-r1-distill-qwen-7b-q8_0.log +37 -0
scores/deepseek-r1-distill-qwen-7b-iq3_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 26.530303 ± 0.256822
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.58%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.062158 ± 0.001634
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.064131 ± 0.001738
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.598873 ± 0.044813
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.110374 ± 0.000408
|
11 |
+
Maximum KLD: 4.856987
|
12 |
+
99.9% KLD: 1.554827
|
13 |
+
99.0% KLD: 0.706525
|
14 |
+
99.0% KLD: 0.706525
|
15 |
+
Median KLD: 0.063658
|
16 |
+
10.0% KLD: 0.001721
|
17 |
+
5.0% KLD: 0.000315
|
18 |
+
1.0% KLD: 0.000020
|
19 |
+
Minimum KLD: -0.000091
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.459 ± 0.021 %
|
23 |
+
Maximum Δp: 90.908%
|
24 |
+
99.9% Δp: 41.637%
|
25 |
+
99.0% Δp: 20.507%
|
26 |
+
95.0% Δp: 8.036%
|
27 |
+
90.0% Δp: 3.551%
|
28 |
+
75.0% Δp: 0.185%
|
29 |
+
Median Δp: -0.025%
|
30 |
+
25.0% Δp: -2.071%
|
31 |
+
10.0% Δp: -9.115%
|
32 |
+
5.0% Δp: -15.397%
|
33 |
+
1.0% Δp: -30.740%
|
34 |
+
0.1% Δp: -54.239%
|
35 |
+
Minimum Δp: -95.795%
|
36 |
+
RMS Δp : 8.130 ± 0.041 %
|
37 |
+
Same top p: 84.107 ± 0.095 %
|
scores/deepseek-r1-distill-qwen-7b-iq3_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 26.638550 ± 0.259075
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.51%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.066230 ± 0.001677
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.068473 ± 0.001792
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.707120 ± 0.046752
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.115060 ± 0.000427
|
11 |
+
Maximum KLD: 6.862354
|
12 |
+
99.9% KLD: 1.668972
|
13 |
+
99.0% KLD: 0.747263
|
14 |
+
99.0% KLD: 0.747263
|
15 |
+
Median KLD: 0.065809
|
16 |
+
10.0% KLD: 0.001661
|
17 |
+
5.0% KLD: 0.000301
|
18 |
+
1.0% KLD: 0.000018
|
19 |
+
Minimum KLD: -0.000103
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.254 ± 0.021 %
|
23 |
+
Maximum Δp: 92.550%
|
24 |
+
99.9% Δp: 43.219%
|
25 |
+
99.0% Δp: 21.697%
|
26 |
+
95.0% Δp: 8.707%
|
27 |
+
90.0% Δp: 4.022%
|
28 |
+
75.0% Δp: 0.260%
|
29 |
+
Median Δp: -0.015%
|
30 |
+
25.0% Δp: -1.839%
|
31 |
+
10.0% Δp: -8.696%
|
32 |
+
5.0% Δp: -15.122%
|
33 |
+
1.0% Δp: -31.087%
|
34 |
+
0.1% Δp: -57.206%
|
35 |
+
Minimum Δp: -96.125%
|
36 |
+
RMS Δp : 8.252 ± 0.042 %
|
37 |
+
Same top p: 83.845 ± 0.095 %
|
scores/deepseek-r1-distill-qwen-7b-iq4_nl.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.645935 ± 0.251542
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.48%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.028256 ± 0.001007
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.028659 ± 0.001035
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.714505 ± 0.027258
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.035420 ± 0.000140
|
11 |
+
Maximum KLD: 3.406450
|
12 |
+
99.9% KLD: 0.527580
|
13 |
+
99.0% KLD: 0.238919
|
14 |
+
99.0% KLD: 0.238919
|
15 |
+
Median KLD: 0.019661
|
16 |
+
10.0% KLD: 0.000409
|
17 |
+
5.0% KLD: 0.000068
|
18 |
+
1.0% KLD: 0.000002
|
19 |
+
Minimum KLD: -0.000216
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.143 ± 0.012 %
|
23 |
+
Maximum Δp: 79.304%
|
24 |
+
99.9% Δp: 28.190%
|
25 |
+
99.0% Δp: 14.472%
|
26 |
+
95.0% Δp: 6.173%
|
27 |
+
90.0% Δp: 3.204%
|
28 |
+
75.0% Δp: 0.378%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.552%
|
31 |
+
10.0% Δp: -3.690%
|
32 |
+
5.0% Δp: -6.890%
|
33 |
+
1.0% Δp: -15.483%
|
34 |
+
0.1% Δp: -31.224%
|
35 |
+
Minimum Δp: -74.269%
|
36 |
+
RMS Δp : 4.584 ± 0.027 %
|
37 |
+
Same top p: 90.668 ± 0.075 %
|
scores/deepseek-r1-distill-qwen-7b-q3_k_l.log
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 26.027373 ± 0.255226
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.91%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.043020 ± 0.001445
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.043958 ± 0.001509
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.095943 ± 0.039245
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.080142 ± 0.000319
|
11 |
+
Maximum KLD: 6.251925
|
12 |
+
99.9% KLD: 1.256165
|
13 |
+
99.0% KLD: 0.545236
|
14 |
+
99.0% KLD: 0.545236
|
15 |
+
Median KLD: 0.043817
|
16 |
+
10.0% KLD: 0.000973
|
17 |
+
5.0% KLD: 0.000158
|
18 |
+
1.0% KLD: 0.000006
|
19 |
+
Minimum KLD: -0.000183
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.308 ± 0.018 %
|
23 |
+
Maximum Δp: 90.671%
|
24 |
+
99.9% Δp: 40.078%
|
25 |
+
99.0% Δp: 20.806%
|
26 |
+
95.0% Δp: 9.213%
|
27 |
+
90.0% Δp: 4.792%
|
28 |
+
75.0% Δp: 0.558%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.862%
|
31 |
+
10.0% Δp: -5.664%
|
32 |
+
5.0% Δp: -10.659%
|
33 |
+
1.0% Δp: -25.039%
|
34 |
+
0.1% Δp: -47.819%
|
35 |
+
Minimum Δp: -89.317%
|
36 |
+
RMS Δp : 6.946 ± 0.038 %
|
37 |
+
Same top p: 86.343 ± 0.089 %
|
38 |
+
|
39 |
+
llama_perf_context_print: load time = 1821.07 ms
|
40 |
+
llama_perf_context_print: prompt eval time = 1019684.89 ms / 299008 tokens ( 3.41 ms per token, 293.24 tokens per second)
|
41 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
42 |
+
llama_perf_context_print: total time = 1045149.32 ms / 299009 tokens
|
43 |
+
ggml_metal_free: deallocating
|
scores/deepseek-r1-distill-qwen-7b-q3_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.889366 ± 0.253265
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.77%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.037703 ± 0.001531
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.038423 ± 0.001590
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.957936 ± 0.040635
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.091519 ± 0.000361
|
11 |
+
Maximum KLD: 8.082105
|
12 |
+
99.9% KLD: 1.367977
|
13 |
+
99.0% KLD: 0.621550
|
14 |
+
99.0% KLD: 0.621550
|
15 |
+
Median KLD: 0.049987
|
16 |
+
10.0% KLD: 0.001159
|
17 |
+
5.0% KLD: 0.000199
|
18 |
+
1.0% KLD: 0.000009
|
19 |
+
Minimum KLD: -0.000175
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.368 ± 0.019 %
|
23 |
+
Maximum Δp: 86.561%
|
24 |
+
99.9% Δp: 43.423%
|
25 |
+
99.0% Δp: 22.085%
|
26 |
+
95.0% Δp: 9.741%
|
27 |
+
90.0% Δp: 5.056%
|
28 |
+
75.0% Δp: 0.582%
|
29 |
+
Median Δp: -0.001%
|
30 |
+
25.0% Δp: -0.958%
|
31 |
+
10.0% Δp: -6.146%
|
32 |
+
5.0% Δp: -11.492%
|
33 |
+
1.0% Δp: -26.685%
|
34 |
+
0.1% Δp: -50.676%
|
35 |
+
Minimum Δp: -89.036%
|
36 |
+
RMS Δp : 7.422 ± 0.040 %
|
37 |
+
Same top p: 85.436 ± 0.091 %
|
scores/deepseek-r1-distill-qwen-7b-q3_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 26.552814 ± 0.258782
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.86%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.063006 ± 0.002011
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.065034 ± 0.002142
|
7 |
+
Mean PPL(Q)-PPL(base) : 1.621384 ± 0.054614
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.164859 ± 0.000610
|
11 |
+
Maximum KLD: 8.583980
|
12 |
+
99.9% KLD: 2.315967
|
13 |
+
99.0% KLD: 1.085407
|
14 |
+
99.0% KLD: 1.085407
|
15 |
+
Median KLD: 0.093793
|
16 |
+
10.0% KLD: 0.002759
|
17 |
+
5.0% KLD: 0.000509
|
18 |
+
1.0% KLD: 0.000027
|
19 |
+
Minimum KLD: -0.000053
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -1.105 ± 0.025 %
|
23 |
+
Maximum Δp: 95.542%
|
24 |
+
99.9% Δp: 53.440%
|
25 |
+
99.0% Δp: 27.342%
|
26 |
+
95.0% Δp: 11.606%
|
27 |
+
90.0% Δp: 5.754%
|
28 |
+
75.0% Δp: 0.494%
|
29 |
+
Median Δp: -0.008%
|
30 |
+
25.0% Δp: -1.857%
|
31 |
+
10.0% Δp: -9.691%
|
32 |
+
5.0% Δp: -17.127%
|
33 |
+
1.0% Δp: -37.275%
|
34 |
+
0.1% Δp: -65.081%
|
35 |
+
Minimum Δp: -94.606%
|
36 |
+
RMS Δp : 9.854 ± 0.049 %
|
37 |
+
Same top p: 80.956 ± 0.102 %
|
scores/deepseek-r1-distill-qwen-7b-q4_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.409863 ± 0.248331
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.58%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.019008 ± 0.000899
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.019190 ± 0.000916
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.478433 ± 0.023588
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.027115 ± 0.000104
|
11 |
+
Maximum KLD: 2.016436
|
12 |
+
99.9% KLD: 0.391155
|
13 |
+
99.0% KLD: 0.179995
|
14 |
+
99.0% KLD: 0.179995
|
15 |
+
Median KLD: 0.015400
|
16 |
+
10.0% KLD: 0.000310
|
17 |
+
5.0% KLD: 0.000046
|
18 |
+
1.0% KLD: 0.000000
|
19 |
+
Minimum KLD: -0.000275
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.073 ± 0.010 %
|
23 |
+
Maximum Δp: 66.267%
|
24 |
+
99.9% Δp: 25.309%
|
25 |
+
99.0% Δp: 12.706%
|
26 |
+
95.0% Δp: 5.590%
|
27 |
+
90.0% Δp: 2.959%
|
28 |
+
75.0% Δp: 0.362%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.466%
|
31 |
+
10.0% Δp: -3.166%
|
32 |
+
5.0% Δp: -5.937%
|
33 |
+
1.0% Δp: -13.474%
|
34 |
+
0.1% Δp: -26.235%
|
35 |
+
Minimum Δp: -66.500%
|
36 |
+
RMS Δp : 4.030 ± 0.024 %
|
37 |
+
Same top p: 91.745 ± 0.071 %
|
scores/deepseek-r1-distill-qwen-7b-q4_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.379497 ± 0.247973
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.52%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.017812 ± 0.000955
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.017972 ± 0.000972
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.448067 ± 0.024845
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.032033 ± 0.000122
|
11 |
+
Maximum KLD: 1.726200
|
12 |
+
99.9% KLD: 0.461810
|
13 |
+
99.0% KLD: 0.211016
|
14 |
+
99.0% KLD: 0.211016
|
15 |
+
Median KLD: 0.017953
|
16 |
+
10.0% KLD: 0.000386
|
17 |
+
5.0% KLD: 0.000063
|
18 |
+
1.0% KLD: 0.000001
|
19 |
+
Minimum KLD: -0.000295
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.088 ± 0.011 %
|
23 |
+
Maximum Δp: 67.370%
|
24 |
+
99.9% Δp: 27.270%
|
25 |
+
99.0% Δp: 13.595%
|
26 |
+
95.0% Δp: 5.977%
|
27 |
+
90.0% Δp: 3.182%
|
28 |
+
75.0% Δp: 0.383%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.511%
|
31 |
+
10.0% Δp: -3.408%
|
32 |
+
5.0% Δp: -6.399%
|
33 |
+
1.0% Δp: -14.558%
|
34 |
+
0.1% Δp: -29.089%
|
35 |
+
Minimum Δp: -68.450%
|
36 |
+
RMS Δp : 4.330 ± 0.025 %
|
37 |
+
Same top p: 91.049 ± 0.074 %
|
scores/deepseek-r1-distill-qwen-7b-q5_k_m.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.331908 ± 0.247609
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.81%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.015935 ± 0.000613
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.016063 ± 0.000623
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.400477 ± 0.016493
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.008227 ± 0.000037
|
11 |
+
Maximum KLD: 3.104945
|
12 |
+
99.9% KLD: 0.117209
|
13 |
+
99.0% KLD: 0.052437
|
14 |
+
99.0% KLD: 0.052437
|
15 |
+
Median KLD: 0.004830
|
16 |
+
10.0% KLD: 0.000092
|
17 |
+
5.0% KLD: 0.000012
|
18 |
+
1.0% KLD: -0.000002
|
19 |
+
Minimum KLD: -0.000180
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.032 ± 0.006 %
|
23 |
+
Maximum Δp: 38.164%
|
24 |
+
99.9% Δp: 13.916%
|
25 |
+
99.0% Δp: 6.928%
|
26 |
+
95.0% Δp: 3.133%
|
27 |
+
90.0% Δp: 1.652%
|
28 |
+
75.0% Δp: 0.209%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.248%
|
31 |
+
10.0% Δp: -1.741%
|
32 |
+
5.0% Δp: -3.275%
|
33 |
+
1.0% Δp: -7.358%
|
34 |
+
0.1% Δp: -14.696%
|
35 |
+
Minimum Δp: -60.848%
|
36 |
+
RMS Δp : 2.211 ± 0.014 %
|
37 |
+
Same top p: 95.381 ± 0.054 %
|
scores/deepseek-r1-distill-qwen-7b-q5_k_s.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.403544 ± 0.248578
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.79%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.018759 ± 0.000634
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.018936 ± 0.000646
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.472114 ± 0.017348
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.009320 ± 0.000039
|
11 |
+
Maximum KLD: 2.831377
|
12 |
+
99.9% KLD: 0.127923
|
13 |
+
99.0% KLD: 0.059484
|
14 |
+
99.0% KLD: 0.059484
|
15 |
+
Median KLD: 0.005393
|
16 |
+
10.0% KLD: 0.000103
|
17 |
+
5.0% KLD: 0.000013
|
18 |
+
1.0% KLD: -0.000002
|
19 |
+
Minimum KLD: -0.000203
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.016 ± 0.006 %
|
23 |
+
Maximum Δp: 42.542%
|
24 |
+
99.9% Δp: 15.234%
|
25 |
+
99.0% Δp: 7.440%
|
26 |
+
95.0% Δp: 3.334%
|
27 |
+
90.0% Δp: 1.787%
|
28 |
+
75.0% Δp: 0.232%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.256%
|
31 |
+
10.0% Δp: -1.792%
|
32 |
+
5.0% Δp: -3.371%
|
33 |
+
1.0% Δp: -7.776%
|
34 |
+
0.1% Δp: -15.189%
|
35 |
+
Minimum Δp: -60.684%
|
36 |
+
RMS Δp : 2.343 ± 0.015 %
|
37 |
+
Same top p: 95.099 ± 0.056 %
|
scores/deepseek-r1-distill-qwen-7b-q6_k.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.139045 ± 0.245198
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.87%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.008293 ± 0.000501
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.008327 ± 0.000505
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.207614 ± 0.013014
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.003335 ± 0.000014
|
11 |
+
Maximum KLD: 1.228119
|
12 |
+
99.9% KLD: 0.039909
|
13 |
+
99.0% KLD: 0.019418
|
14 |
+
99.0% KLD: 0.019418
|
15 |
+
Median KLD: 0.002067
|
16 |
+
10.0% KLD: 0.000037
|
17 |
+
5.0% KLD: 0.000005
|
18 |
+
1.0% KLD: -0.000004
|
19 |
+
Minimum KLD: -0.000159
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: -0.007 ± 0.004 %
|
23 |
+
Maximum Δp: 22.155%
|
24 |
+
99.9% Δp: 8.711%
|
25 |
+
99.0% Δp: 4.504%
|
26 |
+
95.0% Δp: 2.060%
|
27 |
+
90.0% Δp: 1.096%
|
28 |
+
75.0% Δp: 0.141%
|
29 |
+
Median Δp: -0.000%
|
30 |
+
25.0% Δp: -0.153%
|
31 |
+
10.0% Δp: -1.126%
|
32 |
+
5.0% Δp: -2.115%
|
33 |
+
1.0% Δp: -4.485%
|
34 |
+
0.1% Δp: -8.494%
|
35 |
+
Minimum Δp: -55.308%
|
36 |
+
RMS Δp : 1.402 ± 0.011 %
|
37 |
+
Same top p: 96.985 ± 0.044 %
|
scores/deepseek-r1-distill-qwen-7b-q8_0.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
====== Perplexity statistics ======
|
2 |
+
Mean PPL(Q) : 25.118649 ± 0.245079
|
3 |
+
Mean PPL(base) : 24.931431 ± 0.241228
|
4 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.91%
|
5 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.007481 ± 0.000431
|
6 |
+
Mean PPL(Q)/PPL(base) : 1.007509 ± 0.000434
|
7 |
+
Mean PPL(Q)-PPL(base) : 0.187218 ± 0.011259
|
8 |
+
|
9 |
+
====== KL divergence statistics ======
|
10 |
+
Mean KLD: 0.000361 ± 0.000002
|
11 |
+
Maximum KLD: 0.215854
|
12 |
+
99.9% KLD: 0.004745
|
13 |
+
99.0% KLD: 0.002203
|
14 |
+
99.0% KLD: 0.002203
|
15 |
+
Median KLD: 0.000212
|
16 |
+
10.0% KLD: 0.000002
|
17 |
+
5.0% KLD: -0.000000
|
18 |
+
1.0% KLD: -0.000014
|
19 |
+
Minimum KLD: -0.000127
|
20 |
+
|
21 |
+
====== Token probability statistics ======
|
22 |
+
Mean Δp: 0.003 ± 0.001 %
|
23 |
+
Maximum Δp: 24.559%
|
24 |
+
99.9% Δp: 2.974%
|
25 |
+
99.0% Δp: 1.523%
|
26 |
+
95.0% Δp: 0.686%
|
27 |
+
90.0% Δp: 0.364%
|
28 |
+
75.0% Δp: 0.050%
|
29 |
+
Median Δp: 0.000%
|
30 |
+
25.0% Δp: -0.047%
|
31 |
+
10.0% Δp: -0.356%
|
32 |
+
5.0% Δp: -0.670%
|
33 |
+
1.0% Δp: -1.484%
|
34 |
+
0.1% Δp: -2.907%
|
35 |
+
Minimum Δp: -7.835%
|
36 |
+
RMS Δp : 0.468 ± 0.005 %
|
37 |
+
Same top p: 98.983 ± 0.026 %
|