ai-maker-space/llama381binstruct_summarize_short

Browse files

Files changed (5) hide show

README.md +21 -21
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
runs/Sep19_00-10-20_5f37362b6dc2/events.out.tfevents.1726704622.5f37362b6dc2.5459.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.8264
 ## Model description
@@ -52,26 +52,26 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
-| 1.6287        | 1.3889  | 25   | 1.7133          |
-| 0.5957        | 2.7778  | 50   | 1.8331          |
-| 0.4299        | 4.1667  | 75   | 1.9275          |
-| 0.0747        | 5.5556  | 100  | 2.3055          |
-| 0.0526        | 6.9444  | 125  | 2.3188          |
-| 0.0305        | 8.3333  | 150  | 2.4725          |
-| 0.022         | 9.7222  | 175  | 2.4846          |
-| 0.0162        | 11.1111 | 200  | 2.5001          |
-| 0.0109        | 12.5    | 225  | 2.5456          |
-| 0.0043        | 13.8889 | 250  | 2.5991          |
-| 0.0032        | 15.2778 | 275  | 2.6984          |
-| 0.0024        | 16.6667 | 300  | 2.7443          |
-| 0.0023        | 18.0556 | 325  | 2.7712          |
-| 0.0018        | 19.4444 | 350  | 2.7900          |
-| 0.002         | 20.8333 | 375  | 2.8010          |
-| 0.0019        | 22.2222 | 400  | 2.8107          |
-| 0.001         | 23.6111 | 425  | 2.8172          |
-| 0.0014        | 25.0    | 450  | 2.8223          |
-| 0.0014        | 26.3889 | 475  | 2.8254          |
-| 0.0011        | 27.7778 | 500  | 2.8264          |
 ### Framework versions

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.9125
 ## Model description
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
+| 1.7217        | 1.3158  | 25   | 1.7570          |
+| 0.6773        | 2.6316  | 50   | 1.7855          |
+| 0.3713        | 3.9474  | 75   | 1.8944          |
+| 0.1498        | 5.2632  | 100  | 2.2601          |
+| 0.0738        | 6.5789  | 125  | 2.3539          |
+| 0.0395        | 7.8947  | 150  | 2.4572          |
+| 0.03          | 9.2105  | 175  | 2.4119          |
+| 0.0179        | 10.5263 | 200  | 2.4960          |
+| 0.0154        | 11.8421 | 225  | 2.5468          |
+| 0.0054        | 13.1579 | 250  | 2.6392          |
+| 0.0041        | 14.4737 | 275  | 2.7146          |
+| 0.0027        | 15.7895 | 300  | 2.8038          |
+| 0.003         | 17.1053 | 325  | 2.8446          |
+| 0.0024        | 18.4211 | 350  | 2.8672          |
+| 0.0019        | 19.7368 | 375  | 2.8810          |
+| 0.0021        | 21.0526 | 400  | 2.8946          |
+| 0.0021        | 22.3684 | 425  | 2.9023          |
+| 0.0019        | 23.6842 | 450  | 2.9083          |
+| 0.0018        | 25.0    | 475  | 2.9117          |
+| 0.0017        | 26.3158 | 500  | 2.9125          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
     "down_proj",
-    "up_proj",
-    "gate_proj",
     "q_proj",
     "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "q_proj",
     "k_proj",
+    "o_proj",
+    "v_proj",
+    "gate_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a711710bf203b7b69854de286094e5ebb498e4a440bf47ea411824ab9aefcb6f
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ebfdd704afc3714afbeec2d5318998287cf85f7252f1cde96b6ba07c7a34358
 size 167832240

runs/Sep19_00-10-20_5f37362b6dc2/events.out.tfevents.1726704622.5f37362b6dc2.5459.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a4bb304026fff2857d82f7dfaefbe55c2163e59fa993ed46f01caa5d5dc16cf
+size 22329

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60b66582196cc948411e61abb10ba60f548565a17a17b7027b24472c197c1c02
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:75faeb5623168ecbee11f31aaf052eb01702b7888be82753a112b3e80cbcf09a
 size 5496