|
{ |
|
"best_metric": 80.9666820924435, |
|
"best_model_checkpoint": "/root/turkic_qa/tr_kaz_models/tr_kaz_xlm_roberta_base_squad_model/checkpoint-5243", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 7490, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 749, |
|
"train_exact_match": 58.74125874125874, |
|
"train_f1": 76.91933391150722, |
|
"train_runtime": 17.3698, |
|
"train_samples_per_second": 87.508, |
|
"train_steps_per_second": 3.166 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 24.767520904541016, |
|
"learning_rate": 5e-06, |
|
"loss": 1.366, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 56.03125, |
|
"eval_f1": 75.5726701661797, |
|
"eval_runtime": 52.3069, |
|
"eval_samples_per_second": 88.287, |
|
"eval_steps_per_second": 3.154, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1498, |
|
"train_exact_match": 65.33466533466533, |
|
"train_f1": 81.16405259356212, |
|
"train_runtime": 16.8317, |
|
"train_samples_per_second": 88.048, |
|
"train_steps_per_second": 3.149 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 22.722991943359375, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0296, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 60.6875, |
|
"eval_f1": 77.7357851907795, |
|
"eval_runtime": 52.4104, |
|
"eval_samples_per_second": 88.112, |
|
"eval_steps_per_second": 3.148, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2247, |
|
"train_exact_match": 74.52547452547452, |
|
"train_f1": 87.61463327451462, |
|
"train_runtime": 16.7088, |
|
"train_samples_per_second": 89.593, |
|
"train_steps_per_second": 3.232 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 18.895648956298828, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.8407, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 63.0625, |
|
"eval_f1": 79.25734099418638, |
|
"eval_runtime": 51.9473, |
|
"eval_samples_per_second": 88.898, |
|
"eval_steps_per_second": 3.176, |
|
"step": 2247 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2996, |
|
"train_exact_match": 75.02497502497502, |
|
"train_f1": 88.1924695005796, |
|
"train_runtime": 16.7212, |
|
"train_samples_per_second": 90.065, |
|
"train_steps_per_second": 3.229 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 21.92975425720215, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.6761, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 64.3125, |
|
"eval_f1": 80.41214611674394, |
|
"eval_runtime": 51.1453, |
|
"eval_samples_per_second": 90.292, |
|
"eval_steps_per_second": 3.226, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3745, |
|
"train_exact_match": 80.71928071928072, |
|
"train_f1": 91.80017345637452, |
|
"train_runtime": 16.6722, |
|
"train_samples_per_second": 89.73, |
|
"train_steps_per_second": 3.239 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 10.589529991149902, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.5642, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 65.0625, |
|
"eval_f1": 80.50496259716563, |
|
"eval_runtime": 51.2576, |
|
"eval_samples_per_second": 90.094, |
|
"eval_steps_per_second": 3.219, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 4494, |
|
"train_exact_match": 82.71728271728271, |
|
"train_f1": 92.8088019961768, |
|
"train_runtime": 16.7647, |
|
"train_samples_per_second": 88.997, |
|
"train_steps_per_second": 3.221 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 17.399965286254883, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4769, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 65.1875, |
|
"eval_f1": 80.88709112615079, |
|
"eval_runtime": 51.1815, |
|
"eval_samples_per_second": 90.228, |
|
"eval_steps_per_second": 3.224, |
|
"step": 4494 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5243, |
|
"train_exact_match": 84.41558441558442, |
|
"train_f1": 93.70818045593467, |
|
"train_runtime": 16.3897, |
|
"train_samples_per_second": 90.118, |
|
"train_steps_per_second": 3.234 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 15.382094383239746, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.4089, |
|
"step": 5243 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 65.21875, |
|
"eval_f1": 80.9666820924435, |
|
"eval_runtime": 51.2038, |
|
"eval_samples_per_second": 90.189, |
|
"eval_steps_per_second": 3.222, |
|
"step": 5243 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 5992, |
|
"train_exact_match": 84.41558441558442, |
|
"train_f1": 94.00170296456992, |
|
"train_runtime": 17.5397, |
|
"train_samples_per_second": 87.573, |
|
"train_steps_per_second": 3.136 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 48.553558349609375, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3646, |
|
"step": 5992 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 64.71875, |
|
"eval_f1": 80.35415857951902, |
|
"eval_runtime": 52.7202, |
|
"eval_samples_per_second": 87.594, |
|
"eval_steps_per_second": 3.13, |
|
"step": 5992 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 6741, |
|
"train_exact_match": 87.61238761238761, |
|
"train_f1": 95.10743821560438, |
|
"train_runtime": 16.7145, |
|
"train_samples_per_second": 88.008, |
|
"train_steps_per_second": 3.171 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 54.76875305175781, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.3226, |
|
"step": 6741 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 65.40625, |
|
"eval_f1": 80.62880989809192, |
|
"eval_runtime": 52.4199, |
|
"eval_samples_per_second": 88.096, |
|
"eval_steps_per_second": 3.148, |
|
"step": 6741 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7490, |
|
"train_exact_match": 88.1118881118881, |
|
"train_f1": 95.35905521584445, |
|
"train_runtime": 16.8298, |
|
"train_samples_per_second": 88.771, |
|
"train_steps_per_second": 3.209 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 20.260765075683594, |
|
"learning_rate": 0.0, |
|
"loss": 0.2978, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 65.59375, |
|
"eval_f1": 80.93066187820529, |
|
"eval_runtime": 51.2112, |
|
"eval_samples_per_second": 90.176, |
|
"eval_steps_per_second": 3.222, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 7490, |
|
"total_flos": 4.108760851295232e+16, |
|
"train_loss": 0.6347312815198911, |
|
"train_runtime": 4825.6442, |
|
"train_samples_per_second": 43.447, |
|
"train_steps_per_second": 1.552 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7490, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4.108760851295232e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|