|
{ |
|
"best_metric": 0.7517201834862385, |
|
"best_model_checkpoint": "/mnt/cachenew/gutianle/llama2-scorer-non-existent/checkpoint-1800", |
|
"epoch": 0.9174311926605505, |
|
"eval_steps": 100, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001, |
|
"loss": 1.409, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4294724770642202, |
|
"eval_f1-score": 0.41676886432858656, |
|
"eval_loss": 1.2865188121795654, |
|
"eval_precision": 0.5191394843763429, |
|
"eval_recall": 0.4294724770642202, |
|
"eval_runtime": 100.35, |
|
"eval_samples_per_second": 17.379, |
|
"eval_steps_per_second": 2.172, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0433, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6439220183486238, |
|
"eval_f1-score": 0.6287928936906987, |
|
"eval_loss": 0.9169857501983643, |
|
"eval_precision": 0.6326166016462452, |
|
"eval_recall": 0.6439220183486238, |
|
"eval_runtime": 99.1386, |
|
"eval_samples_per_second": 17.592, |
|
"eval_steps_per_second": 2.199, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9457, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.573394495412844, |
|
"eval_f1-score": 0.5780536942349181, |
|
"eval_loss": 0.9270332455635071, |
|
"eval_precision": 0.6119488919407539, |
|
"eval_recall": 0.573394495412844, |
|
"eval_runtime": 99.5145, |
|
"eval_samples_per_second": 17.525, |
|
"eval_steps_per_second": 2.191, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8176, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6702981651376146, |
|
"eval_f1-score": 0.6704055625650827, |
|
"eval_loss": 0.8484154343605042, |
|
"eval_precision": 0.6748752577010106, |
|
"eval_recall": 0.6702981651376146, |
|
"eval_runtime": 99.7876, |
|
"eval_samples_per_second": 17.477, |
|
"eval_steps_per_second": 2.185, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.6955275229357798, |
|
"eval_f1-score": 0.6937246764104357, |
|
"eval_loss": 0.8070082068443298, |
|
"eval_precision": 0.6933530425142568, |
|
"eval_recall": 0.6955275229357798, |
|
"eval_runtime": 99.3769, |
|
"eval_samples_per_second": 17.549, |
|
"eval_steps_per_second": 2.194, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7069, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.6961009174311926, |
|
"eval_f1-score": 0.6962474525389031, |
|
"eval_loss": 0.8322665095329285, |
|
"eval_precision": 0.7002298404175326, |
|
"eval_recall": 0.6961009174311926, |
|
"eval_runtime": 99.2464, |
|
"eval_samples_per_second": 17.572, |
|
"eval_steps_per_second": 2.197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6742, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.7276376146788991, |
|
"eval_f1-score": 0.7163514624677091, |
|
"eval_loss": 0.7878208160400391, |
|
"eval_precision": 0.731064800275646, |
|
"eval_recall": 0.7276376146788991, |
|
"eval_runtime": 99.1023, |
|
"eval_samples_per_second": 17.598, |
|
"eval_steps_per_second": 2.2, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7139, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.694954128440367, |
|
"eval_f1-score": 0.7017678594633088, |
|
"eval_loss": 0.7812691926956177, |
|
"eval_precision": 0.7171557777434424, |
|
"eval_recall": 0.694954128440367, |
|
"eval_runtime": 99.2742, |
|
"eval_samples_per_second": 17.568, |
|
"eval_steps_per_second": 2.196, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7566, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.6909403669724771, |
|
"eval_f1-score": 0.6922199349668261, |
|
"eval_loss": 0.7475219368934631, |
|
"eval_precision": 0.7040747986976179, |
|
"eval_recall": 0.6909403669724771, |
|
"eval_runtime": 99.2593, |
|
"eval_samples_per_second": 17.57, |
|
"eval_steps_per_second": 2.196, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6692, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.6857798165137615, |
|
"eval_f1-score": 0.6973150689879362, |
|
"eval_loss": 0.7810325026512146, |
|
"eval_precision": 0.7120290886583457, |
|
"eval_recall": 0.6857798165137615, |
|
"eval_runtime": 99.239, |
|
"eval_samples_per_second": 17.574, |
|
"eval_steps_per_second": 2.197, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6733, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.7368119266055045, |
|
"eval_f1-score": 0.7271862000397501, |
|
"eval_loss": 0.7271122336387634, |
|
"eval_precision": 0.74773502586483, |
|
"eval_recall": 0.7368119266055045, |
|
"eval_runtime": 99.0466, |
|
"eval_samples_per_second": 17.608, |
|
"eval_steps_per_second": 2.201, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6356, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7253440366972477, |
|
"eval_f1-score": 0.7249868939903279, |
|
"eval_loss": 0.7682604193687439, |
|
"eval_precision": 0.7249192202979644, |
|
"eval_recall": 0.7253440366972477, |
|
"eval_runtime": 99.36, |
|
"eval_samples_per_second": 17.552, |
|
"eval_steps_per_second": 2.194, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6112, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.7264908256880734, |
|
"eval_f1-score": 0.7199307263891085, |
|
"eval_loss": 0.8078410029411316, |
|
"eval_precision": 0.7206056594932215, |
|
"eval_recall": 0.7264908256880734, |
|
"eval_runtime": 99.258, |
|
"eval_samples_per_second": 17.57, |
|
"eval_steps_per_second": 2.196, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.646, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.7431192660550459, |
|
"eval_f1-score": 0.7350696097040301, |
|
"eval_loss": 0.7144489288330078, |
|
"eval_precision": 0.7421391963845961, |
|
"eval_recall": 0.7431192660550459, |
|
"eval_runtime": 99.3431, |
|
"eval_samples_per_second": 17.555, |
|
"eval_steps_per_second": 2.194, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6052, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.7471330275229358, |
|
"eval_f1-score": 0.7394703056430186, |
|
"eval_loss": 0.679287850856781, |
|
"eval_precision": 0.7525699542508473, |
|
"eval_recall": 0.7471330275229358, |
|
"eval_runtime": 99.4231, |
|
"eval_samples_per_second": 17.541, |
|
"eval_steps_per_second": 2.193, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6004, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.75, |
|
"eval_f1-score": 0.7503577703532133, |
|
"eval_loss": 0.7611460089683533, |
|
"eval_precision": 0.7582961750700967, |
|
"eval_recall": 0.75, |
|
"eval_runtime": 99.3947, |
|
"eval_samples_per_second": 17.546, |
|
"eval_steps_per_second": 2.193, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6041, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.7563073394495413, |
|
"eval_f1-score": 0.7497128952328905, |
|
"eval_loss": 0.7003496289253235, |
|
"eval_precision": 0.7580686951357652, |
|
"eval_recall": 0.7563073394495413, |
|
"eval_runtime": 99.3241, |
|
"eval_samples_per_second": 17.559, |
|
"eval_steps_per_second": 2.195, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5445, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7517201834862385, |
|
"eval_f1-score": 0.7465676797118755, |
|
"eval_loss": 0.6659787893295288, |
|
"eval_precision": 0.7528237883767219, |
|
"eval_recall": 0.7517201834862385, |
|
"eval_runtime": 99.3316, |
|
"eval_samples_per_second": 17.557, |
|
"eval_steps_per_second": 2.195, |
|
"step": 1800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5886, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 2.868620669485056e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|