lesso02 commited on
Commit
9a6f0b1
·
verified ·
1 Parent(s): d2221b8

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fd7f4457c1fed0ab75e710ec4c037e25e68910b692621792051a320e3709285
3
  size 100690184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59db3e4434ebdb2c055739fbfc497d4193984e94417e1ef056752ba70a184f8
3
  size 100690184
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a99baa93ec18e9d69ea9e60b181526fba03bd2ead335d32174e4fc5f626253e4
3
  size 51345082
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:993e389f1929284472f2ab7229af0004eae130c9ad6011c0583f2498ca66eb93
3
  size 51345082
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90c7c1a2d9df7762ed596ff4cf1bcff93ba007dbe9aefcb62e6894b768573c96
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e77348693ab93014486e58318571069128adf081adfe643fc83e768d147faf
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:935feb3a63b37324efd90f0473c874080c886e9eea264139d581d13a208d37c1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d2b9cd8b76db862d8f36c96267d64ff69a21d07ce7f6eb4a0887810e44a8eb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.8264233469963074,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
- "epoch": 0.19111323459149546,
5
  "eval_steps": 50,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -359,6 +359,49 @@
359
  "eval_samples_per_second": 77.078,
360
  "eval_steps_per_second": 19.313,
361
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  }
363
  ],
364
  "logging_steps": 10,
@@ -387,7 +430,7 @@
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 3725608982937600.0,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8030063509941101,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.21500238891543239,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
359
  "eval_samples_per_second": 77.078,
360
  "eval_steps_per_second": 19.313,
361
  "step": 400
362
+ },
363
+ {
364
+ "epoch": 0.19589106545628285,
365
+ "grad_norm": 8.18686294555664,
366
+ "learning_rate": 1.928928356813032e-05,
367
+ "loss": 1.0687,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 0.20066889632107024,
372
+ "grad_norm": 10.289799690246582,
373
+ "learning_rate": 1.5347142288200977e-05,
374
+ "loss": 1.4437,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 0.20544672718585763,
379
+ "grad_norm": 12.29814338684082,
380
+ "learning_rate": 1.1822293121248375e-05,
381
+ "loss": 1.6965,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 0.210224558050645,
386
+ "grad_norm": 15.640246391296387,
387
+ "learning_rate": 8.731908778097302e-06,
388
+ "loss": 1.8951,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 0.21500238891543239,
393
+ "grad_norm": 7.856279373168945,
394
+ "learning_rate": 6.09104530062326e-06,
395
+ "loss": 1.9909,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 0.21500238891543239,
400
+ "eval_loss": 0.8030063509941101,
401
+ "eval_runtime": 11.7799,
402
+ "eval_samples_per_second": 74.874,
403
+ "eval_steps_per_second": 18.761,
404
+ "step": 450
405
  }
406
  ],
407
  "logging_steps": 10,
 
430
  "attributes": {}
431
  }
432
  },
433
+ "total_flos": 4191310105804800.0,
434
  "train_batch_size": 4,
435
  "trial_name": null,
436
  "trial_params": null