Nadav commited on
Commit
0313be2
·
1 Parent(s): f6314b5

Training in progress, step 30000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c410b30ed46201dc76b3758a8f05ba2befb17e53aabac920b8594900bf6e8f
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b82214f5a6da233cbde612bdd1f848ffc727475d410f23d11d0ef77869df202
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:793325251fa149e905afc5251e0311b4cc842e9f6cbb39aa46ab80628503d14f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a1a14ed7bf6bc0725bb876cb7a38064b9cb2a4e01a8c1e6ca520325a13a6e9
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138259c5b6d4472016db54172bb7e725edf4c52d33cd44c90cb79184c02490ab
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b911e5c773c97f0b6494ffc61a4bccbd0595e2f25c8540a15cd988ed7205ecc7
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d302e2bbe5f8cc1de61c1cfd61605591d0eb14f9a715e2ddd90f5a8190314e3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be94e3813e8e1c0f62960e0a57904cf2dbed90df3bbe70219beb6f9114fec94f
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afdbfcefcd9e1661e5368ed0a7d4915dce2e8bd6a92fd4d33ed45da578d95703
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32b016f83d2aca2db829c6d70c41861d7f94941040995a7638b751ed5d1d2359
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.1765627720703464,
5
- "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -346,11 +346,79 @@
346
  "eval_samples_per_second": 306.184,
347
  "eval_steps_per_second": 4.838,
348
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  }
350
  ],
351
  "max_steps": 100000,
352
  "num_train_epochs": 9,
353
- "total_flos": 1.1774280960596666e+21,
354
  "trial_name": null,
355
  "trial_params": null
356
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.611875326484416,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
346
  "eval_samples_per_second": 306.184,
347
  "eval_steps_per_second": 4.838,
348
  "step": 25000
349
+ },
350
+ {
351
+ "epoch": 2.22,
352
+ "learning_rate": 8.698878062256518e-05,
353
+ "loss": 0.4355,
354
+ "step": 25500
355
+ },
356
+ {
357
+ "epoch": 2.26,
358
+ "learning_rate": 8.647303945562088e-05,
359
+ "loss": 0.4346,
360
+ "step": 26000
361
+ },
362
+ {
363
+ "epoch": 2.31,
364
+ "learning_rate": 8.594962916800582e-05,
365
+ "loss": 0.435,
366
+ "step": 26500
367
+ },
368
+ {
369
+ "epoch": 2.35,
370
+ "learning_rate": 8.54186789033765e-05,
371
+ "loss": 0.434,
372
+ "step": 27000
373
+ },
374
+ {
375
+ "epoch": 2.39,
376
+ "learning_rate": 8.488031966576593e-05,
377
+ "loss": 0.4341,
378
+ "step": 27500
379
+ },
380
+ {
381
+ "epoch": 2.44,
382
+ "learning_rate": 8.433468428726034e-05,
383
+ "loss": 0.4341,
384
+ "step": 28000
385
+ },
386
+ {
387
+ "epoch": 2.48,
388
+ "learning_rate": 8.378190739522489e-05,
389
+ "loss": 0.4336,
390
+ "step": 28500
391
+ },
392
+ {
393
+ "epoch": 2.52,
394
+ "learning_rate": 8.322212537908629e-05,
395
+ "loss": 0.4323,
396
+ "step": 29000
397
+ },
398
+ {
399
+ "epoch": 2.57,
400
+ "learning_rate": 8.265547635668083e-05,
401
+ "loss": 0.4331,
402
+ "step": 29500
403
+ },
404
+ {
405
+ "epoch": 2.61,
406
+ "learning_rate": 8.208325351258954e-05,
407
+ "loss": 0.4318,
408
+ "step": 30000
409
+ },
410
+ {
411
+ "epoch": 2.61,
412
+ "eval_loss": 0.4114779233932495,
413
+ "eval_runtime": 17.3867,
414
+ "eval_samples_per_second": 287.576,
415
+ "eval_steps_per_second": 4.544,
416
+ "step": 30000
417
  }
418
  ],
419
  "max_steps": 100000,
420
  "num_train_epochs": 9,
421
+ "total_flos": 1.4129211634244282e+21,
422
  "trial_name": null,
423
  "trial_params": null
424
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:793325251fa149e905afc5251e0311b4cc842e9f6cbb39aa46ab80628503d14f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72a1a14ed7bf6bc0725bb876cb7a38064b9cb2a4e01a8c1e6ca520325a13a6e9
3
  size 449471589