Nadav commited on
Commit
c75d38c
·
1 Parent(s): d3530f2

Training in progress, step 10000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:426565ae7e122986bcb61feb0824a20d697e6e1d0d7ee61605f81249ac607053
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73489cdb01a4bb6cfe5252edc42886df8eebea1bb75734e470347a6522c023a1
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5975d5c14cb653070063ab664a542345e1d7b31d1f7f2691782cc33745d9b2b9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5920908de9e54da20823768f6ef8cc7bc5d55494a5080bb14683341420ff389e
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b802e5910fd078d7354b6fbe97850b2f1ec26c0f43e7f9e349a96b25ed8f557
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b44d07e2d139a22d271aa72093cf081d67bc950fe387651d7d6d2f2dba45fb6
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6775d0c71cbc2254c9932c38f18b38899f4e38dc0dfc75f710acd45f734e62b
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:351b48ff70cf2e76c396612d9003a4396dc5f5b7c719dcf507ee09719e12672a
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16ebe9ed2aebd8d40826f0d7b248340ae7726517e3fb98bf26aa87be67a5fb7a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d524b4cb1391ef7e50966a3eef7ac714ecb6ed976eedf165d99a07d29c73b99
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4353125544140693,
5
- "global_step": 5000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -74,11 +74,79 @@
74
  "eval_samples_per_second": 202.871,
75
  "eval_steps_per_second": 3.205,
76
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  }
78
  ],
79
  "max_steps": 100000,
80
  "num_train_epochs": 9,
81
- "total_flos": 2.354930673647616e+20,
82
  "trial_name": null,
83
  "trial_params": null
84
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8706251088281386,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
74
  "eval_samples_per_second": 202.871,
75
  "eval_steps_per_second": 3.205,
76
  "step": 5000
77
+ },
78
+ {
79
+ "epoch": 0.48,
80
+ "learning_rate": 0.00010010559815435491,
81
+ "loss": 0.4647,
82
+ "step": 5500
83
+ },
84
+ {
85
+ "epoch": 0.52,
86
+ "learning_rate": 9.997779155931062e-05,
87
+ "loss": 0.4633,
88
+ "step": 6000
89
+ },
90
+ {
91
+ "epoch": 0.57,
92
+ "learning_rate": 9.983873864536092e-05,
93
+ "loss": 0.4633,
94
+ "step": 6500
95
+ },
96
+ {
97
+ "epoch": 0.61,
98
+ "learning_rate": 9.968871882446063e-05,
99
+ "loss": 0.4599,
100
+ "step": 7000
101
+ },
102
+ {
103
+ "epoch": 0.65,
104
+ "learning_rate": 9.952776911175577e-05,
105
+ "loss": 0.4583,
106
+ "step": 7500
107
+ },
108
+ {
109
+ "epoch": 0.7,
110
+ "learning_rate": 9.935592921917959e-05,
111
+ "loss": 0.459,
112
+ "step": 8000
113
+ },
114
+ {
115
+ "epoch": 0.74,
116
+ "learning_rate": 9.91732415456543e-05,
117
+ "loss": 0.4574,
118
+ "step": 8500
119
+ },
120
+ {
121
+ "epoch": 0.78,
122
+ "learning_rate": 9.897975116662973e-05,
123
+ "loss": 0.4571,
124
+ "step": 9000
125
+ },
126
+ {
127
+ "epoch": 0.83,
128
+ "learning_rate": 9.877550582296162e-05,
129
+ "loss": 0.456,
130
+ "step": 9500
131
+ },
132
+ {
133
+ "epoch": 0.87,
134
+ "learning_rate": 9.856099645730841e-05,
135
+ "loss": 0.4546,
136
+ "step": 10000
137
+ },
138
+ {
139
+ "epoch": 0.87,
140
+ "eval_loss": 0.43266522884368896,
141
+ "eval_runtime": 17.119,
142
+ "eval_samples_per_second": 292.073,
143
+ "eval_steps_per_second": 4.615,
144
+ "step": 10000
145
  }
146
  ],
147
  "max_steps": 100000,
148
  "num_train_epochs": 9,
149
+ "total_flos": 4.709861347295232e+20,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5975d5c14cb653070063ab664a542345e1d7b31d1f7f2691782cc33745d9b2b9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5920908de9e54da20823768f6ef8cc7bc5d55494a5080bb14683341420ff389e
3
  size 449471589