ChiefTheLord commited on
Commit
8fd26e5
·
verified ·
1 Parent(s): 6228e7c

Delete checkpoints-v4.1-discrete-conditional/checkpoint-1792

Browse files
checkpoints-v4.1-discrete-conditional/checkpoint-1792/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:348def0448a17fc33dfd58e269365b3a7518263e219bda91f83e9de110663795
3
- size 24416696
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fccede8ce292e8b16ad8a02eafd56aba6f2498411f1075b01c8a3df4b4701cd
3
- size 816907
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d33ede87b7e42c95f7d6d7faccd1e35edd1370f62e10350592062c54fbbf34bf
3
- size 14645
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a42e33465160c5ace903c63375f0694cfab8943854b6c37c46848f754e8871c0
3
- size 1383
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:970abb983a5a0197c365bc7fdfdc8155569e58b62f56513c3b9d937587189b2d
3
- size 1465
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/trainer_state.json DELETED
@@ -1,251 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 0.8849382716049383,
6
- "eval_steps": 256,
7
- "global_step": 1792,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.06320987654320988,
14
- "grad_norm": 0.08904296904802322,
15
- "learning_rate": 0.000248046875,
16
- "loss": 0.4711284637451172,
17
- "step": 128
18
- },
19
- {
20
- "epoch": 0.12641975308641976,
21
- "grad_norm": 0.15019357204437256,
22
- "learning_rate": 0.000498046875,
23
- "loss": 0.4766996204853058,
24
- "step": 256
25
- },
26
- {
27
- "epoch": 0.12641975308641976,
28
- "eval_cos_loss": 0.13597975344192692,
29
- "eval_loss": 0.4843393615106257,
30
- "eval_mse_loss": 0.4843393615106257,
31
- "step": 256
32
- },
33
- {
34
- "epoch": 0.12641975308641976,
35
- "eval_cos_loss": 0.13597975344192692,
36
- "eval_loss": 0.4843393615106257,
37
- "eval_mse_loss": 0.4843393615106257,
38
- "eval_runtime": 5.5201,
39
- "eval_samples_per_second": 474.263,
40
- "eval_steps_per_second": 7.427,
41
- "step": 256
42
- },
43
- {
44
- "epoch": 0.18962962962962962,
45
- "grad_norm": 0.10759640485048294,
46
- "learning_rate": 0.000748046875,
47
- "loss": 0.47947996854782104,
48
- "step": 384
49
- },
50
- {
51
- "epoch": 0.2528395061728395,
52
- "grad_norm": 0.08275946974754333,
53
- "learning_rate": 0.000998046875,
54
- "loss": 0.47462955117225647,
55
- "step": 512
56
- },
57
- {
58
- "epoch": 0.2528395061728395,
59
- "eval_cos_loss": 0.13039116016248378,
60
- "eval_loss": 0.4657249777782254,
61
- "eval_mse_loss": 0.4657249777782254,
62
- "step": 512
63
- },
64
- {
65
- "epoch": 0.2528395061728395,
66
- "eval_cos_loss": 0.13039116016248378,
67
- "eval_loss": 0.4657249777782254,
68
- "eval_mse_loss": 0.4657249777782254,
69
- "eval_runtime": 5.7382,
70
- "eval_samples_per_second": 456.244,
71
- "eval_steps_per_second": 7.145,
72
- "step": 512
73
- },
74
- {
75
- "epoch": 0.3160493827160494,
76
- "grad_norm": 0.08074437826871872,
77
- "learning_rate": 0.0009827157247249464,
78
- "loss": 0.4701014459133148,
79
- "step": 640
80
- },
81
- {
82
- "epoch": 0.37925925925925924,
83
- "grad_norm": 0.059887129813432693,
84
- "learning_rate": 0.0009315344337660421,
85
- "loss": 0.47220277786254883,
86
- "step": 768
87
- },
88
- {
89
- "epoch": 0.37925925925925924,
90
- "eval_cos_loss": 0.13229864417779735,
91
- "eval_loss": 0.4738647668826871,
92
- "eval_mse_loss": 0.4738647668826871,
93
- "step": 768
94
- },
95
- {
96
- "epoch": 0.37925925925925924,
97
- "eval_cos_loss": 0.13229864417779735,
98
- "eval_loss": 0.4738647668826871,
99
- "eval_mse_loss": 0.4738647668826871,
100
- "eval_runtime": 5.7163,
101
- "eval_samples_per_second": 457.991,
102
- "eval_steps_per_second": 7.173,
103
- "step": 768
104
- },
105
- {
106
- "epoch": 0.44246913580246916,
107
- "grad_norm": 0.04632481560111046,
108
- "learning_rate": 0.0008500491898731988,
109
- "loss": 0.4803292453289032,
110
- "step": 896
111
- },
112
- {
113
- "epoch": 0.505679012345679,
114
- "grad_norm": 0.04784788191318512,
115
- "learning_rate": 0.0007439821899385376,
116
- "loss": 0.47202467918395996,
117
- "step": 1024
118
- },
119
- {
120
- "epoch": 0.505679012345679,
121
- "eval_cos_loss": 0.13205302797439622,
122
- "eval_loss": 0.4683481622033003,
123
- "eval_mse_loss": 0.4683481622033003,
124
- "step": 1024
125
- },
126
- {
127
- "epoch": 0.505679012345679,
128
- "eval_cos_loss": 0.13205302797439622,
129
- "eval_loss": 0.4683481622033003,
130
- "eval_mse_loss": 0.4683481622033003,
131
- "eval_runtime": 5.7275,
132
- "eval_samples_per_second": 457.091,
133
- "eval_steps_per_second": 7.158,
134
- "step": 1024
135
- },
136
- {
137
- "epoch": 0.5688888888888889,
138
- "grad_norm": 0.04520433768630028,
139
- "learning_rate": 0.0006207818531897271,
140
- "loss": 0.47000765800476074,
141
- "step": 1152
142
- },
143
- {
144
- "epoch": 0.6320987654320988,
145
- "grad_norm": 0.052492521703243256,
146
- "learning_rate": 0.0004890997654891032,
147
- "loss": 0.4752293825149536,
148
- "step": 1280
149
- },
150
- {
151
- "epoch": 0.6320987654320988,
152
- "eval_cos_loss": 0.13366234865857335,
153
- "eval_loss": 0.4745350596381397,
154
- "eval_mse_loss": 0.4745350596381397,
155
- "step": 1280
156
- },
157
- {
158
- "epoch": 0.6320987654320988,
159
- "eval_cos_loss": 0.13366234865857335,
160
- "eval_loss": 0.4745350596381397,
161
- "eval_mse_loss": 0.4745350596381397,
162
- "eval_runtime": 5.4531,
163
- "eval_samples_per_second": 480.092,
164
- "eval_steps_per_second": 7.519,
165
- "step": 1280
166
- },
167
- {
168
- "epoch": 0.6953086419753086,
169
- "grad_norm": 0.06958144158124924,
170
- "learning_rate": 0.00035818313279679524,
171
- "loss": 0.468513160943985,
172
- "step": 1408
173
- },
174
- {
175
- "epoch": 0.7585185185185185,
176
- "grad_norm": 0.07884542644023895,
177
- "learning_rate": 0.00023722540797531234,
178
- "loss": 0.4731239974498749,
179
- "step": 1536
180
- },
181
- {
182
- "epoch": 0.7585185185185185,
183
- "eval_cos_loss": 0.13044148719892268,
184
- "eval_loss": 0.46598181055813304,
185
- "eval_mse_loss": 0.46598181055813304,
186
- "step": 1536
187
- },
188
- {
189
- "epoch": 0.7585185185185185,
190
- "eval_cos_loss": 0.13044148719892268,
191
- "eval_loss": 0.46598181055813304,
192
- "eval_mse_loss": 0.46598181055813304,
193
- "eval_runtime": 5.2545,
194
- "eval_samples_per_second": 498.242,
195
- "eval_steps_per_second": 7.803,
196
- "step": 1536
197
- },
198
- {
199
- "epoch": 0.8217283950617283,
200
- "grad_norm": 0.08384841680526733,
201
- "learning_rate": 0.00013472069233656453,
202
- "loss": 0.47170335054397583,
203
- "step": 1664
204
- },
205
- {
206
- "epoch": 0.8849382716049383,
207
- "grad_norm": 0.08261716365814209,
208
- "learning_rate": 5.786724825584927e-05,
209
- "loss": 0.4725135862827301,
210
- "step": 1792
211
- },
212
- {
213
- "epoch": 0.8849382716049383,
214
- "eval_cos_loss": 0.13125932561915096,
215
- "eval_loss": 0.47124562975836964,
216
- "eval_mse_loss": 0.47124562975836964,
217
- "step": 1792
218
- },
219
- {
220
- "epoch": 0.8849382716049383,
221
- "eval_cos_loss": 0.13125932561915096,
222
- "eval_loss": 0.47124562975836964,
223
- "eval_mse_loss": 0.47124562975836964,
224
- "eval_runtime": 5.1931,
225
- "eval_samples_per_second": 504.135,
226
- "eval_steps_per_second": 7.895,
227
- "step": 1792
228
- }
229
- ],
230
- "logging_steps": 128,
231
- "max_steps": 2025,
232
- "num_input_tokens_seen": 0,
233
- "num_train_epochs": 1,
234
- "save_steps": 256,
235
- "stateful_callbacks": {
236
- "TrainerControl": {
237
- "args": {
238
- "should_epoch_stop": false,
239
- "should_evaluate": false,
240
- "should_log": false,
241
- "should_save": true,
242
- "should_training_stop": false
243
- },
244
- "attributes": {}
245
- }
246
- },
247
- "total_flos": 0.0,
248
- "train_batch_size": 64,
249
- "trial_name": null,
250
- "trial_params": null
251
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoints-v4.1-discrete-conditional/checkpoint-1792/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0839bffbc58eb6068cc228e4d756dbb22a9adf723766e40a7bc2a03aca92630
3
- size 5137