| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9975369458128078, |
| "eval_steps": 500, |
| "global_step": 270, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003694581280788177, |
| "grad_norm": 1.3164679266754151, |
| "learning_rate": 3.7037037037037036e-07, |
| "loss": 0.5867, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01847290640394089, |
| "grad_norm": 0.8941813303905811, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 0.5684, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.03694581280788178, |
| "grad_norm": 0.4848149582760561, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.5272, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.05541871921182266, |
| "grad_norm": 0.23489374203812283, |
| "learning_rate": 5.555555555555557e-06, |
| "loss": 0.3213, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07389162561576355, |
| "grad_norm": 0.17590513475537736, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.1719, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09236453201970443, |
| "grad_norm": 0.09924337039651394, |
| "learning_rate": 9.25925925925926e-06, |
| "loss": 0.1392, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11083743842364532, |
| "grad_norm": 0.06829427058420333, |
| "learning_rate": 9.996239762521152e-06, |
| "loss": 0.1168, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12931034482758622, |
| "grad_norm": 0.06324690561682543, |
| "learning_rate": 9.973281012033009e-06, |
| "loss": 0.1009, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1477832512315271, |
| "grad_norm": 0.049072943668117305, |
| "learning_rate": 9.929548316723983e-06, |
| "loss": 0.0886, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16625615763546797, |
| "grad_norm": 0.04472692512923622, |
| "learning_rate": 9.86522435289912e-06, |
| "loss": 0.0772, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.18472906403940886, |
| "grad_norm": 0.04320806158867543, |
| "learning_rate": 9.7805778088694e-06, |
| "loss": 0.0677, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.20320197044334976, |
| "grad_norm": 0.04347088345515666, |
| "learning_rate": 9.67596226261095e-06, |
| "loss": 0.0652, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.22167487684729065, |
| "grad_norm": 0.04636829853932609, |
| "learning_rate": 9.551814704830734e-06, |
| "loss": 0.0618, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.24014778325123154, |
| "grad_norm": 0.04911651390500423, |
| "learning_rate": 9.40865371360804e-06, |
| "loss": 0.0564, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.25862068965517243, |
| "grad_norm": 0.046124586761473525, |
| "learning_rate": 9.247077288236488e-06, |
| "loss": 0.0496, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2770935960591133, |
| "grad_norm": 0.04282690739591252, |
| "learning_rate": 9.067760351314838e-06, |
| "loss": 0.0443, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.2955665024630542, |
| "grad_norm": 0.0501495787415551, |
| "learning_rate": 8.871451929520662e-06, |
| "loss": 0.0472, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.31403940886699505, |
| "grad_norm": 0.048856614296215864, |
| "learning_rate": 8.658972024843063e-06, |
| "loss": 0.045, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.33251231527093594, |
| "grad_norm": 0.052135344528722635, |
| "learning_rate": 8.43120818934367e-06, |
| "loss": 0.0407, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.35098522167487683, |
| "grad_norm": 0.04496874450828456, |
| "learning_rate": 8.18911181775353e-06, |
| "loss": 0.0443, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.3694581280788177, |
| "grad_norm": 0.046961464584805046, |
| "learning_rate": 7.93369417339209e-06, |
| "loss": 0.043, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3879310344827586, |
| "grad_norm": 0.03774079876533218, |
| "learning_rate": 7.666022164008458e-06, |
| "loss": 0.039, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.4064039408866995, |
| "grad_norm": 0.039388091387549375, |
| "learning_rate": 7.387213885189746e-06, |
| "loss": 0.043, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.4248768472906404, |
| "grad_norm": 0.04010524552891231, |
| "learning_rate": 7.098433949952146e-06, |
| "loss": 0.0418, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.4433497536945813, |
| "grad_norm": 0.03364245597783716, |
| "learning_rate": 6.800888624023552e-06, |
| "loss": 0.0396, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4618226600985222, |
| "grad_norm": 0.03214650791918716, |
| "learning_rate": 6.495820787138209e-06, |
| "loss": 0.0343, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.4802955665024631, |
| "grad_norm": 0.04248682387196562, |
| "learning_rate": 6.184504741390596e-06, |
| "loss": 0.0385, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.4987684729064039, |
| "grad_norm": 0.041745690695414894, |
| "learning_rate": 5.8682408883346535e-06, |
| "loss": 0.0354, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.5172413793103449, |
| "grad_norm": 0.043998119847084, |
| "learning_rate": 5.548350297062659e-06, |
| "loss": 0.0363, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5357142857142857, |
| "grad_norm": 0.03523911995673237, |
| "learning_rate": 5.2261691859535325e-06, |
| "loss": 0.0311, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.5541871921182266, |
| "grad_norm": 0.051683301339415226, |
| "learning_rate": 4.903043341140879e-06, |
| "loss": 0.0322, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5726600985221675, |
| "grad_norm": 0.029645536386539162, |
| "learning_rate": 4.580322495015466e-06, |
| "loss": 0.0297, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.5911330049261084, |
| "grad_norm": 0.038478089898929216, |
| "learning_rate": 4.259354688243758e-06, |
| "loss": 0.0373, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6096059113300493, |
| "grad_norm": 0.04016397060959619, |
| "learning_rate": 3.941480638852948e-06, |
| "loss": 0.0293, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.6280788177339901, |
| "grad_norm": 0.03295300026030036, |
| "learning_rate": 3.6280281419034934e-06, |
| "loss": 0.0306, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.646551724137931, |
| "grad_norm": 0.04371251122688708, |
| "learning_rate": 3.3203065231422904e-06, |
| "loss": 0.0292, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.6650246305418719, |
| "grad_norm": 0.02878432244430226, |
| "learning_rate": 3.019601169804216e-06, |
| "loss": 0.0348, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6834975369458128, |
| "grad_norm": 0.03582599925982462, |
| "learning_rate": 2.7271681614074973e-06, |
| "loss": 0.0292, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.7019704433497537, |
| "grad_norm": 0.04498392884678493, |
| "learning_rate": 2.4442290229706344e-06, |
| "loss": 0.0355, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7204433497536946, |
| "grad_norm": 0.03888587884987569, |
| "learning_rate": 2.171965622567308e-06, |
| "loss": 0.0311, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.7389162561576355, |
| "grad_norm": 0.04684421314626146, |
| "learning_rate": 1.9115152345327154e-06, |
| "loss": 0.0391, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7573891625615764, |
| "grad_norm": 0.031274018167510506, |
| "learning_rate": 1.6639657889429017e-06, |
| "loss": 0.0275, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.7758620689655172, |
| "grad_norm": 0.03793766745215515, |
| "learning_rate": 1.4303513272105057e-06, |
| "loss": 0.0312, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7943349753694581, |
| "grad_norm": 0.03084526133895099, |
| "learning_rate": 1.2116476827794104e-06, |
| "loss": 0.0334, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.812807881773399, |
| "grad_norm": 0.035909978135080484, |
| "learning_rate": 1.008768404960535e-06, |
| "loss": 0.0329, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8312807881773399, |
| "grad_norm": 0.03337500963867465, |
| "learning_rate": 8.225609429353187e-07, |
| "loss": 0.0299, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.8497536945812808, |
| "grad_norm": 0.04194808091582502, |
| "learning_rate": 6.53803105866761e-07, |
| "loss": 0.0312, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8682266009852216, |
| "grad_norm": 0.030185202275612954, |
| "learning_rate": 5.031998139045352e-07, |
| "loss": 0.0291, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.8866995073891626, |
| "grad_norm": 0.03678202209697851, |
| "learning_rate": 3.7138015365554834e-07, |
| "loss": 0.0329, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9051724137931034, |
| "grad_norm": 0.026908956915127933, |
| "learning_rate": 2.5889475041961767e-07, |
| "loss": 0.0318, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.9236453201970444, |
| "grad_norm": 0.04036220391547687, |
| "learning_rate": 1.6621346816668993e-07, |
| "loss": 0.0304, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9421182266009852, |
| "grad_norm": 0.037246305929994866, |
| "learning_rate": 9.372344686307655e-08, |
| "loss": 0.036, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.9605911330049262, |
| "grad_norm": 0.031641745281581694, |
| "learning_rate": 4.172748534499449e-08, |
| "loss": 0.0296, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.979064039408867, |
| "grad_norm": 0.03135655037281048, |
| "learning_rate": 1.044277649433989e-08, |
| "loss": 0.0294, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.9975369458128078, |
| "grad_norm": 0.03097346411772941, |
| "learning_rate": 0.0, |
| "loss": 0.033, |
| "step": 270 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 270, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8.643970128528015e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|