{ "best_global_step": 1126, "best_metric": 0.9026876737720111, "best_model_checkpoint": "/workspace/code_langid/CodeLanguage-Encoder-v1/checkpoint-1126", "epoch": 2.0, "eval_steps": 500, "global_step": 1126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.035555555555555556, "grad_norm": 50.418575286865234, "learning_rate": 6.666666666666667e-06, "loss": 1.0323, "step": 20 }, { "epoch": 0.07111111111111111, "grad_norm": 8.79090690612793, "learning_rate": 1.3684210526315791e-05, "loss": 0.3941, "step": 40 }, { "epoch": 0.10666666666666667, "grad_norm": 5.275986671447754, "learning_rate": 1.9999827267889127e-05, "loss": 0.3868, "step": 60 }, { "epoch": 0.14222222222222222, "grad_norm": 4.890424728393555, "learning_rate": 1.997910663401593e-05, "loss": 0.3957, "step": 80 }, { "epoch": 0.17777777777777778, "grad_norm": 2.00404953956604, "learning_rate": 1.9923921581420398e-05, "loss": 0.2849, "step": 100 }, { "epoch": 0.21333333333333335, "grad_norm": 4.090190887451172, "learning_rate": 1.9834462700385778e-05, "loss": 0.4143, "step": 120 }, { "epoch": 0.24888888888888888, "grad_norm": 3.5859761238098145, "learning_rate": 1.9711038951267685e-05, "loss": 0.3221, "step": 140 }, { "epoch": 0.28444444444444444, "grad_norm": 4.17680549621582, "learning_rate": 1.9554076597450804e-05, "loss": 0.3325, "step": 160 }, { "epoch": 0.32, "grad_norm": 5.892409324645996, "learning_rate": 1.9364117733181033e-05, "loss": 0.3185, "step": 180 }, { "epoch": 0.35555555555555557, "grad_norm": 15.038902282714844, "learning_rate": 1.9141818411357532e-05, "loss": 0.2261, "step": 200 }, { "epoch": 0.39111111111111113, "grad_norm": 5.424457550048828, "learning_rate": 1.8887946377750536e-05, "loss": 0.3511, "step": 220 }, { "epoch": 0.4266666666666667, "grad_norm": 5.554855823516846, "learning_rate": 1.8603378419470253e-05, "loss": 0.2405, "step": 240 }, { "epoch": 0.4622222222222222, "grad_norm": 5.886898040771484, "learning_rate": 1.8289097336844297e-05, "loss": 0.251, "step": 260 }, { "epoch": 0.49777777777777776, "grad_norm": 6.273799419403076, "learning_rate": 1.794618854916172e-05, "loss": 0.2398, "step": 280 }, { "epoch": 0.5333333333333333, "grad_norm": 19.23700523376465, "learning_rate": 1.7575836346006322e-05, "loss": 0.1587, "step": 300 }, { "epoch": 0.5688888888888889, "grad_norm": 6.060857772827148, "learning_rate": 1.717931979712576e-05, "loss": 0.2326, "step": 320 }, { "epoch": 0.6044444444444445, "grad_norm": 3.646226167678833, "learning_rate": 1.6758008334962462e-05, "loss": 0.1481, "step": 340 }, { "epoch": 0.64, "grad_norm": 7.208901882171631, "learning_rate": 1.6313357025102653e-05, "loss": 0.1729, "step": 360 }, { "epoch": 0.6755555555555556, "grad_norm": 4.766529560089111, "learning_rate": 1.584690154097788e-05, "loss": 0.1506, "step": 380 }, { "epoch": 0.7111111111111111, "grad_norm": 5.213943958282471, "learning_rate": 1.53602528601746e-05, "loss": 0.0915, "step": 400 }, { "epoch": 0.7466666666666667, "grad_norm": 6.959835529327393, "learning_rate": 1.485509170066898e-05, "loss": 0.165, "step": 420 }, { "epoch": 0.7822222222222223, "grad_norm": 5.660776138305664, "learning_rate": 1.433316271620227e-05, "loss": 0.1133, "step": 440 }, { "epoch": 0.8177777777777778, "grad_norm": 5.484377861022949, "learning_rate": 1.3796268470843958e-05, "loss": 0.1428, "step": 460 }, { "epoch": 0.8533333333333334, "grad_norm": 6.408914089202881, "learning_rate": 1.324626321355242e-05, "loss": 0.1192, "step": 480 }, { "epoch": 0.8888888888888888, "grad_norm": 6.384398937225342, "learning_rate": 1.2685046474233678e-05, "loss": 0.0774, "step": 500 }, { "epoch": 0.9244444444444444, "grad_norm": 8.599336624145508, "learning_rate": 1.211455650341529e-05, "loss": 0.1333, "step": 520 }, { "epoch": 0.96, "grad_norm": 3.2596817016601562, "learning_rate": 1.1536763578192413e-05, "loss": 0.0863, "step": 540 }, { "epoch": 0.9955555555555555, "grad_norm": 4.493932723999023, "learning_rate": 1.0953663197565112e-05, "loss": 0.1024, "step": 560 }, { "epoch": 1.0, "eval_category_set_accuracy": 0.749, "eval_is_valid_accuracy": 0.922, "eval_loss": 0.04746342822909355, "eval_macro_f1": 0.8320485546083991, "eval_micro_f1": 0.8457854406130269, "eval_runtime": 9.5375, "eval_samples_per_second": 104.849, "eval_steps_per_second": 13.106, "step": 563 }, { "epoch": 1.0302222222222222, "grad_norm": 3.14100980758667, "learning_rate": 1.0367269190667806e-05, "loss": 0.0791, "step": 580 }, { "epoch": 1.0657777777777777, "grad_norm": 5.287792682647705, "learning_rate": 9.779606761692735e-06, "loss": 0.0608, "step": 600 }, { "epoch": 1.1013333333333333, "grad_norm": 7.076214790344238, "learning_rate": 9.19270549552784e-06, "loss": 0.0675, "step": 620 }, { "epoch": 1.1368888888888888, "grad_norm": 5.072103500366211, "learning_rate": 8.608592348265239e-06, "loss": 0.0817, "step": 640 }, { "epoch": 1.1724444444444444, "grad_norm": 1.6247732639312744, "learning_rate": 8.029284646788679e-06, "loss": 0.0522, "step": 660 }, { "epoch": 1.208, "grad_norm": 8.46465015411377, "learning_rate": 7.456783121616925e-06, "loss": 0.0829, "step": 680 }, { "epoch": 1.2435555555555555, "grad_norm": 3.561216115951538, "learning_rate": 6.893064997065378e-06, "loss": 0.0676, "step": 700 }, { "epoch": 1.279111111111111, "grad_norm": 6.2991485595703125, "learning_rate": 6.340077162590021e-06, "loss": 0.0797, "step": 720 }, { "epoch": 1.3146666666666667, "grad_norm": 5.223042964935303, "learning_rate": 5.79972944889761e-06, "loss": 0.0614, "step": 740 }, { "epoch": 1.3502222222222222, "grad_norm": 3.579955577850342, "learning_rate": 5.2738880320441256e-06, "loss": 0.048, "step": 760 }, { "epoch": 1.3857777777777778, "grad_norm": 8.557352066040039, "learning_rate": 4.764368988301375e-06, "loss": 0.0794, "step": 780 }, { "epoch": 1.4213333333333333, "grad_norm": 3.6406803131103516, "learning_rate": 4.272932022051186e-06, "loss": 0.0589, "step": 800 }, { "epoch": 1.456888888888889, "grad_norm": 4.307910919189453, "learning_rate": 3.801274388368751e-06, "loss": 0.062, "step": 820 }, { "epoch": 1.4924444444444445, "grad_norm": 2.9349007606506348, "learning_rate": 3.3510250312845315e-06, "loss": 0.0515, "step": 840 }, { "epoch": 1.528, "grad_norm": 4.273534774780273, "learning_rate": 2.9237389579691232e-06, "loss": 0.0308, "step": 860 }, { "epoch": 1.5635555555555556, "grad_norm": 6.90522575378418, "learning_rate": 2.5208918682707185e-06, "loss": 0.0686, "step": 880 }, { "epoch": 1.5991111111111111, "grad_norm": 3.360746383666992, "learning_rate": 2.14387505815294e-06, "loss": 0.051, "step": 900 }, { "epoch": 1.6346666666666667, "grad_norm": 7.586688995361328, "learning_rate": 1.7939906146347751e-06, "loss": 0.0526, "step": 920 }, { "epoch": 1.6702222222222223, "grad_norm": 2.680347442626953, "learning_rate": 1.4724469188276935e-06, "loss": 0.0501, "step": 940 }, { "epoch": 1.7057777777777776, "grad_norm": 0.2218952476978302, "learning_rate": 1.1803544726008665e-06, "loss": 0.0321, "step": 960 }, { "epoch": 1.7413333333333334, "grad_norm": 2.8257687091827393, "learning_rate": 9.18722063287838e-07, "loss": 0.0545, "step": 980 }, { "epoch": 1.7768888888888887, "grad_norm": 4.3921380043029785, "learning_rate": 6.884532796803245e-07, "loss": 0.0543, "step": 1000 }, { "epoch": 1.8124444444444445, "grad_norm": 8.81263542175293, "learning_rate": 4.903433913418254e-07, "loss": 0.0402, "step": 1020 }, { "epoch": 1.8479999999999999, "grad_norm": 5.800553798675537, "learning_rate": 3.250766020187446e-07, "loss": 0.0669, "step": 1040 }, { "epoch": 1.8835555555555556, "grad_norm": 5.161985874176025, "learning_rate": 1.9322368663482693e-07, "loss": 0.0378, "step": 1060 }, { "epoch": 1.919111111111111, "grad_norm": 6.102690696716309, "learning_rate": 9.524002002995192e-08, "loss": 0.0736, "step": 1080 }, { "epoch": 1.9546666666666668, "grad_norm": 3.5213029384613037, "learning_rate": 3.146400425127927e-08, "loss": 0.0461, "step": 1100 }, { "epoch": 1.9902222222222221, "grad_norm": 2.1008975505828857, "learning_rate": 2.1158998284220855e-09, "loss": 0.0366, "step": 1120 }, { "epoch": 2.0, "eval_category_set_accuracy": 0.825, "eval_is_valid_accuracy": 0.944, "eval_loss": 0.03216760233044624, "eval_macro_f1": 0.893842732124518, "eval_micro_f1": 0.9026876737720111, "eval_runtime": 9.5148, "eval_samples_per_second": 105.099, "eval_steps_per_second": 13.137, "step": 1126 } ], "logging_steps": 20, "max_steps": 1126, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8505564385194240.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }