Yash1005's picture
upload Code Language-ID encoder (multi-label classifier)
7bc7eaf verified
{
"best_global_step": 1126,
"best_metric": 0.9026876737720111,
"best_model_checkpoint": "/workspace/code_langid/CodeLanguage-Encoder-v1/checkpoint-1126",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 1126,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.035555555555555556,
"grad_norm": 50.418575286865234,
"learning_rate": 6.666666666666667e-06,
"loss": 1.0323,
"step": 20
},
{
"epoch": 0.07111111111111111,
"grad_norm": 8.79090690612793,
"learning_rate": 1.3684210526315791e-05,
"loss": 0.3941,
"step": 40
},
{
"epoch": 0.10666666666666667,
"grad_norm": 5.275986671447754,
"learning_rate": 1.9999827267889127e-05,
"loss": 0.3868,
"step": 60
},
{
"epoch": 0.14222222222222222,
"grad_norm": 4.890424728393555,
"learning_rate": 1.997910663401593e-05,
"loss": 0.3957,
"step": 80
},
{
"epoch": 0.17777777777777778,
"grad_norm": 2.00404953956604,
"learning_rate": 1.9923921581420398e-05,
"loss": 0.2849,
"step": 100
},
{
"epoch": 0.21333333333333335,
"grad_norm": 4.090190887451172,
"learning_rate": 1.9834462700385778e-05,
"loss": 0.4143,
"step": 120
},
{
"epoch": 0.24888888888888888,
"grad_norm": 3.5859761238098145,
"learning_rate": 1.9711038951267685e-05,
"loss": 0.3221,
"step": 140
},
{
"epoch": 0.28444444444444444,
"grad_norm": 4.17680549621582,
"learning_rate": 1.9554076597450804e-05,
"loss": 0.3325,
"step": 160
},
{
"epoch": 0.32,
"grad_norm": 5.892409324645996,
"learning_rate": 1.9364117733181033e-05,
"loss": 0.3185,
"step": 180
},
{
"epoch": 0.35555555555555557,
"grad_norm": 15.038902282714844,
"learning_rate": 1.9141818411357532e-05,
"loss": 0.2261,
"step": 200
},
{
"epoch": 0.39111111111111113,
"grad_norm": 5.424457550048828,
"learning_rate": 1.8887946377750536e-05,
"loss": 0.3511,
"step": 220
},
{
"epoch": 0.4266666666666667,
"grad_norm": 5.554855823516846,
"learning_rate": 1.8603378419470253e-05,
"loss": 0.2405,
"step": 240
},
{
"epoch": 0.4622222222222222,
"grad_norm": 5.886898040771484,
"learning_rate": 1.8289097336844297e-05,
"loss": 0.251,
"step": 260
},
{
"epoch": 0.49777777777777776,
"grad_norm": 6.273799419403076,
"learning_rate": 1.794618854916172e-05,
"loss": 0.2398,
"step": 280
},
{
"epoch": 0.5333333333333333,
"grad_norm": 19.23700523376465,
"learning_rate": 1.7575836346006322e-05,
"loss": 0.1587,
"step": 300
},
{
"epoch": 0.5688888888888889,
"grad_norm": 6.060857772827148,
"learning_rate": 1.717931979712576e-05,
"loss": 0.2326,
"step": 320
},
{
"epoch": 0.6044444444444445,
"grad_norm": 3.646226167678833,
"learning_rate": 1.6758008334962462e-05,
"loss": 0.1481,
"step": 340
},
{
"epoch": 0.64,
"grad_norm": 7.208901882171631,
"learning_rate": 1.6313357025102653e-05,
"loss": 0.1729,
"step": 360
},
{
"epoch": 0.6755555555555556,
"grad_norm": 4.766529560089111,
"learning_rate": 1.584690154097788e-05,
"loss": 0.1506,
"step": 380
},
{
"epoch": 0.7111111111111111,
"grad_norm": 5.213943958282471,
"learning_rate": 1.53602528601746e-05,
"loss": 0.0915,
"step": 400
},
{
"epoch": 0.7466666666666667,
"grad_norm": 6.959835529327393,
"learning_rate": 1.485509170066898e-05,
"loss": 0.165,
"step": 420
},
{
"epoch": 0.7822222222222223,
"grad_norm": 5.660776138305664,
"learning_rate": 1.433316271620227e-05,
"loss": 0.1133,
"step": 440
},
{
"epoch": 0.8177777777777778,
"grad_norm": 5.484377861022949,
"learning_rate": 1.3796268470843958e-05,
"loss": 0.1428,
"step": 460
},
{
"epoch": 0.8533333333333334,
"grad_norm": 6.408914089202881,
"learning_rate": 1.324626321355242e-05,
"loss": 0.1192,
"step": 480
},
{
"epoch": 0.8888888888888888,
"grad_norm": 6.384398937225342,
"learning_rate": 1.2685046474233678e-05,
"loss": 0.0774,
"step": 500
},
{
"epoch": 0.9244444444444444,
"grad_norm": 8.599336624145508,
"learning_rate": 1.211455650341529e-05,
"loss": 0.1333,
"step": 520
},
{
"epoch": 0.96,
"grad_norm": 3.2596817016601562,
"learning_rate": 1.1536763578192413e-05,
"loss": 0.0863,
"step": 540
},
{
"epoch": 0.9955555555555555,
"grad_norm": 4.493932723999023,
"learning_rate": 1.0953663197565112e-05,
"loss": 0.1024,
"step": 560
},
{
"epoch": 1.0,
"eval_category_set_accuracy": 0.749,
"eval_is_valid_accuracy": 0.922,
"eval_loss": 0.04746342822909355,
"eval_macro_f1": 0.8320485546083991,
"eval_micro_f1": 0.8457854406130269,
"eval_runtime": 9.5375,
"eval_samples_per_second": 104.849,
"eval_steps_per_second": 13.106,
"step": 563
},
{
"epoch": 1.0302222222222222,
"grad_norm": 3.14100980758667,
"learning_rate": 1.0367269190667806e-05,
"loss": 0.0791,
"step": 580
},
{
"epoch": 1.0657777777777777,
"grad_norm": 5.287792682647705,
"learning_rate": 9.779606761692735e-06,
"loss": 0.0608,
"step": 600
},
{
"epoch": 1.1013333333333333,
"grad_norm": 7.076214790344238,
"learning_rate": 9.19270549552784e-06,
"loss": 0.0675,
"step": 620
},
{
"epoch": 1.1368888888888888,
"grad_norm": 5.072103500366211,
"learning_rate": 8.608592348265239e-06,
"loss": 0.0817,
"step": 640
},
{
"epoch": 1.1724444444444444,
"grad_norm": 1.6247732639312744,
"learning_rate": 8.029284646788679e-06,
"loss": 0.0522,
"step": 660
},
{
"epoch": 1.208,
"grad_norm": 8.46465015411377,
"learning_rate": 7.456783121616925e-06,
"loss": 0.0829,
"step": 680
},
{
"epoch": 1.2435555555555555,
"grad_norm": 3.561216115951538,
"learning_rate": 6.893064997065378e-06,
"loss": 0.0676,
"step": 700
},
{
"epoch": 1.279111111111111,
"grad_norm": 6.2991485595703125,
"learning_rate": 6.340077162590021e-06,
"loss": 0.0797,
"step": 720
},
{
"epoch": 1.3146666666666667,
"grad_norm": 5.223042964935303,
"learning_rate": 5.79972944889761e-06,
"loss": 0.0614,
"step": 740
},
{
"epoch": 1.3502222222222222,
"grad_norm": 3.579955577850342,
"learning_rate": 5.2738880320441256e-06,
"loss": 0.048,
"step": 760
},
{
"epoch": 1.3857777777777778,
"grad_norm": 8.557352066040039,
"learning_rate": 4.764368988301375e-06,
"loss": 0.0794,
"step": 780
},
{
"epoch": 1.4213333333333333,
"grad_norm": 3.6406803131103516,
"learning_rate": 4.272932022051186e-06,
"loss": 0.0589,
"step": 800
},
{
"epoch": 1.456888888888889,
"grad_norm": 4.307910919189453,
"learning_rate": 3.801274388368751e-06,
"loss": 0.062,
"step": 820
},
{
"epoch": 1.4924444444444445,
"grad_norm": 2.9349007606506348,
"learning_rate": 3.3510250312845315e-06,
"loss": 0.0515,
"step": 840
},
{
"epoch": 1.528,
"grad_norm": 4.273534774780273,
"learning_rate": 2.9237389579691232e-06,
"loss": 0.0308,
"step": 860
},
{
"epoch": 1.5635555555555556,
"grad_norm": 6.90522575378418,
"learning_rate": 2.5208918682707185e-06,
"loss": 0.0686,
"step": 880
},
{
"epoch": 1.5991111111111111,
"grad_norm": 3.360746383666992,
"learning_rate": 2.14387505815294e-06,
"loss": 0.051,
"step": 900
},
{
"epoch": 1.6346666666666667,
"grad_norm": 7.586688995361328,
"learning_rate": 1.7939906146347751e-06,
"loss": 0.0526,
"step": 920
},
{
"epoch": 1.6702222222222223,
"grad_norm": 2.680347442626953,
"learning_rate": 1.4724469188276935e-06,
"loss": 0.0501,
"step": 940
},
{
"epoch": 1.7057777777777776,
"grad_norm": 0.2218952476978302,
"learning_rate": 1.1803544726008665e-06,
"loss": 0.0321,
"step": 960
},
{
"epoch": 1.7413333333333334,
"grad_norm": 2.8257687091827393,
"learning_rate": 9.18722063287838e-07,
"loss": 0.0545,
"step": 980
},
{
"epoch": 1.7768888888888887,
"grad_norm": 4.3921380043029785,
"learning_rate": 6.884532796803245e-07,
"loss": 0.0543,
"step": 1000
},
{
"epoch": 1.8124444444444445,
"grad_norm": 8.81263542175293,
"learning_rate": 4.903433913418254e-07,
"loss": 0.0402,
"step": 1020
},
{
"epoch": 1.8479999999999999,
"grad_norm": 5.800553798675537,
"learning_rate": 3.250766020187446e-07,
"loss": 0.0669,
"step": 1040
},
{
"epoch": 1.8835555555555556,
"grad_norm": 5.161985874176025,
"learning_rate": 1.9322368663482693e-07,
"loss": 0.0378,
"step": 1060
},
{
"epoch": 1.919111111111111,
"grad_norm": 6.102690696716309,
"learning_rate": 9.524002002995192e-08,
"loss": 0.0736,
"step": 1080
},
{
"epoch": 1.9546666666666668,
"grad_norm": 3.5213029384613037,
"learning_rate": 3.146400425127927e-08,
"loss": 0.0461,
"step": 1100
},
{
"epoch": 1.9902222222222221,
"grad_norm": 2.1008975505828857,
"learning_rate": 2.1158998284220855e-09,
"loss": 0.0366,
"step": 1120
},
{
"epoch": 2.0,
"eval_category_set_accuracy": 0.825,
"eval_is_valid_accuracy": 0.944,
"eval_loss": 0.03216760233044624,
"eval_macro_f1": 0.893842732124518,
"eval_micro_f1": 0.9026876737720111,
"eval_runtime": 9.5148,
"eval_samples_per_second": 105.099,
"eval_steps_per_second": 13.137,
"step": 1126
}
],
"logging_steps": 20,
"max_steps": 1126,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8505564385194240.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}