Yash1005's picture
upload Code Language-ID encoder (multi-label classifier)
36550eb verified
{
"best_global_step": 3375,
"best_metric": 0.9395306859205776,
"best_model_checkpoint": "/workspace/code_langid/CodeLanguage-Encoder-v1/checkpoint-3375",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 3375,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017777777777777778,
"grad_norm": 91.99722290039062,
"learning_rate": 2.2485207100591717e-06,
"loss": 1.3883,
"step": 20
},
{
"epoch": 0.035555555555555556,
"grad_norm": 22.086933135986328,
"learning_rate": 4.615384615384616e-06,
"loss": 0.57,
"step": 40
},
{
"epoch": 0.05333333333333334,
"grad_norm": 21.929166793823242,
"learning_rate": 6.98224852071006e-06,
"loss": 0.394,
"step": 60
},
{
"epoch": 0.07111111111111111,
"grad_norm": 11.914956092834473,
"learning_rate": 9.349112426035503e-06,
"loss": 0.3654,
"step": 80
},
{
"epoch": 0.08888888888888889,
"grad_norm": 9.060860633850098,
"learning_rate": 1.1715976331360948e-05,
"loss": 0.3761,
"step": 100
},
{
"epoch": 0.10666666666666667,
"grad_norm": 26.926528930664062,
"learning_rate": 1.4082840236686392e-05,
"loss": 0.3866,
"step": 120
},
{
"epoch": 0.12444444444444444,
"grad_norm": 5.112385272979736,
"learning_rate": 1.6449704142011837e-05,
"loss": 0.3834,
"step": 140
},
{
"epoch": 0.14222222222222222,
"grad_norm": 4.676296234130859,
"learning_rate": 1.881656804733728e-05,
"loss": 0.36,
"step": 160
},
{
"epoch": 0.16,
"grad_norm": 3.6760458946228027,
"learning_rate": 1.9999519891672918e-05,
"loss": 0.3531,
"step": 180
},
{
"epoch": 0.17777777777777778,
"grad_norm": 4.675507068634033,
"learning_rate": 1.99956793016566e-05,
"loss": 0.3881,
"step": 200
},
{
"epoch": 0.19555555555555557,
"grad_norm": 6.470630645751953,
"learning_rate": 1.998799959670796e-05,
"loss": 0.3449,
"step": 220
},
{
"epoch": 0.21333333333333335,
"grad_norm": 4.329215049743652,
"learning_rate": 1.9976483726428423e-05,
"loss": 0.3406,
"step": 240
},
{
"epoch": 0.2311111111111111,
"grad_norm": 6.4355058670043945,
"learning_rate": 1.9961136113803982e-05,
"loss": 0.3488,
"step": 260
},
{
"epoch": 0.24888888888888888,
"grad_norm": 25.0487117767334,
"learning_rate": 1.9941962653506426e-05,
"loss": 0.3448,
"step": 280
},
{
"epoch": 0.26666666666666666,
"grad_norm": 6.033944129943848,
"learning_rate": 1.991897070962933e-05,
"loss": 0.3207,
"step": 300
},
{
"epoch": 0.28444444444444444,
"grad_norm": 4.8149919509887695,
"learning_rate": 1.9892169112859677e-05,
"loss": 0.3065,
"step": 320
},
{
"epoch": 0.3022222222222222,
"grad_norm": 16.133445739746094,
"learning_rate": 1.9861568157086182e-05,
"loss": 0.2826,
"step": 340
},
{
"epoch": 0.32,
"grad_norm": 4.365119457244873,
"learning_rate": 1.9827179595445644e-05,
"loss": 0.261,
"step": 360
},
{
"epoch": 0.3377777777777778,
"grad_norm": 5.095998764038086,
"learning_rate": 1.9789016635808836e-05,
"loss": 0.2536,
"step": 380
},
{
"epoch": 0.35555555555555557,
"grad_norm": 6.346428871154785,
"learning_rate": 1.9747093935707658e-05,
"loss": 0.2196,
"step": 400
},
{
"epoch": 0.37333333333333335,
"grad_norm": 4.671675205230713,
"learning_rate": 1.9701427596705504e-05,
"loss": 0.2519,
"step": 420
},
{
"epoch": 0.39111111111111113,
"grad_norm": 8.537422180175781,
"learning_rate": 1.9652035158213015e-05,
"loss": 0.2311,
"step": 440
},
{
"epoch": 0.4088888888888889,
"grad_norm": 4.477969169616699,
"learning_rate": 1.959893559075161e-05,
"loss": 0.2234,
"step": 460
},
{
"epoch": 0.4266666666666667,
"grad_norm": 26.87811279296875,
"learning_rate": 1.9542149288667295e-05,
"loss": 0.1985,
"step": 480
},
{
"epoch": 0.4444444444444444,
"grad_norm": 7.016199111938477,
"learning_rate": 1.9481698062297692e-05,
"loss": 0.2214,
"step": 500
},
{
"epoch": 0.4622222222222222,
"grad_norm": 9.351459503173828,
"learning_rate": 1.941760512959516e-05,
"loss": 0.2035,
"step": 520
},
{
"epoch": 0.48,
"grad_norm": 10.939650535583496,
"learning_rate": 1.9349895107209308e-05,
"loss": 0.1576,
"step": 540
},
{
"epoch": 0.49777777777777776,
"grad_norm": 6.640588283538818,
"learning_rate": 1.9278594001032302e-05,
"loss": 0.1763,
"step": 560
},
{
"epoch": 0.5155555555555555,
"grad_norm": 8.542006492614746,
"learning_rate": 1.920372919621057e-05,
"loss": 0.1562,
"step": 580
},
{
"epoch": 0.5333333333333333,
"grad_norm": 6.431937217712402,
"learning_rate": 1.9125329446626823e-05,
"loss": 0.1478,
"step": 600
},
{
"epoch": 0.5511111111111111,
"grad_norm": 5.852420330047607,
"learning_rate": 1.9043424863856286e-05,
"loss": 0.1169,
"step": 620
},
{
"epoch": 0.5688888888888889,
"grad_norm": 9.895511627197266,
"learning_rate": 1.895804690560156e-05,
"loss": 0.1085,
"step": 640
},
{
"epoch": 0.5866666666666667,
"grad_norm": 9.346630096435547,
"learning_rate": 1.8869228363610406e-05,
"loss": 0.1389,
"step": 660
},
{
"epoch": 0.6044444444444445,
"grad_norm": 2.5748658180236816,
"learning_rate": 1.87770033510812e-05,
"loss": 0.1102,
"step": 680
},
{
"epoch": 0.6222222222222222,
"grad_norm": 3.143669843673706,
"learning_rate": 1.868140728956079e-05,
"loss": 0.1177,
"step": 700
},
{
"epoch": 0.64,
"grad_norm": 2.2248847484588623,
"learning_rate": 1.8582476895339912e-05,
"loss": 0.1281,
"step": 720
},
{
"epoch": 0.6577777777777778,
"grad_norm": 6.335216999053955,
"learning_rate": 1.8480250165351256e-05,
"loss": 0.1279,
"step": 740
},
{
"epoch": 0.6755555555555556,
"grad_norm": 7.8944091796875,
"learning_rate": 1.837476636257574e-05,
"loss": 0.1102,
"step": 760
},
{
"epoch": 0.6933333333333334,
"grad_norm": 4.683923244476318,
"learning_rate": 1.8266066000962444e-05,
"loss": 0.072,
"step": 780
},
{
"epoch": 0.7111111111111111,
"grad_norm": 6.723703861236572,
"learning_rate": 1.8154190829868152e-05,
"loss": 0.1092,
"step": 800
},
{
"epoch": 0.7288888888888889,
"grad_norm": 6.945916175842285,
"learning_rate": 1.803918381802235e-05,
"loss": 0.1264,
"step": 820
},
{
"epoch": 0.7466666666666667,
"grad_norm": 7.265810012817383,
"learning_rate": 1.7921089137023897e-05,
"loss": 0.0858,
"step": 840
},
{
"epoch": 0.7644444444444445,
"grad_norm": 3.5829412937164307,
"learning_rate": 1.779995214437573e-05,
"loss": 0.1233,
"step": 860
},
{
"epoch": 0.7822222222222223,
"grad_norm": 5.7208123207092285,
"learning_rate": 1.767581936606406e-05,
"loss": 0.0956,
"step": 880
},
{
"epoch": 0.8,
"grad_norm": 5.497292995452881,
"learning_rate": 1.7548738478688785e-05,
"loss": 0.0926,
"step": 900
},
{
"epoch": 0.8177777777777778,
"grad_norm": 3.0958940982818604,
"learning_rate": 1.7418758291151995e-05,
"loss": 0.1092,
"step": 920
},
{
"epoch": 0.8355555555555556,
"grad_norm": 2.6405749320983887,
"learning_rate": 1.7285928725911562e-05,
"loss": 0.0991,
"step": 940
},
{
"epoch": 0.8533333333333334,
"grad_norm": 4.4936723709106445,
"learning_rate": 1.7150300799807067e-05,
"loss": 0.0767,
"step": 960
},
{
"epoch": 0.8711111111111111,
"grad_norm": 4.851830005645752,
"learning_rate": 1.7011926604465357e-05,
"loss": 0.0932,
"step": 980
},
{
"epoch": 0.8888888888888888,
"grad_norm": 4.944611549377441,
"learning_rate": 1.6870859286293354e-05,
"loss": 0.0814,
"step": 1000
},
{
"epoch": 0.9066666666666666,
"grad_norm": 4.881468296051025,
"learning_rate": 1.6727153026065707e-05,
"loss": 0.0932,
"step": 1020
},
{
"epoch": 0.9244444444444444,
"grad_norm": 8.76336669921875,
"learning_rate": 1.6580863018115163e-05,
"loss": 0.1098,
"step": 1040
},
{
"epoch": 0.9422222222222222,
"grad_norm": 2.69486141204834,
"learning_rate": 1.64320454491337e-05,
"loss": 0.0757,
"step": 1060
},
{
"epoch": 0.96,
"grad_norm": 2.511228322982788,
"learning_rate": 1.6280757476592467e-05,
"loss": 0.0683,
"step": 1080
},
{
"epoch": 0.9777777777777777,
"grad_norm": 8.538342475891113,
"learning_rate": 1.612705720678888e-05,
"loss": 0.0711,
"step": 1100
},
{
"epoch": 0.9955555555555555,
"grad_norm": 7.436290264129639,
"learning_rate": 1.5971003672529332e-05,
"loss": 0.0936,
"step": 1120
},
{
"epoch": 1.0,
"eval_category_set_accuracy": 0.77,
"eval_is_valid_accuracy": 0.929,
"eval_loss": 0.03886782005429268,
"eval_macro_f1": 0.8593521556125059,
"eval_micro_f1": 0.8701359587435537,
"eval_runtime": 17.3499,
"eval_samples_per_second": 57.637,
"eval_steps_per_second": 14.409,
"step": 1125
},
{
"epoch": 1.0133333333333334,
"grad_norm": 6.135354995727539,
"learning_rate": 1.5812656810455996e-05,
"loss": 0.0731,
"step": 1140
},
{
"epoch": 1.031111111111111,
"grad_norm": 7.639137268066406,
"learning_rate": 1.565207743802653e-05,
"loss": 0.0579,
"step": 1160
},
{
"epoch": 1.048888888888889,
"grad_norm": 9.30708122253418,
"learning_rate": 1.5489327230155455e-05,
"loss": 0.0575,
"step": 1180
},
{
"epoch": 1.0666666666666667,
"grad_norm": 0.26070043444633484,
"learning_rate": 1.5324468695526215e-05,
"loss": 0.0413,
"step": 1200
},
{
"epoch": 1.0844444444444445,
"grad_norm": 7.059542179107666,
"learning_rate": 1.5157565152583002e-05,
"loss": 0.0779,
"step": 1220
},
{
"epoch": 1.1022222222222222,
"grad_norm": 12.312825202941895,
"learning_rate": 1.4988680705211568e-05,
"loss": 0.0732,
"step": 1240
},
{
"epoch": 1.12,
"grad_norm": 0.6326771378517151,
"learning_rate": 1.481788021811837e-05,
"loss": 0.0522,
"step": 1260
},
{
"epoch": 1.1377777777777778,
"grad_norm": 6.887764930725098,
"learning_rate": 1.46452292919175e-05,
"loss": 0.0667,
"step": 1280
},
{
"epoch": 1.1555555555555554,
"grad_norm": 6.309450626373291,
"learning_rate": 1.4470794237934966e-05,
"loss": 0.0623,
"step": 1300
},
{
"epoch": 1.1733333333333333,
"grad_norm": 2.185997247695923,
"learning_rate": 1.4294642052740015e-05,
"loss": 0.0538,
"step": 1320
},
{
"epoch": 1.1911111111111112,
"grad_norm": 1.244521975517273,
"learning_rate": 1.4116840392413247e-05,
"loss": 0.0392,
"step": 1340
},
{
"epoch": 1.208888888888889,
"grad_norm": 7.956927299499512,
"learning_rate": 1.393745754656146e-05,
"loss": 0.0572,
"step": 1360
},
{
"epoch": 1.2266666666666666,
"grad_norm": 7.431176662445068,
"learning_rate": 1.3756562412089141e-05,
"loss": 0.059,
"step": 1380
},
{
"epoch": 1.2444444444444445,
"grad_norm": 2.3178868293762207,
"learning_rate": 1.3574224466736716e-05,
"loss": 0.0605,
"step": 1400
},
{
"epoch": 1.2622222222222224,
"grad_norm": 0.8065705895423889,
"learning_rate": 1.3390513742395725e-05,
"loss": 0.0377,
"step": 1420
},
{
"epoch": 1.28,
"grad_norm": 2.56118106842041,
"learning_rate": 1.3205500798211155e-05,
"loss": 0.0382,
"step": 1440
},
{
"epoch": 1.2977777777777777,
"grad_norm": 5.321646213531494,
"learning_rate": 1.3019256693481253e-05,
"loss": 0.0346,
"step": 1460
},
{
"epoch": 1.3155555555555556,
"grad_norm": 6.427070617675781,
"learning_rate": 1.2831852960365256e-05,
"loss": 0.0598,
"step": 1480
},
{
"epoch": 1.3333333333333333,
"grad_norm": 4.72144889831543,
"learning_rate": 1.2643361576409517e-05,
"loss": 0.0683,
"step": 1500
},
{
"epoch": 1.3511111111111112,
"grad_norm": 8.388670921325684,
"learning_rate": 1.2453854936902525e-05,
"loss": 0.0521,
"step": 1520
},
{
"epoch": 1.3688888888888888,
"grad_norm": 6.774533748626709,
"learning_rate": 1.2263405827069531e-05,
"loss": 0.0677,
"step": 1540
},
{
"epoch": 1.3866666666666667,
"grad_norm": 3.9661309719085693,
"learning_rate": 1.2072087394117382e-05,
"loss": 0.045,
"step": 1560
},
{
"epoch": 1.4044444444444444,
"grad_norm": 0.7026374340057373,
"learning_rate": 1.1879973119140316e-05,
"loss": 0.0363,
"step": 1580
},
{
"epoch": 1.4222222222222223,
"grad_norm": 8.110880851745605,
"learning_rate": 1.1687136788897544e-05,
"loss": 0.0474,
"step": 1600
},
{
"epoch": 1.44,
"grad_norm": 6.098938465118408,
"learning_rate": 1.1493652467473418e-05,
"loss": 0.063,
"step": 1620
},
{
"epoch": 1.4577777777777778,
"grad_norm": 6.203182697296143,
"learning_rate": 1.1299594467831079e-05,
"loss": 0.0445,
"step": 1640
},
{
"epoch": 1.4755555555555555,
"grad_norm": 4.827872276306152,
"learning_rate": 1.1105037323270538e-05,
"loss": 0.0467,
"step": 1660
},
{
"epoch": 1.4933333333333334,
"grad_norm": 1.4762051105499268,
"learning_rate": 1.09100557588021e-05,
"loss": 0.0466,
"step": 1680
},
{
"epoch": 1.511111111111111,
"grad_norm": 3.032404899597168,
"learning_rate": 1.0714724662446194e-05,
"loss": 0.0482,
"step": 1700
},
{
"epoch": 1.528888888888889,
"grad_norm": 2.016261100769043,
"learning_rate": 1.051911905647055e-05,
"loss": 0.0458,
"step": 1720
},
{
"epoch": 1.5466666666666666,
"grad_norm": 1.3372159004211426,
"learning_rate": 1.0323314068575858e-05,
"loss": 0.0473,
"step": 1740
},
{
"epoch": 1.5644444444444443,
"grad_norm": 9.508127212524414,
"learning_rate": 1.0127384903040907e-05,
"loss": 0.0547,
"step": 1760
},
{
"epoch": 1.5822222222222222,
"grad_norm": 7.226083278656006,
"learning_rate": 9.931406811838307e-06,
"loss": 0.0403,
"step": 1780
},
{
"epoch": 1.6,
"grad_norm": 6.973893165588379,
"learning_rate": 9.735455065731922e-06,
"loss": 0.0404,
"step": 1800
},
{
"epoch": 1.6177777777777778,
"grad_norm": 5.251108646392822,
"learning_rate": 9.539604925367052e-06,
"loss": 0.0358,
"step": 1820
},
{
"epoch": 1.6355555555555554,
"grad_norm": 3.708037853240967,
"learning_rate": 9.343931612364533e-06,
"loss": 0.0319,
"step": 1840
},
{
"epoch": 1.6533333333333333,
"grad_norm": 1.9106441736221313,
"learning_rate": 9.148510280429786e-06,
"loss": 0.03,
"step": 1860
},
{
"epoch": 1.6711111111111112,
"grad_norm": 5.1486592292785645,
"learning_rate": 8.95341598648801e-06,
"loss": 0.0474,
"step": 1880
},
{
"epoch": 1.6888888888888889,
"grad_norm": 0.31516969203948975,
"learning_rate": 8.758723661856465e-06,
"loss": 0.0239,
"step": 1900
},
{
"epoch": 1.7066666666666666,
"grad_norm": 2.8174779415130615,
"learning_rate": 8.56450808346508e-06,
"loss": 0.0405,
"step": 1920
},
{
"epoch": 1.7244444444444444,
"grad_norm": 2.6797842979431152,
"learning_rate": 8.370843845136307e-06,
"loss": 0.0338,
"step": 1940
},
{
"epoch": 1.7422222222222223,
"grad_norm": 7.398653984069824,
"learning_rate": 8.177805328935311e-06,
"loss": 0.0367,
"step": 1960
},
{
"epoch": 1.76,
"grad_norm": 0.9176409244537354,
"learning_rate": 7.98546667660151e-06,
"loss": 0.0514,
"step": 1980
},
{
"epoch": 1.7777777777777777,
"grad_norm": 6.95626974105835,
"learning_rate": 7.793901761072396e-06,
"loss": 0.0462,
"step": 2000
},
{
"epoch": 1.7955555555555556,
"grad_norm": 7.002866268157959,
"learning_rate": 7.6031841581106045e-06,
"loss": 0.0431,
"step": 2020
},
{
"epoch": 1.8133333333333335,
"grad_norm": 3.7514915466308594,
"learning_rate": 7.413387118045136e-06,
"loss": 0.0275,
"step": 2040
},
{
"epoch": 1.8311111111111111,
"grad_norm": 4.360569477081299,
"learning_rate": 7.224583537637544e-06,
"loss": 0.0545,
"step": 2060
},
{
"epoch": 1.8488888888888888,
"grad_norm": 4.490522861480713,
"learning_rate": 7.036845932083938e-06,
"loss": 0.0364,
"step": 2080
},
{
"epoch": 1.8666666666666667,
"grad_norm": 0.8960697650909424,
"learning_rate": 6.850246407163532e-06,
"loss": 0.025,
"step": 2100
},
{
"epoch": 1.8844444444444446,
"grad_norm": 8.298011779785156,
"learning_rate": 6.664856631544449e-06,
"loss": 0.0488,
"step": 2120
},
{
"epoch": 1.9022222222222223,
"grad_norm": 0.39041435718536377,
"learning_rate": 6.48074780925739e-06,
"loss": 0.0499,
"step": 2140
},
{
"epoch": 1.92,
"grad_norm": 4.386420249938965,
"learning_rate": 6.2979906523477765e-06,
"loss": 0.0357,
"step": 2160
},
{
"epoch": 1.9377777777777778,
"grad_norm": 0.0772632360458374,
"learning_rate": 6.1166553537168494e-06,
"loss": 0.0434,
"step": 2180
},
{
"epoch": 1.9555555555555557,
"grad_norm": 3.595348358154297,
"learning_rate": 5.936811560162169e-06,
"loss": 0.0356,
"step": 2200
},
{
"epoch": 1.9733333333333334,
"grad_norm": 2.2714860439300537,
"learning_rate": 5.758528345627828e-06,
"loss": 0.0268,
"step": 2220
},
{
"epoch": 1.991111111111111,
"grad_norm": 0.9474923610687256,
"learning_rate": 5.581874184674734e-06,
"loss": 0.0421,
"step": 2240
},
{
"epoch": 2.0,
"eval_category_set_accuracy": 0.874,
"eval_is_valid_accuracy": 0.965,
"eval_loss": 0.022557925432920456,
"eval_macro_f1": 0.9249589403924913,
"eval_micro_f1": 0.9310653536257834,
"eval_runtime": 17.6046,
"eval_samples_per_second": 56.803,
"eval_steps_per_second": 14.201,
"step": 2250
},
{
"epoch": 2.008888888888889,
"grad_norm": 0.81345134973526,
"learning_rate": 5.406916926181052e-06,
"loss": 0.0178,
"step": 2260
},
{
"epoch": 2.026666666666667,
"grad_norm": 0.6435673832893372,
"learning_rate": 5.2337237672830055e-06,
"loss": 0.0153,
"step": 2280
},
{
"epoch": 2.0444444444444443,
"grad_norm": 2.50960636138916,
"learning_rate": 5.062361227565946e-06,
"loss": 0.0147,
"step": 2300
},
{
"epoch": 2.062222222222222,
"grad_norm": 0.061879731714725494,
"learning_rate": 4.892895123515696e-06,
"loss": 0.011,
"step": 2320
},
{
"epoch": 2.08,
"grad_norm": 0.04990602657198906,
"learning_rate": 4.7253905432399295e-06,
"loss": 0.0127,
"step": 2340
},
{
"epoch": 2.097777777777778,
"grad_norm": 5.34898567199707,
"learning_rate": 4.559911821469275e-06,
"loss": 0.0157,
"step": 2360
},
{
"epoch": 2.1155555555555554,
"grad_norm": 0.2640538513660431,
"learning_rate": 4.396522514847811e-06,
"loss": 0.0113,
"step": 2380
},
{
"epoch": 2.1333333333333333,
"grad_norm": 0.38076090812683105,
"learning_rate": 4.235285377522401e-06,
"loss": 0.0069,
"step": 2400
},
{
"epoch": 2.151111111111111,
"grad_norm": 0.957832932472229,
"learning_rate": 4.076262337040223e-06,
"loss": 0.0136,
"step": 2420
},
{
"epoch": 2.168888888888889,
"grad_norm": 4.484457015991211,
"learning_rate": 3.9195144705638034e-06,
"loss": 0.0085,
"step": 2440
},
{
"epoch": 2.1866666666666665,
"grad_norm": 5.153985023498535,
"learning_rate": 3.7651019814126656e-06,
"loss": 0.0144,
"step": 2460
},
{
"epoch": 2.2044444444444444,
"grad_norm": 0.1879798024892807,
"learning_rate": 3.6130841759405776e-06,
"loss": 0.0041,
"step": 2480
},
{
"epoch": 2.2222222222222223,
"grad_norm": 5.104600429534912,
"learning_rate": 3.4635194407573247e-06,
"loss": 0.0154,
"step": 2500
},
{
"epoch": 2.24,
"grad_norm": 0.09507002681493759,
"learning_rate": 3.316465220303744e-06,
"loss": 0.0152,
"step": 2520
},
{
"epoch": 2.2577777777777777,
"grad_norm": 1.6253083944320679,
"learning_rate": 3.1719779947885863e-06,
"loss": 0.0063,
"step": 2540
},
{
"epoch": 2.2755555555555556,
"grad_norm": 4.733920574188232,
"learning_rate": 3.030113258495756e-06,
"loss": 0.0083,
"step": 2560
},
{
"epoch": 2.2933333333333334,
"grad_norm": 4.784224033355713,
"learning_rate": 2.890925498470213e-06,
"loss": 0.012,
"step": 2580
},
{
"epoch": 2.311111111111111,
"grad_norm": 0.4101342260837555,
"learning_rate": 2.754468173590713e-06,
"loss": 0.0034,
"step": 2600
},
{
"epoch": 2.328888888888889,
"grad_norm": 0.1665552854537964,
"learning_rate": 2.6207936940374767e-06,
"loss": 0.0199,
"step": 2620
},
{
"epoch": 2.3466666666666667,
"grad_norm": 0.27030205726623535,
"learning_rate": 2.4899534011626012e-06,
"loss": 0.0162,
"step": 2640
},
{
"epoch": 2.3644444444444446,
"grad_norm": 2.187560796737671,
"learning_rate": 2.36199754777102e-06,
"loss": 0.0035,
"step": 2660
},
{
"epoch": 2.3822222222222225,
"grad_norm": 0.5252636671066284,
"learning_rate": 2.2369752788195343e-06,
"loss": 0.012,
"step": 2680
},
{
"epoch": 2.4,
"grad_norm": 2.124925136566162,
"learning_rate": 2.1149346125413316e-06,
"loss": 0.0144,
"step": 2700
},
{
"epoch": 2.417777777777778,
"grad_norm": 2.135836601257324,
"learning_rate": 1.9959224220032747e-06,
"loss": 0.0187,
"step": 2720
},
{
"epoch": 2.4355555555555557,
"grad_norm": 0.13177770376205444,
"learning_rate": 1.879984417103017e-06,
"loss": 0.01,
"step": 2740
},
{
"epoch": 2.453333333333333,
"grad_norm": 0.11943187564611435,
"learning_rate": 1.7671651270128531e-06,
"loss": 0.0124,
"step": 2760
},
{
"epoch": 2.471111111111111,
"grad_norm": 2.2270348072052,
"learning_rate": 1.6575078830770708e-06,
"loss": 0.0109,
"step": 2780
},
{
"epoch": 2.488888888888889,
"grad_norm": 0.23311637341976166,
"learning_rate": 1.5510548021693693e-06,
"loss": 0.015,
"step": 2800
},
{
"epoch": 2.506666666666667,
"grad_norm": 0.0807090774178505,
"learning_rate": 1.447846770516701e-06,
"loss": 0.0089,
"step": 2820
},
{
"epoch": 2.5244444444444447,
"grad_norm": 2.7864654064178467,
"learning_rate": 1.3479234279958041e-06,
"loss": 0.013,
"step": 2840
},
{
"epoch": 2.542222222222222,
"grad_norm": 0.3439638018608093,
"learning_rate": 1.2513231529084269e-06,
"loss": 0.0096,
"step": 2860
},
{
"epoch": 2.56,
"grad_norm": 0.3769543766975403,
"learning_rate": 1.1580830472410709e-06,
"loss": 0.0065,
"step": 2880
},
{
"epoch": 2.5777777777777775,
"grad_norm": 4.0474677085876465,
"learning_rate": 1.0682389224149648e-06,
"loss": 0.0116,
"step": 2900
},
{
"epoch": 2.5955555555555554,
"grad_norm": 2.4342126846313477,
"learning_rate": 9.818252855317112e-07,
"loss": 0.0063,
"step": 2920
},
{
"epoch": 2.6133333333333333,
"grad_norm": 0.04185617342591286,
"learning_rate": 8.988753261198724e-07,
"loss": 0.0027,
"step": 2940
},
{
"epoch": 2.631111111111111,
"grad_norm": 1.2005906105041504,
"learning_rate": 8.19420903387631e-07,
"loss": 0.0084,
"step": 2960
},
{
"epoch": 2.648888888888889,
"grad_norm": 1.7181979417800903,
"learning_rate": 7.434925339863908e-07,
"loss": 0.0047,
"step": 2980
},
{
"epoch": 2.6666666666666665,
"grad_norm": 0.12996995449066162,
"learning_rate": 6.711193802900074e-07,
"loss": 0.0128,
"step": 3000
},
{
"epoch": 2.6844444444444444,
"grad_norm": 3.3893086910247803,
"learning_rate": 6.023292391941859e-07,
"loss": 0.0139,
"step": 3020
},
{
"epoch": 2.7022222222222223,
"grad_norm": 0.1592389941215515,
"learning_rate": 5.371485314403202e-07,
"loss": 0.0101,
"step": 3040
},
{
"epoch": 2.7199999999999998,
"grad_norm": 0.9441222548484802,
"learning_rate": 4.756022914678804e-07,
"loss": 0.0082,
"step": 3060
},
{
"epoch": 2.7377777777777776,
"grad_norm": 0.5016261339187622,
"learning_rate": 4.1771415779924826e-07,
"loss": 0.0082,
"step": 3080
},
{
"epoch": 2.7555555555555555,
"grad_norm": 0.2186020165681839,
"learning_rate": 3.6350636396069947e-07,
"loss": 0.0127,
"step": 3100
},
{
"epoch": 2.7733333333333334,
"grad_norm": 0.1453126072883606,
"learning_rate": 3.1299972994299874e-07,
"loss": 0.0058,
"step": 3120
},
{
"epoch": 2.7911111111111113,
"grad_norm": 1.516239881515503,
"learning_rate": 2.6621365420491984e-07,
"loss": 0.0151,
"step": 3140
},
{
"epoch": 2.8088888888888888,
"grad_norm": 5.567173480987549,
"learning_rate": 2.2316610622273082e-07,
"loss": 0.0108,
"step": 3160
},
{
"epoch": 2.8266666666666667,
"grad_norm": 0.06365057826042175,
"learning_rate": 1.8387361958852378e-07,
"loss": 0.0141,
"step": 3180
},
{
"epoch": 2.8444444444444446,
"grad_norm": 0.7341110706329346,
"learning_rate": 1.4835128566003553e-07,
"loss": 0.0132,
"step": 3200
},
{
"epoch": 2.862222222222222,
"grad_norm": 0.07594721764326096,
"learning_rate": 1.1661274776439857e-07,
"loss": 0.0067,
"step": 3220
},
{
"epoch": 2.88,
"grad_norm": 1.0531156063079834,
"learning_rate": 8.867019595804272e-08,
"loss": 0.0089,
"step": 3240
},
{
"epoch": 2.897777777777778,
"grad_norm": 0.6338114738464355,
"learning_rate": 6.453436234477805e-08,
"loss": 0.0084,
"step": 3260
},
{
"epoch": 2.9155555555555557,
"grad_norm": 0.8637073040008545,
"learning_rate": 4.4214516953825505e-08,
"loss": 0.0037,
"step": 3280
},
{
"epoch": 2.9333333333333336,
"grad_norm": 2.1685738563537598,
"learning_rate": 2.7718464179415928e-08,
"loss": 0.0062,
"step": 3300
},
{
"epoch": 2.951111111111111,
"grad_norm": 1.4732089042663574,
"learning_rate": 1.5052539783292353e-08,
"loss": 0.0061,
"step": 3320
},
{
"epoch": 2.968888888888889,
"grad_norm": 1.7118995189666748,
"learning_rate": 6.2216084612931606e-09,
"loss": 0.0166,
"step": 3340
},
{
"epoch": 2.986666666666667,
"grad_norm": 0.22905214130878448,
"learning_rate": 1.2290619749244504e-09,
"loss": 0.0174,
"step": 3360
},
{
"epoch": 3.0,
"eval_category_set_accuracy": 0.883,
"eval_is_valid_accuracy": 0.966,
"eval_loss": 0.021968627348542213,
"eval_macro_f1": 0.9343878654799134,
"eval_micro_f1": 0.9395306859205776,
"eval_runtime": 17.5098,
"eval_samples_per_second": 57.111,
"eval_steps_per_second": 14.278,
"step": 3375
}
],
"logging_steps": 20,
"max_steps": 3375,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.318107799420652e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}