Text Classification
Transformers
Safetensors
modernbert
code
language-identification
multi-label
llm-guard
encoder
text-embeddings-inference
Instructions to use Accuknoxtechnologies/CodeLanguage-Encoder-v1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Accuknoxtechnologies/CodeLanguage-Encoder-v1 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="Accuknoxtechnologies/CodeLanguage-Encoder-v1")# Load model directly from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("Accuknoxtechnologies/CodeLanguage-Encoder-v1") model = AutoModelForSequenceClassification.from_pretrained("Accuknoxtechnologies/CodeLanguage-Encoder-v1") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 3375, | |
| "best_metric": 0.9395306859205776, | |
| "best_model_checkpoint": "/workspace/code_langid/CodeLanguage-Encoder-v1/checkpoint-3375", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 3375, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017777777777777778, | |
| "grad_norm": 91.99722290039062, | |
| "learning_rate": 2.2485207100591717e-06, | |
| "loss": 1.3883, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 22.086933135986328, | |
| "learning_rate": 4.615384615384616e-06, | |
| "loss": 0.57, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 21.929166793823242, | |
| "learning_rate": 6.98224852071006e-06, | |
| "loss": 0.394, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 11.914956092834473, | |
| "learning_rate": 9.349112426035503e-06, | |
| "loss": 0.3654, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 9.060860633850098, | |
| "learning_rate": 1.1715976331360948e-05, | |
| "loss": 0.3761, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 26.926528930664062, | |
| "learning_rate": 1.4082840236686392e-05, | |
| "loss": 0.3866, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12444444444444444, | |
| "grad_norm": 5.112385272979736, | |
| "learning_rate": 1.6449704142011837e-05, | |
| "loss": 0.3834, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 4.676296234130859, | |
| "learning_rate": 1.881656804733728e-05, | |
| "loss": 0.36, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 3.6760458946228027, | |
| "learning_rate": 1.9999519891672918e-05, | |
| "loss": 0.3531, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 4.675507068634033, | |
| "learning_rate": 1.99956793016566e-05, | |
| "loss": 0.3881, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19555555555555557, | |
| "grad_norm": 6.470630645751953, | |
| "learning_rate": 1.998799959670796e-05, | |
| "loss": 0.3449, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 4.329215049743652, | |
| "learning_rate": 1.9976483726428423e-05, | |
| "loss": 0.3406, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2311111111111111, | |
| "grad_norm": 6.4355058670043945, | |
| "learning_rate": 1.9961136113803982e-05, | |
| "loss": 0.3488, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24888888888888888, | |
| "grad_norm": 25.0487117767334, | |
| "learning_rate": 1.9941962653506426e-05, | |
| "loss": 0.3448, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 6.033944129943848, | |
| "learning_rate": 1.991897070962933e-05, | |
| "loss": 0.3207, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 4.8149919509887695, | |
| "learning_rate": 1.9892169112859677e-05, | |
| "loss": 0.3065, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3022222222222222, | |
| "grad_norm": 16.133445739746094, | |
| "learning_rate": 1.9861568157086182e-05, | |
| "loss": 0.2826, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.365119457244873, | |
| "learning_rate": 1.9827179595445644e-05, | |
| "loss": 0.261, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3377777777777778, | |
| "grad_norm": 5.095998764038086, | |
| "learning_rate": 1.9789016635808836e-05, | |
| "loss": 0.2536, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 6.346428871154785, | |
| "learning_rate": 1.9747093935707658e-05, | |
| "loss": 0.2196, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 4.671675205230713, | |
| "learning_rate": 1.9701427596705504e-05, | |
| "loss": 0.2519, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.39111111111111113, | |
| "grad_norm": 8.537422180175781, | |
| "learning_rate": 1.9652035158213015e-05, | |
| "loss": 0.2311, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4088888888888889, | |
| "grad_norm": 4.477969169616699, | |
| "learning_rate": 1.959893559075161e-05, | |
| "loss": 0.2234, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 26.87811279296875, | |
| "learning_rate": 1.9542149288667295e-05, | |
| "loss": 0.1985, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 7.016199111938477, | |
| "learning_rate": 1.9481698062297692e-05, | |
| "loss": 0.2214, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4622222222222222, | |
| "grad_norm": 9.351459503173828, | |
| "learning_rate": 1.941760512959516e-05, | |
| "loss": 0.2035, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 10.939650535583496, | |
| "learning_rate": 1.9349895107209308e-05, | |
| "loss": 0.1576, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.49777777777777776, | |
| "grad_norm": 6.640588283538818, | |
| "learning_rate": 1.9278594001032302e-05, | |
| "loss": 0.1763, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5155555555555555, | |
| "grad_norm": 8.542006492614746, | |
| "learning_rate": 1.920372919621057e-05, | |
| "loss": 0.1562, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 6.431937217712402, | |
| "learning_rate": 1.9125329446626823e-05, | |
| "loss": 0.1478, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5511111111111111, | |
| "grad_norm": 5.852420330047607, | |
| "learning_rate": 1.9043424863856286e-05, | |
| "loss": 0.1169, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 9.895511627197266, | |
| "learning_rate": 1.895804690560156e-05, | |
| "loss": 0.1085, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 9.346630096435547, | |
| "learning_rate": 1.8869228363610406e-05, | |
| "loss": 0.1389, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6044444444444445, | |
| "grad_norm": 2.5748658180236816, | |
| "learning_rate": 1.87770033510812e-05, | |
| "loss": 0.1102, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 3.143669843673706, | |
| "learning_rate": 1.868140728956079e-05, | |
| "loss": 0.1177, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.2248847484588623, | |
| "learning_rate": 1.8582476895339912e-05, | |
| "loss": 0.1281, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6577777777777778, | |
| "grad_norm": 6.335216999053955, | |
| "learning_rate": 1.8480250165351256e-05, | |
| "loss": 0.1279, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6755555555555556, | |
| "grad_norm": 7.8944091796875, | |
| "learning_rate": 1.837476636257574e-05, | |
| "loss": 0.1102, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 4.683923244476318, | |
| "learning_rate": 1.8266066000962444e-05, | |
| "loss": 0.072, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 6.723703861236572, | |
| "learning_rate": 1.8154190829868152e-05, | |
| "loss": 0.1092, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7288888888888889, | |
| "grad_norm": 6.945916175842285, | |
| "learning_rate": 1.803918381802235e-05, | |
| "loss": 0.1264, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 7.265810012817383, | |
| "learning_rate": 1.7921089137023897e-05, | |
| "loss": 0.0858, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.7644444444444445, | |
| "grad_norm": 3.5829412937164307, | |
| "learning_rate": 1.779995214437573e-05, | |
| "loss": 0.1233, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7822222222222223, | |
| "grad_norm": 5.7208123207092285, | |
| "learning_rate": 1.767581936606406e-05, | |
| "loss": 0.0956, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 5.497292995452881, | |
| "learning_rate": 1.7548738478688785e-05, | |
| "loss": 0.0926, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 3.0958940982818604, | |
| "learning_rate": 1.7418758291151995e-05, | |
| "loss": 0.1092, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8355555555555556, | |
| "grad_norm": 2.6405749320983887, | |
| "learning_rate": 1.7285928725911562e-05, | |
| "loss": 0.0991, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 4.4936723709106445, | |
| "learning_rate": 1.7150300799807067e-05, | |
| "loss": 0.0767, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.8711111111111111, | |
| "grad_norm": 4.851830005645752, | |
| "learning_rate": 1.7011926604465357e-05, | |
| "loss": 0.0932, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 4.944611549377441, | |
| "learning_rate": 1.6870859286293354e-05, | |
| "loss": 0.0814, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 4.881468296051025, | |
| "learning_rate": 1.6727153026065707e-05, | |
| "loss": 0.0932, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9244444444444444, | |
| "grad_norm": 8.76336669921875, | |
| "learning_rate": 1.6580863018115163e-05, | |
| "loss": 0.1098, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9422222222222222, | |
| "grad_norm": 2.69486141204834, | |
| "learning_rate": 1.64320454491337e-05, | |
| "loss": 0.0757, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 2.511228322982788, | |
| "learning_rate": 1.6280757476592467e-05, | |
| "loss": 0.0683, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 8.538342475891113, | |
| "learning_rate": 1.612705720678888e-05, | |
| "loss": 0.0711, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 7.436290264129639, | |
| "learning_rate": 1.5971003672529332e-05, | |
| "loss": 0.0936, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_category_set_accuracy": 0.77, | |
| "eval_is_valid_accuracy": 0.929, | |
| "eval_loss": 0.03886782005429268, | |
| "eval_macro_f1": 0.8593521556125059, | |
| "eval_micro_f1": 0.8701359587435537, | |
| "eval_runtime": 17.3499, | |
| "eval_samples_per_second": 57.637, | |
| "eval_steps_per_second": 14.409, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.0133333333333334, | |
| "grad_norm": 6.135354995727539, | |
| "learning_rate": 1.5812656810455996e-05, | |
| "loss": 0.0731, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.031111111111111, | |
| "grad_norm": 7.639137268066406, | |
| "learning_rate": 1.565207743802653e-05, | |
| "loss": 0.0579, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.048888888888889, | |
| "grad_norm": 9.30708122253418, | |
| "learning_rate": 1.5489327230155455e-05, | |
| "loss": 0.0575, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 0.26070043444633484, | |
| "learning_rate": 1.5324468695526215e-05, | |
| "loss": 0.0413, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.0844444444444445, | |
| "grad_norm": 7.059542179107666, | |
| "learning_rate": 1.5157565152583002e-05, | |
| "loss": 0.0779, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.1022222222222222, | |
| "grad_norm": 12.312825202941895, | |
| "learning_rate": 1.4988680705211568e-05, | |
| "loss": 0.0732, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.6326771378517151, | |
| "learning_rate": 1.481788021811837e-05, | |
| "loss": 0.0522, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.1377777777777778, | |
| "grad_norm": 6.887764930725098, | |
| "learning_rate": 1.46452292919175e-05, | |
| "loss": 0.0667, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.1555555555555554, | |
| "grad_norm": 6.309450626373291, | |
| "learning_rate": 1.4470794237934966e-05, | |
| "loss": 0.0623, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1733333333333333, | |
| "grad_norm": 2.185997247695923, | |
| "learning_rate": 1.4294642052740015e-05, | |
| "loss": 0.0538, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.1911111111111112, | |
| "grad_norm": 1.244521975517273, | |
| "learning_rate": 1.4116840392413247e-05, | |
| "loss": 0.0392, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.208888888888889, | |
| "grad_norm": 7.956927299499512, | |
| "learning_rate": 1.393745754656146e-05, | |
| "loss": 0.0572, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.2266666666666666, | |
| "grad_norm": 7.431176662445068, | |
| "learning_rate": 1.3756562412089141e-05, | |
| "loss": 0.059, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.2444444444444445, | |
| "grad_norm": 2.3178868293762207, | |
| "learning_rate": 1.3574224466736716e-05, | |
| "loss": 0.0605, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.2622222222222224, | |
| "grad_norm": 0.8065705895423889, | |
| "learning_rate": 1.3390513742395725e-05, | |
| "loss": 0.0377, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 2.56118106842041, | |
| "learning_rate": 1.3205500798211155e-05, | |
| "loss": 0.0382, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.2977777777777777, | |
| "grad_norm": 5.321646213531494, | |
| "learning_rate": 1.3019256693481253e-05, | |
| "loss": 0.0346, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.3155555555555556, | |
| "grad_norm": 6.427070617675781, | |
| "learning_rate": 1.2831852960365256e-05, | |
| "loss": 0.0598, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 4.72144889831543, | |
| "learning_rate": 1.2643361576409517e-05, | |
| "loss": 0.0683, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.3511111111111112, | |
| "grad_norm": 8.388670921325684, | |
| "learning_rate": 1.2453854936902525e-05, | |
| "loss": 0.0521, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.3688888888888888, | |
| "grad_norm": 6.774533748626709, | |
| "learning_rate": 1.2263405827069531e-05, | |
| "loss": 0.0677, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.3866666666666667, | |
| "grad_norm": 3.9661309719085693, | |
| "learning_rate": 1.2072087394117382e-05, | |
| "loss": 0.045, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.4044444444444444, | |
| "grad_norm": 0.7026374340057373, | |
| "learning_rate": 1.1879973119140316e-05, | |
| "loss": 0.0363, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.4222222222222223, | |
| "grad_norm": 8.110880851745605, | |
| "learning_rate": 1.1687136788897544e-05, | |
| "loss": 0.0474, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 6.098938465118408, | |
| "learning_rate": 1.1493652467473418e-05, | |
| "loss": 0.063, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.4577777777777778, | |
| "grad_norm": 6.203182697296143, | |
| "learning_rate": 1.1299594467831079e-05, | |
| "loss": 0.0445, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.4755555555555555, | |
| "grad_norm": 4.827872276306152, | |
| "learning_rate": 1.1105037323270538e-05, | |
| "loss": 0.0467, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.4933333333333334, | |
| "grad_norm": 1.4762051105499268, | |
| "learning_rate": 1.09100557588021e-05, | |
| "loss": 0.0466, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.511111111111111, | |
| "grad_norm": 3.032404899597168, | |
| "learning_rate": 1.0714724662446194e-05, | |
| "loss": 0.0482, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.528888888888889, | |
| "grad_norm": 2.016261100769043, | |
| "learning_rate": 1.051911905647055e-05, | |
| "loss": 0.0458, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.5466666666666666, | |
| "grad_norm": 1.3372159004211426, | |
| "learning_rate": 1.0323314068575858e-05, | |
| "loss": 0.0473, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.5644444444444443, | |
| "grad_norm": 9.508127212524414, | |
| "learning_rate": 1.0127384903040907e-05, | |
| "loss": 0.0547, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.5822222222222222, | |
| "grad_norm": 7.226083278656006, | |
| "learning_rate": 9.931406811838307e-06, | |
| "loss": 0.0403, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 6.973893165588379, | |
| "learning_rate": 9.735455065731922e-06, | |
| "loss": 0.0404, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.6177777777777778, | |
| "grad_norm": 5.251108646392822, | |
| "learning_rate": 9.539604925367052e-06, | |
| "loss": 0.0358, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.6355555555555554, | |
| "grad_norm": 3.708037853240967, | |
| "learning_rate": 9.343931612364533e-06, | |
| "loss": 0.0319, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.6533333333333333, | |
| "grad_norm": 1.9106441736221313, | |
| "learning_rate": 9.148510280429786e-06, | |
| "loss": 0.03, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.6711111111111112, | |
| "grad_norm": 5.1486592292785645, | |
| "learning_rate": 8.95341598648801e-06, | |
| "loss": 0.0474, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.6888888888888889, | |
| "grad_norm": 0.31516969203948975, | |
| "learning_rate": 8.758723661856465e-06, | |
| "loss": 0.0239, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.7066666666666666, | |
| "grad_norm": 2.8174779415130615, | |
| "learning_rate": 8.56450808346508e-06, | |
| "loss": 0.0405, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.7244444444444444, | |
| "grad_norm": 2.6797842979431152, | |
| "learning_rate": 8.370843845136307e-06, | |
| "loss": 0.0338, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.7422222222222223, | |
| "grad_norm": 7.398653984069824, | |
| "learning_rate": 8.177805328935311e-06, | |
| "loss": 0.0367, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.9176409244537354, | |
| "learning_rate": 7.98546667660151e-06, | |
| "loss": 0.0514, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 6.95626974105835, | |
| "learning_rate": 7.793901761072396e-06, | |
| "loss": 0.0462, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.7955555555555556, | |
| "grad_norm": 7.002866268157959, | |
| "learning_rate": 7.6031841581106045e-06, | |
| "loss": 0.0431, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 1.8133333333333335, | |
| "grad_norm": 3.7514915466308594, | |
| "learning_rate": 7.413387118045136e-06, | |
| "loss": 0.0275, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 1.8311111111111111, | |
| "grad_norm": 4.360569477081299, | |
| "learning_rate": 7.224583537637544e-06, | |
| "loss": 0.0545, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 1.8488888888888888, | |
| "grad_norm": 4.490522861480713, | |
| "learning_rate": 7.036845932083938e-06, | |
| "loss": 0.0364, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 1.8666666666666667, | |
| "grad_norm": 0.8960697650909424, | |
| "learning_rate": 6.850246407163532e-06, | |
| "loss": 0.025, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.8844444444444446, | |
| "grad_norm": 8.298011779785156, | |
| "learning_rate": 6.664856631544449e-06, | |
| "loss": 0.0488, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 1.9022222222222223, | |
| "grad_norm": 0.39041435718536377, | |
| "learning_rate": 6.48074780925739e-06, | |
| "loss": 0.0499, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 4.386420249938965, | |
| "learning_rate": 6.2979906523477765e-06, | |
| "loss": 0.0357, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 1.9377777777777778, | |
| "grad_norm": 0.0772632360458374, | |
| "learning_rate": 6.1166553537168494e-06, | |
| "loss": 0.0434, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 1.9555555555555557, | |
| "grad_norm": 3.595348358154297, | |
| "learning_rate": 5.936811560162169e-06, | |
| "loss": 0.0356, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.9733333333333334, | |
| "grad_norm": 2.2714860439300537, | |
| "learning_rate": 5.758528345627828e-06, | |
| "loss": 0.0268, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 1.991111111111111, | |
| "grad_norm": 0.9474923610687256, | |
| "learning_rate": 5.581874184674734e-06, | |
| "loss": 0.0421, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_category_set_accuracy": 0.874, | |
| "eval_is_valid_accuracy": 0.965, | |
| "eval_loss": 0.022557925432920456, | |
| "eval_macro_f1": 0.9249589403924913, | |
| "eval_micro_f1": 0.9310653536257834, | |
| "eval_runtime": 17.6046, | |
| "eval_samples_per_second": 56.803, | |
| "eval_steps_per_second": 14.201, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.008888888888889, | |
| "grad_norm": 0.81345134973526, | |
| "learning_rate": 5.406916926181052e-06, | |
| "loss": 0.0178, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.026666666666667, | |
| "grad_norm": 0.6435673832893372, | |
| "learning_rate": 5.2337237672830055e-06, | |
| "loss": 0.0153, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.0444444444444443, | |
| "grad_norm": 2.50960636138916, | |
| "learning_rate": 5.062361227565946e-06, | |
| "loss": 0.0147, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.062222222222222, | |
| "grad_norm": 0.061879731714725494, | |
| "learning_rate": 4.892895123515696e-06, | |
| "loss": 0.011, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.04990602657198906, | |
| "learning_rate": 4.7253905432399295e-06, | |
| "loss": 0.0127, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.097777777777778, | |
| "grad_norm": 5.34898567199707, | |
| "learning_rate": 4.559911821469275e-06, | |
| "loss": 0.0157, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.1155555555555554, | |
| "grad_norm": 0.2640538513660431, | |
| "learning_rate": 4.396522514847811e-06, | |
| "loss": 0.0113, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.1333333333333333, | |
| "grad_norm": 0.38076090812683105, | |
| "learning_rate": 4.235285377522401e-06, | |
| "loss": 0.0069, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.151111111111111, | |
| "grad_norm": 0.957832932472229, | |
| "learning_rate": 4.076262337040223e-06, | |
| "loss": 0.0136, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.168888888888889, | |
| "grad_norm": 4.484457015991211, | |
| "learning_rate": 3.9195144705638034e-06, | |
| "loss": 0.0085, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.1866666666666665, | |
| "grad_norm": 5.153985023498535, | |
| "learning_rate": 3.7651019814126656e-06, | |
| "loss": 0.0144, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.2044444444444444, | |
| "grad_norm": 0.1879798024892807, | |
| "learning_rate": 3.6130841759405776e-06, | |
| "loss": 0.0041, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 5.104600429534912, | |
| "learning_rate": 3.4635194407573247e-06, | |
| "loss": 0.0154, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.09507002681493759, | |
| "learning_rate": 3.316465220303744e-06, | |
| "loss": 0.0152, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.2577777777777777, | |
| "grad_norm": 1.6253083944320679, | |
| "learning_rate": 3.1719779947885863e-06, | |
| "loss": 0.0063, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.2755555555555556, | |
| "grad_norm": 4.733920574188232, | |
| "learning_rate": 3.030113258495756e-06, | |
| "loss": 0.0083, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.2933333333333334, | |
| "grad_norm": 4.784224033355713, | |
| "learning_rate": 2.890925498470213e-06, | |
| "loss": 0.012, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.311111111111111, | |
| "grad_norm": 0.4101342260837555, | |
| "learning_rate": 2.754468173590713e-06, | |
| "loss": 0.0034, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.328888888888889, | |
| "grad_norm": 0.1665552854537964, | |
| "learning_rate": 2.6207936940374767e-06, | |
| "loss": 0.0199, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.3466666666666667, | |
| "grad_norm": 0.27030205726623535, | |
| "learning_rate": 2.4899534011626012e-06, | |
| "loss": 0.0162, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.3644444444444446, | |
| "grad_norm": 2.187560796737671, | |
| "learning_rate": 2.36199754777102e-06, | |
| "loss": 0.0035, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.3822222222222225, | |
| "grad_norm": 0.5252636671066284, | |
| "learning_rate": 2.2369752788195343e-06, | |
| "loss": 0.012, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.124925136566162, | |
| "learning_rate": 2.1149346125413316e-06, | |
| "loss": 0.0144, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.417777777777778, | |
| "grad_norm": 2.135836601257324, | |
| "learning_rate": 1.9959224220032747e-06, | |
| "loss": 0.0187, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.4355555555555557, | |
| "grad_norm": 0.13177770376205444, | |
| "learning_rate": 1.879984417103017e-06, | |
| "loss": 0.01, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.453333333333333, | |
| "grad_norm": 0.11943187564611435, | |
| "learning_rate": 1.7671651270128531e-06, | |
| "loss": 0.0124, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.471111111111111, | |
| "grad_norm": 2.2270348072052, | |
| "learning_rate": 1.6575078830770708e-06, | |
| "loss": 0.0109, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.488888888888889, | |
| "grad_norm": 0.23311637341976166, | |
| "learning_rate": 1.5510548021693693e-06, | |
| "loss": 0.015, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.506666666666667, | |
| "grad_norm": 0.0807090774178505, | |
| "learning_rate": 1.447846770516701e-06, | |
| "loss": 0.0089, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.5244444444444447, | |
| "grad_norm": 2.7864654064178467, | |
| "learning_rate": 1.3479234279958041e-06, | |
| "loss": 0.013, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.542222222222222, | |
| "grad_norm": 0.3439638018608093, | |
| "learning_rate": 1.2513231529084269e-06, | |
| "loss": 0.0096, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.3769543766975403, | |
| "learning_rate": 1.1580830472410709e-06, | |
| "loss": 0.0065, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.5777777777777775, | |
| "grad_norm": 4.0474677085876465, | |
| "learning_rate": 1.0682389224149648e-06, | |
| "loss": 0.0116, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.5955555555555554, | |
| "grad_norm": 2.4342126846313477, | |
| "learning_rate": 9.818252855317112e-07, | |
| "loss": 0.0063, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.6133333333333333, | |
| "grad_norm": 0.04185617342591286, | |
| "learning_rate": 8.988753261198724e-07, | |
| "loss": 0.0027, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 2.631111111111111, | |
| "grad_norm": 1.2005906105041504, | |
| "learning_rate": 8.19420903387631e-07, | |
| "loss": 0.0084, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 2.648888888888889, | |
| "grad_norm": 1.7181979417800903, | |
| "learning_rate": 7.434925339863908e-07, | |
| "loss": 0.0047, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 0.12996995449066162, | |
| "learning_rate": 6.711193802900074e-07, | |
| "loss": 0.0128, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.6844444444444444, | |
| "grad_norm": 3.3893086910247803, | |
| "learning_rate": 6.023292391941859e-07, | |
| "loss": 0.0139, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 2.7022222222222223, | |
| "grad_norm": 0.1592389941215515, | |
| "learning_rate": 5.371485314403202e-07, | |
| "loss": 0.0101, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.9441222548484802, | |
| "learning_rate": 4.756022914678804e-07, | |
| "loss": 0.0082, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 2.7377777777777776, | |
| "grad_norm": 0.5016261339187622, | |
| "learning_rate": 4.1771415779924826e-07, | |
| "loss": 0.0082, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 2.7555555555555555, | |
| "grad_norm": 0.2186020165681839, | |
| "learning_rate": 3.6350636396069947e-07, | |
| "loss": 0.0127, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.7733333333333334, | |
| "grad_norm": 0.1453126072883606, | |
| "learning_rate": 3.1299972994299874e-07, | |
| "loss": 0.0058, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 2.7911111111111113, | |
| "grad_norm": 1.516239881515503, | |
| "learning_rate": 2.6621365420491984e-07, | |
| "loss": 0.0151, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 2.8088888888888888, | |
| "grad_norm": 5.567173480987549, | |
| "learning_rate": 2.2316610622273082e-07, | |
| "loss": 0.0108, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 2.8266666666666667, | |
| "grad_norm": 0.06365057826042175, | |
| "learning_rate": 1.8387361958852378e-07, | |
| "loss": 0.0141, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 2.8444444444444446, | |
| "grad_norm": 0.7341110706329346, | |
| "learning_rate": 1.4835128566003553e-07, | |
| "loss": 0.0132, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.862222222222222, | |
| "grad_norm": 0.07594721764326096, | |
| "learning_rate": 1.1661274776439857e-07, | |
| "loss": 0.0067, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.0531156063079834, | |
| "learning_rate": 8.867019595804272e-08, | |
| "loss": 0.0089, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 2.897777777777778, | |
| "grad_norm": 0.6338114738464355, | |
| "learning_rate": 6.453436234477805e-08, | |
| "loss": 0.0084, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 2.9155555555555557, | |
| "grad_norm": 0.8637073040008545, | |
| "learning_rate": 4.4214516953825505e-08, | |
| "loss": 0.0037, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 2.9333333333333336, | |
| "grad_norm": 2.1685738563537598, | |
| "learning_rate": 2.7718464179415928e-08, | |
| "loss": 0.0062, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.951111111111111, | |
| "grad_norm": 1.4732089042663574, | |
| "learning_rate": 1.5052539783292353e-08, | |
| "loss": 0.0061, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 2.968888888888889, | |
| "grad_norm": 1.7118995189666748, | |
| "learning_rate": 6.2216084612931606e-09, | |
| "loss": 0.0166, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 2.986666666666667, | |
| "grad_norm": 0.22905214130878448, | |
| "learning_rate": 1.2290619749244504e-09, | |
| "loss": 0.0174, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_category_set_accuracy": 0.883, | |
| "eval_is_valid_accuracy": 0.966, | |
| "eval_loss": 0.021968627348542213, | |
| "eval_macro_f1": 0.9343878654799134, | |
| "eval_micro_f1": 0.9395306859205776, | |
| "eval_runtime": 17.5098, | |
| "eval_samples_per_second": 57.111, | |
| "eval_steps_per_second": 14.278, | |
| "step": 3375 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 3375, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.318107799420652e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |