{ "best_global_step": 3800, "best_metric": 3.8959919379522034e-07, "best_model_checkpoint": "./code_corruptor_model_v2\\checkpoint-3800", "epoch": 10.0, "eval_steps": 500, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13157894736842105, "grad_norm": 0.10753314197063446, "learning_rate": 4.948464912280702e-05, "loss": 0.4466, "step": 50 }, { "epoch": 0.2631578947368421, "grad_norm": 0.006062635686248541, "learning_rate": 4.8936403508771935e-05, "loss": 0.0037, "step": 100 }, { "epoch": 0.39473684210526316, "grad_norm": 0.008465762250125408, "learning_rate": 4.838815789473685e-05, "loss": 0.001, "step": 150 }, { "epoch": 0.5263157894736842, "grad_norm": 0.01755240559577942, "learning_rate": 4.7839912280701754e-05, "loss": 0.0015, "step": 200 }, { "epoch": 0.6578947368421053, "grad_norm": 0.042610205709934235, "learning_rate": 4.7291666666666666e-05, "loss": 0.0009, "step": 250 }, { "epoch": 0.7894736842105263, "grad_norm": 0.05834781005978584, "learning_rate": 4.674342105263158e-05, "loss": 0.0006, "step": 300 }, { "epoch": 0.9210526315789473, "grad_norm": 0.0006559228058904409, "learning_rate": 4.619517543859649e-05, "loss": 0.0005, "step": 350 }, { "epoch": 1.0, "eval_loss": 1.6028698155423626e-05, "eval_runtime": 6.2183, "eval_samples_per_second": 24.444, "eval_steps_per_second": 12.222, "step": 380 }, { "epoch": 1.0526315789473684, "grad_norm": 0.055409740656614304, "learning_rate": 4.5646929824561405e-05, "loss": 0.0005, "step": 400 }, { "epoch": 1.1842105263157894, "grad_norm": 0.07685278356075287, "learning_rate": 4.509868421052632e-05, "loss": 0.0007, "step": 450 }, { "epoch": 1.3157894736842106, "grad_norm": 0.0034991835709661245, "learning_rate": 4.455043859649123e-05, "loss": 0.0004, "step": 500 }, { "epoch": 1.4473684210526316, "grad_norm": 0.0010067017283290625, "learning_rate": 4.400219298245614e-05, "loss": 0.0002, "step": 550 }, { "epoch": 1.5789473684210527, "grad_norm": 0.0004999448428861797, "learning_rate": 4.3453947368421056e-05, "loss": 0.0005, "step": 600 }, { "epoch": 1.7105263157894737, "grad_norm": 0.0048265825025737286, "learning_rate": 4.290570175438597e-05, "loss": 0.0001, "step": 650 }, { "epoch": 1.8421052631578947, "grad_norm": 0.0009590413537807763, "learning_rate": 4.235745614035088e-05, "loss": 0.0002, "step": 700 }, { "epoch": 1.973684210526316, "grad_norm": 0.0026596838142722845, "learning_rate": 4.180921052631579e-05, "loss": 0.0002, "step": 750 }, { "epoch": 2.0, "eval_loss": 9.181250788969919e-06, "eval_runtime": 6.0411, "eval_samples_per_second": 25.161, "eval_steps_per_second": 12.58, "step": 760 }, { "epoch": 2.1052631578947367, "grad_norm": 0.0004917697515338659, "learning_rate": 4.12609649122807e-05, "loss": 0.0003, "step": 800 }, { "epoch": 2.236842105263158, "grad_norm": 0.0010589464800432324, "learning_rate": 4.071271929824562e-05, "loss": 0.0005, "step": 850 }, { "epoch": 2.3684210526315788, "grad_norm": 0.001737129525281489, "learning_rate": 4.016447368421053e-05, "loss": 0.0004, "step": 900 }, { "epoch": 2.5, "grad_norm": 0.00043353348155505955, "learning_rate": 3.9616228070175445e-05, "loss": 0.0002, "step": 950 }, { "epoch": 2.6315789473684212, "grad_norm": 0.000876229431014508, "learning_rate": 3.906798245614035e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 2.763157894736842, "grad_norm": 0.006859931629151106, "learning_rate": 3.8519736842105264e-05, "loss": 0.0, "step": 1050 }, { "epoch": 2.8947368421052633, "grad_norm": 0.00029508452280424535, "learning_rate": 3.7971491228070176e-05, "loss": 0.0005, "step": 1100 }, { "epoch": 3.0, "eval_loss": 4.4037507905159146e-05, "eval_runtime": 6.0562, "eval_samples_per_second": 25.098, "eval_steps_per_second": 12.549, "step": 1140 }, { "epoch": 3.026315789473684, "grad_norm": 0.02006879821419716, "learning_rate": 3.742324561403509e-05, "loss": 0.001, "step": 1150 }, { "epoch": 3.1578947368421053, "grad_norm": 0.0015200282214209437, "learning_rate": 3.6875e-05, "loss": 0.0001, "step": 1200 }, { "epoch": 3.2894736842105265, "grad_norm": 0.0007221988635137677, "learning_rate": 3.6326754385964915e-05, "loss": 0.0, "step": 1250 }, { "epoch": 3.4210526315789473, "grad_norm": 0.0002022625703830272, "learning_rate": 3.577850877192983e-05, "loss": 0.0002, "step": 1300 }, { "epoch": 3.5526315789473686, "grad_norm": 0.2107187658548355, "learning_rate": 3.523026315789474e-05, "loss": 0.0009, "step": 1350 }, { "epoch": 3.6842105263157894, "grad_norm": 0.0004320333246141672, "learning_rate": 3.468201754385965e-05, "loss": 0.0009, "step": 1400 }, { "epoch": 3.8157894736842106, "grad_norm": 0.0014643239555880427, "learning_rate": 3.4133771929824566e-05, "loss": 0.0005, "step": 1450 }, { "epoch": 3.9473684210526314, "grad_norm": 0.0002134596143150702, "learning_rate": 3.358552631578947e-05, "loss": 0.0, "step": 1500 }, { "epoch": 4.0, "eval_loss": 1.688454176473897e-05, "eval_runtime": 6.1697, "eval_samples_per_second": 24.637, "eval_steps_per_second": 12.318, "step": 1520 }, { "epoch": 4.078947368421052, "grad_norm": 0.007211349904537201, "learning_rate": 3.3037280701754384e-05, "loss": 0.0002, "step": 1550 }, { "epoch": 4.2105263157894735, "grad_norm": 0.00045391780440695584, "learning_rate": 3.24890350877193e-05, "loss": 0.0003, "step": 1600 }, { "epoch": 4.342105263157895, "grad_norm": 0.00023115136718843132, "learning_rate": 3.194078947368421e-05, "loss": 0.0001, "step": 1650 }, { "epoch": 4.473684210526316, "grad_norm": 0.14033561944961548, "learning_rate": 3.139254385964913e-05, "loss": 0.0001, "step": 1700 }, { "epoch": 4.605263157894737, "grad_norm": 0.0017103628488257527, "learning_rate": 3.0844298245614035e-05, "loss": 0.0001, "step": 1750 }, { "epoch": 4.7368421052631575, "grad_norm": 0.0032462095841765404, "learning_rate": 3.0296052631578948e-05, "loss": 0.0, "step": 1800 }, { "epoch": 4.868421052631579, "grad_norm": 0.002826864365488291, "learning_rate": 2.974780701754386e-05, "loss": 0.0001, "step": 1850 }, { "epoch": 5.0, "grad_norm": 0.0006184170488268137, "learning_rate": 2.9199561403508774e-05, "loss": 0.0005, "step": 1900 }, { "epoch": 5.0, "eval_loss": 3.343406660860637e-06, "eval_runtime": 6.1148, "eval_samples_per_second": 24.858, "eval_steps_per_second": 12.429, "step": 1900 }, { "epoch": 5.131578947368421, "grad_norm": 0.0009092154796235263, "learning_rate": 2.8651315789473686e-05, "loss": 0.0, "step": 1950 }, { "epoch": 5.2631578947368425, "grad_norm": 0.0006162663921713829, "learning_rate": 2.81030701754386e-05, "loss": 0.0, "step": 2000 }, { "epoch": 5.394736842105263, "grad_norm": 0.0013979279901832342, "learning_rate": 2.755482456140351e-05, "loss": 0.0001, "step": 2050 }, { "epoch": 5.526315789473684, "grad_norm": 0.00045684297219850123, "learning_rate": 2.700657894736842e-05, "loss": 0.0, "step": 2100 }, { "epoch": 5.657894736842105, "grad_norm": 0.017059462144970894, "learning_rate": 2.6458333333333334e-05, "loss": 0.0, "step": 2150 }, { "epoch": 5.7894736842105265, "grad_norm": 9.451019286643714e-05, "learning_rate": 2.591008771929825e-05, "loss": 0.0001, "step": 2200 }, { "epoch": 5.921052631578947, "grad_norm": 0.0002531503851059824, "learning_rate": 2.5361842105263163e-05, "loss": 0.0002, "step": 2250 }, { "epoch": 6.0, "eval_loss": 8.532630317859002e-07, "eval_runtime": 6.1846, "eval_samples_per_second": 24.577, "eval_steps_per_second": 12.289, "step": 2280 }, { "epoch": 6.052631578947368, "grad_norm": 0.00012987718218937516, "learning_rate": 2.4813596491228072e-05, "loss": 0.0, "step": 2300 }, { "epoch": 6.184210526315789, "grad_norm": 8.179421274689957e-05, "learning_rate": 2.426535087719298e-05, "loss": 0.0, "step": 2350 }, { "epoch": 6.315789473684211, "grad_norm": 0.0004638760001398623, "learning_rate": 2.3717105263157898e-05, "loss": 0.0, "step": 2400 }, { "epoch": 6.447368421052632, "grad_norm": 0.0002866844297386706, "learning_rate": 2.316885964912281e-05, "loss": 0.0005, "step": 2450 }, { "epoch": 6.578947368421053, "grad_norm": 7.315115362871438e-05, "learning_rate": 2.262061403508772e-05, "loss": 0.0, "step": 2500 }, { "epoch": 6.7105263157894735, "grad_norm": 0.00015406313468702137, "learning_rate": 2.2072368421052632e-05, "loss": 0.0, "step": 2550 }, { "epoch": 6.842105263157895, "grad_norm": 8.98495563887991e-05, "learning_rate": 2.1524122807017545e-05, "loss": 0.0, "step": 2600 }, { "epoch": 6.973684210526316, "grad_norm": 0.00020581792341545224, "learning_rate": 2.0975877192982458e-05, "loss": 0.0, "step": 2650 }, { "epoch": 7.0, "eval_loss": 8.157680895237718e-07, "eval_runtime": 6.1011, "eval_samples_per_second": 24.914, "eval_steps_per_second": 12.457, "step": 2660 }, { "epoch": 7.105263157894737, "grad_norm": 0.00012650905409827828, "learning_rate": 2.042763157894737e-05, "loss": 0.0, "step": 2700 }, { "epoch": 7.2368421052631575, "grad_norm": 0.00016898708418011665, "learning_rate": 1.987938596491228e-05, "loss": 0.0, "step": 2750 }, { "epoch": 7.368421052631579, "grad_norm": 0.0003560652839951217, "learning_rate": 1.9331140350877193e-05, "loss": 0.0, "step": 2800 }, { "epoch": 7.5, "grad_norm": 8.456506475340575e-05, "learning_rate": 1.8782894736842105e-05, "loss": 0.0, "step": 2850 }, { "epoch": 7.631578947368421, "grad_norm": 0.00023642393352929503, "learning_rate": 1.8234649122807018e-05, "loss": 0.0, "step": 2900 }, { "epoch": 7.7631578947368425, "grad_norm": 8.907222945708781e-05, "learning_rate": 1.768640350877193e-05, "loss": 0.0, "step": 2950 }, { "epoch": 7.894736842105263, "grad_norm": 6.318661326076835e-05, "learning_rate": 1.7138157894736844e-05, "loss": 0.0, "step": 3000 }, { "epoch": 8.0, "eval_loss": 5.999586392135825e-07, "eval_runtime": 6.1117, "eval_samples_per_second": 24.87, "eval_steps_per_second": 12.435, "step": 3040 }, { "epoch": 8.026315789473685, "grad_norm": 0.04527832567691803, "learning_rate": 1.6589912280701756e-05, "loss": 0.0002, "step": 3050 }, { "epoch": 8.157894736842104, "grad_norm": 0.0002466822334099561, "learning_rate": 1.604166666666667e-05, "loss": 0.0, "step": 3100 }, { "epoch": 8.289473684210526, "grad_norm": 0.00023732382396701723, "learning_rate": 1.549342105263158e-05, "loss": 0.0, "step": 3150 }, { "epoch": 8.421052631578947, "grad_norm": 8.941477426560596e-05, "learning_rate": 1.4945175438596493e-05, "loss": 0.0001, "step": 3200 }, { "epoch": 8.552631578947368, "grad_norm": 0.00011901962716365233, "learning_rate": 1.4396929824561402e-05, "loss": 0.0, "step": 3250 }, { "epoch": 8.68421052631579, "grad_norm": 9.798636165214702e-05, "learning_rate": 1.3848684210526317e-05, "loss": 0.0, "step": 3300 }, { "epoch": 8.81578947368421, "grad_norm": 3.7345100281527266e-05, "learning_rate": 1.330043859649123e-05, "loss": 0.0, "step": 3350 }, { "epoch": 8.947368421052632, "grad_norm": 2.8350032152957283e-05, "learning_rate": 1.275219298245614e-05, "loss": 0.0, "step": 3400 }, { "epoch": 9.0, "eval_loss": 4.4665341647487367e-07, "eval_runtime": 6.1054, "eval_samples_per_second": 24.896, "eval_steps_per_second": 12.448, "step": 3420 }, { "epoch": 9.078947368421053, "grad_norm": 8.318301843246445e-05, "learning_rate": 1.2203947368421053e-05, "loss": 0.0, "step": 3450 }, { "epoch": 9.210526315789474, "grad_norm": 4.566019197227433e-05, "learning_rate": 1.1655701754385966e-05, "loss": 0.0, "step": 3500 }, { "epoch": 9.342105263157896, "grad_norm": 0.03128642588853836, "learning_rate": 1.1107456140350877e-05, "loss": 0.0, "step": 3550 }, { "epoch": 9.473684210526315, "grad_norm": 4.535232073976658e-05, "learning_rate": 1.055921052631579e-05, "loss": 0.0, "step": 3600 }, { "epoch": 9.605263157894736, "grad_norm": 7.957038906170055e-05, "learning_rate": 1.0010964912280703e-05, "loss": 0.0, "step": 3650 }, { "epoch": 9.736842105263158, "grad_norm": 5.397196946432814e-05, "learning_rate": 9.462719298245615e-06, "loss": 0.0, "step": 3700 }, { "epoch": 9.868421052631579, "grad_norm": 0.00015735568013042212, "learning_rate": 8.914473684210526e-06, "loss": 0.0, "step": 3750 }, { "epoch": 10.0, "grad_norm": 3.112037666141987e-05, "learning_rate": 8.36622807017544e-06, "loss": 0.0, "step": 3800 }, { "epoch": 10.0, "eval_loss": 3.8959919379522034e-07, "eval_runtime": 6.0027, "eval_samples_per_second": 25.322, "eval_steps_per_second": 12.661, "step": 3800 } ], "logging_steps": 50, "max_steps": 4560, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4621990389350400.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }