diff --git "a/metrics.json" "b/metrics.json" new file mode 100644--- /dev/null +++ "b/metrics.json" @@ -0,0 +1,14452 @@ +{ + "train_runtime": 15528.6205, + "train_samples_per_second": 0.103, + "train_steps_per_second": 0.103, + "train_loss": 0.19463951266836374, + "epoch": 1.0, + "config": { + "vein_name": "html_r64_v2", + "start_step": 600, + "end_step": 2200, + "batch_size": 1, + "grad_accum": 1, + "lora_r": 64, + "lora_alpha": 64, + "warmup_steps": 10, + "rs_lora": false, + "learning_rate": 0.0002, + "max_length": 23401, + "dataset_path": "dataset.jsonl", + "dataset_index_start": 1618, + "dataset_index_end": 1436, + "total_samples_processed": 5000 + }, + "gpu_memory": { + "peak_reserved_gb": 19.139 + }, + "step_by_step_metrics": [ + { + "step": 601, + "timestamp": "2025-12-28T08:58:29.750718", + "elapsed_time": 24.035809755325317, + "loss": 0.2155, + "grad_norm": 0.1604655683040619, + "learning_rate": 0.0, + "epoch": 0.000625 + }, + { + "step": 602, + "timestamp": "2025-12-28T08:58:36.494748", + "elapsed_time": 30.779838800430298, + "loss": 0.152, + "grad_norm": 0.1441534161567688, + "learning_rate": 2e-05, + "epoch": 0.00125 + }, + { + "step": 603, + "timestamp": "2025-12-28T08:58:46.390445", + "elapsed_time": 40.67553472518921, + "loss": 0.1768, + "grad_norm": 0.14501173794269562, + "learning_rate": 4e-05, + "epoch": 0.001875 + }, + { + "step": 604, + "timestamp": "2025-12-28T08:58:52.949267", + "elapsed_time": 47.234357595443726, + "loss": 0.3154, + "grad_norm": 0.20344674587249756, + "learning_rate": 6e-05, + "epoch": 0.0025 + }, + { + "step": 605, + "timestamp": "2025-12-28T08:59:02.976637", + "elapsed_time": 57.26172733306885, + "loss": 0.1981, + "grad_norm": 0.1389472484588623, + "learning_rate": 8e-05, + "epoch": 0.003125 + }, + { + "step": 606, + "timestamp": "2025-12-28T08:59:14.217365", + "elapsed_time": 68.50245547294617, + "loss": 0.1454, + "grad_norm": 0.12427599728107452, + "learning_rate": 0.0001, + "epoch": 0.00375 + }, + { + "step": 607, + "timestamp": "2025-12-28T08:59:19.569352", + "elapsed_time": 73.85444259643555, + "loss": 0.3674, + "grad_norm": 0.21498622000217438, + "learning_rate": 0.00012, + "epoch": 0.004375 + }, + { + "step": 608, + "timestamp": "2025-12-28T08:59:32.803346", + "elapsed_time": 87.08843612670898, + "loss": 0.1358, + "grad_norm": 0.1065230667591095, + "learning_rate": 0.00014, + "epoch": 0.005 + }, + { + "step": 609, + "timestamp": "2025-12-28T08:59:43.029459", + "elapsed_time": 97.31454968452454, + "loss": 0.1995, + "grad_norm": 0.19915856420993805, + "learning_rate": 0.00016, + "epoch": 0.005625 + }, + { + "step": 610, + "timestamp": "2025-12-28T08:59:47.972807", + "elapsed_time": 102.25789737701416, + "loss": 0.2193, + "grad_norm": 0.18390510976314545, + "learning_rate": 0.00018, + "epoch": 0.00625 + }, + { + "step": 611, + "timestamp": "2025-12-28T08:59:56.025895", + "elapsed_time": 110.31098580360413, + "loss": 0.1661, + "grad_norm": 0.14340169727802277, + "learning_rate": 0.0002, + "epoch": 0.006875 + }, + { + "step": 612, + "timestamp": "2025-12-28T09:00:04.192186", + "elapsed_time": 118.47727656364441, + "loss": 0.1867, + "grad_norm": 0.17027480900287628, + "learning_rate": 0.000199874213836478, + "epoch": 0.0075 + }, + { + "step": 613, + "timestamp": "2025-12-28T09:00:15.363335", + "elapsed_time": 129.64842581748962, + "loss": 0.1322, + "grad_norm": 0.12804803252220154, + "learning_rate": 0.000199748427672956, + "epoch": 0.008125 + }, + { + "step": 614, + "timestamp": "2025-12-28T09:00:22.801954", + "elapsed_time": 137.0870442390442, + "loss": 0.3998, + "grad_norm": 0.2402307242155075, + "learning_rate": 0.00019962264150943397, + "epoch": 0.00875 + }, + { + "step": 615, + "timestamp": "2025-12-28T09:00:41.397275", + "elapsed_time": 155.68236541748047, + "loss": 0.167, + "grad_norm": 0.12692536413669586, + "learning_rate": 0.00019949685534591195, + "epoch": 0.009375 + }, + { + "step": 616, + "timestamp": "2025-12-28T09:00:57.925605", + "elapsed_time": 172.2106957435608, + "loss": 0.1226, + "grad_norm": 0.1139947846531868, + "learning_rate": 0.00019937106918238996, + "epoch": 0.01 + }, + { + "step": 617, + "timestamp": "2025-12-28T09:01:03.902990", + "elapsed_time": 178.1880807876587, + "loss": 0.2534, + "grad_norm": 0.2593400478363037, + "learning_rate": 0.00019924528301886794, + "epoch": 0.010625 + }, + { + "step": 618, + "timestamp": "2025-12-28T09:01:19.300000", + "elapsed_time": 193.58509039878845, + "loss": 0.1422, + "grad_norm": 0.12689131498336792, + "learning_rate": 0.00019911949685534592, + "epoch": 0.01125 + }, + { + "step": 619, + "timestamp": "2025-12-28T09:01:25.737299", + "elapsed_time": 200.0223891735077, + "loss": 0.1783, + "grad_norm": 0.16301578283309937, + "learning_rate": 0.0001989937106918239, + "epoch": 0.011875 + }, + { + "step": 620, + "timestamp": "2025-12-28T09:01:38.363267", + "elapsed_time": 212.64835739135742, + "loss": 0.1295, + "grad_norm": 0.13712646067142487, + "learning_rate": 0.0001988679245283019, + "epoch": 0.0125 + }, + { + "step": 621, + "timestamp": "2025-12-28T09:01:43.657825", + "elapsed_time": 217.9429154396057, + "loss": 0.2815, + "grad_norm": 0.21860435605049133, + "learning_rate": 0.00019874213836477988, + "epoch": 0.013125 + }, + { + "step": 622, + "timestamp": "2025-12-28T09:01:52.793187", + "elapsed_time": 227.07827711105347, + "loss": 0.3943, + "grad_norm": 0.20962341129779816, + "learning_rate": 0.00019861635220125786, + "epoch": 0.01375 + }, + { + "step": 623, + "timestamp": "2025-12-28T09:02:00.534713", + "elapsed_time": 234.81980347633362, + "loss": 0.3958, + "grad_norm": 0.17121204733848572, + "learning_rate": 0.00019849056603773587, + "epoch": 0.014375 + }, + { + "step": 624, + "timestamp": "2025-12-28T09:02:09.703390", + "elapsed_time": 243.98848390579224, + "loss": 0.2354, + "grad_norm": 0.16160890460014343, + "learning_rate": 0.00019836477987421385, + "epoch": 0.015 + }, + { + "step": 625, + "timestamp": "2025-12-28T09:02:15.193169", + "elapsed_time": 249.47825932502747, + "loss": 0.2254, + "grad_norm": 0.2050420343875885, + "learning_rate": 0.00019823899371069183, + "epoch": 0.015625 + }, + { + "step": 626, + "timestamp": "2025-12-28T09:02:23.621480", + "elapsed_time": 257.9065706729889, + "loss": 0.3533, + "grad_norm": 0.16951978206634521, + "learning_rate": 0.00019811320754716983, + "epoch": 0.01625 + }, + { + "step": 627, + "timestamp": "2025-12-28T09:02:40.925797", + "elapsed_time": 275.21088790893555, + "loss": 0.1368, + "grad_norm": 0.12023195624351501, + "learning_rate": 0.0001979874213836478, + "epoch": 0.016875 + }, + { + "step": 628, + "timestamp": "2025-12-28T09:02:47.264513", + "elapsed_time": 281.54960775375366, + "loss": 0.1796, + "grad_norm": 0.3685634434223175, + "learning_rate": 0.0001978616352201258, + "epoch": 0.0175 + }, + { + "step": 629, + "timestamp": "2025-12-28T09:02:54.630161", + "elapsed_time": 288.9152555465698, + "loss": 0.1811, + "grad_norm": 0.1793571263551712, + "learning_rate": 0.0001977358490566038, + "epoch": 0.018125 + }, + { + "step": 630, + "timestamp": "2025-12-28T09:03:01.180041", + "elapsed_time": 295.4651312828064, + "loss": 0.2804, + "grad_norm": 0.23330993950366974, + "learning_rate": 0.00019761006289308177, + "epoch": 0.01875 + }, + { + "step": 631, + "timestamp": "2025-12-28T09:03:11.837408", + "elapsed_time": 306.12249875068665, + "loss": 0.2404, + "grad_norm": 0.20730850100517273, + "learning_rate": 0.00019748427672955975, + "epoch": 0.019375 + }, + { + "step": 632, + "timestamp": "2025-12-28T09:03:16.778563", + "elapsed_time": 311.0636534690857, + "loss": 0.3396, + "grad_norm": 0.22279325127601624, + "learning_rate": 0.00019735849056603773, + "epoch": 0.02 + }, + { + "step": 633, + "timestamp": "2025-12-28T09:03:25.959314", + "elapsed_time": 320.2444043159485, + "loss": 0.161, + "grad_norm": 0.17951494455337524, + "learning_rate": 0.00019723270440251574, + "epoch": 0.020625 + }, + { + "step": 634, + "timestamp": "2025-12-28T09:03:42.789283", + "elapsed_time": 337.07437324523926, + "loss": 0.1691, + "grad_norm": 0.12446315586566925, + "learning_rate": 0.00019710691823899372, + "epoch": 0.02125 + }, + { + "step": 635, + "timestamp": "2025-12-28T09:03:54.312342", + "elapsed_time": 348.5974371433258, + "loss": 0.1439, + "grad_norm": 0.19256795942783356, + "learning_rate": 0.0001969811320754717, + "epoch": 0.021875 + }, + { + "step": 636, + "timestamp": "2025-12-28T09:04:03.177467", + "elapsed_time": 357.46255707740784, + "loss": 0.2008, + "grad_norm": 0.13904313743114471, + "learning_rate": 0.0001968553459119497, + "epoch": 0.0225 + }, + { + "step": 637, + "timestamp": "2025-12-28T09:04:10.990148", + "elapsed_time": 365.2752380371094, + "loss": 0.1878, + "grad_norm": 0.1624336838722229, + "learning_rate": 0.00019672955974842768, + "epoch": 0.023125 + }, + { + "step": 638, + "timestamp": "2025-12-28T09:04:17.546438", + "elapsed_time": 371.83152866363525, + "loss": 0.1937, + "grad_norm": 0.17041485011577606, + "learning_rate": 0.00019660377358490566, + "epoch": 0.02375 + }, + { + "step": 639, + "timestamp": "2025-12-28T09:04:21.596846", + "elapsed_time": 375.88193678855896, + "loss": 0.2582, + "grad_norm": 0.20617352426052094, + "learning_rate": 0.00019647798742138367, + "epoch": 0.024375 + }, + { + "step": 640, + "timestamp": "2025-12-28T09:04:32.606180", + "elapsed_time": 386.8912706375122, + "loss": 0.1758, + "grad_norm": 0.1377355009317398, + "learning_rate": 0.00019635220125786165, + "epoch": 0.025 + }, + { + "step": 641, + "timestamp": "2025-12-28T09:04:43.506700", + "elapsed_time": 397.7917902469635, + "loss": 0.3234, + "grad_norm": 0.1705474555492401, + "learning_rate": 0.00019622641509433963, + "epoch": 0.025625 + }, + { + "step": 642, + "timestamp": "2025-12-28T09:04:51.282799", + "elapsed_time": 405.5678901672363, + "loss": 0.2184, + "grad_norm": 0.2311810404062271, + "learning_rate": 0.00019610062893081763, + "epoch": 0.02625 + }, + { + "step": 643, + "timestamp": "2025-12-28T09:04:57.361632", + "elapsed_time": 411.6467225551605, + "loss": 0.188, + "grad_norm": 0.1845218986272812, + "learning_rate": 0.0001959748427672956, + "epoch": 0.026875 + }, + { + "step": 644, + "timestamp": "2025-12-28T09:05:06.307667", + "elapsed_time": 420.5927574634552, + "loss": 0.2218, + "grad_norm": 0.18984770774841309, + "learning_rate": 0.0001958490566037736, + "epoch": 0.0275 + }, + { + "step": 645, + "timestamp": "2025-12-28T09:05:20.346611", + "elapsed_time": 434.6317012310028, + "loss": 0.1774, + "grad_norm": 0.1433676928281784, + "learning_rate": 0.00019572327044025157, + "epoch": 0.028125 + }, + { + "step": 646, + "timestamp": "2025-12-28T09:05:25.128238", + "elapsed_time": 439.4133291244507, + "loss": 0.3369, + "grad_norm": 0.22627809643745422, + "learning_rate": 0.00019559748427672958, + "epoch": 0.02875 + }, + { + "step": 647, + "timestamp": "2025-12-28T09:05:36.797870", + "elapsed_time": 451.08296036720276, + "loss": 0.1735, + "grad_norm": 0.16844865679740906, + "learning_rate": 0.00019547169811320755, + "epoch": 0.029375 + }, + { + "step": 648, + "timestamp": "2025-12-28T09:05:57.683316", + "elapsed_time": 471.96840620040894, + "loss": 0.1199, + "grad_norm": 0.11961396783590317, + "learning_rate": 0.00019534591194968553, + "epoch": 0.03 + }, + { + "step": 649, + "timestamp": "2025-12-28T09:06:05.657286", + "elapsed_time": 479.9423773288727, + "loss": 0.2306, + "grad_norm": 0.1685563325881958, + "learning_rate": 0.00019522012578616354, + "epoch": 0.030625 + }, + { + "step": 650, + "timestamp": "2025-12-28T09:06:20.462627", + "elapsed_time": 494.7477180957794, + "loss": 0.1896, + "grad_norm": 0.12992839515209198, + "learning_rate": 0.00019509433962264152, + "epoch": 0.03125 + }, + { + "step": 651, + "timestamp": "2025-12-28T09:06:31.640433", + "elapsed_time": 505.92552399635315, + "loss": 0.135, + "grad_norm": 0.12081814557313919, + "learning_rate": 0.0001949685534591195, + "epoch": 0.031875 + }, + { + "step": 652, + "timestamp": "2025-12-28T09:06:42.823577", + "elapsed_time": 517.1086673736572, + "loss": 0.1369, + "grad_norm": 0.11642343550920486, + "learning_rate": 0.0001948427672955975, + "epoch": 0.0325 + }, + { + "step": 653, + "timestamp": "2025-12-28T09:06:51.916795", + "elapsed_time": 526.2018857002258, + "loss": 0.1889, + "grad_norm": 0.13910572230815887, + "learning_rate": 0.00019471698113207548, + "epoch": 0.033125 + }, + { + "step": 654, + "timestamp": "2025-12-28T09:07:02.155700", + "elapsed_time": 536.4407908916473, + "loss": 0.3567, + "grad_norm": 0.1917208731174469, + "learning_rate": 0.00019459119496855346, + "epoch": 0.03375 + }, + { + "step": 655, + "timestamp": "2025-12-28T09:07:15.706369", + "elapsed_time": 549.9914598464966, + "loss": 0.1709, + "grad_norm": 0.12028059363365173, + "learning_rate": 0.00019446540880503147, + "epoch": 0.034375 + }, + { + "step": 656, + "timestamp": "2025-12-28T09:07:32.250659", + "elapsed_time": 566.535749912262, + "loss": 0.1228, + "grad_norm": 0.19263313710689545, + "learning_rate": 0.00019433962264150945, + "epoch": 0.035 + }, + { + "step": 657, + "timestamp": "2025-12-28T09:07:39.316479", + "elapsed_time": 573.601569890976, + "loss": 0.2289, + "grad_norm": 0.2995343506336212, + "learning_rate": 0.00019421383647798743, + "epoch": 0.035625 + }, + { + "step": 658, + "timestamp": "2025-12-28T09:07:54.181478", + "elapsed_time": 588.4665679931641, + "loss": 0.1498, + "grad_norm": 0.13047035038471222, + "learning_rate": 0.00019408805031446543, + "epoch": 0.03625 + }, + { + "step": 659, + "timestamp": "2025-12-28T09:08:01.427506", + "elapsed_time": 595.7125968933105, + "loss": 0.1957, + "grad_norm": 0.16221193969249725, + "learning_rate": 0.0001939622641509434, + "epoch": 0.036875 + }, + { + "step": 660, + "timestamp": "2025-12-28T09:08:07.362296", + "elapsed_time": 601.6473863124847, + "loss": 0.1755, + "grad_norm": 0.1594441831111908, + "learning_rate": 0.0001938364779874214, + "epoch": 0.0375 + }, + { + "step": 661, + "timestamp": "2025-12-28T09:08:13.438049", + "elapsed_time": 607.7231397628784, + "loss": 0.2449, + "grad_norm": 0.18605917692184448, + "learning_rate": 0.00019371069182389937, + "epoch": 0.038125 + }, + { + "step": 662, + "timestamp": "2025-12-28T09:08:25.983999", + "elapsed_time": 620.2690892219543, + "loss": 0.1445, + "grad_norm": 0.11945810914039612, + "learning_rate": 0.00019358490566037738, + "epoch": 0.03875 + }, + { + "step": 663, + "timestamp": "2025-12-28T09:08:34.320553", + "elapsed_time": 628.6056432723999, + "loss": 0.1952, + "grad_norm": 0.15212717652320862, + "learning_rate": 0.00019345911949685536, + "epoch": 0.039375 + }, + { + "step": 664, + "timestamp": "2025-12-28T09:08:47.204012", + "elapsed_time": 641.4891028404236, + "loss": 0.1264, + "grad_norm": 0.1063343733549118, + "learning_rate": 0.00019333333333333333, + "epoch": 0.04 + }, + { + "step": 665, + "timestamp": "2025-12-28T09:08:55.268548", + "elapsed_time": 649.5536382198334, + "loss": 0.1998, + "grad_norm": 0.1476004421710968, + "learning_rate": 0.00019320754716981134, + "epoch": 0.040625 + }, + { + "step": 666, + "timestamp": "2025-12-28T09:09:11.683318", + "elapsed_time": 665.9684083461761, + "loss": 0.1221, + "grad_norm": 0.1132006123661995, + "learning_rate": 0.00019308176100628932, + "epoch": 0.04125 + }, + { + "step": 667, + "timestamp": "2025-12-28T09:09:22.208240", + "elapsed_time": 676.493331193924, + "loss": 0.1504, + "grad_norm": 0.13121221959590912, + "learning_rate": 0.0001929559748427673, + "epoch": 0.041875 + }, + { + "step": 668, + "timestamp": "2025-12-28T09:09:42.700506", + "elapsed_time": 696.9855964183807, + "loss": 0.121, + "grad_norm": 0.0900096669793129, + "learning_rate": 0.0001928301886792453, + "epoch": 0.0425 + }, + { + "step": 669, + "timestamp": "2025-12-28T09:09:57.623005", + "elapsed_time": 711.9080958366394, + "loss": 0.1276, + "grad_norm": 0.11430996656417847, + "learning_rate": 0.00019270440251572328, + "epoch": 0.043125 + }, + { + "step": 670, + "timestamp": "2025-12-28T09:10:04.247302", + "elapsed_time": 718.5323920249939, + "loss": 0.1923, + "grad_norm": 0.19039106369018555, + "learning_rate": 0.00019257861635220126, + "epoch": 0.04375 + }, + { + "step": 671, + "timestamp": "2025-12-28T09:10:16.730363", + "elapsed_time": 731.0154540538788, + "loss": 0.314, + "grad_norm": 0.1691213995218277, + "learning_rate": 0.00019245283018867927, + "epoch": 0.044375 + }, + { + "step": 672, + "timestamp": "2025-12-28T09:10:26.112523", + "elapsed_time": 740.3976130485535, + "loss": 0.164, + "grad_norm": 0.13402487337589264, + "learning_rate": 0.00019232704402515725, + "epoch": 0.045 + }, + { + "step": 673, + "timestamp": "2025-12-28T09:10:34.372277", + "elapsed_time": 748.6573669910431, + "loss": 0.1849, + "grad_norm": 0.14694516360759735, + "learning_rate": 0.00019220125786163523, + "epoch": 0.045625 + }, + { + "step": 674, + "timestamp": "2025-12-28T09:10:44.792607", + "elapsed_time": 759.0776975154877, + "loss": 0.1962, + "grad_norm": 0.15182200074195862, + "learning_rate": 0.0001920754716981132, + "epoch": 0.04625 + }, + { + "step": 675, + "timestamp": "2025-12-28T09:10:54.996775", + "elapsed_time": 769.2818658351898, + "loss": 0.1313, + "grad_norm": 0.11995584517717361, + "learning_rate": 0.0001919496855345912, + "epoch": 0.046875 + }, + { + "step": 676, + "timestamp": "2025-12-28T09:11:03.629001", + "elapsed_time": 777.9140913486481, + "loss": 0.2438, + "grad_norm": 0.14396265149116516, + "learning_rate": 0.0001918238993710692, + "epoch": 0.0475 + }, + { + "step": 677, + "timestamp": "2025-12-28T09:11:12.607355", + "elapsed_time": 786.8924453258514, + "loss": 0.2567, + "grad_norm": 0.1717308908700943, + "learning_rate": 0.00019169811320754717, + "epoch": 0.048125 + }, + { + "step": 678, + "timestamp": "2025-12-28T09:11:25.911563", + "elapsed_time": 800.196653842926, + "loss": 0.1644, + "grad_norm": 0.14998085796833038, + "learning_rate": 0.00019157232704402518, + "epoch": 0.04875 + }, + { + "step": 679, + "timestamp": "2025-12-28T09:11:33.380615", + "elapsed_time": 807.6657054424286, + "loss": 0.3003, + "grad_norm": 0.16212862730026245, + "learning_rate": 0.00019144654088050316, + "epoch": 0.049375 + }, + { + "step": 680, + "timestamp": "2025-12-28T09:11:39.286109", + "elapsed_time": 813.5711998939514, + "loss": 0.261, + "grad_norm": 0.19972242414951324, + "learning_rate": 0.00019132075471698114, + "epoch": 0.05 + }, + { + "step": 681, + "timestamp": "2025-12-28T09:11:48.163985", + "elapsed_time": 822.4490756988525, + "loss": 0.3926, + "grad_norm": 0.17470583319664001, + "learning_rate": 0.00019119496855345914, + "epoch": 0.050625 + }, + { + "step": 682, + "timestamp": "2025-12-28T09:12:01.357382", + "elapsed_time": 835.6424729824066, + "loss": 0.1516, + "grad_norm": 0.1337703913450241, + "learning_rate": 0.00019106918238993712, + "epoch": 0.05125 + }, + { + "step": 683, + "timestamp": "2025-12-28T09:12:11.393661", + "elapsed_time": 845.678751707077, + "loss": 0.2966, + "grad_norm": 0.14767307043075562, + "learning_rate": 0.0001909433962264151, + "epoch": 0.051875 + }, + { + "step": 684, + "timestamp": "2025-12-28T09:12:17.843729", + "elapsed_time": 852.1288199424744, + "loss": 0.217, + "grad_norm": 0.16958969831466675, + "learning_rate": 0.0001908176100628931, + "epoch": 0.0525 + }, + { + "step": 685, + "timestamp": "2025-12-28T09:12:25.829441", + "elapsed_time": 860.1145317554474, + "loss": 0.1656, + "grad_norm": 0.14111389219760895, + "learning_rate": 0.00019069182389937108, + "epoch": 0.053125 + }, + { + "step": 686, + "timestamp": "2025-12-28T09:12:37.674364", + "elapsed_time": 871.9594547748566, + "loss": 0.1438, + "grad_norm": 0.11669319868087769, + "learning_rate": 0.00019056603773584906, + "epoch": 0.05375 + }, + { + "step": 687, + "timestamp": "2025-12-28T09:12:56.652637", + "elapsed_time": 890.9377274513245, + "loss": 0.1478, + "grad_norm": 0.12338529527187347, + "learning_rate": 0.00019044025157232704, + "epoch": 0.054375 + }, + { + "step": 688, + "timestamp": "2025-12-28T09:13:02.953778", + "elapsed_time": 897.2388682365417, + "loss": 0.198, + "grad_norm": 0.1538494974374771, + "learning_rate": 0.00019031446540880505, + "epoch": 0.055 + }, + { + "step": 689, + "timestamp": "2025-12-28T09:13:11.878858", + "elapsed_time": 906.1639485359192, + "loss": 0.1424, + "grad_norm": 0.12444575875997543, + "learning_rate": 0.00019018867924528303, + "epoch": 0.055625 + }, + { + "step": 690, + "timestamp": "2025-12-28T09:13:20.334671", + "elapsed_time": 914.61976146698, + "loss": 0.1823, + "grad_norm": 0.19447259604930878, + "learning_rate": 0.000190062893081761, + "epoch": 0.05625 + }, + { + "step": 691, + "timestamp": "2025-12-28T09:13:27.616304", + "elapsed_time": 921.9013941287994, + "loss": 0.1547, + "grad_norm": 0.14511334896087646, + "learning_rate": 0.00018993710691823901, + "epoch": 0.056875 + }, + { + "step": 692, + "timestamp": "2025-12-28T09:13:41.557565", + "elapsed_time": 935.8426558971405, + "loss": 0.1594, + "grad_norm": 0.11656054854393005, + "learning_rate": 0.000189811320754717, + "epoch": 0.0575 + }, + { + "step": 693, + "timestamp": "2025-12-28T09:13:51.373998", + "elapsed_time": 945.6590890884399, + "loss": 0.2355, + "grad_norm": 0.1568254977464676, + "learning_rate": 0.00018968553459119497, + "epoch": 0.058125 + }, + { + "step": 694, + "timestamp": "2025-12-28T09:14:06.517264", + "elapsed_time": 960.8023540973663, + "loss": 0.2098, + "grad_norm": 0.12919512391090393, + "learning_rate": 0.00018955974842767298, + "epoch": 0.05875 + }, + { + "step": 695, + "timestamp": "2025-12-28T09:14:14.033362", + "elapsed_time": 968.3184523582458, + "loss": 0.2264, + "grad_norm": 0.21954892575740814, + "learning_rate": 0.00018943396226415096, + "epoch": 0.059375 + }, + { + "step": 696, + "timestamp": "2025-12-28T09:14:23.154345", + "elapsed_time": 977.4394352436066, + "loss": 0.197, + "grad_norm": 0.1527150720357895, + "learning_rate": 0.00018930817610062894, + "epoch": 0.06 + }, + { + "step": 697, + "timestamp": "2025-12-28T09:14:30.973861", + "elapsed_time": 985.258951663971, + "loss": 0.2027, + "grad_norm": 0.16911649703979492, + "learning_rate": 0.00018918238993710694, + "epoch": 0.060625 + }, + { + "step": 698, + "timestamp": "2025-12-28T09:14:38.748666", + "elapsed_time": 993.0337567329407, + "loss": 0.2625, + "grad_norm": 0.15294122695922852, + "learning_rate": 0.00018905660377358492, + "epoch": 0.06125 + }, + { + "step": 699, + "timestamp": "2025-12-28T09:14:46.484732", + "elapsed_time": 1000.7698221206665, + "loss": 0.2257, + "grad_norm": 0.16110914945602417, + "learning_rate": 0.0001889308176100629, + "epoch": 0.061875 + }, + { + "step": 700, + "timestamp": "2025-12-28T09:14:52.017443", + "elapsed_time": 1006.3025336265564, + "loss": 0.2229, + "grad_norm": 0.19415181875228882, + "learning_rate": 0.00018880503144654088, + "epoch": 0.0625 + }, + { + "step": 701, + "timestamp": "2025-12-28T09:15:00.051403", + "elapsed_time": 1014.336493730545, + "loss": 0.1913, + "grad_norm": 0.1673995554447174, + "learning_rate": 0.00018867924528301889, + "epoch": 0.063125 + }, + { + "step": 702, + "timestamp": "2025-12-28T09:15:16.601964", + "elapsed_time": 1030.8870537281036, + "loss": 0.1377, + "grad_norm": 0.09899748116731644, + "learning_rate": 0.00018855345911949686, + "epoch": 0.06375 + }, + { + "step": 703, + "timestamp": "2025-12-28T09:15:30.488977", + "elapsed_time": 1044.7740678787231, + "loss": 0.1405, + "grad_norm": 0.11365586519241333, + "learning_rate": 0.00018842767295597484, + "epoch": 0.064375 + }, + { + "step": 704, + "timestamp": "2025-12-28T09:15:36.750160", + "elapsed_time": 1051.0352516174316, + "loss": 0.1852, + "grad_norm": 0.1651289463043213, + "learning_rate": 0.00018830188679245285, + "epoch": 0.065 + }, + { + "step": 705, + "timestamp": "2025-12-28T09:15:49.379817", + "elapsed_time": 1063.6649072170258, + "loss": 0.1602, + "grad_norm": 0.12038940191268921, + "learning_rate": 0.00018817610062893083, + "epoch": 0.065625 + }, + { + "step": 706, + "timestamp": "2025-12-28T09:15:54.708518", + "elapsed_time": 1068.9936089515686, + "loss": 0.3003, + "grad_norm": 0.26949650049209595, + "learning_rate": 0.0001880503144654088, + "epoch": 0.06625 + }, + { + "step": 707, + "timestamp": "2025-12-28T09:16:05.981736", + "elapsed_time": 1080.2668268680573, + "loss": 0.1754, + "grad_norm": 0.133348748087883, + "learning_rate": 0.00018792452830188681, + "epoch": 0.066875 + }, + { + "step": 708, + "timestamp": "2025-12-28T09:16:11.541142", + "elapsed_time": 1085.8262326717377, + "loss": 0.1986, + "grad_norm": 0.1636824756860733, + "learning_rate": 0.0001877987421383648, + "epoch": 0.0675 + }, + { + "step": 709, + "timestamp": "2025-12-28T09:16:17.744602", + "elapsed_time": 1092.0296926498413, + "loss": 0.2282, + "grad_norm": 0.18413999676704407, + "learning_rate": 0.00018767295597484277, + "epoch": 0.068125 + }, + { + "step": 710, + "timestamp": "2025-12-28T09:16:28.622419", + "elapsed_time": 1102.9075095653534, + "loss": 0.1403, + "grad_norm": 0.1099625825881958, + "learning_rate": 0.00018754716981132078, + "epoch": 0.06875 + }, + { + "step": 711, + "timestamp": "2025-12-28T09:16:38.899862", + "elapsed_time": 1113.1849522590637, + "loss": 0.3365, + "grad_norm": 0.1869092434644699, + "learning_rate": 0.00018742138364779876, + "epoch": 0.069375 + }, + { + "step": 712, + "timestamp": "2025-12-28T09:16:58.976813", + "elapsed_time": 1133.2619035243988, + "loss": 0.1024, + "grad_norm": 0.08866474777460098, + "learning_rate": 0.00018729559748427674, + "epoch": 0.07 + }, + { + "step": 713, + "timestamp": "2025-12-28T09:17:05.528653", + "elapsed_time": 1139.8137435913086, + "loss": 0.2417, + "grad_norm": 0.1919388324022293, + "learning_rate": 0.00018716981132075472, + "epoch": 0.070625 + }, + { + "step": 714, + "timestamp": "2025-12-28T09:17:16.181514", + "elapsed_time": 1150.466604232788, + "loss": 0.1628, + "grad_norm": 0.12882591784000397, + "learning_rate": 0.00018704402515723272, + "epoch": 0.07125 + }, + { + "step": 715, + "timestamp": "2025-12-28T09:17:21.921176", + "elapsed_time": 1156.2062666416168, + "loss": 0.2503, + "grad_norm": 0.17477668821811676, + "learning_rate": 0.0001869182389937107, + "epoch": 0.071875 + }, + { + "step": 716, + "timestamp": "2025-12-28T09:17:31.060009", + "elapsed_time": 1165.3450994491577, + "loss": 0.19, + "grad_norm": 0.15098389983177185, + "learning_rate": 0.00018679245283018868, + "epoch": 0.0725 + }, + { + "step": 717, + "timestamp": "2025-12-28T09:17:51.950466", + "elapsed_time": 1186.235556602478, + "loss": 0.1353, + "grad_norm": 0.12708379328250885, + "learning_rate": 0.0001866666666666667, + "epoch": 0.073125 + }, + { + "step": 718, + "timestamp": "2025-12-28T09:17:57.215989", + "elapsed_time": 1191.5010793209076, + "loss": 0.2758, + "grad_norm": 0.20731349289417267, + "learning_rate": 0.00018654088050314467, + "epoch": 0.07375 + }, + { + "step": 719, + "timestamp": "2025-12-28T09:18:05.159270", + "elapsed_time": 1199.4443612098694, + "loss": 0.226, + "grad_norm": 0.16173875331878662, + "learning_rate": 0.00018641509433962264, + "epoch": 0.074375 + }, + { + "step": 720, + "timestamp": "2025-12-28T09:18:10.909682", + "elapsed_time": 1205.1947722434998, + "loss": 0.2709, + "grad_norm": 0.21631106734275818, + "learning_rate": 0.00018628930817610065, + "epoch": 0.075 + }, + { + "step": 721, + "timestamp": "2025-12-28T09:18:23.590226", + "elapsed_time": 1217.875316619873, + "loss": 0.2306, + "grad_norm": 0.12810084223747253, + "learning_rate": 0.00018616352201257863, + "epoch": 0.075625 + }, + { + "step": 722, + "timestamp": "2025-12-28T09:18:38.922154", + "elapsed_time": 1233.2072455883026, + "loss": 0.1657, + "grad_norm": 0.10849788039922714, + "learning_rate": 0.0001860377358490566, + "epoch": 0.07625 + }, + { + "step": 723, + "timestamp": "2025-12-28T09:18:48.500418", + "elapsed_time": 1242.7855124473572, + "loss": 0.1657, + "grad_norm": 0.17049764096736908, + "learning_rate": 0.00018591194968553462, + "epoch": 0.076875 + }, + { + "step": 724, + "timestamp": "2025-12-28T09:18:57.124927", + "elapsed_time": 1251.4100172519684, + "loss": 0.1443, + "grad_norm": 0.1467668116092682, + "learning_rate": 0.0001857861635220126, + "epoch": 0.0775 + }, + { + "step": 725, + "timestamp": "2025-12-28T09:19:04.646596", + "elapsed_time": 1258.9316868782043, + "loss": 0.2106, + "grad_norm": 0.16145600378513336, + "learning_rate": 0.00018566037735849057, + "epoch": 0.078125 + }, + { + "step": 726, + "timestamp": "2025-12-28T09:19:13.415801", + "elapsed_time": 1267.7008922100067, + "loss": 0.1683, + "grad_norm": 0.13938355445861816, + "learning_rate": 0.00018553459119496855, + "epoch": 0.07875 + }, + { + "step": 727, + "timestamp": "2025-12-28T09:19:19.791168", + "elapsed_time": 1274.0762577056885, + "loss": 0.1834, + "grad_norm": 0.16343441605567932, + "learning_rate": 0.00018540880503144656, + "epoch": 0.079375 + }, + { + "step": 728, + "timestamp": "2025-12-28T09:19:26.780643", + "elapsed_time": 1281.065733909607, + "loss": 0.3605, + "grad_norm": 0.18578511476516724, + "learning_rate": 0.00018528301886792454, + "epoch": 0.08 + }, + { + "step": 729, + "timestamp": "2025-12-28T09:19:40.770904", + "elapsed_time": 1295.0559949874878, + "loss": 0.1423, + "grad_norm": 0.10042066127061844, + "learning_rate": 0.00018515723270440252, + "epoch": 0.080625 + }, + { + "step": 730, + "timestamp": "2025-12-28T09:19:46.875385", + "elapsed_time": 1301.1604759693146, + "loss": 0.2364, + "grad_norm": 0.2259899079799652, + "learning_rate": 0.00018503144654088052, + "epoch": 0.08125 + }, + { + "step": 731, + "timestamp": "2025-12-28T09:19:57.704231", + "elapsed_time": 1311.9893221855164, + "loss": 0.1339, + "grad_norm": 0.15420939028263092, + "learning_rate": 0.0001849056603773585, + "epoch": 0.081875 + }, + { + "step": 732, + "timestamp": "2025-12-28T09:20:05.181888", + "elapsed_time": 1319.4669790267944, + "loss": 0.1448, + "grad_norm": 0.13649825751781464, + "learning_rate": 0.00018477987421383648, + "epoch": 0.0825 + }, + { + "step": 733, + "timestamp": "2025-12-28T09:20:22.787205", + "elapsed_time": 1337.0722954273224, + "loss": 0.1804, + "grad_norm": 0.1483648419380188, + "learning_rate": 0.0001846540880503145, + "epoch": 0.083125 + }, + { + "step": 734, + "timestamp": "2025-12-28T09:20:32.228745", + "elapsed_time": 1346.5138351917267, + "loss": 0.1621, + "grad_norm": 0.1357170045375824, + "learning_rate": 0.00018452830188679247, + "epoch": 0.08375 + }, + { + "step": 735, + "timestamp": "2025-12-28T09:20:39.593318", + "elapsed_time": 1353.8784093856812, + "loss": 0.1985, + "grad_norm": 0.18031319975852966, + "learning_rate": 0.00018440251572327045, + "epoch": 0.084375 + }, + { + "step": 736, + "timestamp": "2025-12-28T09:20:46.467101", + "elapsed_time": 1360.752191543579, + "loss": 0.4623, + "grad_norm": 0.2359013855457306, + "learning_rate": 0.00018427672955974845, + "epoch": 0.085 + }, + { + "step": 737, + "timestamp": "2025-12-28T09:20:53.603967", + "elapsed_time": 1367.8890571594238, + "loss": 0.2163, + "grad_norm": 0.16415388882160187, + "learning_rate": 0.00018415094339622643, + "epoch": 0.085625 + }, + { + "step": 738, + "timestamp": "2025-12-28T09:20:59.877010", + "elapsed_time": 1374.1621007919312, + "loss": 0.2737, + "grad_norm": 0.18287940323352814, + "learning_rate": 0.0001840251572327044, + "epoch": 0.08625 + }, + { + "step": 739, + "timestamp": "2025-12-28T09:21:17.007779", + "elapsed_time": 1391.2928698062897, + "loss": 0.144, + "grad_norm": 0.10500773787498474, + "learning_rate": 0.0001838993710691824, + "epoch": 0.086875 + }, + { + "step": 740, + "timestamp": "2025-12-28T09:21:27.416174", + "elapsed_time": 1401.70126414299, + "loss": 0.1542, + "grad_norm": 0.11694104224443436, + "learning_rate": 0.0001837735849056604, + "epoch": 0.0875 + }, + { + "step": 741, + "timestamp": "2025-12-28T09:21:32.682215", + "elapsed_time": 1406.9673054218292, + "loss": 0.2283, + "grad_norm": 0.17158034443855286, + "learning_rate": 0.00018364779874213837, + "epoch": 0.088125 + }, + { + "step": 742, + "timestamp": "2025-12-28T09:21:42.891669", + "elapsed_time": 1417.17675948143, + "loss": 0.1413, + "grad_norm": 0.12412276864051819, + "learning_rate": 0.00018352201257861635, + "epoch": 0.08875 + }, + { + "step": 743, + "timestamp": "2025-12-28T09:21:50.263790", + "elapsed_time": 1424.5488805770874, + "loss": 0.322, + "grad_norm": 0.2420777678489685, + "learning_rate": 0.00018339622641509436, + "epoch": 0.089375 + }, + { + "step": 744, + "timestamp": "2025-12-28T09:22:01.677433", + "elapsed_time": 1435.9625227451324, + "loss": 0.1475, + "grad_norm": 0.10151813179254532, + "learning_rate": 0.00018327044025157234, + "epoch": 0.09 + }, + { + "step": 745, + "timestamp": "2025-12-28T09:22:11.434022", + "elapsed_time": 1445.7191128730774, + "loss": 0.3527, + "grad_norm": 0.16911114752292633, + "learning_rate": 0.00018314465408805032, + "epoch": 0.090625 + }, + { + "step": 746, + "timestamp": "2025-12-28T09:22:23.572820", + "elapsed_time": 1457.8579106330872, + "loss": 0.1674, + "grad_norm": 0.12217868864536285, + "learning_rate": 0.00018301886792452832, + "epoch": 0.09125 + }, + { + "step": 747, + "timestamp": "2025-12-28T09:22:36.319867", + "elapsed_time": 1470.6049571037292, + "loss": 0.136, + "grad_norm": 0.10214618593454361, + "learning_rate": 0.0001828930817610063, + "epoch": 0.091875 + }, + { + "step": 748, + "timestamp": "2025-12-28T09:22:46.737961", + "elapsed_time": 1481.023051738739, + "loss": 0.2034, + "grad_norm": 0.14041507244110107, + "learning_rate": 0.00018276729559748428, + "epoch": 0.0925 + }, + { + "step": 749, + "timestamp": "2025-12-28T09:22:54.486010", + "elapsed_time": 1488.7711000442505, + "loss": 0.203, + "grad_norm": 0.23749075829982758, + "learning_rate": 0.0001826415094339623, + "epoch": 0.093125 + }, + { + "step": 750, + "timestamp": "2025-12-28T09:23:15.381836", + "elapsed_time": 1509.6669268608093, + "loss": 0.1252, + "grad_norm": 0.09619986265897751, + "learning_rate": 0.00018251572327044027, + "epoch": 0.09375 + }, + { + "step": 751, + "timestamp": "2025-12-28T09:23:26.138595", + "elapsed_time": 1520.4236857891083, + "loss": 0.1759, + "grad_norm": 0.1735697239637375, + "learning_rate": 0.00018238993710691825, + "epoch": 0.094375 + }, + { + "step": 752, + "timestamp": "2025-12-28T09:23:31.779100", + "elapsed_time": 1526.064194202423, + "loss": 0.4079, + "grad_norm": 0.2254641205072403, + "learning_rate": 0.00018226415094339625, + "epoch": 0.095 + }, + { + "step": 753, + "timestamp": "2025-12-28T09:23:48.621990", + "elapsed_time": 1542.9070808887482, + "loss": 0.1389, + "grad_norm": 0.09768623113632202, + "learning_rate": 0.00018213836477987423, + "epoch": 0.095625 + }, + { + "step": 754, + "timestamp": "2025-12-28T09:23:56.146839", + "elapsed_time": 1550.4319293498993, + "loss": 0.3221, + "grad_norm": 0.17994655668735504, + "learning_rate": 0.0001820125786163522, + "epoch": 0.09625 + }, + { + "step": 755, + "timestamp": "2025-12-28T09:24:17.041799", + "elapsed_time": 1571.3268892765045, + "loss": 0.1407, + "grad_norm": 0.09752582013607025, + "learning_rate": 0.0001818867924528302, + "epoch": 0.096875 + }, + { + "step": 756, + "timestamp": "2025-12-28T09:24:27.104362", + "elapsed_time": 1581.389452457428, + "loss": 0.1681, + "grad_norm": 0.21668905019760132, + "learning_rate": 0.0001817610062893082, + "epoch": 0.0975 + }, + { + "step": 757, + "timestamp": "2025-12-28T09:24:38.052954", + "elapsed_time": 1592.3380448818207, + "loss": 0.2357, + "grad_norm": 0.14341306686401367, + "learning_rate": 0.00018163522012578617, + "epoch": 0.098125 + }, + { + "step": 758, + "timestamp": "2025-12-28T09:24:46.883893", + "elapsed_time": 1601.168983221054, + "loss": 0.1544, + "grad_norm": 0.12626543641090393, + "learning_rate": 0.00018150943396226415, + "epoch": 0.09875 + }, + { + "step": 759, + "timestamp": "2025-12-28T09:24:56.091568", + "elapsed_time": 1610.3766589164734, + "loss": 0.3366, + "grad_norm": 0.1486847847700119, + "learning_rate": 0.00018138364779874216, + "epoch": 0.099375 + }, + { + "step": 760, + "timestamp": "2025-12-28T09:25:02.464933", + "elapsed_time": 1616.7500236034393, + "loss": 0.2171, + "grad_norm": 0.1620669662952423, + "learning_rate": 0.00018125786163522014, + "epoch": 0.1 + }, + { + "step": 761, + "timestamp": "2025-12-28T09:25:12.378673", + "elapsed_time": 1626.6637637615204, + "loss": 0.1958, + "grad_norm": 0.13596412539482117, + "learning_rate": 0.00018113207547169812, + "epoch": 0.100625 + }, + { + "step": 762, + "timestamp": "2025-12-28T09:25:24.048320", + "elapsed_time": 1638.3334102630615, + "loss": 0.1543, + "grad_norm": 0.19701559841632843, + "learning_rate": 0.00018100628930817612, + "epoch": 0.10125 + }, + { + "step": 763, + "timestamp": "2025-12-28T09:25:31.137409", + "elapsed_time": 1645.4224989414215, + "loss": 0.3084, + "grad_norm": 0.16929014027118683, + "learning_rate": 0.0001808805031446541, + "epoch": 0.101875 + }, + { + "step": 764, + "timestamp": "2025-12-28T09:25:37.301475", + "elapsed_time": 1651.5865650177002, + "loss": 0.2045, + "grad_norm": 0.1783858984708786, + "learning_rate": 0.00018075471698113208, + "epoch": 0.1025 + }, + { + "step": 765, + "timestamp": "2025-12-28T09:25:42.794771", + "elapsed_time": 1657.0798616409302, + "loss": 0.2026, + "grad_norm": 0.1741485893726349, + "learning_rate": 0.0001806289308176101, + "epoch": 0.103125 + }, + { + "step": 766, + "timestamp": "2025-12-28T09:25:53.503557", + "elapsed_time": 1667.788647890091, + "loss": 0.1651, + "grad_norm": 0.39134204387664795, + "learning_rate": 0.00018050314465408807, + "epoch": 0.10375 + }, + { + "step": 767, + "timestamp": "2025-12-28T09:26:03.591001", + "elapsed_time": 1677.8760917186737, + "loss": 0.2385, + "grad_norm": 0.19072601199150085, + "learning_rate": 0.00018037735849056605, + "epoch": 0.104375 + }, + { + "step": 768, + "timestamp": "2025-12-28T09:26:12.895494", + "elapsed_time": 1687.180584192276, + "loss": 0.2075, + "grad_norm": 0.14767950773239136, + "learning_rate": 0.00018025157232704403, + "epoch": 0.105 + }, + { + "step": 769, + "timestamp": "2025-12-28T09:26:28.488803", + "elapsed_time": 1702.7738931179047, + "loss": 0.1246, + "grad_norm": 0.11118397116661072, + "learning_rate": 0.00018012578616352203, + "epoch": 0.105625 + }, + { + "step": 770, + "timestamp": "2025-12-28T09:26:35.655701", + "elapsed_time": 1709.9407913684845, + "loss": 0.2173, + "grad_norm": 0.15238629281520844, + "learning_rate": 0.00018, + "epoch": 0.10625 + }, + { + "step": 771, + "timestamp": "2025-12-28T09:26:42.135442", + "elapsed_time": 1716.420532464981, + "loss": 0.4145, + "grad_norm": 0.17681153118610382, + "learning_rate": 0.000179874213836478, + "epoch": 0.106875 + }, + { + "step": 772, + "timestamp": "2025-12-28T09:26:54.831081", + "elapsed_time": 1729.1161713600159, + "loss": 0.2015, + "grad_norm": 0.17193950712680817, + "learning_rate": 0.000179748427672956, + "epoch": 0.1075 + }, + { + "step": 773, + "timestamp": "2025-12-28T09:27:05.543585", + "elapsed_time": 1739.8286757469177, + "loss": 0.1725, + "grad_norm": 0.11407608538866043, + "learning_rate": 0.00017962264150943398, + "epoch": 0.108125 + }, + { + "step": 774, + "timestamp": "2025-12-28T09:27:09.428086", + "elapsed_time": 1743.713176727295, + "loss": 0.3777, + "grad_norm": 0.2451329529285431, + "learning_rate": 0.00017949685534591195, + "epoch": 0.10875 + }, + { + "step": 775, + "timestamp": "2025-12-28T09:27:23.100645", + "elapsed_time": 1757.3857352733612, + "loss": 0.1818, + "grad_norm": 0.11372605711221695, + "learning_rate": 0.00017937106918238996, + "epoch": 0.109375 + }, + { + "step": 776, + "timestamp": "2025-12-28T09:27:33.162588", + "elapsed_time": 1767.447678565979, + "loss": 0.1858, + "grad_norm": 0.1400919407606125, + "learning_rate": 0.00017924528301886794, + "epoch": 0.11 + }, + { + "step": 777, + "timestamp": "2025-12-28T09:27:39.454731", + "elapsed_time": 1773.7398252487183, + "loss": 0.2219, + "grad_norm": 0.16964372992515564, + "learning_rate": 0.00017911949685534592, + "epoch": 0.110625 + }, + { + "step": 778, + "timestamp": "2025-12-28T09:27:44.743958", + "elapsed_time": 1779.0290484428406, + "loss": 0.2554, + "grad_norm": 0.17479604482650757, + "learning_rate": 0.00017899371069182393, + "epoch": 0.11125 + }, + { + "step": 779, + "timestamp": "2025-12-28T09:27:53.377780", + "elapsed_time": 1787.6628749370575, + "loss": 0.1559, + "grad_norm": 0.12139801681041718, + "learning_rate": 0.0001788679245283019, + "epoch": 0.111875 + }, + { + "step": 780, + "timestamp": "2025-12-28T09:28:09.536850", + "elapsed_time": 1803.821940422058, + "loss": 0.1215, + "grad_norm": 0.11527290940284729, + "learning_rate": 0.00017874213836477988, + "epoch": 0.1125 + }, + { + "step": 781, + "timestamp": "2025-12-28T09:28:16.916052", + "elapsed_time": 1811.2011427879333, + "loss": 0.191, + "grad_norm": 0.2477898895740509, + "learning_rate": 0.00017861635220125786, + "epoch": 0.113125 + }, + { + "step": 782, + "timestamp": "2025-12-28T09:28:24.055187", + "elapsed_time": 1818.3402771949768, + "loss": 0.2501, + "grad_norm": 0.18624109029769897, + "learning_rate": 0.00017849056603773587, + "epoch": 0.11375 + }, + { + "step": 783, + "timestamp": "2025-12-28T09:28:35.480491", + "elapsed_time": 1829.7655820846558, + "loss": 0.1427, + "grad_norm": 0.10521696507930756, + "learning_rate": 0.00017836477987421385, + "epoch": 0.114375 + }, + { + "step": 784, + "timestamp": "2025-12-28T09:28:46.488517", + "elapsed_time": 1840.7736072540283, + "loss": 0.153, + "grad_norm": 0.14191673696041107, + "learning_rate": 0.00017823899371069183, + "epoch": 0.115 + }, + { + "step": 785, + "timestamp": "2025-12-28T09:28:57.330380", + "elapsed_time": 1851.615470647812, + "loss": 0.1657, + "grad_norm": 0.11947259306907654, + "learning_rate": 0.00017811320754716983, + "epoch": 0.115625 + }, + { + "step": 786, + "timestamp": "2025-12-28T09:29:02.999607", + "elapsed_time": 1857.2846972942352, + "loss": 0.3162, + "grad_norm": 0.20225848257541656, + "learning_rate": 0.0001779874213836478, + "epoch": 0.11625 + }, + { + "step": 787, + "timestamp": "2025-12-28T09:29:14.010140", + "elapsed_time": 1868.29523062706, + "loss": 0.199, + "grad_norm": 0.20261730253696442, + "learning_rate": 0.0001778616352201258, + "epoch": 0.116875 + }, + { + "step": 788, + "timestamp": "2025-12-28T09:29:27.747132", + "elapsed_time": 1882.0322229862213, + "loss": 0.1879, + "grad_norm": 0.1322057545185089, + "learning_rate": 0.0001777358490566038, + "epoch": 0.1175 + }, + { + "step": 789, + "timestamp": "2025-12-28T09:29:32.947439", + "elapsed_time": 1887.2325296401978, + "loss": 0.2144, + "grad_norm": 0.1755441427230835, + "learning_rate": 0.00017761006289308178, + "epoch": 0.118125 + }, + { + "step": 790, + "timestamp": "2025-12-28T09:29:43.515994", + "elapsed_time": 1897.8010845184326, + "loss": 0.1669, + "grad_norm": 0.12597765028476715, + "learning_rate": 0.00017748427672955976, + "epoch": 0.11875 + }, + { + "step": 791, + "timestamp": "2025-12-28T09:29:49.003240", + "elapsed_time": 1903.288330078125, + "loss": 0.3016, + "grad_norm": 0.17289644479751587, + "learning_rate": 0.00017735849056603776, + "epoch": 0.119375 + }, + { + "step": 792, + "timestamp": "2025-12-28T09:29:57.868876", + "elapsed_time": 1912.153966665268, + "loss": 0.2346, + "grad_norm": 0.13611753284931183, + "learning_rate": 0.00017723270440251574, + "epoch": 0.12 + }, + { + "step": 793, + "timestamp": "2025-12-28T09:30:09.439964", + "elapsed_time": 1923.7250542640686, + "loss": 0.1734, + "grad_norm": 0.1182570829987526, + "learning_rate": 0.00017710691823899372, + "epoch": 0.120625 + }, + { + "step": 794, + "timestamp": "2025-12-28T09:30:21.281246", + "elapsed_time": 1935.5663363933563, + "loss": 0.2104, + "grad_norm": 0.16117580235004425, + "learning_rate": 0.0001769811320754717, + "epoch": 0.12125 + }, + { + "step": 795, + "timestamp": "2025-12-28T09:30:35.176733", + "elapsed_time": 1949.4618237018585, + "loss": 0.2394, + "grad_norm": 0.12246831506490707, + "learning_rate": 0.0001768553459119497, + "epoch": 0.121875 + }, + { + "step": 796, + "timestamp": "2025-12-28T09:30:45.890549", + "elapsed_time": 1960.1756389141083, + "loss": 0.222, + "grad_norm": 0.14199329912662506, + "learning_rate": 0.00017672955974842768, + "epoch": 0.1225 + }, + { + "step": 797, + "timestamp": "2025-12-28T09:30:56.848050", + "elapsed_time": 1971.1331400871277, + "loss": 0.1856, + "grad_norm": 0.15696978569030762, + "learning_rate": 0.00017660377358490566, + "epoch": 0.123125 + }, + { + "step": 798, + "timestamp": "2025-12-28T09:31:05.711194", + "elapsed_time": 1979.9962842464447, + "loss": 0.1754, + "grad_norm": 0.12661588191986084, + "learning_rate": 0.00017647798742138367, + "epoch": 0.12375 + }, + { + "step": 799, + "timestamp": "2025-12-28T09:31:14.996719", + "elapsed_time": 1989.2818095684052, + "loss": 0.202, + "grad_norm": 0.1417084038257599, + "learning_rate": 0.00017635220125786165, + "epoch": 0.124375 + }, + { + "step": 800, + "timestamp": "2025-12-28T09:31:23.000694", + "elapsed_time": 1997.285789012909, + "loss": 0.2166, + "grad_norm": 0.13480763137340546, + "learning_rate": 0.00017622641509433963, + "epoch": 0.125 + }, + { + "step": 801, + "timestamp": "2025-12-28T09:31:35.762309", + "elapsed_time": 2010.0473990440369, + "loss": 0.1369, + "grad_norm": 0.45576173067092896, + "learning_rate": 0.00017610062893081763, + "epoch": 0.125625 + }, + { + "step": 802, + "timestamp": "2025-12-28T09:31:43.012873", + "elapsed_time": 2017.2979636192322, + "loss": 0.2186, + "grad_norm": 0.1421194076538086, + "learning_rate": 0.0001759748427672956, + "epoch": 0.12625 + }, + { + "step": 803, + "timestamp": "2025-12-28T09:32:02.199762", + "elapsed_time": 2036.484852552414, + "loss": 0.1557, + "grad_norm": 0.08416479825973511, + "learning_rate": 0.0001758490566037736, + "epoch": 0.126875 + }, + { + "step": 804, + "timestamp": "2025-12-28T09:32:07.151235", + "elapsed_time": 2041.4363248348236, + "loss": 0.2531, + "grad_norm": 0.17905178666114807, + "learning_rate": 0.0001757232704402516, + "epoch": 0.1275 + }, + { + "step": 805, + "timestamp": "2025-12-28T09:32:15.632157", + "elapsed_time": 2049.9172480106354, + "loss": 0.1335, + "grad_norm": 0.1256638765335083, + "learning_rate": 0.00017559748427672958, + "epoch": 0.128125 + }, + { + "step": 806, + "timestamp": "2025-12-28T09:32:31.298185", + "elapsed_time": 2065.583275079727, + "loss": 0.1427, + "grad_norm": 0.0954441949725151, + "learning_rate": 0.00017547169811320756, + "epoch": 0.12875 + }, + { + "step": 807, + "timestamp": "2025-12-28T09:32:39.447526", + "elapsed_time": 2073.7326169013977, + "loss": 0.1878, + "grad_norm": 0.13530845940113068, + "learning_rate": 0.00017534591194968554, + "epoch": 0.129375 + }, + { + "step": 808, + "timestamp": "2025-12-28T09:32:50.466826", + "elapsed_time": 2084.751916408539, + "loss": 0.1094, + "grad_norm": 0.1460101455450058, + "learning_rate": 0.00017522012578616354, + "epoch": 0.13 + }, + { + "step": 809, + "timestamp": "2025-12-28T09:33:11.356705", + "elapsed_time": 2105.6417951583862, + "loss": 0.1233, + "grad_norm": 0.09432196617126465, + "learning_rate": 0.00017509433962264152, + "epoch": 0.130625 + }, + { + "step": 810, + "timestamp": "2025-12-28T09:33:18.523039", + "elapsed_time": 2112.808129787445, + "loss": 0.2119, + "grad_norm": 0.15918022394180298, + "learning_rate": 0.0001749685534591195, + "epoch": 0.13125 + }, + { + "step": 811, + "timestamp": "2025-12-28T09:33:26.829092", + "elapsed_time": 2121.114182472229, + "loss": 0.1979, + "grad_norm": 0.14457164704799652, + "learning_rate": 0.0001748427672955975, + "epoch": 0.131875 + }, + { + "step": 812, + "timestamp": "2025-12-28T09:33:32.394758", + "elapsed_time": 2126.6798486709595, + "loss": 0.3052, + "grad_norm": 0.19710645079612732, + "learning_rate": 0.00017471698113207549, + "epoch": 0.1325 + }, + { + "step": 813, + "timestamp": "2025-12-28T09:33:45.262163", + "elapsed_time": 2139.5472536087036, + "loss": 0.1832, + "grad_norm": 0.17278793454170227, + "learning_rate": 0.00017459119496855346, + "epoch": 0.133125 + }, + { + "step": 814, + "timestamp": "2025-12-28T09:33:55.968938", + "elapsed_time": 2150.254028081894, + "loss": 0.1271, + "grad_norm": 0.10092293471097946, + "learning_rate": 0.00017446540880503147, + "epoch": 0.13375 + }, + { + "step": 815, + "timestamp": "2025-12-28T09:34:03.945511", + "elapsed_time": 2158.23060131073, + "loss": 0.1929, + "grad_norm": 0.1362391710281372, + "learning_rate": 0.00017433962264150945, + "epoch": 0.134375 + }, + { + "step": 816, + "timestamp": "2025-12-28T09:34:17.206545", + "elapsed_time": 2171.4916355609894, + "loss": 0.1908, + "grad_norm": 0.10968425869941711, + "learning_rate": 0.00017421383647798743, + "epoch": 0.135 + }, + { + "step": 817, + "timestamp": "2025-12-28T09:34:23.275224", + "elapsed_time": 2177.5603144168854, + "loss": 0.2587, + "grad_norm": 0.17565354704856873, + "learning_rate": 0.00017408805031446543, + "epoch": 0.135625 + }, + { + "step": 818, + "timestamp": "2025-12-28T09:34:30.707329", + "elapsed_time": 2184.9924190044403, + "loss": 0.4115, + "grad_norm": 0.21891839802265167, + "learning_rate": 0.00017396226415094341, + "epoch": 0.13625 + }, + { + "step": 819, + "timestamp": "2025-12-28T09:34:40.426629", + "elapsed_time": 2194.7117190361023, + "loss": 0.1647, + "grad_norm": 0.1077812984585762, + "learning_rate": 0.0001738364779874214, + "epoch": 0.136875 + }, + { + "step": 820, + "timestamp": "2025-12-28T09:34:51.747535", + "elapsed_time": 2206.032625436783, + "loss": 0.1475, + "grad_norm": 0.14761823415756226, + "learning_rate": 0.00017371069182389937, + "epoch": 0.1375 + }, + { + "step": 821, + "timestamp": "2025-12-28T09:34:55.071621", + "elapsed_time": 2209.356710910797, + "loss": 0.3967, + "grad_norm": 0.24036414921283722, + "learning_rate": 0.00017358490566037738, + "epoch": 0.138125 + }, + { + "step": 822, + "timestamp": "2025-12-28T09:35:15.970127", + "elapsed_time": 2230.255217552185, + "loss": 0.0833, + "grad_norm": 0.09146321564912796, + "learning_rate": 0.00017345911949685536, + "epoch": 0.13875 + }, + { + "step": 823, + "timestamp": "2025-12-28T09:35:26.728440", + "elapsed_time": 2241.0135345458984, + "loss": 0.2568, + "grad_norm": 0.1330772191286087, + "learning_rate": 0.00017333333333333334, + "epoch": 0.139375 + }, + { + "step": 824, + "timestamp": "2025-12-28T09:35:35.721884", + "elapsed_time": 2250.006974697113, + "loss": 0.3728, + "grad_norm": 0.1559283286333084, + "learning_rate": 0.00017320754716981134, + "epoch": 0.14 + }, + { + "step": 825, + "timestamp": "2025-12-28T09:35:44.080694", + "elapsed_time": 2258.3657846450806, + "loss": 0.1528, + "grad_norm": 0.12386928498744965, + "learning_rate": 0.00017308176100628932, + "epoch": 0.140625 + }, + { + "step": 826, + "timestamp": "2025-12-28T09:35:57.707994", + "elapsed_time": 2271.9930849075317, + "loss": 0.1711, + "grad_norm": 0.10447162389755249, + "learning_rate": 0.0001729559748427673, + "epoch": 0.14125 + }, + { + "step": 827, + "timestamp": "2025-12-28T09:36:13.573665", + "elapsed_time": 2287.8587548732758, + "loss": 0.1396, + "grad_norm": 0.09571991115808487, + "learning_rate": 0.0001728301886792453, + "epoch": 0.141875 + }, + { + "step": 828, + "timestamp": "2025-12-28T09:36:20.525116", + "elapsed_time": 2294.810206890106, + "loss": 0.192, + "grad_norm": 0.1702854335308075, + "learning_rate": 0.00017270440251572329, + "epoch": 0.1425 + }, + { + "step": 829, + "timestamp": "2025-12-28T09:36:31.229422", + "elapsed_time": 2305.514511823654, + "loss": 0.1648, + "grad_norm": 0.11315345764160156, + "learning_rate": 0.00017257861635220126, + "epoch": 0.143125 + }, + { + "step": 830, + "timestamp": "2025-12-28T09:36:42.076098", + "elapsed_time": 2316.3611884117126, + "loss": 0.176, + "grad_norm": 0.12335264682769775, + "learning_rate": 0.00017245283018867927, + "epoch": 0.14375 + }, + { + "step": 831, + "timestamp": "2025-12-28T09:36:51.299215", + "elapsed_time": 2325.5843057632446, + "loss": 0.2268, + "grad_norm": 0.2178533375263214, + "learning_rate": 0.00017232704402515725, + "epoch": 0.144375 + }, + { + "step": 832, + "timestamp": "2025-12-28T09:37:04.287014", + "elapsed_time": 2338.5721044540405, + "loss": 0.1242, + "grad_norm": 0.11771131306886673, + "learning_rate": 0.00017220125786163523, + "epoch": 0.145 + }, + { + "step": 833, + "timestamp": "2025-12-28T09:37:12.634424", + "elapsed_time": 2346.919515132904, + "loss": 0.1969, + "grad_norm": 0.13342277705669403, + "learning_rate": 0.00017207547169811324, + "epoch": 0.145625 + }, + { + "step": 834, + "timestamp": "2025-12-28T09:37:24.771648", + "elapsed_time": 2359.0567383766174, + "loss": 0.1777, + "grad_norm": 0.10993700474500656, + "learning_rate": 0.00017194968553459121, + "epoch": 0.14625 + }, + { + "step": 835, + "timestamp": "2025-12-28T09:37:32.205060", + "elapsed_time": 2366.49015045166, + "loss": 0.2091, + "grad_norm": 0.14296327531337738, + "learning_rate": 0.0001718238993710692, + "epoch": 0.146875 + }, + { + "step": 836, + "timestamp": "2025-12-28T09:37:40.267010", + "elapsed_time": 2374.552100419998, + "loss": 0.2618, + "grad_norm": 0.18104122579097748, + "learning_rate": 0.00017169811320754717, + "epoch": 0.1475 + }, + { + "step": 837, + "timestamp": "2025-12-28T09:37:56.881841", + "elapsed_time": 2391.166932106018, + "loss": 0.1163, + "grad_norm": 0.11203698068857193, + "learning_rate": 0.00017157232704402518, + "epoch": 0.148125 + }, + { + "step": 838, + "timestamp": "2025-12-28T09:38:15.789255", + "elapsed_time": 2410.0743453502655, + "loss": 0.1306, + "grad_norm": 0.12151342630386353, + "learning_rate": 0.00017144654088050316, + "epoch": 0.14875 + }, + { + "step": 839, + "timestamp": "2025-12-28T09:38:22.694989", + "elapsed_time": 2416.9800794124603, + "loss": 0.1991, + "grad_norm": 0.14583951234817505, + "learning_rate": 0.00017132075471698114, + "epoch": 0.149375 + }, + { + "step": 840, + "timestamp": "2025-12-28T09:38:32.405677", + "elapsed_time": 2426.6907675266266, + "loss": 0.5209, + "grad_norm": 0.1858104020357132, + "learning_rate": 0.00017119496855345914, + "epoch": 0.15 + }, + { + "step": 841, + "timestamp": "2025-12-28T09:38:41.693108", + "elapsed_time": 2435.97819852829, + "loss": 0.2115, + "grad_norm": 0.12431403249502182, + "learning_rate": 0.00017106918238993712, + "epoch": 0.150625 + }, + { + "step": 842, + "timestamp": "2025-12-28T09:38:49.200296", + "elapsed_time": 2443.4853858947754, + "loss": 0.2078, + "grad_norm": 0.1477956473827362, + "learning_rate": 0.0001709433962264151, + "epoch": 0.15125 + }, + { + "step": 843, + "timestamp": "2025-12-28T09:38:55.712143", + "elapsed_time": 2449.9972331523895, + "loss": 0.3907, + "grad_norm": 0.184353306889534, + "learning_rate": 0.0001708176100628931, + "epoch": 0.151875 + }, + { + "step": 844, + "timestamp": "2025-12-28T09:38:58.933553", + "elapsed_time": 2453.218643426895, + "loss": 0.2667, + "grad_norm": 0.262251079082489, + "learning_rate": 0.0001706918238993711, + "epoch": 0.1525 + }, + { + "step": 845, + "timestamp": "2025-12-28T09:39:05.378036", + "elapsed_time": 2459.663126707077, + "loss": 0.1593, + "grad_norm": 0.12699125707149506, + "learning_rate": 0.00017056603773584907, + "epoch": 0.153125 + }, + { + "step": 846, + "timestamp": "2025-12-28T09:39:15.452772", + "elapsed_time": 2469.737862586975, + "loss": 0.2266, + "grad_norm": 0.134856179356575, + "learning_rate": 0.00017044025157232707, + "epoch": 0.15375 + }, + { + "step": 847, + "timestamp": "2025-12-28T09:39:29.349192", + "elapsed_time": 2483.63428235054, + "loss": 0.156, + "grad_norm": 0.09759867191314697, + "learning_rate": 0.00017031446540880505, + "epoch": 0.154375 + }, + { + "step": 848, + "timestamp": "2025-12-28T09:39:35.325679", + "elapsed_time": 2489.6107692718506, + "loss": 0.1556, + "grad_norm": 0.13298504054546356, + "learning_rate": 0.00017018867924528303, + "epoch": 0.155 + }, + { + "step": 849, + "timestamp": "2025-12-28T09:39:44.060500", + "elapsed_time": 2498.345590353012, + "loss": 0.1904, + "grad_norm": 0.11852674186229706, + "learning_rate": 0.000170062893081761, + "epoch": 0.155625 + }, + { + "step": 850, + "timestamp": "2025-12-28T09:39:54.425345", + "elapsed_time": 2508.710435152054, + "loss": 0.1842, + "grad_norm": 0.22243794798851013, + "learning_rate": 0.00016993710691823902, + "epoch": 0.15625 + }, + { + "step": 851, + "timestamp": "2025-12-28T09:40:05.019054", + "elapsed_time": 2519.3041446208954, + "loss": 0.1675, + "grad_norm": 0.12315836548805237, + "learning_rate": 0.000169811320754717, + "epoch": 0.156875 + }, + { + "step": 852, + "timestamp": "2025-12-28T09:40:18.805001", + "elapsed_time": 2533.0900909900665, + "loss": 0.186, + "grad_norm": 0.1145598366856575, + "learning_rate": 0.00016968553459119497, + "epoch": 0.1575 + }, + { + "step": 853, + "timestamp": "2025-12-28T09:40:26.649585", + "elapsed_time": 2540.9346754550934, + "loss": 0.2299, + "grad_norm": 0.15518611669540405, + "learning_rate": 0.00016955974842767298, + "epoch": 0.158125 + }, + { + "step": 854, + "timestamp": "2025-12-28T09:40:34.119488", + "elapsed_time": 2548.4045779705048, + "loss": 0.2426, + "grad_norm": 0.14574050903320312, + "learning_rate": 0.00016943396226415096, + "epoch": 0.15875 + }, + { + "step": 855, + "timestamp": "2025-12-28T09:40:40.131033", + "elapsed_time": 2554.416123867035, + "loss": 0.2763, + "grad_norm": 0.1946178823709488, + "learning_rate": 0.00016930817610062894, + "epoch": 0.159375 + }, + { + "step": 856, + "timestamp": "2025-12-28T09:40:48.080229", + "elapsed_time": 2562.3653190135956, + "loss": 0.4301, + "grad_norm": 0.16829311847686768, + "learning_rate": 0.00016918238993710694, + "epoch": 0.16 + }, + { + "step": 857, + "timestamp": "2025-12-28T09:41:03.320074", + "elapsed_time": 2577.6051642894745, + "loss": 0.1631, + "grad_norm": 0.19184663891792297, + "learning_rate": 0.00016905660377358492, + "epoch": 0.160625 + }, + { + "step": 858, + "timestamp": "2025-12-28T09:41:12.493085", + "elapsed_time": 2586.7781751155853, + "loss": 0.1824, + "grad_norm": 0.125960111618042, + "learning_rate": 0.0001689308176100629, + "epoch": 0.16125 + }, + { + "step": 859, + "timestamp": "2025-12-28T09:41:21.589208", + "elapsed_time": 2595.874298810959, + "loss": 0.1654, + "grad_norm": 0.12101560086011887, + "learning_rate": 0.0001688050314465409, + "epoch": 0.161875 + }, + { + "step": 860, + "timestamp": "2025-12-28T09:41:30.728410", + "elapsed_time": 2605.0134999752045, + "loss": 0.3724, + "grad_norm": 0.1491203010082245, + "learning_rate": 0.0001686792452830189, + "epoch": 0.1625 + }, + { + "step": 861, + "timestamp": "2025-12-28T09:41:40.445296", + "elapsed_time": 2614.7303869724274, + "loss": 0.1735, + "grad_norm": 0.12737800180912018, + "learning_rate": 0.00016855345911949687, + "epoch": 0.163125 + }, + { + "step": 862, + "timestamp": "2025-12-28T09:41:55.948429", + "elapsed_time": 2630.2335200309753, + "loss": 0.126, + "grad_norm": 0.08794135600328445, + "learning_rate": 0.00016842767295597485, + "epoch": 0.16375 + }, + { + "step": 863, + "timestamp": "2025-12-28T09:41:59.637376", + "elapsed_time": 2633.9224672317505, + "loss": 0.302, + "grad_norm": 0.2143140286207199, + "learning_rate": 0.00016830188679245285, + "epoch": 0.164375 + }, + { + "step": 864, + "timestamp": "2025-12-28T09:42:06.917332", + "elapsed_time": 2641.202422618866, + "loss": 0.2941, + "grad_norm": 0.15600278973579407, + "learning_rate": 0.00016817610062893083, + "epoch": 0.165 + }, + { + "step": 865, + "timestamp": "2025-12-28T09:42:18.849851", + "elapsed_time": 2653.134941339493, + "loss": 0.1691, + "grad_norm": 0.11934607475996017, + "learning_rate": 0.0001680503144654088, + "epoch": 0.165625 + }, + { + "step": 866, + "timestamp": "2025-12-28T09:42:28.174345", + "elapsed_time": 2662.459435939789, + "loss": 0.1558, + "grad_norm": 0.11288256198167801, + "learning_rate": 0.00016792452830188682, + "epoch": 0.16625 + }, + { + "step": 867, + "timestamp": "2025-12-28T09:42:39.834999", + "elapsed_time": 2674.1200897693634, + "loss": 0.1402, + "grad_norm": 0.11293160915374756, + "learning_rate": 0.0001677987421383648, + "epoch": 0.166875 + }, + { + "step": 868, + "timestamp": "2025-12-28T09:42:45.101829", + "elapsed_time": 2679.3869194984436, + "loss": 0.3336, + "grad_norm": 0.21127207577228546, + "learning_rate": 0.00016767295597484277, + "epoch": 0.1675 + }, + { + "step": 869, + "timestamp": "2025-12-28T09:42:53.397932", + "elapsed_time": 2687.683022260666, + "loss": 0.2122, + "grad_norm": 0.13150571286678314, + "learning_rate": 0.00016754716981132078, + "epoch": 0.168125 + }, + { + "step": 870, + "timestamp": "2025-12-28T09:43:02.604043", + "elapsed_time": 2696.889132976532, + "loss": 0.2443, + "grad_norm": 0.16025897860527039, + "learning_rate": 0.00016742138364779876, + "epoch": 0.16875 + }, + { + "step": 871, + "timestamp": "2025-12-28T09:43:16.218580", + "elapsed_time": 2710.503670692444, + "loss": 0.1361, + "grad_norm": 0.1163235530257225, + "learning_rate": 0.00016729559748427674, + "epoch": 0.169375 + }, + { + "step": 872, + "timestamp": "2025-12-28T09:43:23.734614", + "elapsed_time": 2718.01970911026, + "loss": 0.2797, + "grad_norm": 0.3218298852443695, + "learning_rate": 0.00016716981132075474, + "epoch": 0.17 + }, + { + "step": 873, + "timestamp": "2025-12-28T09:43:40.199422", + "elapsed_time": 2734.4845123291016, + "loss": 0.1531, + "grad_norm": 0.09483516216278076, + "learning_rate": 0.00016704402515723272, + "epoch": 0.170625 + }, + { + "step": 874, + "timestamp": "2025-12-28T09:43:46.722337", + "elapsed_time": 2741.007427930832, + "loss": 0.1846, + "grad_norm": 0.13699615001678467, + "learning_rate": 0.0001669182389937107, + "epoch": 0.17125 + }, + { + "step": 875, + "timestamp": "2025-12-28T09:44:01.014473", + "elapsed_time": 2755.2995631694794, + "loss": 0.2334, + "grad_norm": 0.14246909320354462, + "learning_rate": 0.00016679245283018868, + "epoch": 0.171875 + }, + { + "step": 876, + "timestamp": "2025-12-28T09:44:13.506506", + "elapsed_time": 2767.7915959358215, + "loss": 0.1595, + "grad_norm": 0.11375102400779724, + "learning_rate": 0.0001666666666666667, + "epoch": 0.1725 + }, + { + "step": 877, + "timestamp": "2025-12-28T09:44:19.673893", + "elapsed_time": 2773.9589836597443, + "loss": 0.2133, + "grad_norm": 0.1539752185344696, + "learning_rate": 0.00016654088050314467, + "epoch": 0.173125 + }, + { + "step": 878, + "timestamp": "2025-12-28T09:44:31.607564", + "elapsed_time": 2785.8926544189453, + "loss": 0.3027, + "grad_norm": 0.13873888552188873, + "learning_rate": 0.00016641509433962265, + "epoch": 0.17375 + }, + { + "step": 879, + "timestamp": "2025-12-28T09:44:36.488223", + "elapsed_time": 2790.773313522339, + "loss": 0.2748, + "grad_norm": 0.17139685153961182, + "learning_rate": 0.00016628930817610065, + "epoch": 0.174375 + }, + { + "step": 880, + "timestamp": "2025-12-28T09:44:44.545212", + "elapsed_time": 2798.83030295372, + "loss": 0.192, + "grad_norm": 0.14361582696437836, + "learning_rate": 0.00016616352201257863, + "epoch": 0.175 + }, + { + "step": 881, + "timestamp": "2025-12-28T09:44:52.364196", + "elapsed_time": 2806.6492867469788, + "loss": 0.2773, + "grad_norm": 0.18555279076099396, + "learning_rate": 0.0001660377358490566, + "epoch": 0.175625 + }, + { + "step": 882, + "timestamp": "2025-12-28T09:44:58.645574", + "elapsed_time": 2812.9306647777557, + "loss": 0.3019, + "grad_norm": 0.1758921891450882, + "learning_rate": 0.00016591194968553462, + "epoch": 0.17625 + }, + { + "step": 883, + "timestamp": "2025-12-28T09:45:09.067925", + "elapsed_time": 2823.353015899658, + "loss": 0.3553, + "grad_norm": 0.15257853269577026, + "learning_rate": 0.0001657861635220126, + "epoch": 0.176875 + }, + { + "step": 884, + "timestamp": "2025-12-28T09:45:13.656005", + "elapsed_time": 2827.94109582901, + "loss": 0.2704, + "grad_norm": 0.17811451852321625, + "learning_rate": 0.00016566037735849058, + "epoch": 0.1775 + }, + { + "step": 885, + "timestamp": "2025-12-28T09:45:19.215450", + "elapsed_time": 2833.500540494919, + "loss": 0.1815, + "grad_norm": 0.1703587919473648, + "learning_rate": 0.00016553459119496858, + "epoch": 0.178125 + }, + { + "step": 886, + "timestamp": "2025-12-28T09:45:24.676685", + "elapsed_time": 2838.9617760181427, + "loss": 0.4395, + "grad_norm": 0.2093503326177597, + "learning_rate": 0.00016540880503144656, + "epoch": 0.17875 + }, + { + "step": 887, + "timestamp": "2025-12-28T09:45:29.125709", + "elapsed_time": 2843.410799264908, + "loss": 0.2659, + "grad_norm": 0.1815217286348343, + "learning_rate": 0.00016528301886792454, + "epoch": 0.179375 + }, + { + "step": 888, + "timestamp": "2025-12-28T09:45:35.113254", + "elapsed_time": 2849.3983447551727, + "loss": 0.2541, + "grad_norm": 0.18482376635074615, + "learning_rate": 0.00016515723270440252, + "epoch": 0.18 + }, + { + "step": 889, + "timestamp": "2025-12-28T09:45:41.738048", + "elapsed_time": 2856.023138523102, + "loss": 0.1837, + "grad_norm": 0.15669426321983337, + "learning_rate": 0.00016503144654088052, + "epoch": 0.180625 + }, + { + "step": 890, + "timestamp": "2025-12-28T09:45:50.170533", + "elapsed_time": 2864.455623626709, + "loss": 0.2074, + "grad_norm": 0.16513699293136597, + "learning_rate": 0.0001649056603773585, + "epoch": 0.18125 + }, + { + "step": 891, + "timestamp": "2025-12-28T09:45:58.910483", + "elapsed_time": 2873.1955733299255, + "loss": 0.1612, + "grad_norm": 0.1310487985610962, + "learning_rate": 0.00016477987421383648, + "epoch": 0.181875 + }, + { + "step": 892, + "timestamp": "2025-12-28T09:46:08.665921", + "elapsed_time": 2882.9510111808777, + "loss": 0.1793, + "grad_norm": 0.1385423094034195, + "learning_rate": 0.0001646540880503145, + "epoch": 0.1825 + }, + { + "step": 893, + "timestamp": "2025-12-28T09:46:28.595204", + "elapsed_time": 2902.8802947998047, + "loss": 0.1056, + "grad_norm": 0.09160617738962173, + "learning_rate": 0.00016452830188679247, + "epoch": 0.183125 + }, + { + "step": 894, + "timestamp": "2025-12-28T09:46:37.456263", + "elapsed_time": 2911.7413532733917, + "loss": 0.2347, + "grad_norm": 0.1339615136384964, + "learning_rate": 0.00016440251572327045, + "epoch": 0.18375 + }, + { + "step": 895, + "timestamp": "2025-12-28T09:46:43.090133", + "elapsed_time": 2917.3752233982086, + "loss": 0.2097, + "grad_norm": 0.1673922836780548, + "learning_rate": 0.00016427672955974845, + "epoch": 0.184375 + }, + { + "step": 896, + "timestamp": "2025-12-28T09:46:48.742520", + "elapsed_time": 2923.0276103019714, + "loss": 0.2041, + "grad_norm": 0.17525748908519745, + "learning_rate": 0.00016415094339622643, + "epoch": 0.185 + }, + { + "step": 897, + "timestamp": "2025-12-28T09:46:56.108721", + "elapsed_time": 2930.3938117027283, + "loss": 0.1659, + "grad_norm": 0.15093988180160522, + "learning_rate": 0.0001640251572327044, + "epoch": 0.185625 + }, + { + "step": 898, + "timestamp": "2025-12-28T09:47:09.846290", + "elapsed_time": 2944.1313841342926, + "loss": 0.1298, + "grad_norm": 0.09404166042804718, + "learning_rate": 0.00016389937106918242, + "epoch": 0.18625 + }, + { + "step": 899, + "timestamp": "2025-12-28T09:47:21.835574", + "elapsed_time": 2956.120668411255, + "loss": 0.1425, + "grad_norm": 0.10061768442392349, + "learning_rate": 0.0001637735849056604, + "epoch": 0.186875 + }, + { + "step": 900, + "timestamp": "2025-12-28T09:47:32.792316", + "elapsed_time": 2967.0774064064026, + "loss": 0.124, + "grad_norm": 0.09524894505739212, + "learning_rate": 0.00016364779874213838, + "epoch": 0.1875 + }, + { + "step": 901, + "timestamp": "2025-12-28T09:47:44.729124", + "elapsed_time": 2979.0142147541046, + "loss": 0.1498, + "grad_norm": 2.4628686904907227, + "learning_rate": 0.00016352201257861635, + "epoch": 0.188125 + }, + { + "step": 902, + "timestamp": "2025-12-28T09:47:52.982666", + "elapsed_time": 2987.267756462097, + "loss": 0.2408, + "grad_norm": 0.13900604844093323, + "learning_rate": 0.00016339622641509436, + "epoch": 0.18875 + }, + { + "step": 903, + "timestamp": "2025-12-28T09:48:00.120207", + "elapsed_time": 2994.4052975177765, + "loss": 0.2353, + "grad_norm": 0.15295051038265228, + "learning_rate": 0.00016327044025157234, + "epoch": 0.189375 + }, + { + "step": 904, + "timestamp": "2025-12-28T09:48:08.263388", + "elapsed_time": 3002.5484788417816, + "loss": 0.2088, + "grad_norm": 0.14227791130542755, + "learning_rate": 0.00016314465408805032, + "epoch": 0.19 + }, + { + "step": 905, + "timestamp": "2025-12-28T09:48:21.400428", + "elapsed_time": 3015.6855177879333, + "loss": 0.1279, + "grad_norm": 0.13115034997463226, + "learning_rate": 0.00016301886792452833, + "epoch": 0.190625 + }, + { + "step": 906, + "timestamp": "2025-12-28T09:48:30.333067", + "elapsed_time": 3024.618157148361, + "loss": 0.1772, + "grad_norm": 0.13131241500377655, + "learning_rate": 0.0001628930817610063, + "epoch": 0.19125 + }, + { + "step": 907, + "timestamp": "2025-12-28T09:48:39.473331", + "elapsed_time": 3033.7584216594696, + "loss": 0.1563, + "grad_norm": 0.11939848214387894, + "learning_rate": 0.00016276729559748428, + "epoch": 0.191875 + }, + { + "step": 908, + "timestamp": "2025-12-28T09:48:55.692279", + "elapsed_time": 3049.9773693084717, + "loss": 0.1491, + "grad_norm": 0.14415167272090912, + "learning_rate": 0.0001626415094339623, + "epoch": 0.1925 + }, + { + "step": 909, + "timestamp": "2025-12-28T09:49:04.121120", + "elapsed_time": 3058.406210422516, + "loss": 0.1965, + "grad_norm": 0.17701658606529236, + "learning_rate": 0.00016251572327044027, + "epoch": 0.193125 + }, + { + "step": 910, + "timestamp": "2025-12-28T09:49:15.530418", + "elapsed_time": 3069.815508365631, + "loss": 0.2012, + "grad_norm": 0.12482966482639313, + "learning_rate": 0.00016238993710691825, + "epoch": 0.19375 + }, + { + "step": 911, + "timestamp": "2025-12-28T09:49:21.168387", + "elapsed_time": 3075.45347738266, + "loss": 0.3984, + "grad_norm": 0.17656728625297546, + "learning_rate": 0.00016226415094339625, + "epoch": 0.194375 + }, + { + "step": 912, + "timestamp": "2025-12-28T09:49:33.856071", + "elapsed_time": 3088.141161441803, + "loss": 0.131, + "grad_norm": 0.0940021201968193, + "learning_rate": 0.00016213836477987423, + "epoch": 0.195 + }, + { + "step": 913, + "timestamp": "2025-12-28T09:49:45.752863", + "elapsed_time": 3100.0379536151886, + "loss": 0.165, + "grad_norm": 0.19799897074699402, + "learning_rate": 0.0001620125786163522, + "epoch": 0.195625 + }, + { + "step": 914, + "timestamp": "2025-12-28T09:49:51.223696", + "elapsed_time": 3105.50878572464, + "loss": 0.2392, + "grad_norm": 0.39207378029823303, + "learning_rate": 0.0001618867924528302, + "epoch": 0.19625 + }, + { + "step": 915, + "timestamp": "2025-12-28T09:50:04.960806", + "elapsed_time": 3119.245896577835, + "loss": 0.1131, + "grad_norm": 0.09941917657852173, + "learning_rate": 0.0001617610062893082, + "epoch": 0.196875 + }, + { + "step": 916, + "timestamp": "2025-12-28T09:50:12.135355", + "elapsed_time": 3126.4204454421997, + "loss": 0.1949, + "grad_norm": 0.16558235883712769, + "learning_rate": 0.00016163522012578618, + "epoch": 0.1975 + }, + { + "step": 917, + "timestamp": "2025-12-28T09:50:24.159964", + "elapsed_time": 3138.4450545310974, + "loss": 0.1532, + "grad_norm": 0.1185847669839859, + "learning_rate": 0.00016150943396226416, + "epoch": 0.198125 + }, + { + "step": 918, + "timestamp": "2025-12-28T09:50:36.336876", + "elapsed_time": 3150.6219668388367, + "loss": 0.1423, + "grad_norm": 0.11089828610420227, + "learning_rate": 0.00016138364779874216, + "epoch": 0.19875 + }, + { + "step": 919, + "timestamp": "2025-12-28T09:50:42.776184", + "elapsed_time": 3157.0612740516663, + "loss": 0.3044, + "grad_norm": 0.19416838884353638, + "learning_rate": 0.00016125786163522014, + "epoch": 0.199375 + }, + { + "step": 920, + "timestamp": "2025-12-28T09:50:50.019743", + "elapsed_time": 3164.304833650589, + "loss": 0.4452, + "grad_norm": 0.3824036419391632, + "learning_rate": 0.00016113207547169812, + "epoch": 0.2 + }, + { + "step": 921, + "timestamp": "2025-12-28T09:50:57.804129", + "elapsed_time": 3172.0892198085785, + "loss": 0.3216, + "grad_norm": 0.16429851949214935, + "learning_rate": 0.00016100628930817613, + "epoch": 0.200625 + }, + { + "step": 922, + "timestamp": "2025-12-28T09:51:13.052604", + "elapsed_time": 3187.337694168091, + "loss": 0.1217, + "grad_norm": 0.13496516644954681, + "learning_rate": 0.0001608805031446541, + "epoch": 0.20125 + }, + { + "step": 923, + "timestamp": "2025-12-28T09:51:24.570532", + "elapsed_time": 3198.855621814728, + "loss": 0.1566, + "grad_norm": 0.12458086013793945, + "learning_rate": 0.00016075471698113208, + "epoch": 0.201875 + }, + { + "step": 924, + "timestamp": "2025-12-28T09:51:31.848741", + "elapsed_time": 3206.133831501007, + "loss": 0.2315, + "grad_norm": 0.14897367358207703, + "learning_rate": 0.0001606289308176101, + "epoch": 0.2025 + }, + { + "step": 925, + "timestamp": "2025-12-28T09:51:45.734245", + "elapsed_time": 3220.019335269928, + "loss": 0.111, + "grad_norm": 0.09125252813100815, + "learning_rate": 0.00016050314465408807, + "epoch": 0.203125 + }, + { + "step": 926, + "timestamp": "2025-12-28T09:51:58.601974", + "elapsed_time": 3232.8870646953583, + "loss": 0.1471, + "grad_norm": 0.11020820587873459, + "learning_rate": 0.00016037735849056605, + "epoch": 0.20375 + }, + { + "step": 927, + "timestamp": "2025-12-28T09:52:06.148283", + "elapsed_time": 3240.43337392807, + "loss": 0.1875, + "grad_norm": 0.12961116433143616, + "learning_rate": 0.00016025157232704405, + "epoch": 0.204375 + }, + { + "step": 928, + "timestamp": "2025-12-28T09:52:13.614144", + "elapsed_time": 3247.899234056473, + "loss": 0.1729, + "grad_norm": 0.14182396233081818, + "learning_rate": 0.00016012578616352203, + "epoch": 0.205 + }, + { + "step": 929, + "timestamp": "2025-12-28T09:52:18.583980", + "elapsed_time": 3252.8690705299377, + "loss": 0.3456, + "grad_norm": 0.20903103053569794, + "learning_rate": 0.00016, + "epoch": 0.205625 + }, + { + "step": 930, + "timestamp": "2025-12-28T09:52:25.638732", + "elapsed_time": 3259.9238221645355, + "loss": 0.1962, + "grad_norm": 0.13587729632854462, + "learning_rate": 0.000159874213836478, + "epoch": 0.20625 + }, + { + "step": 931, + "timestamp": "2025-12-28T09:52:34.937513", + "elapsed_time": 3269.222603082657, + "loss": 0.2363, + "grad_norm": 0.1760166883468628, + "learning_rate": 0.000159748427672956, + "epoch": 0.206875 + }, + { + "step": 932, + "timestamp": "2025-12-28T09:52:43.090395", + "elapsed_time": 3277.3754856586456, + "loss": 0.2063, + "grad_norm": 0.13720373809337616, + "learning_rate": 0.00015962264150943398, + "epoch": 0.2075 + }, + { + "step": 933, + "timestamp": "2025-12-28T09:52:49.474895", + "elapsed_time": 3283.7599856853485, + "loss": 0.229, + "grad_norm": 0.14343424141407013, + "learning_rate": 0.00015949685534591196, + "epoch": 0.208125 + }, + { + "step": 934, + "timestamp": "2025-12-28T09:53:03.465281", + "elapsed_time": 3297.7503714561462, + "loss": 0.1478, + "grad_norm": 0.15561024844646454, + "learning_rate": 0.00015937106918238996, + "epoch": 0.20875 + }, + { + "step": 935, + "timestamp": "2025-12-28T09:53:10.342764", + "elapsed_time": 3304.627854824066, + "loss": 0.2767, + "grad_norm": 0.15607592463493347, + "learning_rate": 0.00015924528301886794, + "epoch": 0.209375 + }, + { + "step": 936, + "timestamp": "2025-12-28T09:53:15.162542", + "elapsed_time": 3309.447632074356, + "loss": 0.2736, + "grad_norm": 0.18114545941352844, + "learning_rate": 0.00015911949685534592, + "epoch": 0.21 + }, + { + "step": 937, + "timestamp": "2025-12-28T09:53:26.422739", + "elapsed_time": 3320.7078297138214, + "loss": 0.1715, + "grad_norm": 0.11876345425844193, + "learning_rate": 0.00015899371069182393, + "epoch": 0.210625 + }, + { + "step": 938, + "timestamp": "2025-12-28T09:53:32.911148", + "elapsed_time": 3327.1962430477142, + "loss": 0.1795, + "grad_norm": 0.1508261114358902, + "learning_rate": 0.0001588679245283019, + "epoch": 0.21125 + }, + { + "step": 939, + "timestamp": "2025-12-28T09:53:38.919335", + "elapsed_time": 3333.2044246196747, + "loss": 0.1735, + "grad_norm": 0.14189061522483826, + "learning_rate": 0.00015874213836477989, + "epoch": 0.211875 + }, + { + "step": 940, + "timestamp": "2025-12-28T09:53:52.060423", + "elapsed_time": 3346.345513343811, + "loss": 0.1236, + "grad_norm": 0.09674876928329468, + "learning_rate": 0.0001586163522012579, + "epoch": 0.2125 + }, + { + "step": 941, + "timestamp": "2025-12-28T09:54:12.941873", + "elapsed_time": 3367.22696352005, + "loss": 0.1143, + "grad_norm": 0.10512091219425201, + "learning_rate": 0.00015849056603773587, + "epoch": 0.213125 + }, + { + "step": 942, + "timestamp": "2025-12-28T09:54:23.471024", + "elapsed_time": 3377.756114244461, + "loss": 0.1029, + "grad_norm": 0.21176813542842865, + "learning_rate": 0.00015836477987421385, + "epoch": 0.21375 + }, + { + "step": 943, + "timestamp": "2025-12-28T09:54:33.191585", + "elapsed_time": 3387.4766755104065, + "loss": 0.1839, + "grad_norm": 0.12542878091335297, + "learning_rate": 0.00015823899371069183, + "epoch": 0.214375 + }, + { + "step": 944, + "timestamp": "2025-12-28T09:54:53.926827", + "elapsed_time": 3408.2119178771973, + "loss": 0.1474, + "grad_norm": 0.12502753734588623, + "learning_rate": 0.00015811320754716983, + "epoch": 0.215 + }, + { + "step": 945, + "timestamp": "2025-12-28T09:55:05.593186", + "elapsed_time": 3419.8782770633698, + "loss": 0.1523, + "grad_norm": 0.11267740279436111, + "learning_rate": 0.00015798742138364781, + "epoch": 0.215625 + }, + { + "step": 946, + "timestamp": "2025-12-28T09:55:14.970581", + "elapsed_time": 3429.2556715011597, + "loss": 0.1716, + "grad_norm": 0.14452391862869263, + "learning_rate": 0.0001578616352201258, + "epoch": 0.21625 + }, + { + "step": 947, + "timestamp": "2025-12-28T09:55:23.786812", + "elapsed_time": 3438.0719022750854, + "loss": 0.2105, + "grad_norm": 0.14315542578697205, + "learning_rate": 0.0001577358490566038, + "epoch": 0.216875 + }, + { + "step": 948, + "timestamp": "2025-12-28T09:55:27.890524", + "elapsed_time": 3442.175614595413, + "loss": 0.3344, + "grad_norm": 0.2096068263053894, + "learning_rate": 0.00015761006289308178, + "epoch": 0.2175 + }, + { + "step": 949, + "timestamp": "2025-12-28T09:55:37.602478", + "elapsed_time": 3451.887568950653, + "loss": 0.1969, + "grad_norm": 0.12928517162799835, + "learning_rate": 0.00015748427672955976, + "epoch": 0.218125 + }, + { + "step": 950, + "timestamp": "2025-12-28T09:55:42.477278", + "elapsed_time": 3456.7623686790466, + "loss": 0.2521, + "grad_norm": 0.17746160924434662, + "learning_rate": 0.00015735849056603776, + "epoch": 0.21875 + }, + { + "step": 951, + "timestamp": "2025-12-28T09:55:48.417273", + "elapsed_time": 3462.702367544174, + "loss": 0.1949, + "grad_norm": 0.14048974215984344, + "learning_rate": 0.00015723270440251574, + "epoch": 0.219375 + }, + { + "step": 952, + "timestamp": "2025-12-28T09:55:56.843770", + "elapsed_time": 3471.1288611888885, + "loss": 0.3451, + "grad_norm": 0.14857220649719238, + "learning_rate": 0.00015710691823899372, + "epoch": 0.22 + }, + { + "step": 953, + "timestamp": "2025-12-28T09:56:03.831991", + "elapsed_time": 3478.117082118988, + "loss": 0.2241, + "grad_norm": 0.16286444664001465, + "learning_rate": 0.00015698113207547173, + "epoch": 0.220625 + }, + { + "step": 954, + "timestamp": "2025-12-28T09:56:09.773840", + "elapsed_time": 3484.0589311122894, + "loss": 0.4378, + "grad_norm": 0.25359323620796204, + "learning_rate": 0.0001568553459119497, + "epoch": 0.22125 + }, + { + "step": 955, + "timestamp": "2025-12-28T09:56:18.031656", + "elapsed_time": 3492.316746234894, + "loss": 0.3404, + "grad_norm": 0.16270765662193298, + "learning_rate": 0.00015672955974842769, + "epoch": 0.221875 + }, + { + "step": 956, + "timestamp": "2025-12-28T09:56:31.017715", + "elapsed_time": 3505.302805662155, + "loss": 0.1701, + "grad_norm": 0.11110091954469681, + "learning_rate": 0.00015660377358490567, + "epoch": 0.2225 + }, + { + "step": 957, + "timestamp": "2025-12-28T09:56:35.074678", + "elapsed_time": 3509.359768629074, + "loss": 0.3194, + "grad_norm": 0.20245419442653656, + "learning_rate": 0.00015647798742138367, + "epoch": 0.223125 + }, + { + "step": 958, + "timestamp": "2025-12-28T09:56:53.824765", + "elapsed_time": 3528.109855890274, + "loss": 0.1034, + "grad_norm": 0.07926256209611893, + "learning_rate": 0.00015635220125786165, + "epoch": 0.22375 + }, + { + "step": 959, + "timestamp": "2025-12-28T09:57:00.356123", + "elapsed_time": 3534.6412131786346, + "loss": 0.2092, + "grad_norm": 0.17986778914928436, + "learning_rate": 0.00015622641509433963, + "epoch": 0.224375 + }, + { + "step": 960, + "timestamp": "2025-12-28T09:57:08.990782", + "elapsed_time": 3543.2758724689484, + "loss": 0.1359, + "grad_norm": 0.13381750881671906, + "learning_rate": 0.00015610062893081764, + "epoch": 0.225 + }, + { + "step": 961, + "timestamp": "2025-12-28T09:57:26.985511", + "elapsed_time": 3561.2706019878387, + "loss": 0.0902, + "grad_norm": 0.07347288727760315, + "learning_rate": 0.00015597484276729561, + "epoch": 0.225625 + }, + { + "step": 962, + "timestamp": "2025-12-28T09:57:31.711118", + "elapsed_time": 3565.996208190918, + "loss": 0.2244, + "grad_norm": 0.17614322900772095, + "learning_rate": 0.0001558490566037736, + "epoch": 0.22625 + }, + { + "step": 963, + "timestamp": "2025-12-28T09:57:40.681045", + "elapsed_time": 3574.9661359786987, + "loss": 0.2077, + "grad_norm": 0.1338813602924347, + "learning_rate": 0.0001557232704402516, + "epoch": 0.226875 + }, + { + "step": 964, + "timestamp": "2025-12-28T09:57:44.579572", + "elapsed_time": 3578.8646624088287, + "loss": 0.2926, + "grad_norm": 0.22020283341407776, + "learning_rate": 0.00015559748427672958, + "epoch": 0.2275 + }, + { + "step": 965, + "timestamp": "2025-12-28T09:57:52.917233", + "elapsed_time": 3587.2023231983185, + "loss": 0.1769, + "grad_norm": 0.13740260899066925, + "learning_rate": 0.00015547169811320756, + "epoch": 0.228125 + }, + { + "step": 966, + "timestamp": "2025-12-28T09:58:01.752612", + "elapsed_time": 3596.0377027988434, + "loss": 0.1681, + "grad_norm": 0.12490309774875641, + "learning_rate": 0.00015534591194968556, + "epoch": 0.22875 + }, + { + "step": 967, + "timestamp": "2025-12-28T09:58:08.641082", + "elapsed_time": 3602.9261722564697, + "loss": 0.1626, + "grad_norm": 0.12916311621665955, + "learning_rate": 0.00015522012578616354, + "epoch": 0.229375 + }, + { + "step": 968, + "timestamp": "2025-12-28T09:58:21.326631", + "elapsed_time": 3615.611721277237, + "loss": 0.1157, + "grad_norm": 0.10117348283529282, + "learning_rate": 0.00015509433962264152, + "epoch": 0.23 + }, + { + "step": 969, + "timestamp": "2025-12-28T09:58:32.168538", + "elapsed_time": 3626.4536283016205, + "loss": 0.2099, + "grad_norm": 0.21025630831718445, + "learning_rate": 0.0001549685534591195, + "epoch": 0.230625 + }, + { + "step": 970, + "timestamp": "2025-12-28T09:58:40.157526", + "elapsed_time": 3634.4426164627075, + "loss": 0.2095, + "grad_norm": 0.13846275210380554, + "learning_rate": 0.0001548427672955975, + "epoch": 0.23125 + }, + { + "step": 971, + "timestamp": "2025-12-28T09:58:58.528736", + "elapsed_time": 3652.8138258457184, + "loss": 0.6305, + "grad_norm": 0.14148321747779846, + "learning_rate": 0.0001547169811320755, + "epoch": 0.231875 + }, + { + "step": 972, + "timestamp": "2025-12-28T09:59:09.985061", + "elapsed_time": 3664.270151615143, + "loss": 0.1818, + "grad_norm": 0.4846095144748688, + "learning_rate": 0.00015459119496855347, + "epoch": 0.2325 + }, + { + "step": 973, + "timestamp": "2025-12-28T09:59:15.643684", + "elapsed_time": 3669.9287745952606, + "loss": 0.3468, + "grad_norm": 0.18205124139785767, + "learning_rate": 0.00015446540880503147, + "epoch": 0.233125 + }, + { + "step": 974, + "timestamp": "2025-12-28T09:59:24.573388", + "elapsed_time": 3678.858478307724, + "loss": 0.2059, + "grad_norm": 0.13004031777381897, + "learning_rate": 0.00015433962264150945, + "epoch": 0.23375 + }, + { + "step": 975, + "timestamp": "2025-12-28T09:59:32.752142", + "elapsed_time": 3687.037232398987, + "loss": 0.2575, + "grad_norm": 0.1793992817401886, + "learning_rate": 0.00015421383647798743, + "epoch": 0.234375 + }, + { + "step": 976, + "timestamp": "2025-12-28T09:59:42.141949", + "elapsed_time": 3696.4270396232605, + "loss": 0.1674, + "grad_norm": 0.11684457212686539, + "learning_rate": 0.00015408805031446544, + "epoch": 0.235 + }, + { + "step": 977, + "timestamp": "2025-12-28T09:59:49.029099", + "elapsed_time": 3703.3141901493073, + "loss": 0.1885, + "grad_norm": 0.1317356675863266, + "learning_rate": 0.00015396226415094342, + "epoch": 0.235625 + }, + { + "step": 978, + "timestamp": "2025-12-28T09:59:56.964854", + "elapsed_time": 3711.2499442100525, + "loss": 0.2512, + "grad_norm": 0.1530027985572815, + "learning_rate": 0.0001538364779874214, + "epoch": 0.23625 + }, + { + "step": 979, + "timestamp": "2025-12-28T10:00:05.695516", + "elapsed_time": 3719.9806060791016, + "loss": 0.2406, + "grad_norm": 0.13735264539718628, + "learning_rate": 0.0001537106918238994, + "epoch": 0.236875 + }, + { + "step": 980, + "timestamp": "2025-12-28T10:00:13.159866", + "elapsed_time": 3727.444956302643, + "loss": 0.265, + "grad_norm": 0.14935627579689026, + "learning_rate": 0.00015358490566037738, + "epoch": 0.2375 + }, + { + "step": 981, + "timestamp": "2025-12-28T10:00:20.073453", + "elapsed_time": 3734.3585438728333, + "loss": 0.2331, + "grad_norm": 0.15793128311634064, + "learning_rate": 0.00015345911949685536, + "epoch": 0.238125 + }, + { + "step": 982, + "timestamp": "2025-12-28T10:00:28.700639", + "elapsed_time": 3742.9857289791107, + "loss": 0.3142, + "grad_norm": 0.14131158590316772, + "learning_rate": 0.00015333333333333334, + "epoch": 0.23875 + }, + { + "step": 983, + "timestamp": "2025-12-28T10:00:37.338005", + "elapsed_time": 3751.623096227646, + "loss": 0.1871, + "grad_norm": 0.12910977005958557, + "learning_rate": 0.00015320754716981134, + "epoch": 0.239375 + }, + { + "step": 984, + "timestamp": "2025-12-28T10:00:51.515626", + "elapsed_time": 3765.8007164001465, + "loss": 0.2466, + "grad_norm": 0.15984784066677094, + "learning_rate": 0.00015308176100628932, + "epoch": 0.24 + }, + { + "step": 985, + "timestamp": "2025-12-28T10:00:58.996365", + "elapsed_time": 3773.2814559936523, + "loss": 0.1202, + "grad_norm": 0.11847283691167831, + "learning_rate": 0.0001529559748427673, + "epoch": 0.240625 + }, + { + "step": 986, + "timestamp": "2025-12-28T10:01:09.966903", + "elapsed_time": 3784.25199341774, + "loss": 0.1645, + "grad_norm": 0.15656305849552155, + "learning_rate": 0.0001528301886792453, + "epoch": 0.24125 + }, + { + "step": 987, + "timestamp": "2025-12-28T10:01:23.517576", + "elapsed_time": 3797.802666902542, + "loss": 0.1248, + "grad_norm": 0.0928470715880394, + "learning_rate": 0.0001527044025157233, + "epoch": 0.241875 + }, + { + "step": 988, + "timestamp": "2025-12-28T10:01:34.217983", + "elapsed_time": 3808.503073453903, + "loss": 0.1338, + "grad_norm": 0.11139731854200363, + "learning_rate": 0.00015257861635220127, + "epoch": 0.2425 + }, + { + "step": 989, + "timestamp": "2025-12-28T10:01:41.199518", + "elapsed_time": 3815.484607934952, + "loss": 0.2724, + "grad_norm": 0.15163938701152802, + "learning_rate": 0.00015245283018867927, + "epoch": 0.243125 + }, + { + "step": 990, + "timestamp": "2025-12-28T10:01:48.450348", + "elapsed_time": 3822.7354385852814, + "loss": 0.2476, + "grad_norm": 0.17267391085624695, + "learning_rate": 0.00015232704402515725, + "epoch": 0.24375 + }, + { + "step": 991, + "timestamp": "2025-12-28T10:01:56.433279", + "elapsed_time": 3830.718369960785, + "loss": 0.1465, + "grad_norm": 0.11863212287425995, + "learning_rate": 0.00015220125786163523, + "epoch": 0.244375 + }, + { + "step": 992, + "timestamp": "2025-12-28T10:02:09.167701", + "elapsed_time": 3843.4527916908264, + "loss": 0.1302, + "grad_norm": 0.10036662966012955, + "learning_rate": 0.00015207547169811324, + "epoch": 0.245 + }, + { + "step": 993, + "timestamp": "2025-12-28T10:02:14.112829", + "elapsed_time": 3848.3979198932648, + "loss": 0.3477, + "grad_norm": 0.19196945428848267, + "learning_rate": 0.00015194968553459122, + "epoch": 0.245625 + }, + { + "step": 994, + "timestamp": "2025-12-28T10:02:24.995027", + "elapsed_time": 3859.2801179885864, + "loss": 0.1563, + "grad_norm": 0.11495956778526306, + "learning_rate": 0.0001518238993710692, + "epoch": 0.24625 + }, + { + "step": 995, + "timestamp": "2025-12-28T10:02:33.036433", + "elapsed_time": 3867.3215239048004, + "loss": 0.1802, + "grad_norm": 0.11796751618385315, + "learning_rate": 0.00015169811320754717, + "epoch": 0.246875 + }, + { + "step": 996, + "timestamp": "2025-12-28T10:02:50.626582", + "elapsed_time": 3884.911673307419, + "loss": 0.1721, + "grad_norm": 0.10495149344205856, + "learning_rate": 0.00015157232704402518, + "epoch": 0.2475 + }, + { + "step": 997, + "timestamp": "2025-12-28T10:02:56.189074", + "elapsed_time": 3890.474164247513, + "loss": 0.2895, + "grad_norm": 0.18693897128105164, + "learning_rate": 0.00015144654088050316, + "epoch": 0.248125 + }, + { + "step": 998, + "timestamp": "2025-12-28T10:03:03.931060", + "elapsed_time": 3898.216150522232, + "loss": 0.2034, + "grad_norm": 0.1423393189907074, + "learning_rate": 0.00015132075471698114, + "epoch": 0.24875 + }, + { + "step": 999, + "timestamp": "2025-12-28T10:03:21.397855", + "elapsed_time": 3915.6829454898834, + "loss": 0.1197, + "grad_norm": 0.0859493762254715, + "learning_rate": 0.00015119496855345914, + "epoch": 0.249375 + }, + { + "step": 1000, + "timestamp": "2025-12-28T10:03:29.241271", + "elapsed_time": 3923.5263612270355, + "loss": 0.2176, + "grad_norm": 0.13979171216487885, + "learning_rate": 0.00015106918238993712, + "epoch": 0.25 + }, + { + "step": 1001, + "timestamp": "2025-12-28T10:03:35.258000", + "elapsed_time": 3929.543091058731, + "loss": 0.2195, + "grad_norm": 0.15846221148967743, + "learning_rate": 0.0001509433962264151, + "epoch": 0.250625 + }, + { + "step": 1002, + "timestamp": "2025-12-28T10:03:40.199295", + "elapsed_time": 3934.484385251999, + "loss": 0.345, + "grad_norm": 0.18337437510490417, + "learning_rate": 0.0001508176100628931, + "epoch": 0.25125 + }, + { + "step": 1003, + "timestamp": "2025-12-28T10:03:49.416164", + "elapsed_time": 3943.701254606247, + "loss": 0.2019, + "grad_norm": 0.11637191474437714, + "learning_rate": 0.0001506918238993711, + "epoch": 0.251875 + }, + { + "step": 1004, + "timestamp": "2025-12-28T10:04:02.545538", + "elapsed_time": 3956.830629825592, + "loss": 0.1278, + "grad_norm": 0.1031939908862114, + "learning_rate": 0.00015056603773584907, + "epoch": 0.2525 + }, + { + "step": 1005, + "timestamp": "2025-12-28T10:04:08.991273", + "elapsed_time": 3963.2763633728027, + "loss": 0.2647, + "grad_norm": 0.16900090873241425, + "learning_rate": 0.00015044025157232707, + "epoch": 0.253125 + }, + { + "step": 1006, + "timestamp": "2025-12-28T10:04:15.995166", + "elapsed_time": 3970.2802562713623, + "loss": 0.2239, + "grad_norm": 0.1869828701019287, + "learning_rate": 0.00015031446540880505, + "epoch": 0.25375 + }, + { + "step": 1007, + "timestamp": "2025-12-28T10:04:26.260626", + "elapsed_time": 3980.5457170009613, + "loss": 0.1438, + "grad_norm": 0.1253536194562912, + "learning_rate": 0.00015018867924528303, + "epoch": 0.254375 + }, + { + "step": 1008, + "timestamp": "2025-12-28T10:04:33.781669", + "elapsed_time": 3988.0667593479156, + "loss": 0.2621, + "grad_norm": 0.17986273765563965, + "learning_rate": 0.00015006289308176104, + "epoch": 0.255 + }, + { + "step": 1009, + "timestamp": "2025-12-28T10:04:40.413811", + "elapsed_time": 3994.698902130127, + "loss": 0.2008, + "grad_norm": 0.14311149716377258, + "learning_rate": 0.000149937106918239, + "epoch": 0.255625 + }, + { + "step": 1010, + "timestamp": "2025-12-28T10:04:59.959869", + "elapsed_time": 4014.2449600696564, + "loss": 0.1501, + "grad_norm": 0.08852102607488632, + "learning_rate": 0.00014981132075471697, + "epoch": 0.25625 + }, + { + "step": 1011, + "timestamp": "2025-12-28T10:05:07.239700", + "elapsed_time": 4021.5247910022736, + "loss": 0.2402, + "grad_norm": 0.19574478268623352, + "learning_rate": 0.00014968553459119498, + "epoch": 0.256875 + }, + { + "step": 1012, + "timestamp": "2025-12-28T10:05:19.732988", + "elapsed_time": 4034.0180780887604, + "loss": 0.1703, + "grad_norm": 0.10853290557861328, + "learning_rate": 0.00014955974842767295, + "epoch": 0.2575 + }, + { + "step": 1013, + "timestamp": "2025-12-28T10:05:28.914807", + "elapsed_time": 4043.1998975276947, + "loss": 0.2656, + "grad_norm": 0.16859759390354156, + "learning_rate": 0.00014943396226415093, + "epoch": 0.258125 + }, + { + "step": 1014, + "timestamp": "2025-12-28T10:05:40.579825", + "elapsed_time": 4054.8649151325226, + "loss": 0.1861, + "grad_norm": 0.12188176810741425, + "learning_rate": 0.00014930817610062894, + "epoch": 0.25875 + }, + { + "step": 1015, + "timestamp": "2025-12-28T10:05:52.247391", + "elapsed_time": 4066.5324816703796, + "loss": 0.172, + "grad_norm": 0.11021149158477783, + "learning_rate": 0.00014918238993710692, + "epoch": 0.259375 + }, + { + "step": 1016, + "timestamp": "2025-12-28T10:06:02.667177", + "elapsed_time": 4076.9522676467896, + "loss": 0.1577, + "grad_norm": 0.11924094706773758, + "learning_rate": 0.0001490566037735849, + "epoch": 0.26 + }, + { + "step": 1017, + "timestamp": "2025-12-28T10:06:10.516680", + "elapsed_time": 4084.8017704486847, + "loss": 0.1642, + "grad_norm": 0.12346573173999786, + "learning_rate": 0.00014893081761006288, + "epoch": 0.260625 + }, + { + "step": 1018, + "timestamp": "2025-12-28T10:06:15.781551", + "elapsed_time": 4090.066641330719, + "loss": 0.4633, + "grad_norm": 0.2274809330701828, + "learning_rate": 0.00014880503144654088, + "epoch": 0.26125 + }, + { + "step": 1019, + "timestamp": "2025-12-28T10:06:23.024176", + "elapsed_time": 4097.30926656723, + "loss": 0.2659, + "grad_norm": 0.1639455407857895, + "learning_rate": 0.00014867924528301886, + "epoch": 0.261875 + }, + { + "step": 1020, + "timestamp": "2025-12-28T10:06:29.002861", + "elapsed_time": 4103.28795170784, + "loss": 0.1842, + "grad_norm": 0.12965330481529236, + "learning_rate": 0.00014855345911949684, + "epoch": 0.2625 + }, + { + "step": 1021, + "timestamp": "2025-12-28T10:06:41.542093", + "elapsed_time": 4115.827183961868, + "loss": 0.159, + "grad_norm": 0.10215835273265839, + "learning_rate": 0.00014842767295597485, + "epoch": 0.263125 + }, + { + "step": 1022, + "timestamp": "2025-12-28T10:06:47.805495", + "elapsed_time": 4122.090589284897, + "loss": 0.2087, + "grad_norm": 0.19713939726352692, + "learning_rate": 0.00014830188679245283, + "epoch": 0.26375 + }, + { + "step": 1023, + "timestamp": "2025-12-28T10:06:53.583302", + "elapsed_time": 4127.8683931827545, + "loss": 0.2987, + "grad_norm": 0.18019935488700867, + "learning_rate": 0.0001481761006289308, + "epoch": 0.264375 + }, + { + "step": 1024, + "timestamp": "2025-12-28T10:07:00.180550", + "elapsed_time": 4134.465640544891, + "loss": 0.2478, + "grad_norm": 0.1467757374048233, + "learning_rate": 0.0001480503144654088, + "epoch": 0.265 + }, + { + "step": 1025, + "timestamp": "2025-12-28T10:07:10.795758", + "elapsed_time": 4145.0808482170105, + "loss": 0.1668, + "grad_norm": 0.10882839560508728, + "learning_rate": 0.0001479245283018868, + "epoch": 0.265625 + }, + { + "step": 1026, + "timestamp": "2025-12-28T10:07:17.133433", + "elapsed_time": 4151.418524265289, + "loss": 0.2731, + "grad_norm": 0.15907247364521027, + "learning_rate": 0.00014779874213836477, + "epoch": 0.26625 + }, + { + "step": 1027, + "timestamp": "2025-12-28T10:07:24.120391", + "elapsed_time": 4158.405481100082, + "loss": 0.2377, + "grad_norm": 0.13879650831222534, + "learning_rate": 0.00014767295597484278, + "epoch": 0.266875 + }, + { + "step": 1028, + "timestamp": "2025-12-28T10:07:32.464337", + "elapsed_time": 4166.749427556992, + "loss": 0.1956, + "grad_norm": 0.13655611872673035, + "learning_rate": 0.00014754716981132076, + "epoch": 0.2675 + }, + { + "step": 1029, + "timestamp": "2025-12-28T10:07:46.125666", + "elapsed_time": 4180.410757303238, + "loss": 0.119, + "grad_norm": 0.2348964810371399, + "learning_rate": 0.00014742138364779873, + "epoch": 0.268125 + }, + { + "step": 1030, + "timestamp": "2025-12-28T10:07:50.593098", + "elapsed_time": 4184.878188371658, + "loss": 0.2267, + "grad_norm": 0.16901762783527374, + "learning_rate": 0.0001472955974842767, + "epoch": 0.26875 + }, + { + "step": 1031, + "timestamp": "2025-12-28T10:08:03.007589", + "elapsed_time": 4197.292679548264, + "loss": 0.1174, + "grad_norm": 0.08787506818771362, + "learning_rate": 0.00014716981132075472, + "epoch": 0.269375 + }, + { + "step": 1032, + "timestamp": "2025-12-28T10:08:09.168697", + "elapsed_time": 4203.453787326813, + "loss": 0.3684, + "grad_norm": 0.18056993186473846, + "learning_rate": 0.0001470440251572327, + "epoch": 0.27 + }, + { + "step": 1033, + "timestamp": "2025-12-28T10:08:20.157096", + "elapsed_time": 4214.442186117172, + "loss": 0.1727, + "grad_norm": 0.11187569051980972, + "learning_rate": 0.00014691823899371068, + "epoch": 0.270625 + }, + { + "step": 1034, + "timestamp": "2025-12-28T10:08:27.489866", + "elapsed_time": 4221.774956703186, + "loss": 0.1853, + "grad_norm": 0.132780984044075, + "learning_rate": 0.00014679245283018868, + "epoch": 0.27125 + }, + { + "step": 1035, + "timestamp": "2025-12-28T10:08:32.101038", + "elapsed_time": 4226.38612818718, + "loss": 0.2988, + "grad_norm": 0.21782910823822021, + "learning_rate": 0.00014666666666666666, + "epoch": 0.271875 + }, + { + "step": 1036, + "timestamp": "2025-12-28T10:08:38.180903", + "elapsed_time": 4232.465993881226, + "loss": 0.173, + "grad_norm": 0.1362728774547577, + "learning_rate": 0.00014654088050314464, + "epoch": 0.2725 + }, + { + "step": 1037, + "timestamp": "2025-12-28T10:08:47.148440", + "elapsed_time": 4241.433530807495, + "loss": 0.1494, + "grad_norm": 0.11238773167133331, + "learning_rate": 0.00014641509433962265, + "epoch": 0.273125 + }, + { + "step": 1038, + "timestamp": "2025-12-28T10:08:53.691649", + "elapsed_time": 4247.976739406586, + "loss": 0.2181, + "grad_norm": 0.15294326841831207, + "learning_rate": 0.00014628930817610063, + "epoch": 0.27375 + }, + { + "step": 1039, + "timestamp": "2025-12-28T10:08:59.801991", + "elapsed_time": 4254.0870814323425, + "loss": 0.2035, + "grad_norm": 0.14438562095165253, + "learning_rate": 0.0001461635220125786, + "epoch": 0.274375 + }, + { + "step": 1040, + "timestamp": "2025-12-28T10:09:04.922147", + "elapsed_time": 4259.2072377204895, + "loss": 0.1839, + "grad_norm": 0.163107767701149, + "learning_rate": 0.0001460377358490566, + "epoch": 0.275 + }, + { + "step": 1041, + "timestamp": "2025-12-28T10:09:11.363913", + "elapsed_time": 4265.649003267288, + "loss": 0.2156, + "grad_norm": 0.14970509707927704, + "learning_rate": 0.0001459119496855346, + "epoch": 0.275625 + }, + { + "step": 1042, + "timestamp": "2025-12-28T10:09:21.257050", + "elapsed_time": 4275.542140722275, + "loss": 0.1636, + "grad_norm": 0.12284188717603683, + "learning_rate": 0.00014578616352201257, + "epoch": 0.27625 + }, + { + "step": 1043, + "timestamp": "2025-12-28T10:09:28.778477", + "elapsed_time": 4283.063568115234, + "loss": 0.4273, + "grad_norm": 0.18576599657535553, + "learning_rate": 0.00014566037735849055, + "epoch": 0.276875 + }, + { + "step": 1044, + "timestamp": "2025-12-28T10:09:36.716585", + "elapsed_time": 4291.001675605774, + "loss": 0.187, + "grad_norm": 0.13979823887348175, + "learning_rate": 0.00014553459119496856, + "epoch": 0.2775 + }, + { + "step": 1045, + "timestamp": "2025-12-28T10:09:47.537065", + "elapsed_time": 4301.822155952454, + "loss": 0.1229, + "grad_norm": 0.11963897943496704, + "learning_rate": 0.00014540880503144653, + "epoch": 0.278125 + }, + { + "step": 1046, + "timestamp": "2025-12-28T10:09:53.307725", + "elapsed_time": 4307.5928156375885, + "loss": 0.2356, + "grad_norm": 0.18368114531040192, + "learning_rate": 0.00014528301886792451, + "epoch": 0.27875 + }, + { + "step": 1047, + "timestamp": "2025-12-28T10:09:59.635756", + "elapsed_time": 4313.920850515366, + "loss": 0.2149, + "grad_norm": 0.2329953908920288, + "learning_rate": 0.00014515723270440252, + "epoch": 0.279375 + }, + { + "step": 1048, + "timestamp": "2025-12-28T10:10:04.571410", + "elapsed_time": 4318.856500864029, + "loss": 0.2881, + "grad_norm": 0.24254077672958374, + "learning_rate": 0.0001450314465408805, + "epoch": 0.28 + }, + { + "step": 1049, + "timestamp": "2025-12-28T10:10:09.018520", + "elapsed_time": 4323.303614139557, + "loss": 0.3995, + "grad_norm": 0.20608584582805634, + "learning_rate": 0.00014490566037735848, + "epoch": 0.280625 + }, + { + "step": 1050, + "timestamp": "2025-12-28T10:10:19.733959", + "elapsed_time": 4334.019049882889, + "loss": 0.1759, + "grad_norm": 0.12463296949863434, + "learning_rate": 0.00014477987421383648, + "epoch": 0.28125 + }, + { + "step": 1051, + "timestamp": "2025-12-28T10:10:27.504878", + "elapsed_time": 4341.789968252182, + "loss": 0.1602, + "grad_norm": 0.13327348232269287, + "learning_rate": 0.00014465408805031446, + "epoch": 0.281875 + }, + { + "step": 1052, + "timestamp": "2025-12-28T10:10:40.425643", + "elapsed_time": 4354.710733413696, + "loss": 0.1314, + "grad_norm": 0.09431233257055283, + "learning_rate": 0.00014452830188679244, + "epoch": 0.2825 + }, + { + "step": 1053, + "timestamp": "2025-12-28T10:10:58.475011", + "elapsed_time": 4372.760101318359, + "loss": 0.1314, + "grad_norm": 0.08956651389598846, + "learning_rate": 0.00014440251572327045, + "epoch": 0.283125 + }, + { + "step": 1054, + "timestamp": "2025-12-28T10:11:05.726827", + "elapsed_time": 4380.011917591095, + "loss": 0.2106, + "grad_norm": 0.14600154757499695, + "learning_rate": 0.00014427672955974843, + "epoch": 0.28375 + }, + { + "step": 1055, + "timestamp": "2025-12-28T10:11:19.021552", + "elapsed_time": 4393.30664229393, + "loss": 0.176, + "grad_norm": 0.1056523472070694, + "learning_rate": 0.0001441509433962264, + "epoch": 0.284375 + }, + { + "step": 1056, + "timestamp": "2025-12-28T10:11:29.958671", + "elapsed_time": 4404.243761062622, + "loss": 0.1629, + "grad_norm": 0.10517208278179169, + "learning_rate": 0.00014402515723270439, + "epoch": 0.285 + }, + { + "step": 1057, + "timestamp": "2025-12-28T10:11:36.541158", + "elapsed_time": 4410.826248168945, + "loss": 0.3887, + "grad_norm": 0.16218851506710052, + "learning_rate": 0.0001438993710691824, + "epoch": 0.285625 + }, + { + "step": 1058, + "timestamp": "2025-12-28T10:11:45.563207", + "elapsed_time": 4419.848297119141, + "loss": 0.1683, + "grad_norm": 0.1224561482667923, + "learning_rate": 0.00014377358490566037, + "epoch": 0.28625 + }, + { + "step": 1059, + "timestamp": "2025-12-28T10:11:58.056565", + "elapsed_time": 4432.34165596962, + "loss": 0.125, + "grad_norm": 0.08662772178649902, + "learning_rate": 0.00014364779874213835, + "epoch": 0.286875 + }, + { + "step": 1060, + "timestamp": "2025-12-28T10:12:05.305261", + "elapsed_time": 4439.590351343155, + "loss": 0.2103, + "grad_norm": 0.14773836731910706, + "learning_rate": 0.00014352201257861636, + "epoch": 0.2875 + }, + { + "step": 1061, + "timestamp": "2025-12-28T10:12:17.136857", + "elapsed_time": 4451.4219472408295, + "loss": 0.2254, + "grad_norm": 0.128709077835083, + "learning_rate": 0.00014339622641509434, + "epoch": 0.288125 + }, + { + "step": 1062, + "timestamp": "2025-12-28T10:12:26.358416", + "elapsed_time": 4460.643507003784, + "loss": 0.1654, + "grad_norm": 0.12667128443717957, + "learning_rate": 0.00014327044025157231, + "epoch": 0.28875 + }, + { + "step": 1063, + "timestamp": "2025-12-28T10:12:35.697244", + "elapsed_time": 4469.982335090637, + "loss": 0.1426, + "grad_norm": 0.10940206795930862, + "learning_rate": 0.00014314465408805032, + "epoch": 0.289375 + }, + { + "step": 1064, + "timestamp": "2025-12-28T10:12:41.676751", + "elapsed_time": 4475.961841106415, + "loss": 0.1529, + "grad_norm": 0.1327073872089386, + "learning_rate": 0.0001430188679245283, + "epoch": 0.29 + }, + { + "step": 1065, + "timestamp": "2025-12-28T10:12:54.040253", + "elapsed_time": 4488.325343132019, + "loss": 0.4812, + "grad_norm": 0.15044333040714264, + "learning_rate": 0.00014289308176100628, + "epoch": 0.290625 + }, + { + "step": 1066, + "timestamp": "2025-12-28T10:13:01.898646", + "elapsed_time": 4496.183736562729, + "loss": 0.19, + "grad_norm": 0.12461165338754654, + "learning_rate": 0.00014276729559748429, + "epoch": 0.29125 + }, + { + "step": 1067, + "timestamp": "2025-12-28T10:13:11.281195", + "elapsed_time": 4505.566284894943, + "loss": 0.375, + "grad_norm": 0.2037774622440338, + "learning_rate": 0.00014264150943396226, + "epoch": 0.291875 + }, + { + "step": 1068, + "timestamp": "2025-12-28T10:13:20.374473", + "elapsed_time": 4514.659563064575, + "loss": 0.1759, + "grad_norm": 0.11888349056243896, + "learning_rate": 0.00014251572327044024, + "epoch": 0.2925 + }, + { + "step": 1069, + "timestamp": "2025-12-28T10:13:33.299520", + "elapsed_time": 4527.58461022377, + "loss": 0.1318, + "grad_norm": 0.09523271024227142, + "learning_rate": 0.00014238993710691825, + "epoch": 0.293125 + }, + { + "step": 1070, + "timestamp": "2025-12-28T10:13:41.047563", + "elapsed_time": 4535.332653284073, + "loss": 0.2335, + "grad_norm": 0.13759227097034454, + "learning_rate": 0.00014226415094339623, + "epoch": 0.29375 + }, + { + "step": 1071, + "timestamp": "2025-12-28T10:13:56.504224", + "elapsed_time": 4550.789314746857, + "loss": 0.1236, + "grad_norm": 0.08639674633741379, + "learning_rate": 0.0001421383647798742, + "epoch": 0.294375 + }, + { + "step": 1072, + "timestamp": "2025-12-28T10:14:06.307720", + "elapsed_time": 4560.59281039238, + "loss": 0.1721, + "grad_norm": 0.1233757734298706, + "learning_rate": 0.0001420125786163522, + "epoch": 0.295 + }, + { + "step": 1073, + "timestamp": "2025-12-28T10:14:11.665841", + "elapsed_time": 4565.950931310654, + "loss": 0.2988, + "grad_norm": 0.1682165414094925, + "learning_rate": 0.0001418867924528302, + "epoch": 0.295625 + }, + { + "step": 1074, + "timestamp": "2025-12-28T10:14:21.737669", + "elapsed_time": 4576.022758960724, + "loss": 0.1494, + "grad_norm": 0.10447243601083755, + "learning_rate": 0.00014176100628930817, + "epoch": 0.29625 + }, + { + "step": 1075, + "timestamp": "2025-12-28T10:14:40.552859", + "elapsed_time": 4594.837973356247, + "loss": 0.1524, + "grad_norm": 0.0825522392988205, + "learning_rate": 0.00014163522012578615, + "epoch": 0.296875 + }, + { + "step": 1076, + "timestamp": "2025-12-28T10:14:49.284505", + "elapsed_time": 4603.569595575333, + "loss": 0.2119, + "grad_norm": 0.12849776446819305, + "learning_rate": 0.00014150943396226416, + "epoch": 0.2975 + }, + { + "step": 1077, + "timestamp": "2025-12-28T10:15:02.082705", + "elapsed_time": 4616.367795705795, + "loss": 0.1306, + "grad_norm": 0.09817694872617722, + "learning_rate": 0.00014138364779874214, + "epoch": 0.298125 + }, + { + "step": 1078, + "timestamp": "2025-12-28T10:15:12.968494", + "elapsed_time": 4627.253584623337, + "loss": 0.125, + "grad_norm": 0.10283590853214264, + "learning_rate": 0.00014125786163522012, + "epoch": 0.29875 + }, + { + "step": 1079, + "timestamp": "2025-12-28T10:15:20.706362", + "elapsed_time": 4634.991452932358, + "loss": 0.3718, + "grad_norm": 0.16993916034698486, + "learning_rate": 0.00014113207547169812, + "epoch": 0.299375 + }, + { + "step": 1080, + "timestamp": "2025-12-28T10:15:24.915200", + "elapsed_time": 4639.200289726257, + "loss": 0.1865, + "grad_norm": 0.16090618073940277, + "learning_rate": 0.0001410062893081761, + "epoch": 0.3 + }, + { + "step": 1081, + "timestamp": "2025-12-28T10:15:33.659663", + "elapsed_time": 4647.944753646851, + "loss": 0.1715, + "grad_norm": 0.1435355693101883, + "learning_rate": 0.00014088050314465408, + "epoch": 0.300625 + }, + { + "step": 1082, + "timestamp": "2025-12-28T10:15:50.278302", + "elapsed_time": 4664.563392162323, + "loss": 0.1291, + "grad_norm": 0.08483153581619263, + "learning_rate": 0.00014075471698113209, + "epoch": 0.30125 + }, + { + "step": 1083, + "timestamp": "2025-12-28T10:15:56.389680", + "elapsed_time": 4670.674770593643, + "loss": 0.2924, + "grad_norm": 0.17434731125831604, + "learning_rate": 0.00014062893081761007, + "epoch": 0.301875 + }, + { + "step": 1084, + "timestamp": "2025-12-28T10:16:06.200612", + "elapsed_time": 4680.48570227623, + "loss": 0.1588, + "grad_norm": 0.10816198587417603, + "learning_rate": 0.00014050314465408804, + "epoch": 0.3025 + }, + { + "step": 1085, + "timestamp": "2025-12-28T10:16:12.114394", + "elapsed_time": 4686.399484395981, + "loss": 0.2117, + "grad_norm": 0.15836821496486664, + "learning_rate": 0.00014037735849056602, + "epoch": 0.303125 + }, + { + "step": 1086, + "timestamp": "2025-12-28T10:16:20.183071", + "elapsed_time": 4694.468160867691, + "loss": 0.2609, + "grad_norm": 0.16809400916099548, + "learning_rate": 0.00014025157232704403, + "epoch": 0.30375 + }, + { + "step": 1087, + "timestamp": "2025-12-28T10:16:28.614605", + "elapsed_time": 4702.8996958732605, + "loss": 0.497, + "grad_norm": 0.17253176867961884, + "learning_rate": 0.000140125786163522, + "epoch": 0.304375 + }, + { + "step": 1088, + "timestamp": "2025-12-28T10:16:34.729314", + "elapsed_time": 4709.014405012131, + "loss": 0.2396, + "grad_norm": 0.1900101751089096, + "learning_rate": 0.00014, + "epoch": 0.305 + }, + { + "step": 1089, + "timestamp": "2025-12-28T10:16:47.236814", + "elapsed_time": 4721.521904230118, + "loss": 0.2017, + "grad_norm": 0.1198599562048912, + "learning_rate": 0.000139874213836478, + "epoch": 0.305625 + }, + { + "step": 1090, + "timestamp": "2025-12-28T10:16:56.587644", + "elapsed_time": 4730.872734546661, + "loss": 0.1482, + "grad_norm": 0.10646126419305801, + "learning_rate": 0.00013974842767295597, + "epoch": 0.30625 + }, + { + "step": 1091, + "timestamp": "2025-12-28T10:17:09.729699", + "elapsed_time": 4744.014789104462, + "loss": 0.1493, + "grad_norm": 0.1059768870472908, + "learning_rate": 0.00013962264150943395, + "epoch": 0.306875 + }, + { + "step": 1092, + "timestamp": "2025-12-28T10:17:15.846589", + "elapsed_time": 4750.131683349609, + "loss": 0.2455, + "grad_norm": 0.15631641447544098, + "learning_rate": 0.00013949685534591196, + "epoch": 0.3075 + }, + { + "step": 1093, + "timestamp": "2025-12-28T10:17:26.226547", + "elapsed_time": 4760.511637687683, + "loss": 0.158, + "grad_norm": 0.11631731688976288, + "learning_rate": 0.00013937106918238994, + "epoch": 0.308125 + }, + { + "step": 1094, + "timestamp": "2025-12-28T10:17:35.012584", + "elapsed_time": 4769.297674417496, + "loss": 0.1526, + "grad_norm": 0.1259499490261078, + "learning_rate": 0.00013924528301886792, + "epoch": 0.30875 + }, + { + "step": 1095, + "timestamp": "2025-12-28T10:17:45.261282", + "elapsed_time": 4779.546371936798, + "loss": 0.241, + "grad_norm": 0.15224315226078033, + "learning_rate": 0.00013911949685534592, + "epoch": 0.309375 + }, + { + "step": 1096, + "timestamp": "2025-12-28T10:17:57.287033", + "elapsed_time": 4791.572123765945, + "loss": 0.139, + "grad_norm": 0.1223248764872551, + "learning_rate": 0.0001389937106918239, + "epoch": 0.31 + }, + { + "step": 1097, + "timestamp": "2025-12-28T10:18:09.370759", + "elapsed_time": 4803.655849456787, + "loss": 0.2395, + "grad_norm": 0.13052494823932648, + "learning_rate": 0.00013886792452830188, + "epoch": 0.310625 + }, + { + "step": 1098, + "timestamp": "2025-12-28T10:18:22.367902", + "elapsed_time": 4816.652992963791, + "loss": 0.1401, + "grad_norm": 0.11577942967414856, + "learning_rate": 0.00013874213836477986, + "epoch": 0.31125 + }, + { + "step": 1099, + "timestamp": "2025-12-28T10:18:37.241574", + "elapsed_time": 4831.526664972305, + "loss": 0.139, + "grad_norm": 0.09500917792320251, + "learning_rate": 0.00013861635220125787, + "epoch": 0.311875 + }, + { + "step": 1100, + "timestamp": "2025-12-28T10:18:45.367638", + "elapsed_time": 4839.6527326107025, + "loss": 0.1616, + "grad_norm": 0.12062890082597733, + "learning_rate": 0.00013849056603773585, + "epoch": 0.3125 + }, + { + "step": 1101, + "timestamp": "2025-12-28T10:18:50.501032", + "elapsed_time": 4844.786121845245, + "loss": 0.5666, + "grad_norm": 0.21491988003253937, + "learning_rate": 0.00013836477987421382, + "epoch": 0.313125 + }, + { + "step": 1102, + "timestamp": "2025-12-28T10:18:57.091853", + "elapsed_time": 4851.37694311142, + "loss": 0.1821, + "grad_norm": 0.14800046384334564, + "learning_rate": 0.00013823899371069183, + "epoch": 0.31375 + }, + { + "step": 1103, + "timestamp": "2025-12-28T10:19:06.683644", + "elapsed_time": 4860.968734264374, + "loss": 0.1477, + "grad_norm": 0.11978691071271896, + "learning_rate": 0.0001381132075471698, + "epoch": 0.314375 + }, + { + "step": 1104, + "timestamp": "2025-12-28T10:19:21.833042", + "elapsed_time": 4876.118132352829, + "loss": 0.1662, + "grad_norm": 0.09653400629758835, + "learning_rate": 0.0001379874213836478, + "epoch": 0.315 + }, + { + "step": 1105, + "timestamp": "2025-12-28T10:19:26.190650", + "elapsed_time": 4880.475740432739, + "loss": 0.2282, + "grad_norm": 0.20169983804225922, + "learning_rate": 0.0001378616352201258, + "epoch": 0.315625 + }, + { + "step": 1106, + "timestamp": "2025-12-28T10:19:38.616678", + "elapsed_time": 4892.901768684387, + "loss": 0.1446, + "grad_norm": 0.10674963146448135, + "learning_rate": 0.00013773584905660377, + "epoch": 0.31625 + }, + { + "step": 1107, + "timestamp": "2025-12-28T10:19:59.515344", + "elapsed_time": 4913.800434112549, + "loss": 0.1037, + "grad_norm": 0.06956765800714493, + "learning_rate": 0.00013761006289308175, + "epoch": 0.316875 + }, + { + "step": 1108, + "timestamp": "2025-12-28T10:20:06.485473", + "elapsed_time": 4920.77056312561, + "loss": 0.182, + "grad_norm": 0.23156726360321045, + "learning_rate": 0.00013748427672955976, + "epoch": 0.3175 + }, + { + "step": 1109, + "timestamp": "2025-12-28T10:20:23.683657", + "elapsed_time": 4937.968747854233, + "loss": 0.1634, + "grad_norm": 0.1053549200296402, + "learning_rate": 0.00013735849056603774, + "epoch": 0.318125 + }, + { + "step": 1110, + "timestamp": "2025-12-28T10:20:30.071074", + "elapsed_time": 4944.356164216995, + "loss": 0.1761, + "grad_norm": 0.1309899240732193, + "learning_rate": 0.00013723270440251572, + "epoch": 0.31875 + }, + { + "step": 1111, + "timestamp": "2025-12-28T10:20:48.259766", + "elapsed_time": 4962.5448570251465, + "loss": 0.0991, + "grad_norm": 0.07965648174285889, + "learning_rate": 0.0001371069182389937, + "epoch": 0.319375 + }, + { + "step": 1112, + "timestamp": "2025-12-28T10:20:54.336910", + "elapsed_time": 4968.622000217438, + "loss": 0.1765, + "grad_norm": 0.16077613830566406, + "learning_rate": 0.0001369811320754717, + "epoch": 0.32 + }, + { + "step": 1113, + "timestamp": "2025-12-28T10:21:01.708820", + "elapsed_time": 4975.993910551071, + "loss": 0.2535, + "grad_norm": 0.19687579572200775, + "learning_rate": 0.00013685534591194968, + "epoch": 0.320625 + }, + { + "step": 1114, + "timestamp": "2025-12-28T10:21:08.888514", + "elapsed_time": 4983.173604011536, + "loss": 0.1733, + "grad_norm": 0.1458161175251007, + "learning_rate": 0.00013672955974842766, + "epoch": 0.32125 + }, + { + "step": 1115, + "timestamp": "2025-12-28T10:21:21.027050", + "elapsed_time": 4995.312140703201, + "loss": 0.1462, + "grad_norm": 0.10044913738965988, + "learning_rate": 0.00013660377358490567, + "epoch": 0.321875 + }, + { + "step": 1116, + "timestamp": "2025-12-28T10:21:32.969714", + "elapsed_time": 5007.2548043727875, + "loss": 0.1104, + "grad_norm": 0.08253983408212662, + "learning_rate": 0.00013647798742138365, + "epoch": 0.3225 + }, + { + "step": 1117, + "timestamp": "2025-12-28T10:21:38.181094", + "elapsed_time": 5012.466184139252, + "loss": 0.2278, + "grad_norm": 0.1712544858455658, + "learning_rate": 0.00013635220125786162, + "epoch": 0.323125 + }, + { + "step": 1118, + "timestamp": "2025-12-28T10:21:48.132268", + "elapsed_time": 5022.4173583984375, + "loss": 0.1665, + "grad_norm": 0.1129027009010315, + "learning_rate": 0.00013622641509433963, + "epoch": 0.32375 + }, + { + "step": 1119, + "timestamp": "2025-12-28T10:21:58.890656", + "elapsed_time": 5033.175746202469, + "loss": 0.1526, + "grad_norm": 0.12182003259658813, + "learning_rate": 0.0001361006289308176, + "epoch": 0.324375 + }, + { + "step": 1120, + "timestamp": "2025-12-28T10:22:16.895465", + "elapsed_time": 5051.1805555820465, + "loss": 0.1335, + "grad_norm": 0.0835099071264267, + "learning_rate": 0.0001359748427672956, + "epoch": 0.325 + }, + { + "step": 1121, + "timestamp": "2025-12-28T10:22:23.340365", + "elapsed_time": 5057.62545633316, + "loss": 0.2159, + "grad_norm": 0.15998020768165588, + "learning_rate": 0.0001358490566037736, + "epoch": 0.325625 + }, + { + "step": 1122, + "timestamp": "2025-12-28T10:22:33.758550", + "elapsed_time": 5068.04364490509, + "loss": 0.1589, + "grad_norm": 0.115847647190094, + "learning_rate": 0.00013572327044025157, + "epoch": 0.32625 + }, + { + "step": 1123, + "timestamp": "2025-12-28T10:22:44.939902", + "elapsed_time": 5079.224991798401, + "loss": 0.1524, + "grad_norm": 0.10123994946479797, + "learning_rate": 0.00013559748427672955, + "epoch": 0.326875 + }, + { + "step": 1124, + "timestamp": "2025-12-28T10:22:50.271896", + "elapsed_time": 5084.556986808777, + "loss": 0.2074, + "grad_norm": 0.1583547741174698, + "learning_rate": 0.00013547169811320753, + "epoch": 0.3275 + }, + { + "step": 1125, + "timestamp": "2025-12-28T10:22:58.007020", + "elapsed_time": 5092.292110919952, + "loss": 0.2013, + "grad_norm": 0.13498209416866302, + "learning_rate": 0.00013534591194968554, + "epoch": 0.328125 + }, + { + "step": 1126, + "timestamp": "2025-12-28T10:23:06.472685", + "elapsed_time": 5100.75777554512, + "loss": 0.1787, + "grad_norm": 0.12321489304304123, + "learning_rate": 0.00013522012578616352, + "epoch": 0.32875 + }, + { + "step": 1127, + "timestamp": "2025-12-28T10:23:16.415604", + "elapsed_time": 5110.700694322586, + "loss": 0.2123, + "grad_norm": 0.13657422363758087, + "learning_rate": 0.0001350943396226415, + "epoch": 0.329375 + }, + { + "step": 1128, + "timestamp": "2025-12-28T10:23:21.550030", + "elapsed_time": 5115.835124254227, + "loss": 0.3319, + "grad_norm": 0.19260597229003906, + "learning_rate": 0.0001349685534591195, + "epoch": 0.33 + }, + { + "step": 1129, + "timestamp": "2025-12-28T10:23:37.207751", + "elapsed_time": 5131.492841243744, + "loss": 0.1309, + "grad_norm": 0.08664542436599731, + "learning_rate": 0.00013484276729559748, + "epoch": 0.330625 + }, + { + "step": 1130, + "timestamp": "2025-12-28T10:23:43.286231", + "elapsed_time": 5137.57132101059, + "loss": 0.2417, + "grad_norm": 0.21089886128902435, + "learning_rate": 0.00013471698113207546, + "epoch": 0.33125 + }, + { + "step": 1131, + "timestamp": "2025-12-28T10:23:51.484950", + "elapsed_time": 5145.7700407505035, + "loss": 0.1542, + "grad_norm": 0.12316355109214783, + "learning_rate": 0.00013459119496855347, + "epoch": 0.331875 + }, + { + "step": 1132, + "timestamp": "2025-12-28T10:23:58.046371", + "elapsed_time": 5152.331461429596, + "loss": 0.1719, + "grad_norm": 0.1278318166732788, + "learning_rate": 0.00013446540880503145, + "epoch": 0.3325 + }, + { + "step": 1133, + "timestamp": "2025-12-28T10:24:04.389165", + "elapsed_time": 5158.674255847931, + "loss": 0.2467, + "grad_norm": 0.16408215463161469, + "learning_rate": 0.00013433962264150943, + "epoch": 0.333125 + }, + { + "step": 1134, + "timestamp": "2025-12-28T10:24:14.662588", + "elapsed_time": 5168.947679042816, + "loss": 0.136, + "grad_norm": 0.1054520383477211, + "learning_rate": 0.00013421383647798743, + "epoch": 0.33375 + }, + { + "step": 1135, + "timestamp": "2025-12-28T10:24:24.914867", + "elapsed_time": 5179.199957847595, + "loss": 0.1625, + "grad_norm": 0.11112856864929199, + "learning_rate": 0.0001340880503144654, + "epoch": 0.334375 + }, + { + "step": 1136, + "timestamp": "2025-12-28T10:24:33.704996", + "elapsed_time": 5187.990086078644, + "loss": 0.2022, + "grad_norm": 0.14535382390022278, + "learning_rate": 0.0001339622641509434, + "epoch": 0.335 + }, + { + "step": 1137, + "timestamp": "2025-12-28T10:24:54.598309", + "elapsed_time": 5208.883399009705, + "loss": 0.1032, + "grad_norm": 0.07190191745758057, + "learning_rate": 0.00013383647798742137, + "epoch": 0.335625 + }, + { + "step": 1138, + "timestamp": "2025-12-28T10:24:59.727226", + "elapsed_time": 5214.0123155117035, + "loss": 0.2176, + "grad_norm": 0.1817207783460617, + "learning_rate": 0.00013371069182389938, + "epoch": 0.33625 + }, + { + "step": 1139, + "timestamp": "2025-12-28T10:25:05.999597", + "elapsed_time": 5220.284687042236, + "loss": 0.1639, + "grad_norm": 0.1274854838848114, + "learning_rate": 0.00013358490566037735, + "epoch": 0.336875 + }, + { + "step": 1140, + "timestamp": "2025-12-28T10:25:13.369606", + "elapsed_time": 5227.654696941376, + "loss": 0.2805, + "grad_norm": 0.15669360756874084, + "learning_rate": 0.00013345911949685533, + "epoch": 0.3375 + }, + { + "step": 1141, + "timestamp": "2025-12-28T10:25:26.173526", + "elapsed_time": 5240.458616495132, + "loss": 0.2244, + "grad_norm": 0.10920794308185577, + "learning_rate": 0.00013333333333333334, + "epoch": 0.338125 + }, + { + "step": 1142, + "timestamp": "2025-12-28T10:25:34.387144", + "elapsed_time": 5248.672234773636, + "loss": 0.1525, + "grad_norm": 0.12012416124343872, + "learning_rate": 0.00013320754716981132, + "epoch": 0.33875 + }, + { + "step": 1143, + "timestamp": "2025-12-28T10:25:40.867120", + "elapsed_time": 5255.15221118927, + "loss": 0.2315, + "grad_norm": 0.17467765510082245, + "learning_rate": 0.0001330817610062893, + "epoch": 0.339375 + }, + { + "step": 1144, + "timestamp": "2025-12-28T10:25:49.536049", + "elapsed_time": 5263.821138858795, + "loss": 0.1585, + "grad_norm": 0.12994243204593658, + "learning_rate": 0.0001329559748427673, + "epoch": 0.34 + }, + { + "step": 1145, + "timestamp": "2025-12-28T10:25:56.165948", + "elapsed_time": 5270.451038122177, + "loss": 0.3481, + "grad_norm": 0.17362722754478455, + "learning_rate": 0.00013283018867924528, + "epoch": 0.340625 + }, + { + "step": 1146, + "timestamp": "2025-12-28T10:26:07.059344", + "elapsed_time": 5281.344434261322, + "loss": 0.1996, + "grad_norm": 0.11975681781768799, + "learning_rate": 0.00013270440251572326, + "epoch": 0.34125 + }, + { + "step": 1147, + "timestamp": "2025-12-28T10:26:27.952012", + "elapsed_time": 5302.237102270126, + "loss": 0.1627, + "grad_norm": 0.08679784089326859, + "learning_rate": 0.00013257861635220127, + "epoch": 0.341875 + }, + { + "step": 1148, + "timestamp": "2025-12-28T10:26:36.021702", + "elapsed_time": 5310.306792974472, + "loss": 0.1613, + "grad_norm": 0.14012403786182404, + "learning_rate": 0.00013245283018867925, + "epoch": 0.3425 + }, + { + "step": 1149, + "timestamp": "2025-12-28T10:26:53.963469", + "elapsed_time": 5328.248558998108, + "loss": 0.1234, + "grad_norm": 0.08561190962791443, + "learning_rate": 0.00013232704402515723, + "epoch": 0.343125 + }, + { + "step": 1150, + "timestamp": "2025-12-28T10:27:05.423036", + "elapsed_time": 5339.708126306534, + "loss": 0.1275, + "grad_norm": 0.10564465820789337, + "learning_rate": 0.00013220125786163523, + "epoch": 0.34375 + }, + { + "step": 1151, + "timestamp": "2025-12-28T10:27:13.370347", + "elapsed_time": 5347.655437231064, + "loss": 0.171, + "grad_norm": 0.13151898980140686, + "learning_rate": 0.0001320754716981132, + "epoch": 0.344375 + }, + { + "step": 1152, + "timestamp": "2025-12-28T10:27:26.513582", + "elapsed_time": 5360.798672437668, + "loss": 0.1066, + "grad_norm": 0.08146792650222778, + "learning_rate": 0.0001319496855345912, + "epoch": 0.345 + }, + { + "step": 1153, + "timestamp": "2025-12-28T10:27:37.983479", + "elapsed_time": 5372.268569231033, + "loss": 0.14, + "grad_norm": 0.11052241921424866, + "learning_rate": 0.00013182389937106917, + "epoch": 0.345625 + }, + { + "step": 1154, + "timestamp": "2025-12-28T10:27:49.248781", + "elapsed_time": 5383.533871412277, + "loss": 0.2192, + "grad_norm": 0.11441831290721893, + "learning_rate": 0.00013169811320754718, + "epoch": 0.34625 + }, + { + "step": 1155, + "timestamp": "2025-12-28T10:27:58.548010", + "elapsed_time": 5392.833100557327, + "loss": 0.1996, + "grad_norm": 0.13789567351341248, + "learning_rate": 0.00013157232704402516, + "epoch": 0.346875 + }, + { + "step": 1156, + "timestamp": "2025-12-28T10:28:05.648486", + "elapsed_time": 5399.933576583862, + "loss": 0.1288, + "grad_norm": 0.12561623752117157, + "learning_rate": 0.00013144654088050313, + "epoch": 0.3475 + }, + { + "step": 1157, + "timestamp": "2025-12-28T10:28:17.278511", + "elapsed_time": 5411.563601255417, + "loss": 0.1758, + "grad_norm": 0.17324663698673248, + "learning_rate": 0.00013132075471698114, + "epoch": 0.348125 + }, + { + "step": 1158, + "timestamp": "2025-12-28T10:28:29.227502", + "elapsed_time": 5423.512593269348, + "loss": 0.2519, + "grad_norm": 0.13509972393512726, + "learning_rate": 0.00013119496855345912, + "epoch": 0.34875 + }, + { + "step": 1159, + "timestamp": "2025-12-28T10:28:41.252121", + "elapsed_time": 5435.537210941315, + "loss": 0.1828, + "grad_norm": 0.11435085535049438, + "learning_rate": 0.0001310691823899371, + "epoch": 0.349375 + }, + { + "step": 1160, + "timestamp": "2025-12-28T10:28:49.279299", + "elapsed_time": 5443.564389467239, + "loss": 0.2038, + "grad_norm": 0.13359728455543518, + "learning_rate": 0.0001309433962264151, + "epoch": 0.35 + }, + { + "step": 1161, + "timestamp": "2025-12-28T10:29:01.419851", + "elapsed_time": 5455.704941749573, + "loss": 0.1188, + "grad_norm": 0.09035109728574753, + "learning_rate": 0.00013081761006289308, + "epoch": 0.350625 + }, + { + "step": 1162, + "timestamp": "2025-12-28T10:29:07.506551", + "elapsed_time": 5461.791641712189, + "loss": 0.2134, + "grad_norm": 0.16012340784072876, + "learning_rate": 0.00013069182389937106, + "epoch": 0.35125 + }, + { + "step": 1163, + "timestamp": "2025-12-28T10:29:19.642475", + "elapsed_time": 5473.927566051483, + "loss": 0.1431, + "grad_norm": 0.10316343605518341, + "learning_rate": 0.00013056603773584907, + "epoch": 0.351875 + }, + { + "step": 1164, + "timestamp": "2025-12-28T10:29:26.703778", + "elapsed_time": 5480.988868236542, + "loss": 0.1874, + "grad_norm": 0.13070017099380493, + "learning_rate": 0.00013044025157232705, + "epoch": 0.3525 + }, + { + "step": 1165, + "timestamp": "2025-12-28T10:29:42.102895", + "elapsed_time": 5496.387985467911, + "loss": 0.1539, + "grad_norm": 0.09859879314899445, + "learning_rate": 0.00013031446540880503, + "epoch": 0.353125 + }, + { + "step": 1166, + "timestamp": "2025-12-28T10:29:58.650069", + "elapsed_time": 5512.935160160065, + "loss": 0.1449, + "grad_norm": 0.09668051451444626, + "learning_rate": 0.000130188679245283, + "epoch": 0.35375 + }, + { + "step": 1167, + "timestamp": "2025-12-28T10:30:17.397139", + "elapsed_time": 5531.682229757309, + "loss": 0.1166, + "grad_norm": 0.07741539925336838, + "learning_rate": 0.000130062893081761, + "epoch": 0.354375 + }, + { + "step": 1168, + "timestamp": "2025-12-28T10:30:25.144695", + "elapsed_time": 5539.429785490036, + "loss": 0.1554, + "grad_norm": 0.11451406031847, + "learning_rate": 0.000129937106918239, + "epoch": 0.355 + }, + { + "step": 1169, + "timestamp": "2025-12-28T10:30:37.830348", + "elapsed_time": 5552.115438699722, + "loss": 0.1541, + "grad_norm": 0.1345360428094864, + "learning_rate": 0.00012981132075471697, + "epoch": 0.355625 + }, + { + "step": 1170, + "timestamp": "2025-12-28T10:30:45.645731", + "elapsed_time": 5559.930821657181, + "loss": 0.2273, + "grad_norm": 0.15822488069534302, + "learning_rate": 0.00012968553459119498, + "epoch": 0.35625 + }, + { + "step": 1171, + "timestamp": "2025-12-28T10:30:53.952954", + "elapsed_time": 5568.238044023514, + "loss": 0.1536, + "grad_norm": 0.2116500437259674, + "learning_rate": 0.00012955974842767296, + "epoch": 0.356875 + }, + { + "step": 1172, + "timestamp": "2025-12-28T10:31:03.335531", + "elapsed_time": 5577.620621681213, + "loss": 0.1236, + "grad_norm": 0.11660629510879517, + "learning_rate": 0.00012943396226415094, + "epoch": 0.3575 + }, + { + "step": 1173, + "timestamp": "2025-12-28T10:31:10.218223", + "elapsed_time": 5584.503312826157, + "loss": 0.2065, + "grad_norm": 0.135480597615242, + "learning_rate": 0.00012930817610062894, + "epoch": 0.358125 + }, + { + "step": 1174, + "timestamp": "2025-12-28T10:31:18.995819", + "elapsed_time": 5593.2809092998505, + "loss": 0.1724, + "grad_norm": 0.23346908390522003, + "learning_rate": 0.00012918238993710692, + "epoch": 0.35875 + }, + { + "step": 1175, + "timestamp": "2025-12-28T10:31:26.934005", + "elapsed_time": 5601.2190997600555, + "loss": 0.493, + "grad_norm": 0.18534159660339355, + "learning_rate": 0.0001290566037735849, + "epoch": 0.359375 + }, + { + "step": 1176, + "timestamp": "2025-12-28T10:31:35.566298", + "elapsed_time": 5609.851388692856, + "loss": 0.18, + "grad_norm": 0.1343761831521988, + "learning_rate": 0.0001289308176100629, + "epoch": 0.36 + }, + { + "step": 1177, + "timestamp": "2025-12-28T10:31:43.785332", + "elapsed_time": 5618.070422172546, + "loss": 0.1392, + "grad_norm": 0.12244327366352081, + "learning_rate": 0.00012880503144654088, + "epoch": 0.360625 + }, + { + "step": 1178, + "timestamp": "2025-12-28T10:31:49.452840", + "elapsed_time": 5623.737930774689, + "loss": 0.2027, + "grad_norm": 0.15840069949626923, + "learning_rate": 0.00012867924528301886, + "epoch": 0.36125 + }, + { + "step": 1179, + "timestamp": "2025-12-28T10:31:58.279487", + "elapsed_time": 5632.564577817917, + "loss": 0.24, + "grad_norm": 0.17348845303058624, + "learning_rate": 0.00012855345911949684, + "epoch": 0.361875 + }, + { + "step": 1180, + "timestamp": "2025-12-28T10:32:06.750060", + "elapsed_time": 5641.035150527954, + "loss": 0.1493, + "grad_norm": 0.11318610608577728, + "learning_rate": 0.00012842767295597485, + "epoch": 0.3625 + }, + { + "step": 1181, + "timestamp": "2025-12-28T10:32:18.316743", + "elapsed_time": 5652.601833343506, + "loss": 0.1519, + "grad_norm": 0.1193518340587616, + "learning_rate": 0.00012830188679245283, + "epoch": 0.363125 + }, + { + "step": 1182, + "timestamp": "2025-12-28T10:32:24.831408", + "elapsed_time": 5659.116497993469, + "loss": 0.1801, + "grad_norm": 0.14156877994537354, + "learning_rate": 0.0001281761006289308, + "epoch": 0.36375 + }, + { + "step": 1183, + "timestamp": "2025-12-28T10:32:31.901184", + "elapsed_time": 5666.186274528503, + "loss": 0.1667, + "grad_norm": 0.13352788984775543, + "learning_rate": 0.0001280503144654088, + "epoch": 0.364375 + }, + { + "step": 1184, + "timestamp": "2025-12-28T10:32:46.767013", + "elapsed_time": 5681.052104473114, + "loss": 0.1224, + "grad_norm": 0.09199526906013489, + "learning_rate": 0.0001279245283018868, + "epoch": 0.365 + }, + { + "step": 1185, + "timestamp": "2025-12-28T10:32:54.235718", + "elapsed_time": 5688.52080821991, + "loss": 0.1942, + "grad_norm": 0.13854575157165527, + "learning_rate": 0.00012779874213836477, + "epoch": 0.365625 + }, + { + "step": 1186, + "timestamp": "2025-12-28T10:33:07.216737", + "elapsed_time": 5701.501827001572, + "loss": 0.1412, + "grad_norm": 0.10089116543531418, + "learning_rate": 0.00012767295597484278, + "epoch": 0.36625 + }, + { + "step": 1187, + "timestamp": "2025-12-28T10:33:18.169177", + "elapsed_time": 5712.454270601273, + "loss": 0.1496, + "grad_norm": 0.11052452027797699, + "learning_rate": 0.00012754716981132076, + "epoch": 0.366875 + }, + { + "step": 1188, + "timestamp": "2025-12-28T10:33:24.179712", + "elapsed_time": 5718.464802980423, + "loss": 0.1801, + "grad_norm": 0.17774660885334015, + "learning_rate": 0.00012742138364779874, + "epoch": 0.3675 + }, + { + "step": 1189, + "timestamp": "2025-12-28T10:33:30.449798", + "elapsed_time": 5724.734888315201, + "loss": 0.1707, + "grad_norm": 0.13715173304080963, + "learning_rate": 0.00012729559748427674, + "epoch": 0.368125 + }, + { + "step": 1190, + "timestamp": "2025-12-28T10:33:40.351650", + "elapsed_time": 5734.636740446091, + "loss": 0.1585, + "grad_norm": 0.13463056087493896, + "learning_rate": 0.00012716981132075472, + "epoch": 0.36875 + }, + { + "step": 1191, + "timestamp": "2025-12-28T10:33:46.518785", + "elapsed_time": 5740.803875684738, + "loss": 0.2042, + "grad_norm": 0.14503754675388336, + "learning_rate": 0.0001270440251572327, + "epoch": 0.369375 + }, + { + "step": 1192, + "timestamp": "2025-12-28T10:33:58.192196", + "elapsed_time": 5752.477287054062, + "loss": 0.1911, + "grad_norm": 0.11271210759878159, + "learning_rate": 0.00012691823899371068, + "epoch": 0.37 + }, + { + "step": 1193, + "timestamp": "2025-12-28T10:34:05.329085", + "elapsed_time": 5759.614175319672, + "loss": 0.2337, + "grad_norm": 0.14175988733768463, + "learning_rate": 0.00012679245283018869, + "epoch": 0.370625 + }, + { + "step": 1194, + "timestamp": "2025-12-28T10:34:17.155544", + "elapsed_time": 5771.440634012222, + "loss": 0.1992, + "grad_norm": 0.10490947216749191, + "learning_rate": 0.00012666666666666666, + "epoch": 0.37125 + }, + { + "step": 1195, + "timestamp": "2025-12-28T10:34:32.490446", + "elapsed_time": 5786.77553653717, + "loss": 0.1565, + "grad_norm": 0.11939844489097595, + "learning_rate": 0.00012654088050314464, + "epoch": 0.371875 + }, + { + "step": 1196, + "timestamp": "2025-12-28T10:34:41.906197", + "elapsed_time": 5796.191288232803, + "loss": 0.1493, + "grad_norm": 0.11298428475856781, + "learning_rate": 0.00012641509433962265, + "epoch": 0.3725 + }, + { + "step": 1197, + "timestamp": "2025-12-28T10:34:49.971838", + "elapsed_time": 5804.256928920746, + "loss": 0.2001, + "grad_norm": 0.13944107294082642, + "learning_rate": 0.00012628930817610063, + "epoch": 0.373125 + }, + { + "step": 1198, + "timestamp": "2025-12-28T10:34:57.032417", + "elapsed_time": 5811.317508220673, + "loss": 0.2897, + "grad_norm": 0.1779128611087799, + "learning_rate": 0.0001261635220125786, + "epoch": 0.37375 + }, + { + "step": 1199, + "timestamp": "2025-12-28T10:35:08.870827", + "elapsed_time": 5823.1559183597565, + "loss": 0.1722, + "grad_norm": 0.11074298620223999, + "learning_rate": 0.00012603773584905661, + "epoch": 0.374375 + }, + { + "step": 1200, + "timestamp": "2025-12-28T10:35:14.846064", + "elapsed_time": 5829.131155014038, + "loss": 0.1623, + "grad_norm": 0.12961973249912262, + "learning_rate": 0.0001259119496855346, + "epoch": 0.375 + }, + { + "step": 1201, + "timestamp": "2025-12-28T10:35:25.422373", + "elapsed_time": 5839.707467794418, + "loss": 0.1785, + "grad_norm": 0.14644969999790192, + "learning_rate": 0.00012578616352201257, + "epoch": 0.375625 + }, + { + "step": 1202, + "timestamp": "2025-12-28T10:35:41.289838", + "elapsed_time": 5855.574928283691, + "loss": 0.118, + "grad_norm": 0.0863247960805893, + "learning_rate": 0.00012566037735849058, + "epoch": 0.37625 + }, + { + "step": 1203, + "timestamp": "2025-12-28T10:35:54.961911", + "elapsed_time": 5869.247001171112, + "loss": 0.1602, + "grad_norm": 0.09607759863138199, + "learning_rate": 0.00012553459119496856, + "epoch": 0.376875 + }, + { + "step": 1204, + "timestamp": "2025-12-28T10:36:02.141105", + "elapsed_time": 5876.426195144653, + "loss": 0.1269, + "grad_norm": 0.12356162816286087, + "learning_rate": 0.00012540880503144654, + "epoch": 0.3775 + }, + { + "step": 1205, + "timestamp": "2025-12-28T10:36:15.945952", + "elapsed_time": 5890.23104262352, + "loss": 0.1182, + "grad_norm": 0.09206572920084, + "learning_rate": 0.00012528301886792452, + "epoch": 0.378125 + }, + { + "step": 1206, + "timestamp": "2025-12-28T10:36:23.429110", + "elapsed_time": 5897.714200735092, + "loss": 0.2265, + "grad_norm": 0.14600634574890137, + "learning_rate": 0.00012515723270440252, + "epoch": 0.37875 + }, + { + "step": 1207, + "timestamp": "2025-12-28T10:36:32.859599", + "elapsed_time": 5907.1446895599365, + "loss": 0.1366, + "grad_norm": 0.10805106908082962, + "learning_rate": 0.0001250314465408805, + "epoch": 0.379375 + }, + { + "step": 1208, + "timestamp": "2025-12-28T10:36:41.692070", + "elapsed_time": 5915.9771609306335, + "loss": 0.1532, + "grad_norm": 0.12475503236055374, + "learning_rate": 0.00012490566037735848, + "epoch": 0.38 + }, + { + "step": 1209, + "timestamp": "2025-12-28T10:36:52.893121", + "elapsed_time": 5927.178210735321, + "loss": 0.1372, + "grad_norm": 0.10797689855098724, + "learning_rate": 0.00012477987421383649, + "epoch": 0.380625 + }, + { + "step": 1210, + "timestamp": "2025-12-28T10:37:05.316752", + "elapsed_time": 5939.601842880249, + "loss": 0.1379, + "grad_norm": 0.09704222530126572, + "learning_rate": 0.00012465408805031447, + "epoch": 0.38125 + }, + { + "step": 1211, + "timestamp": "2025-12-28T10:37:10.877159", + "elapsed_time": 5945.1622495651245, + "loss": 0.2735, + "grad_norm": 0.16764011979103088, + "learning_rate": 0.00012452830188679244, + "epoch": 0.381875 + }, + { + "step": 1212, + "timestamp": "2025-12-28T10:37:19.026917", + "elapsed_time": 5953.312007665634, + "loss": 0.2679, + "grad_norm": 0.1391536146402359, + "learning_rate": 0.00012440251572327045, + "epoch": 0.3825 + }, + { + "step": 1213, + "timestamp": "2025-12-28T10:37:28.216063", + "elapsed_time": 5962.501153230667, + "loss": 0.1346, + "grad_norm": 0.1069680005311966, + "learning_rate": 0.00012427672955974843, + "epoch": 0.383125 + }, + { + "step": 1214, + "timestamp": "2025-12-28T10:37:42.338346", + "elapsed_time": 5976.623436450958, + "loss": 0.1306, + "grad_norm": 0.0998351201415062, + "learning_rate": 0.0001241509433962264, + "epoch": 0.38375 + }, + { + "step": 1215, + "timestamp": "2025-12-28T10:37:49.904441", + "elapsed_time": 5984.189530849457, + "loss": 0.1815, + "grad_norm": 0.13117118179798126, + "learning_rate": 0.00012402515723270442, + "epoch": 0.384375 + }, + { + "step": 1216, + "timestamp": "2025-12-28T10:38:03.854769", + "elapsed_time": 5998.1398594379425, + "loss": 0.3325, + "grad_norm": 0.13191911578178406, + "learning_rate": 0.0001238993710691824, + "epoch": 0.385 + }, + { + "step": 1217, + "timestamp": "2025-12-28T10:38:08.710052", + "elapsed_time": 6002.995143175125, + "loss": 0.7795, + "grad_norm": 0.23979060351848602, + "learning_rate": 0.00012377358490566037, + "epoch": 0.385625 + }, + { + "step": 1218, + "timestamp": "2025-12-28T10:38:22.340339", + "elapsed_time": 6016.625429391861, + "loss": 0.1357, + "grad_norm": 0.0968586727976799, + "learning_rate": 0.00012364779874213835, + "epoch": 0.38625 + }, + { + "step": 1219, + "timestamp": "2025-12-28T10:38:42.480374", + "elapsed_time": 6036.7654638290405, + "loss": 0.1035, + "grad_norm": 0.08069409430027008, + "learning_rate": 0.00012352201257861636, + "epoch": 0.386875 + }, + { + "step": 1220, + "timestamp": "2025-12-28T10:38:50.725844", + "elapsed_time": 6045.010933876038, + "loss": 0.1788, + "grad_norm": 0.12745045125484467, + "learning_rate": 0.00012339622641509434, + "epoch": 0.3875 + }, + { + "step": 1221, + "timestamp": "2025-12-28T10:38:56.392405", + "elapsed_time": 6050.677495241165, + "loss": 0.2797, + "grad_norm": 0.29041117429733276, + "learning_rate": 0.00012327044025157232, + "epoch": 0.388125 + }, + { + "step": 1222, + "timestamp": "2025-12-28T10:39:05.731705", + "elapsed_time": 6060.016795396805, + "loss": 0.2435, + "grad_norm": 0.12010473012924194, + "learning_rate": 0.00012314465408805032, + "epoch": 0.38875 + }, + { + "step": 1223, + "timestamp": "2025-12-28T10:39:13.957976", + "elapsed_time": 6068.243066072464, + "loss": 0.1408, + "grad_norm": 0.10792548954486847, + "learning_rate": 0.0001230188679245283, + "epoch": 0.389375 + }, + { + "step": 1224, + "timestamp": "2025-12-28T10:39:25.907397", + "elapsed_time": 6080.192487239838, + "loss": 0.15, + "grad_norm": 0.10715028643608093, + "learning_rate": 0.00012289308176100628, + "epoch": 0.39 + }, + { + "step": 1225, + "timestamp": "2025-12-28T10:39:31.137562", + "elapsed_time": 6085.422652006149, + "loss": 0.1641, + "grad_norm": 0.13656176626682281, + "learning_rate": 0.0001227672955974843, + "epoch": 0.390625 + }, + { + "step": 1226, + "timestamp": "2025-12-28T10:39:34.990548", + "elapsed_time": 6089.275638103485, + "loss": 0.2354, + "grad_norm": 0.1691398322582245, + "learning_rate": 0.00012264150943396227, + "epoch": 0.39125 + }, + { + "step": 1227, + "timestamp": "2025-12-28T10:39:47.535666", + "elapsed_time": 6101.820756912231, + "loss": 0.1597, + "grad_norm": 0.10032006353139877, + "learning_rate": 0.00012251572327044025, + "epoch": 0.391875 + }, + { + "step": 1228, + "timestamp": "2025-12-28T10:39:54.871871", + "elapsed_time": 6109.156960964203, + "loss": 0.2194, + "grad_norm": 0.1799350529909134, + "learning_rate": 0.00012238993710691825, + "epoch": 0.3925 + }, + { + "step": 1229, + "timestamp": "2025-12-28T10:40:05.715326", + "elapsed_time": 6120.000416755676, + "loss": 0.1546, + "grad_norm": 0.10520683974027634, + "learning_rate": 0.00012226415094339623, + "epoch": 0.393125 + }, + { + "step": 1230, + "timestamp": "2025-12-28T10:40:15.438855", + "elapsed_time": 6129.723945379257, + "loss": 0.182, + "grad_norm": 0.13104449212551117, + "learning_rate": 0.0001221383647798742, + "epoch": 0.39375 + }, + { + "step": 1231, + "timestamp": "2025-12-28T10:40:26.329143", + "elapsed_time": 6140.614233493805, + "loss": 0.1419, + "grad_norm": 0.09966589510440826, + "learning_rate": 0.0001220125786163522, + "epoch": 0.394375 + }, + { + "step": 1232, + "timestamp": "2025-12-28T10:40:31.971296", + "elapsed_time": 6146.256386756897, + "loss": 0.1358, + "grad_norm": 0.12772534787654877, + "learning_rate": 0.0001218867924528302, + "epoch": 0.395 + }, + { + "step": 1233, + "timestamp": "2025-12-28T10:40:39.345570", + "elapsed_time": 6153.630660057068, + "loss": 0.161, + "grad_norm": 0.11391877382993698, + "learning_rate": 0.00012176100628930817, + "epoch": 0.395625 + }, + { + "step": 1234, + "timestamp": "2025-12-28T10:40:52.265078", + "elapsed_time": 6166.550168275833, + "loss": 0.1591, + "grad_norm": 0.1011412963271141, + "learning_rate": 0.00012163522012578617, + "epoch": 0.39625 + }, + { + "step": 1235, + "timestamp": "2025-12-28T10:41:00.285752", + "elapsed_time": 6174.57084274292, + "loss": 0.1064, + "grad_norm": 0.10651546716690063, + "learning_rate": 0.00012150943396226415, + "epoch": 0.396875 + }, + { + "step": 1236, + "timestamp": "2025-12-28T10:41:08.312804", + "elapsed_time": 6182.597895145416, + "loss": 0.1547, + "grad_norm": 0.13378091156482697, + "learning_rate": 0.00012138364779874214, + "epoch": 0.3975 + }, + { + "step": 1237, + "timestamp": "2025-12-28T10:41:17.897328", + "elapsed_time": 6192.182418823242, + "loss": 0.1642, + "grad_norm": 0.1135389655828476, + "learning_rate": 0.00012125786163522013, + "epoch": 0.398125 + }, + { + "step": 1238, + "timestamp": "2025-12-28T10:41:27.960873", + "elapsed_time": 6202.245964050293, + "loss": 0.3449, + "grad_norm": 0.1613943874835968, + "learning_rate": 0.00012113207547169811, + "epoch": 0.39875 + }, + { + "step": 1239, + "timestamp": "2025-12-28T10:41:39.376307", + "elapsed_time": 6213.661397695541, + "loss": 0.1686, + "grad_norm": 0.10929451882839203, + "learning_rate": 0.0001210062893081761, + "epoch": 0.399375 + }, + { + "step": 1240, + "timestamp": "2025-12-28T10:41:49.452823", + "elapsed_time": 6223.737913370132, + "loss": 0.2091, + "grad_norm": 0.1252572238445282, + "learning_rate": 0.0001208805031446541, + "epoch": 0.4 + }, + { + "step": 1241, + "timestamp": "2025-12-28T10:41:55.431783", + "elapsed_time": 6229.716873407364, + "loss": 0.2126, + "grad_norm": 0.15598464012145996, + "learning_rate": 0.00012075471698113207, + "epoch": 0.400625 + }, + { + "step": 1242, + "timestamp": "2025-12-28T10:42:04.291713", + "elapsed_time": 6238.5768032073975, + "loss": 0.4607, + "grad_norm": 0.16947001218795776, + "learning_rate": 0.00012062893081761007, + "epoch": 0.40125 + }, + { + "step": 1243, + "timestamp": "2025-12-28T10:42:16.315554", + "elapsed_time": 6250.600644350052, + "loss": 0.1142, + "grad_norm": 0.09022119641304016, + "learning_rate": 0.00012050314465408805, + "epoch": 0.401875 + }, + { + "step": 1244, + "timestamp": "2025-12-28T10:42:24.823459", + "elapsed_time": 6259.108549118042, + "loss": 0.3034, + "grad_norm": 0.14146797358989716, + "learning_rate": 0.00012037735849056604, + "epoch": 0.4025 + }, + { + "step": 1245, + "timestamp": "2025-12-28T10:42:37.853137", + "elapsed_time": 6272.138226747513, + "loss": 0.1124, + "grad_norm": 0.11186513304710388, + "learning_rate": 0.00012025157232704403, + "epoch": 0.403125 + }, + { + "step": 1246, + "timestamp": "2025-12-28T10:42:45.407348", + "elapsed_time": 6279.6924386024475, + "loss": 0.1426, + "grad_norm": 0.11227832734584808, + "learning_rate": 0.00012012578616352201, + "epoch": 0.40375 + }, + { + "step": 1247, + "timestamp": "2025-12-28T10:42:53.466547", + "elapsed_time": 6287.751641750336, + "loss": 0.1833, + "grad_norm": 0.12571921944618225, + "learning_rate": 0.00012, + "epoch": 0.404375 + }, + { + "step": 1248, + "timestamp": "2025-12-28T10:43:02.403855", + "elapsed_time": 6296.688944816589, + "loss": 0.2413, + "grad_norm": 0.26408830285072327, + "learning_rate": 0.00011987421383647798, + "epoch": 0.405 + }, + { + "step": 1249, + "timestamp": "2025-12-28T10:43:11.586719", + "elapsed_time": 6305.871813774109, + "loss": 0.1202, + "grad_norm": 0.11835648864507675, + "learning_rate": 0.00011974842767295597, + "epoch": 0.405625 + }, + { + "step": 1250, + "timestamp": "2025-12-28T10:43:17.695080", + "elapsed_time": 6311.9801704883575, + "loss": 0.2076, + "grad_norm": 0.24628107249736786, + "learning_rate": 0.00011962264150943397, + "epoch": 0.40625 + }, + { + "step": 1251, + "timestamp": "2025-12-28T10:43:26.526357", + "elapsed_time": 6320.811446428299, + "loss": 0.3627, + "grad_norm": 0.14971527457237244, + "learning_rate": 0.00011949685534591195, + "epoch": 0.406875 + }, + { + "step": 1252, + "timestamp": "2025-12-28T10:43:32.439133", + "elapsed_time": 6326.724223136902, + "loss": 0.2245, + "grad_norm": 0.16869717836380005, + "learning_rate": 0.00011937106918238994, + "epoch": 0.4075 + }, + { + "step": 1253, + "timestamp": "2025-12-28T10:43:40.414515", + "elapsed_time": 6334.699605703354, + "loss": 0.1473, + "grad_norm": 0.12115878611803055, + "learning_rate": 0.00011924528301886793, + "epoch": 0.408125 + }, + { + "step": 1254, + "timestamp": "2025-12-28T10:43:47.555112", + "elapsed_time": 6341.840202093124, + "loss": 0.1415, + "grad_norm": 0.3590112328529358, + "learning_rate": 0.00011911949685534591, + "epoch": 0.40875 + }, + { + "step": 1255, + "timestamp": "2025-12-28T10:43:59.076934", + "elapsed_time": 6353.36202454567, + "loss": 0.166, + "grad_norm": 0.13714636862277985, + "learning_rate": 0.0001189937106918239, + "epoch": 0.409375 + }, + { + "step": 1256, + "timestamp": "2025-12-28T10:44:07.141027", + "elapsed_time": 6361.426117897034, + "loss": 0.1308, + "grad_norm": 0.1114254966378212, + "learning_rate": 0.00011886792452830188, + "epoch": 0.41 + }, + { + "step": 1257, + "timestamp": "2025-12-28T10:44:14.960681", + "elapsed_time": 6369.2457716465, + "loss": 0.1211, + "grad_norm": 0.10369876772165298, + "learning_rate": 0.00011874213836477988, + "epoch": 0.410625 + }, + { + "step": 1258, + "timestamp": "2025-12-28T10:44:21.307788", + "elapsed_time": 6375.592882871628, + "loss": 0.1538, + "grad_norm": 0.11906347423791885, + "learning_rate": 0.00011861635220125787, + "epoch": 0.41125 + }, + { + "step": 1259, + "timestamp": "2025-12-28T10:44:34.306623", + "elapsed_time": 6388.5917139053345, + "loss": 0.1692, + "grad_norm": 0.1116495355963707, + "learning_rate": 0.00011849056603773585, + "epoch": 0.411875 + }, + { + "step": 1260, + "timestamp": "2025-12-28T10:44:46.440601", + "elapsed_time": 6400.725692033768, + "loss": 0.1485, + "grad_norm": 0.1158275455236435, + "learning_rate": 0.00011836477987421384, + "epoch": 0.4125 + }, + { + "step": 1261, + "timestamp": "2025-12-28T10:44:51.167156", + "elapsed_time": 6405.452246665955, + "loss": 0.1906, + "grad_norm": 0.17199426889419556, + "learning_rate": 0.00011823899371069183, + "epoch": 0.413125 + }, + { + "step": 1262, + "timestamp": "2025-12-28T10:44:55.766324", + "elapsed_time": 6410.051414012909, + "loss": 0.5447, + "grad_norm": 0.2059682309627533, + "learning_rate": 0.00011811320754716981, + "epoch": 0.41375 + }, + { + "step": 1263, + "timestamp": "2025-12-28T10:44:59.626300", + "elapsed_time": 6413.911394357681, + "loss": 0.2163, + "grad_norm": 0.16556908190250397, + "learning_rate": 0.0001179874213836478, + "epoch": 0.414375 + }, + { + "step": 1264, + "timestamp": "2025-12-28T10:45:10.040506", + "elapsed_time": 6424.32559633255, + "loss": 0.1534, + "grad_norm": 0.10824116319417953, + "learning_rate": 0.00011786163522012578, + "epoch": 0.415 + }, + { + "step": 1265, + "timestamp": "2025-12-28T10:45:18.021995", + "elapsed_time": 6432.307085752487, + "loss": 0.2078, + "grad_norm": 0.1333610564470291, + "learning_rate": 0.00011773584905660378, + "epoch": 0.415625 + }, + { + "step": 1266, + "timestamp": "2025-12-28T10:45:27.112975", + "elapsed_time": 6441.398065567017, + "loss": 0.1441, + "grad_norm": 0.12403620034456253, + "learning_rate": 0.00011761006289308177, + "epoch": 0.41625 + }, + { + "step": 1267, + "timestamp": "2025-12-28T10:45:33.196456", + "elapsed_time": 6447.4815464019775, + "loss": 0.3569, + "grad_norm": 0.16776348650455475, + "learning_rate": 0.00011748427672955975, + "epoch": 0.416875 + }, + { + "step": 1268, + "timestamp": "2025-12-28T10:45:44.984909", + "elapsed_time": 6459.269999265671, + "loss": 0.1378, + "grad_norm": 0.10034506022930145, + "learning_rate": 0.00011735849056603774, + "epoch": 0.4175 + }, + { + "step": 1269, + "timestamp": "2025-12-28T10:45:53.809026", + "elapsed_time": 6468.0941162109375, + "loss": 0.2125, + "grad_norm": 0.1351146399974823, + "learning_rate": 0.00011723270440251572, + "epoch": 0.418125 + }, + { + "step": 1270, + "timestamp": "2025-12-28T10:45:58.659300", + "elapsed_time": 6472.944390773773, + "loss": 0.1865, + "grad_norm": 0.14262175559997559, + "learning_rate": 0.00011710691823899371, + "epoch": 0.41875 + }, + { + "step": 1271, + "timestamp": "2025-12-28T10:46:04.637730", + "elapsed_time": 6478.922821044922, + "loss": 0.194, + "grad_norm": 0.14949339628219604, + "learning_rate": 0.0001169811320754717, + "epoch": 0.419375 + }, + { + "step": 1272, + "timestamp": "2025-12-28T10:46:12.419593", + "elapsed_time": 6486.704683542252, + "loss": 0.169, + "grad_norm": 0.12632359564304352, + "learning_rate": 0.00011685534591194968, + "epoch": 0.42 + }, + { + "step": 1273, + "timestamp": "2025-12-28T10:46:28.716300", + "elapsed_time": 6503.0013909339905, + "loss": 0.115, + "grad_norm": 0.08015146851539612, + "learning_rate": 0.00011672955974842768, + "epoch": 0.420625 + }, + { + "step": 1274, + "timestamp": "2025-12-28T10:46:32.620770", + "elapsed_time": 6506.905859708786, + "loss": 0.1909, + "grad_norm": 0.16493003070354462, + "learning_rate": 0.00011660377358490567, + "epoch": 0.42125 + }, + { + "step": 1275, + "timestamp": "2025-12-28T10:46:43.229055", + "elapsed_time": 6517.5141451358795, + "loss": 0.166, + "grad_norm": 0.10563270002603531, + "learning_rate": 0.00011647798742138365, + "epoch": 0.421875 + }, + { + "step": 1276, + "timestamp": "2025-12-28T10:46:50.110258", + "elapsed_time": 6524.395348310471, + "loss": 0.2099, + "grad_norm": 0.1380920559167862, + "learning_rate": 0.00011635220125786164, + "epoch": 0.4225 + }, + { + "step": 1277, + "timestamp": "2025-12-28T10:46:58.322692", + "elapsed_time": 6532.607782125473, + "loss": 0.1598, + "grad_norm": 0.11996757984161377, + "learning_rate": 0.00011622641509433962, + "epoch": 0.423125 + }, + { + "step": 1278, + "timestamp": "2025-12-28T10:47:03.207181", + "elapsed_time": 6537.49227142334, + "loss": 0.1789, + "grad_norm": 0.14649128913879395, + "learning_rate": 0.00011610062893081761, + "epoch": 0.42375 + }, + { + "step": 1279, + "timestamp": "2025-12-28T10:47:09.522712", + "elapsed_time": 6543.807803153992, + "loss": 0.1889, + "grad_norm": 0.1377793848514557, + "learning_rate": 0.0001159748427672956, + "epoch": 0.424375 + }, + { + "step": 1280, + "timestamp": "2025-12-28T10:47:18.362641", + "elapsed_time": 6552.6477308273315, + "loss": 0.1573, + "grad_norm": 0.11395595967769623, + "learning_rate": 0.00011584905660377358, + "epoch": 0.425 + }, + { + "step": 1281, + "timestamp": "2025-12-28T10:47:24.849601", + "elapsed_time": 6559.134691953659, + "loss": 0.3999, + "grad_norm": 0.17579996585845947, + "learning_rate": 0.00011572327044025158, + "epoch": 0.425625 + }, + { + "step": 1282, + "timestamp": "2025-12-28T10:47:37.488885", + "elapsed_time": 6571.773975133896, + "loss": 0.1319, + "grad_norm": 0.09526728838682175, + "learning_rate": 0.00011559748427672956, + "epoch": 0.42625 + }, + { + "step": 1283, + "timestamp": "2025-12-28T10:47:45.629523", + "elapsed_time": 6579.914613485336, + "loss": 0.1803, + "grad_norm": 0.12247025221586227, + "learning_rate": 0.00011547169811320755, + "epoch": 0.426875 + }, + { + "step": 1284, + "timestamp": "2025-12-28T10:47:50.156209", + "elapsed_time": 6584.441299676895, + "loss": 0.2226, + "grad_norm": 0.15616583824157715, + "learning_rate": 0.00011534591194968554, + "epoch": 0.4275 + }, + { + "step": 1285, + "timestamp": "2025-12-28T10:47:58.291658", + "elapsed_time": 6592.576747894287, + "loss": 0.1435, + "grad_norm": 0.11908165365457535, + "learning_rate": 0.00011522012578616352, + "epoch": 0.428125 + }, + { + "step": 1286, + "timestamp": "2025-12-28T10:48:03.589513", + "elapsed_time": 6597.874603271484, + "loss": 0.253, + "grad_norm": 0.15733444690704346, + "learning_rate": 0.00011509433962264151, + "epoch": 0.42875 + }, + { + "step": 1287, + "timestamp": "2025-12-28T10:48:15.662909", + "elapsed_time": 6609.947999715805, + "loss": 0.1346, + "grad_norm": 0.08608326315879822, + "learning_rate": 0.0001149685534591195, + "epoch": 0.429375 + }, + { + "step": 1288, + "timestamp": "2025-12-28T10:48:26.180269", + "elapsed_time": 6620.465359449387, + "loss": 0.159, + "grad_norm": 0.10130292922258377, + "learning_rate": 0.00011484276729559748, + "epoch": 0.43 + }, + { + "step": 1289, + "timestamp": "2025-12-28T10:48:31.670014", + "elapsed_time": 6625.955104589462, + "loss": 0.1755, + "grad_norm": 0.1298661231994629, + "learning_rate": 0.00011471698113207548, + "epoch": 0.430625 + }, + { + "step": 1290, + "timestamp": "2025-12-28T10:48:38.047130", + "elapsed_time": 6632.332220554352, + "loss": 0.1663, + "grad_norm": 0.12876641750335693, + "learning_rate": 0.00011459119496855346, + "epoch": 0.43125 + }, + { + "step": 1291, + "timestamp": "2025-12-28T10:48:49.938884", + "elapsed_time": 6644.223974466324, + "loss": 0.1259, + "grad_norm": 0.08893437683582306, + "learning_rate": 0.00011446540880503145, + "epoch": 0.431875 + }, + { + "step": 1292, + "timestamp": "2025-12-28T10:48:56.281897", + "elapsed_time": 6650.566987276077, + "loss": 0.2474, + "grad_norm": 0.1418265700340271, + "learning_rate": 0.00011433962264150944, + "epoch": 0.4325 + }, + { + "step": 1293, + "timestamp": "2025-12-28T10:49:04.023046", + "elapsed_time": 6658.308136463165, + "loss": 0.1673, + "grad_norm": 0.12449798732995987, + "learning_rate": 0.00011421383647798742, + "epoch": 0.433125 + }, + { + "step": 1294, + "timestamp": "2025-12-28T10:49:18.892772", + "elapsed_time": 6673.177862167358, + "loss": 0.143, + "grad_norm": 0.10782869905233383, + "learning_rate": 0.00011408805031446541, + "epoch": 0.43375 + }, + { + "step": 1295, + "timestamp": "2025-12-28T10:49:29.168117", + "elapsed_time": 6683.45320725441, + "loss": 0.1301, + "grad_norm": 0.09740663319826126, + "learning_rate": 0.00011396226415094339, + "epoch": 0.434375 + }, + { + "step": 1296, + "timestamp": "2025-12-28T10:49:48.401036", + "elapsed_time": 6702.686126708984, + "loss": 0.1528, + "grad_norm": 0.08156444132328033, + "learning_rate": 0.00011383647798742138, + "epoch": 0.435 + }, + { + "step": 1297, + "timestamp": "2025-12-28T10:50:01.198414", + "elapsed_time": 6715.483504772186, + "loss": 0.1212, + "grad_norm": 0.09743256121873856, + "learning_rate": 0.00011371069182389938, + "epoch": 0.435625 + }, + { + "step": 1298, + "timestamp": "2025-12-28T10:50:09.499924", + "elapsed_time": 6723.785014867783, + "loss": 0.1612, + "grad_norm": 0.12712618708610535, + "learning_rate": 0.00011358490566037736, + "epoch": 0.43625 + }, + { + "step": 1299, + "timestamp": "2025-12-28T10:50:17.483321", + "elapsed_time": 6731.768411159515, + "loss": 0.1511, + "grad_norm": 0.13302955031394958, + "learning_rate": 0.00011345911949685535, + "epoch": 0.436875 + }, + { + "step": 1300, + "timestamp": "2025-12-28T10:50:27.725225", + "elapsed_time": 6742.010316371918, + "loss": 0.1615, + "grad_norm": 0.10727046430110931, + "learning_rate": 0.00011333333333333334, + "epoch": 0.4375 + }, + { + "step": 1301, + "timestamp": "2025-12-28T10:50:36.587116", + "elapsed_time": 6750.872206449509, + "loss": 0.2059, + "grad_norm": 0.15867577493190765, + "learning_rate": 0.00011320754716981132, + "epoch": 0.438125 + }, + { + "step": 1302, + "timestamp": "2025-12-28T10:50:41.926696", + "elapsed_time": 6756.21178650856, + "loss": 0.2088, + "grad_norm": 0.15406003594398499, + "learning_rate": 0.00011308176100628931, + "epoch": 0.43875 + }, + { + "step": 1303, + "timestamp": "2025-12-28T10:50:49.475802", + "elapsed_time": 6763.760892391205, + "loss": 0.2062, + "grad_norm": 0.13464148342609406, + "learning_rate": 0.00011295597484276729, + "epoch": 0.439375 + }, + { + "step": 1304, + "timestamp": "2025-12-28T10:50:53.491533", + "elapsed_time": 6767.776623249054, + "loss": 0.2728, + "grad_norm": 0.2125401347875595, + "learning_rate": 0.00011283018867924528, + "epoch": 0.44 + }, + { + "step": 1305, + "timestamp": "2025-12-28T10:51:02.825718", + "elapsed_time": 6777.110809087753, + "loss": 0.1534, + "grad_norm": 0.10923696309328079, + "learning_rate": 0.00011270440251572328, + "epoch": 0.440625 + }, + { + "step": 1306, + "timestamp": "2025-12-28T10:51:12.781667", + "elapsed_time": 6787.066757440567, + "loss": 0.4045, + "grad_norm": 0.1884467899799347, + "learning_rate": 0.00011257861635220126, + "epoch": 0.44125 + }, + { + "step": 1307, + "timestamp": "2025-12-28T10:51:21.728953", + "elapsed_time": 6796.014044284821, + "loss": 0.1939, + "grad_norm": 0.12376397848129272, + "learning_rate": 0.00011245283018867925, + "epoch": 0.441875 + }, + { + "step": 1308, + "timestamp": "2025-12-28T10:51:33.630122", + "elapsed_time": 6807.915212869644, + "loss": 0.1559, + "grad_norm": 0.09741462022066116, + "learning_rate": 0.00011232704402515724, + "epoch": 0.4425 + }, + { + "step": 1309, + "timestamp": "2025-12-28T10:51:47.370883", + "elapsed_time": 6821.65597319603, + "loss": 0.1371, + "grad_norm": 0.08514436334371567, + "learning_rate": 0.00011220125786163522, + "epoch": 0.443125 + }, + { + "step": 1310, + "timestamp": "2025-12-28T10:52:02.892235", + "elapsed_time": 6837.177325725555, + "loss": 0.109, + "grad_norm": 0.09866739064455032, + "learning_rate": 0.00011207547169811321, + "epoch": 0.44375 + }, + { + "step": 1311, + "timestamp": "2025-12-28T10:52:13.507601", + "elapsed_time": 6847.792692184448, + "loss": 0.1884, + "grad_norm": 0.10366571694612503, + "learning_rate": 0.00011194968553459119, + "epoch": 0.444375 + }, + { + "step": 1312, + "timestamp": "2025-12-28T10:52:23.266819", + "elapsed_time": 6857.551909446716, + "loss": 0.2008, + "grad_norm": 0.11828729510307312, + "learning_rate": 0.00011182389937106919, + "epoch": 0.445 + }, + { + "step": 1313, + "timestamp": "2025-12-28T10:52:33.470306", + "elapsed_time": 6867.755395889282, + "loss": 0.1863, + "grad_norm": 0.11549749970436096, + "learning_rate": 0.00011169811320754718, + "epoch": 0.445625 + }, + { + "step": 1314, + "timestamp": "2025-12-28T10:52:42.647558", + "elapsed_time": 6876.932648897171, + "loss": 0.1582, + "grad_norm": 0.11198070645332336, + "learning_rate": 0.00011157232704402516, + "epoch": 0.44625 + }, + { + "step": 1315, + "timestamp": "2025-12-28T10:52:52.551145", + "elapsed_time": 6886.8362357616425, + "loss": 0.1355, + "grad_norm": 0.10204551368951797, + "learning_rate": 0.00011144654088050315, + "epoch": 0.446875 + }, + { + "step": 1316, + "timestamp": "2025-12-28T10:53:04.736390", + "elapsed_time": 6899.021480321884, + "loss": 0.1699, + "grad_norm": 0.10276511311531067, + "learning_rate": 0.00011132075471698113, + "epoch": 0.4475 + }, + { + "step": 1317, + "timestamp": "2025-12-28T10:53:10.665741", + "elapsed_time": 6904.950830936432, + "loss": 0.2933, + "grad_norm": 0.16935305297374725, + "learning_rate": 0.00011119496855345912, + "epoch": 0.448125 + }, + { + "step": 1318, + "timestamp": "2025-12-28T10:53:15.139070", + "elapsed_time": 6909.424160718918, + "loss": 0.2412, + "grad_norm": 0.18222883343696594, + "learning_rate": 0.00011106918238993711, + "epoch": 0.44875 + }, + { + "step": 1319, + "timestamp": "2025-12-28T10:53:23.782033", + "elapsed_time": 6918.067123413086, + "loss": 0.128, + "grad_norm": 0.11177527904510498, + "learning_rate": 0.00011094339622641509, + "epoch": 0.449375 + }, + { + "step": 1320, + "timestamp": "2025-12-28T10:53:36.143000", + "elapsed_time": 6930.428090333939, + "loss": 0.149, + "grad_norm": 0.12012763321399689, + "learning_rate": 0.00011081761006289309, + "epoch": 0.45 + }, + { + "step": 1321, + "timestamp": "2025-12-28T10:53:44.449019", + "elapsed_time": 6938.734109163284, + "loss": 0.1378, + "grad_norm": 0.11260993033647537, + "learning_rate": 0.00011069182389937108, + "epoch": 0.450625 + }, + { + "step": 1322, + "timestamp": "2025-12-28T10:53:53.834895", + "elapsed_time": 6948.119985580444, + "loss": 0.2031, + "grad_norm": 0.13370606303215027, + "learning_rate": 0.00011056603773584906, + "epoch": 0.45125 + }, + { + "step": 1323, + "timestamp": "2025-12-28T10:54:03.643538", + "elapsed_time": 6957.928628444672, + "loss": 0.1721, + "grad_norm": 0.12418955564498901, + "learning_rate": 0.00011044025157232705, + "epoch": 0.451875 + }, + { + "step": 1324, + "timestamp": "2025-12-28T10:54:10.718971", + "elapsed_time": 6965.004061698914, + "loss": 0.217, + "grad_norm": 0.15134598314762115, + "learning_rate": 0.00011031446540880503, + "epoch": 0.4525 + }, + { + "step": 1325, + "timestamp": "2025-12-28T10:54:27.565308", + "elapsed_time": 6981.850398540497, + "loss": 0.1303, + "grad_norm": 0.08781035244464874, + "learning_rate": 0.00011018867924528302, + "epoch": 0.453125 + }, + { + "step": 1326, + "timestamp": "2025-12-28T10:54:47.648331", + "elapsed_time": 7001.933421134949, + "loss": 0.1125, + "grad_norm": 0.083819180727005, + "learning_rate": 0.00011006289308176101, + "epoch": 0.45375 + }, + { + "step": 1327, + "timestamp": "2025-12-28T10:55:08.535086", + "elapsed_time": 7022.820177078247, + "loss": 0.0996, + "grad_norm": 0.06516632437705994, + "learning_rate": 0.000109937106918239, + "epoch": 0.454375 + }, + { + "step": 1328, + "timestamp": "2025-12-28T10:55:16.596328", + "elapsed_time": 7030.881418466568, + "loss": 0.2083, + "grad_norm": 0.2553313672542572, + "learning_rate": 0.00010981132075471699, + "epoch": 0.455 + }, + { + "step": 1329, + "timestamp": "2025-12-28T10:55:22.137094", + "elapsed_time": 7036.4221839904785, + "loss": 0.2141, + "grad_norm": 0.16487905383110046, + "learning_rate": 0.00010968553459119497, + "epoch": 0.455625 + }, + { + "step": 1330, + "timestamp": "2025-12-28T10:55:27.891844", + "elapsed_time": 7042.176934480667, + "loss": 0.1958, + "grad_norm": 0.2868243157863617, + "learning_rate": 0.00010955974842767296, + "epoch": 0.45625 + }, + { + "step": 1331, + "timestamp": "2025-12-28T10:55:40.387369", + "elapsed_time": 7054.672459602356, + "loss": 0.1071, + "grad_norm": 0.08470939844846725, + "learning_rate": 0.00010943396226415095, + "epoch": 0.456875 + }, + { + "step": 1332, + "timestamp": "2025-12-28T10:55:46.589006", + "elapsed_time": 7060.8740956783295, + "loss": 0.2562, + "grad_norm": 0.16096508502960205, + "learning_rate": 0.00010930817610062893, + "epoch": 0.4575 + }, + { + "step": 1333, + "timestamp": "2025-12-28T10:55:59.879933", + "elapsed_time": 7074.165023565292, + "loss": 0.1492, + "grad_norm": 0.10115651786327362, + "learning_rate": 0.00010918238993710692, + "epoch": 0.458125 + }, + { + "step": 1334, + "timestamp": "2025-12-28T10:56:06.938716", + "elapsed_time": 7081.223806381226, + "loss": 0.1331, + "grad_norm": 0.12268812209367752, + "learning_rate": 0.00010905660377358491, + "epoch": 0.45875 + }, + { + "step": 1335, + "timestamp": "2025-12-28T10:56:17.194994", + "elapsed_time": 7091.4800844192505, + "loss": 0.1838, + "grad_norm": 0.1212567612528801, + "learning_rate": 0.0001089308176100629, + "epoch": 0.459375 + }, + { + "step": 1336, + "timestamp": "2025-12-28T10:56:25.631434", + "elapsed_time": 7099.916523933411, + "loss": 0.1439, + "grad_norm": 0.11041584610939026, + "learning_rate": 0.00010880503144654089, + "epoch": 0.46 + }, + { + "step": 1337, + "timestamp": "2025-12-28T10:56:32.228488", + "elapsed_time": 7106.513578653336, + "loss": 0.1623, + "grad_norm": 0.14172060787677765, + "learning_rate": 0.00010867924528301887, + "epoch": 0.460625 + }, + { + "step": 1338, + "timestamp": "2025-12-28T10:56:39.404079", + "elapsed_time": 7113.689169168472, + "loss": 0.1669, + "grad_norm": 0.1437670886516571, + "learning_rate": 0.00010855345911949686, + "epoch": 0.46125 + }, + { + "step": 1339, + "timestamp": "2025-12-28T10:56:52.538722", + "elapsed_time": 7126.823812246323, + "loss": 0.122, + "grad_norm": 0.08426967263221741, + "learning_rate": 0.00010842767295597485, + "epoch": 0.461875 + }, + { + "step": 1340, + "timestamp": "2025-12-28T10:56:58.657468", + "elapsed_time": 7132.942558288574, + "loss": 0.1871, + "grad_norm": 0.13586315512657166, + "learning_rate": 0.00010830188679245283, + "epoch": 0.4625 + }, + { + "step": 1341, + "timestamp": "2025-12-28T10:57:14.320116", + "elapsed_time": 7148.605206251144, + "loss": 0.1435, + "grad_norm": 0.09016376733779907, + "learning_rate": 0.00010817610062893082, + "epoch": 0.463125 + }, + { + "step": 1342, + "timestamp": "2025-12-28T10:57:21.801068", + "elapsed_time": 7156.086158514023, + "loss": 0.235, + "grad_norm": 0.1631210297346115, + "learning_rate": 0.00010805031446540882, + "epoch": 0.46375 + }, + { + "step": 1343, + "timestamp": "2025-12-28T10:57:42.686920", + "elapsed_time": 7176.972010374069, + "loss": 0.1637, + "grad_norm": 0.08319737762212753, + "learning_rate": 0.0001079245283018868, + "epoch": 0.464375 + }, + { + "step": 1344, + "timestamp": "2025-12-28T10:57:57.552601", + "elapsed_time": 7191.837691068649, + "loss": 0.1667, + "grad_norm": 0.10576473921537399, + "learning_rate": 0.00010779874213836479, + "epoch": 0.465 + }, + { + "step": 1345, + "timestamp": "2025-12-28T10:58:07.616516", + "elapsed_time": 7201.901606798172, + "loss": 0.2561, + "grad_norm": 0.142886221408844, + "learning_rate": 0.00010767295597484277, + "epoch": 0.465625 + }, + { + "step": 1346, + "timestamp": "2025-12-28T10:58:15.468138", + "elapsed_time": 7209.753228902817, + "loss": 0.1848, + "grad_norm": 0.14011366665363312, + "learning_rate": 0.00010754716981132076, + "epoch": 0.46625 + }, + { + "step": 1347, + "timestamp": "2025-12-28T10:58:20.930984", + "elapsed_time": 7215.216074466705, + "loss": 0.2569, + "grad_norm": 0.16688081622123718, + "learning_rate": 0.00010742138364779875, + "epoch": 0.466875 + }, + { + "step": 1348, + "timestamp": "2025-12-28T10:58:29.272767", + "elapsed_time": 7223.557857751846, + "loss": 0.1619, + "grad_norm": 0.12044026702642441, + "learning_rate": 0.00010729559748427673, + "epoch": 0.4675 + }, + { + "step": 1349, + "timestamp": "2025-12-28T10:58:35.655956", + "elapsed_time": 7229.941046714783, + "loss": 0.1515, + "grad_norm": 0.13383439183235168, + "learning_rate": 0.00010716981132075472, + "epoch": 0.468125 + }, + { + "step": 1350, + "timestamp": "2025-12-28T10:58:51.652984", + "elapsed_time": 7245.938074588776, + "loss": 0.1298, + "grad_norm": 0.08610431104898453, + "learning_rate": 0.0001070440251572327, + "epoch": 0.46875 + }, + { + "step": 1351, + "timestamp": "2025-12-28T10:59:01.082143", + "elapsed_time": 7255.367233753204, + "loss": 0.1903, + "grad_norm": 0.11925891786813736, + "learning_rate": 0.0001069182389937107, + "epoch": 0.469375 + }, + { + "step": 1352, + "timestamp": "2025-12-28T10:59:07.425183", + "elapsed_time": 7261.710273742676, + "loss": 0.1339, + "grad_norm": 0.12832607328891754, + "learning_rate": 0.00010679245283018869, + "epoch": 0.47 + }, + { + "step": 1353, + "timestamp": "2025-12-28T10:59:14.908303", + "elapsed_time": 7269.193393468857, + "loss": 0.1308, + "grad_norm": 0.10823425650596619, + "learning_rate": 0.00010666666666666667, + "epoch": 0.470625 + }, + { + "step": 1354, + "timestamp": "2025-12-28T10:59:22.938044", + "elapsed_time": 7277.223134994507, + "loss": 0.256, + "grad_norm": 0.16312338411808014, + "learning_rate": 0.00010654088050314466, + "epoch": 0.47125 + }, + { + "step": 1355, + "timestamp": "2025-12-28T10:59:32.232549", + "elapsed_time": 7286.517638921738, + "loss": 0.125, + "grad_norm": 0.10023007541894913, + "learning_rate": 0.00010641509433962265, + "epoch": 0.471875 + }, + { + "step": 1356, + "timestamp": "2025-12-28T10:59:43.645190", + "elapsed_time": 7297.930280685425, + "loss": 0.3132, + "grad_norm": 0.14320826530456543, + "learning_rate": 0.00010628930817610063, + "epoch": 0.4725 + }, + { + "step": 1357, + "timestamp": "2025-12-28T10:59:49.851471", + "elapsed_time": 7304.136561393738, + "loss": 0.229, + "grad_norm": 0.18624381721019745, + "learning_rate": 0.00010616352201257862, + "epoch": 0.473125 + }, + { + "step": 1358, + "timestamp": "2025-12-28T10:59:58.793059", + "elapsed_time": 7313.0781490802765, + "loss": 0.1375, + "grad_norm": 0.10548731684684753, + "learning_rate": 0.0001060377358490566, + "epoch": 0.47375 + }, + { + "step": 1359, + "timestamp": "2025-12-28T11:00:10.967893", + "elapsed_time": 7325.252983093262, + "loss": 0.1417, + "grad_norm": 0.10804691165685654, + "learning_rate": 0.0001059119496855346, + "epoch": 0.474375 + }, + { + "step": 1360, + "timestamp": "2025-12-28T11:00:16.238424", + "elapsed_time": 7330.523514509201, + "loss": 0.2253, + "grad_norm": 0.1500820368528366, + "learning_rate": 0.00010578616352201259, + "epoch": 0.475 + }, + { + "step": 1361, + "timestamp": "2025-12-28T11:00:24.712628", + "elapsed_time": 7338.997718095779, + "loss": 0.1785, + "grad_norm": 0.12143810838460922, + "learning_rate": 0.00010566037735849057, + "epoch": 0.475625 + }, + { + "step": 1362, + "timestamp": "2025-12-28T11:00:33.573643", + "elapsed_time": 7347.858733415604, + "loss": 0.176, + "grad_norm": 0.13725930452346802, + "learning_rate": 0.00010553459119496856, + "epoch": 0.47625 + }, + { + "step": 1363, + "timestamp": "2025-12-28T11:00:41.508324", + "elapsed_time": 7355.7934148311615, + "loss": 0.1825, + "grad_norm": 0.13526056706905365, + "learning_rate": 0.00010540880503144654, + "epoch": 0.476875 + }, + { + "step": 1364, + "timestamp": "2025-12-28T11:00:47.043992", + "elapsed_time": 7361.329082250595, + "loss": 0.2259, + "grad_norm": 0.16327698528766632, + "learning_rate": 0.00010528301886792453, + "epoch": 0.4775 + }, + { + "step": 1365, + "timestamp": "2025-12-28T11:00:57.065386", + "elapsed_time": 7371.350476741791, + "loss": 0.182, + "grad_norm": 0.12609660625457764, + "learning_rate": 0.00010515723270440252, + "epoch": 0.478125 + }, + { + "step": 1366, + "timestamp": "2025-12-28T11:01:06.163415", + "elapsed_time": 7380.4485058784485, + "loss": 0.1756, + "grad_norm": 0.1293572634458542, + "learning_rate": 0.0001050314465408805, + "epoch": 0.47875 + }, + { + "step": 1367, + "timestamp": "2025-12-28T11:01:15.379986", + "elapsed_time": 7389.665076971054, + "loss": 0.1195, + "grad_norm": 0.0980340987443924, + "learning_rate": 0.0001049056603773585, + "epoch": 0.479375 + }, + { + "step": 1368, + "timestamp": "2025-12-28T11:01:23.731574", + "elapsed_time": 7398.016664505005, + "loss": 0.2087, + "grad_norm": 0.1566362828016281, + "learning_rate": 0.00010477987421383649, + "epoch": 0.48 + }, + { + "step": 1369, + "timestamp": "2025-12-28T11:01:38.713040", + "elapsed_time": 7412.99813079834, + "loss": 0.1185, + "grad_norm": 0.09030961245298386, + "learning_rate": 0.00010465408805031447, + "epoch": 0.480625 + }, + { + "step": 1370, + "timestamp": "2025-12-28T11:01:47.053864", + "elapsed_time": 7421.3389544487, + "loss": 0.1549, + "grad_norm": 0.10715126991271973, + "learning_rate": 0.00010452830188679246, + "epoch": 0.48125 + }, + { + "step": 1371, + "timestamp": "2025-12-28T11:01:52.967016", + "elapsed_time": 7427.252106428146, + "loss": 0.253, + "grad_norm": 0.16500554978847504, + "learning_rate": 0.00010440251572327044, + "epoch": 0.481875 + }, + { + "step": 1372, + "timestamp": "2025-12-28T11:02:04.814134", + "elapsed_time": 7439.0992250442505, + "loss": 0.2054, + "grad_norm": 0.10960862785577774, + "learning_rate": 0.00010427672955974843, + "epoch": 0.4825 + }, + { + "step": 1373, + "timestamp": "2025-12-28T11:02:20.468162", + "elapsed_time": 7454.7532522678375, + "loss": 0.1554, + "grad_norm": 0.12754300236701965, + "learning_rate": 0.00010415094339622642, + "epoch": 0.483125 + }, + { + "step": 1374, + "timestamp": "2025-12-28T11:02:28.644614", + "elapsed_time": 7462.929704427719, + "loss": 0.1667, + "grad_norm": 0.12952545285224915, + "learning_rate": 0.0001040251572327044, + "epoch": 0.48375 + }, + { + "step": 1375, + "timestamp": "2025-12-28T11:02:33.987063", + "elapsed_time": 7468.2721536159515, + "loss": 0.221, + "grad_norm": 0.1964443176984787, + "learning_rate": 0.0001038993710691824, + "epoch": 0.484375 + }, + { + "step": 1376, + "timestamp": "2025-12-28T11:02:45.242830", + "elapsed_time": 7479.527920484543, + "loss": 0.1549, + "grad_norm": 0.10678889602422714, + "learning_rate": 0.00010377358490566037, + "epoch": 0.485 + }, + { + "step": 1377, + "timestamp": "2025-12-28T11:02:57.881441", + "elapsed_time": 7492.166531324387, + "loss": 0.1292, + "grad_norm": 0.1204121932387352, + "learning_rate": 0.00010364779874213837, + "epoch": 0.485625 + }, + { + "step": 1378, + "timestamp": "2025-12-28T11:03:05.127176", + "elapsed_time": 7499.412266731262, + "loss": 0.2362, + "grad_norm": 0.14369583129882812, + "learning_rate": 0.00010352201257861636, + "epoch": 0.48625 + }, + { + "step": 1379, + "timestamp": "2025-12-28T11:03:08.953753", + "elapsed_time": 7503.238843917847, + "loss": 0.3803, + "grad_norm": 0.21724280714988708, + "learning_rate": 0.00010339622641509434, + "epoch": 0.486875 + }, + { + "step": 1380, + "timestamp": "2025-12-28T11:03:18.974982", + "elapsed_time": 7513.26007270813, + "loss": 0.1546, + "grad_norm": 0.1561720371246338, + "learning_rate": 0.00010327044025157233, + "epoch": 0.4875 + }, + { + "step": 1381, + "timestamp": "2025-12-28T11:03:23.926806", + "elapsed_time": 7518.211896419525, + "loss": 0.2732, + "grad_norm": 0.17296800017356873, + "learning_rate": 0.00010314465408805032, + "epoch": 0.488125 + }, + { + "step": 1382, + "timestamp": "2025-12-28T11:03:42.230374", + "elapsed_time": 7536.5154638290405, + "loss": 0.122, + "grad_norm": 0.08095800131559372, + "learning_rate": 0.0001030188679245283, + "epoch": 0.48875 + }, + { + "step": 1383, + "timestamp": "2025-12-28T11:03:46.737340", + "elapsed_time": 7541.022430181503, + "loss": 0.2151, + "grad_norm": 0.17441503703594208, + "learning_rate": 0.0001028930817610063, + "epoch": 0.489375 + }, + { + "step": 1384, + "timestamp": "2025-12-28T11:03:58.400597", + "elapsed_time": 7552.685687303543, + "loss": 0.1857, + "grad_norm": 0.12047084420919418, + "learning_rate": 0.00010276729559748428, + "epoch": 0.49 + }, + { + "step": 1385, + "timestamp": "2025-12-28T11:04:04.778751", + "elapsed_time": 7559.063841819763, + "loss": 0.2076, + "grad_norm": 0.1551089733839035, + "learning_rate": 0.00010264150943396227, + "epoch": 0.490625 + }, + { + "step": 1386, + "timestamp": "2025-12-28T11:04:13.091378", + "elapsed_time": 7567.37646818161, + "loss": 0.1264, + "grad_norm": 0.10384256392717361, + "learning_rate": 0.00010251572327044026, + "epoch": 0.49125 + }, + { + "step": 1387, + "timestamp": "2025-12-28T11:04:21.309367", + "elapsed_time": 7575.594457626343, + "loss": 0.3628, + "grad_norm": 0.22657428681850433, + "learning_rate": 0.00010238993710691824, + "epoch": 0.491875 + }, + { + "step": 1388, + "timestamp": "2025-12-28T11:04:29.487914", + "elapsed_time": 7583.773004293442, + "loss": 0.2039, + "grad_norm": 0.13718171417713165, + "learning_rate": 0.00010226415094339623, + "epoch": 0.4925 + }, + { + "step": 1389, + "timestamp": "2025-12-28T11:04:42.300103", + "elapsed_time": 7596.585193157196, + "loss": 0.1706, + "grad_norm": 0.10597704350948334, + "learning_rate": 0.00010213836477987422, + "epoch": 0.493125 + }, + { + "step": 1390, + "timestamp": "2025-12-28T11:05:03.024157", + "elapsed_time": 7617.309247255325, + "loss": 0.1205, + "grad_norm": 0.07673165202140808, + "learning_rate": 0.0001020125786163522, + "epoch": 0.49375 + }, + { + "step": 1391, + "timestamp": "2025-12-28T11:05:10.494896", + "elapsed_time": 7624.779986381531, + "loss": 0.1777, + "grad_norm": 0.12185313552618027, + "learning_rate": 0.0001018867924528302, + "epoch": 0.494375 + }, + { + "step": 1392, + "timestamp": "2025-12-28T11:05:16.240771", + "elapsed_time": 7630.525861024857, + "loss": 0.1678, + "grad_norm": 0.1360418051481247, + "learning_rate": 0.00010176100628930818, + "epoch": 0.495 + }, + { + "step": 1393, + "timestamp": "2025-12-28T11:05:25.834105", + "elapsed_time": 7640.119195222855, + "loss": 0.3532, + "grad_norm": 0.14602701365947723, + "learning_rate": 0.00010163522012578617, + "epoch": 0.495625 + }, + { + "step": 1394, + "timestamp": "2025-12-28T11:05:41.365149", + "elapsed_time": 7655.650239467621, + "loss": 0.1404, + "grad_norm": 0.08702640235424042, + "learning_rate": 0.00010150943396226416, + "epoch": 0.49625 + }, + { + "step": 1395, + "timestamp": "2025-12-28T11:05:48.620666", + "elapsed_time": 7662.905756235123, + "loss": 0.222, + "grad_norm": 0.15947473049163818, + "learning_rate": 0.00010138364779874214, + "epoch": 0.496875 + }, + { + "step": 1396, + "timestamp": "2025-12-28T11:05:58.248188", + "elapsed_time": 7672.533278942108, + "loss": 0.3532, + "grad_norm": 0.15767693519592285, + "learning_rate": 0.00010125786163522013, + "epoch": 0.4975 + }, + { + "step": 1397, + "timestamp": "2025-12-28T11:06:04.588958", + "elapsed_time": 7678.874048471451, + "loss": 0.2166, + "grad_norm": 0.14681337773799896, + "learning_rate": 0.00010113207547169811, + "epoch": 0.498125 + }, + { + "step": 1398, + "timestamp": "2025-12-28T11:06:08.944856", + "elapsed_time": 7683.22994685173, + "loss": 0.2352, + "grad_norm": 0.19252383708953857, + "learning_rate": 0.0001010062893081761, + "epoch": 0.49875 + }, + { + "step": 1399, + "timestamp": "2025-12-28T11:06:18.169645", + "elapsed_time": 7692.454735279083, + "loss": 0.2846, + "grad_norm": 0.14431574940681458, + "learning_rate": 0.0001008805031446541, + "epoch": 0.499375 + }, + { + "step": 1400, + "timestamp": "2025-12-28T11:06:23.381124", + "elapsed_time": 7697.666215181351, + "loss": 0.2615, + "grad_norm": 0.18521860241889954, + "learning_rate": 0.00010075471698113208, + "epoch": 0.5 + }, + { + "step": 1401, + "timestamp": "2025-12-28T11:06:31.966311", + "elapsed_time": 7706.2514016628265, + "loss": 0.2161, + "grad_norm": 0.12801428139209747, + "learning_rate": 0.00010062893081761007, + "epoch": 0.500625 + }, + { + "step": 1402, + "timestamp": "2025-12-28T11:06:36.665411", + "elapsed_time": 7710.950501441956, + "loss": 0.2801, + "grad_norm": 0.18982771039009094, + "learning_rate": 0.00010050314465408806, + "epoch": 0.50125 + }, + { + "step": 1403, + "timestamp": "2025-12-28T11:06:40.414131", + "elapsed_time": 7714.699221611023, + "loss": 0.2644, + "grad_norm": 0.2565673589706421, + "learning_rate": 0.00010037735849056604, + "epoch": 0.501875 + }, + { + "step": 1404, + "timestamp": "2025-12-28T11:06:51.418661", + "elapsed_time": 7725.70375084877, + "loss": 0.1786, + "grad_norm": 0.10981456935405731, + "learning_rate": 0.00010025157232704403, + "epoch": 0.5025 + }, + { + "step": 1405, + "timestamp": "2025-12-28T11:06:57.172405", + "elapsed_time": 7731.4574954509735, + "loss": 0.4434, + "grad_norm": 0.20104235410690308, + "learning_rate": 0.00010012578616352201, + "epoch": 0.503125 + }, + { + "step": 1406, + "timestamp": "2025-12-28T11:07:05.960076", + "elapsed_time": 7740.245167016983, + "loss": 0.1505, + "grad_norm": 0.11026592552661896, + "learning_rate": 0.0001, + "epoch": 0.50375 + }, + { + "step": 1407, + "timestamp": "2025-12-28T11:07:17.486947", + "elapsed_time": 7751.7720375061035, + "loss": 0.1486, + "grad_norm": 0.09958239644765854, + "learning_rate": 9.9874213836478e-05, + "epoch": 0.504375 + }, + { + "step": 1408, + "timestamp": "2025-12-28T11:07:26.472895", + "elapsed_time": 7760.757986068726, + "loss": 0.1752, + "grad_norm": 0.11857740581035614, + "learning_rate": 9.974842767295598e-05, + "epoch": 0.505 + }, + { + "step": 1409, + "timestamp": "2025-12-28T11:07:33.007795", + "elapsed_time": 7767.292885303497, + "loss": 0.2939, + "grad_norm": 0.15093620121479034, + "learning_rate": 9.962264150943397e-05, + "epoch": 0.505625 + }, + { + "step": 1410, + "timestamp": "2025-12-28T11:07:41.150713", + "elapsed_time": 7775.43580365181, + "loss": 0.1838, + "grad_norm": 0.1252412497997284, + "learning_rate": 9.949685534591195e-05, + "epoch": 0.50625 + }, + { + "step": 1411, + "timestamp": "2025-12-28T11:07:53.703346", + "elapsed_time": 7787.988436460495, + "loss": 0.1411, + "grad_norm": 0.0989488959312439, + "learning_rate": 9.937106918238994e-05, + "epoch": 0.506875 + }, + { + "step": 1412, + "timestamp": "2025-12-28T11:08:02.343988", + "elapsed_time": 7796.629077911377, + "loss": 0.1611, + "grad_norm": 0.11869240552186966, + "learning_rate": 9.924528301886793e-05, + "epoch": 0.5075 + }, + { + "step": 1413, + "timestamp": "2025-12-28T11:08:12.956190", + "elapsed_time": 7807.2412803173065, + "loss": 0.1394, + "grad_norm": 0.10440147668123245, + "learning_rate": 9.911949685534591e-05, + "epoch": 0.508125 + }, + { + "step": 1414, + "timestamp": "2025-12-28T11:08:18.967906", + "elapsed_time": 7813.252996206284, + "loss": 0.2214, + "grad_norm": 0.16440127789974213, + "learning_rate": 9.89937106918239e-05, + "epoch": 0.50875 + }, + { + "step": 1415, + "timestamp": "2025-12-28T11:08:27.441443", + "elapsed_time": 7821.726533174515, + "loss": 0.1799, + "grad_norm": 0.16303229331970215, + "learning_rate": 9.88679245283019e-05, + "epoch": 0.509375 + }, + { + "step": 1416, + "timestamp": "2025-12-28T11:08:38.770337", + "elapsed_time": 7833.055427074432, + "loss": 0.1423, + "grad_norm": 0.12514905631542206, + "learning_rate": 9.874213836477988e-05, + "epoch": 0.51 + }, + { + "step": 1417, + "timestamp": "2025-12-28T11:08:49.045039", + "elapsed_time": 7843.330129623413, + "loss": 0.128, + "grad_norm": 0.41818127036094666, + "learning_rate": 9.861635220125787e-05, + "epoch": 0.510625 + }, + { + "step": 1418, + "timestamp": "2025-12-28T11:08:56.563729", + "elapsed_time": 7850.848820209503, + "loss": 0.384, + "grad_norm": 0.1607155203819275, + "learning_rate": 9.849056603773585e-05, + "epoch": 0.51125 + }, + { + "step": 1419, + "timestamp": "2025-12-28T11:09:04.998828", + "elapsed_time": 7859.2839179039, + "loss": 0.1517, + "grad_norm": 0.11149600148200989, + "learning_rate": 9.836477987421384e-05, + "epoch": 0.511875 + }, + { + "step": 1420, + "timestamp": "2025-12-28T11:09:15.250027", + "elapsed_time": 7869.53511762619, + "loss": 0.16, + "grad_norm": 0.10607406497001648, + "learning_rate": 9.823899371069183e-05, + "epoch": 0.5125 + }, + { + "step": 1421, + "timestamp": "2025-12-28T11:09:24.383519", + "elapsed_time": 7878.668609380722, + "loss": 0.206, + "grad_norm": 0.12708930671215057, + "learning_rate": 9.811320754716981e-05, + "epoch": 0.513125 + }, + { + "step": 1422, + "timestamp": "2025-12-28T11:09:39.780418", + "elapsed_time": 7894.06550860405, + "loss": 0.3202, + "grad_norm": 0.12644296884536743, + "learning_rate": 9.79874213836478e-05, + "epoch": 0.51375 + }, + { + "step": 1423, + "timestamp": "2025-12-28T11:09:52.468194", + "elapsed_time": 7906.753284931183, + "loss": 0.1625, + "grad_norm": 0.1026865616440773, + "learning_rate": 9.786163522012578e-05, + "epoch": 0.514375 + }, + { + "step": 1424, + "timestamp": "2025-12-28T11:10:07.620418", + "elapsed_time": 7921.905508518219, + "loss": 0.1569, + "grad_norm": 0.08125265687704086, + "learning_rate": 9.773584905660378e-05, + "epoch": 0.515 + }, + { + "step": 1425, + "timestamp": "2025-12-28T11:10:19.799493", + "elapsed_time": 7934.084583282471, + "loss": 0.1262, + "grad_norm": 0.09111649543046951, + "learning_rate": 9.761006289308177e-05, + "epoch": 0.515625 + }, + { + "step": 1426, + "timestamp": "2025-12-28T11:10:25.818900", + "elapsed_time": 7940.1039955616, + "loss": 0.1587, + "grad_norm": 0.1336372345685959, + "learning_rate": 9.748427672955975e-05, + "epoch": 0.51625 + }, + { + "step": 1427, + "timestamp": "2025-12-28T11:10:35.455757", + "elapsed_time": 7949.740847349167, + "loss": 0.1667, + "grad_norm": 0.11319567263126373, + "learning_rate": 9.735849056603774e-05, + "epoch": 0.516875 + }, + { + "step": 1428, + "timestamp": "2025-12-28T11:10:40.995568", + "elapsed_time": 7955.280659198761, + "loss": 0.1724, + "grad_norm": 0.13806995749473572, + "learning_rate": 9.723270440251573e-05, + "epoch": 0.5175 + }, + { + "step": 1429, + "timestamp": "2025-12-28T11:10:51.368410", + "elapsed_time": 7965.653500318527, + "loss": 0.2761, + "grad_norm": 0.12941741943359375, + "learning_rate": 9.710691823899371e-05, + "epoch": 0.518125 + }, + { + "step": 1430, + "timestamp": "2025-12-28T11:10:57.451776", + "elapsed_time": 7971.736865758896, + "loss": 0.1955, + "grad_norm": 0.13250701129436493, + "learning_rate": 9.69811320754717e-05, + "epoch": 0.51875 + }, + { + "step": 1431, + "timestamp": "2025-12-28T11:11:06.229749", + "elapsed_time": 7980.514839172363, + "loss": 0.1667, + "grad_norm": 0.11635927855968475, + "learning_rate": 9.685534591194969e-05, + "epoch": 0.519375 + }, + { + "step": 1432, + "timestamp": "2025-12-28T11:11:15.100261", + "elapsed_time": 7989.385351657867, + "loss": 0.1818, + "grad_norm": 0.1113656610250473, + "learning_rate": 9.672955974842768e-05, + "epoch": 0.52 + }, + { + "step": 1433, + "timestamp": "2025-12-28T11:11:35.992872", + "elapsed_time": 8010.277962684631, + "loss": 0.1138, + "grad_norm": 0.06887345761060715, + "learning_rate": 9.660377358490567e-05, + "epoch": 0.520625 + }, + { + "step": 1434, + "timestamp": "2025-12-28T11:11:46.465445", + "elapsed_time": 8020.75053524971, + "loss": 0.1542, + "grad_norm": 0.1157265156507492, + "learning_rate": 9.647798742138365e-05, + "epoch": 0.52125 + }, + { + "step": 1435, + "timestamp": "2025-12-28T11:11:56.976986", + "elapsed_time": 8031.26207613945, + "loss": 0.1418, + "grad_norm": 0.10130833089351654, + "learning_rate": 9.635220125786164e-05, + "epoch": 0.521875 + }, + { + "step": 1436, + "timestamp": "2025-12-28T11:12:01.707156", + "elapsed_time": 8035.9922461509705, + "loss": 0.3286, + "grad_norm": 0.18273454904556274, + "learning_rate": 9.622641509433963e-05, + "epoch": 0.5225 + }, + { + "step": 1437, + "timestamp": "2025-12-28T11:12:07.714931", + "elapsed_time": 8042.000021696091, + "loss": 0.4137, + "grad_norm": 0.19872736930847168, + "learning_rate": 9.610062893081761e-05, + "epoch": 0.523125 + }, + { + "step": 1438, + "timestamp": "2025-12-28T11:12:19.337217", + "elapsed_time": 8053.622307062149, + "loss": 0.1493, + "grad_norm": 0.1055750697851181, + "learning_rate": 9.59748427672956e-05, + "epoch": 0.52375 + }, + { + "step": 1439, + "timestamp": "2025-12-28T11:12:26.407821", + "elapsed_time": 8060.692911624908, + "loss": 0.1795, + "grad_norm": 0.25722602009773254, + "learning_rate": 9.584905660377359e-05, + "epoch": 0.524375 + }, + { + "step": 1440, + "timestamp": "2025-12-28T11:12:38.538641", + "elapsed_time": 8072.823730945587, + "loss": 0.1517, + "grad_norm": 0.10837842524051666, + "learning_rate": 9.572327044025158e-05, + "epoch": 0.525 + }, + { + "step": 1441, + "timestamp": "2025-12-28T11:12:53.364199", + "elapsed_time": 8087.649289131165, + "loss": 0.0904, + "grad_norm": 0.10741348564624786, + "learning_rate": 9.559748427672957e-05, + "epoch": 0.525625 + }, + { + "step": 1442, + "timestamp": "2025-12-28T11:13:14.254540", + "elapsed_time": 8108.539630651474, + "loss": 0.0746, + "grad_norm": 0.07680816948413849, + "learning_rate": 9.547169811320755e-05, + "epoch": 0.52625 + }, + { + "step": 1443, + "timestamp": "2025-12-28T11:13:29.340440", + "elapsed_time": 8123.625530004501, + "loss": 0.1088, + "grad_norm": 0.07666955888271332, + "learning_rate": 9.534591194968554e-05, + "epoch": 0.526875 + }, + { + "step": 1444, + "timestamp": "2025-12-28T11:13:39.098916", + "elapsed_time": 8133.384006500244, + "loss": 0.2508, + "grad_norm": 0.1188189759850502, + "learning_rate": 9.522012578616352e-05, + "epoch": 0.5275 + }, + { + "step": 1445, + "timestamp": "2025-12-28T11:13:44.861179", + "elapsed_time": 8139.146269083023, + "loss": 0.2474, + "grad_norm": 0.15990658104419708, + "learning_rate": 9.509433962264151e-05, + "epoch": 0.528125 + }, + { + "step": 1446, + "timestamp": "2025-12-28T11:13:52.613787", + "elapsed_time": 8146.898877620697, + "loss": 0.32, + "grad_norm": 0.15008442103862762, + "learning_rate": 9.496855345911951e-05, + "epoch": 0.52875 + }, + { + "step": 1447, + "timestamp": "2025-12-28T11:14:05.480355", + "elapsed_time": 8159.765445947647, + "loss": 0.1453, + "grad_norm": 0.0898258164525032, + "learning_rate": 9.484276729559749e-05, + "epoch": 0.529375 + }, + { + "step": 1448, + "timestamp": "2025-12-28T11:14:15.110555", + "elapsed_time": 8169.395644903183, + "loss": 0.187, + "grad_norm": 0.1340561956167221, + "learning_rate": 9.471698113207548e-05, + "epoch": 0.53 + }, + { + "step": 1449, + "timestamp": "2025-12-28T11:14:22.202495", + "elapsed_time": 8176.487585544586, + "loss": 0.176, + "grad_norm": 0.16083924472332, + "learning_rate": 9.459119496855347e-05, + "epoch": 0.530625 + }, + { + "step": 1450, + "timestamp": "2025-12-28T11:14:28.754429", + "elapsed_time": 8183.039519309998, + "loss": 0.2644, + "grad_norm": 0.15739032626152039, + "learning_rate": 9.446540880503145e-05, + "epoch": 0.53125 + }, + { + "step": 1451, + "timestamp": "2025-12-28T11:14:38.517478", + "elapsed_time": 8192.802568435669, + "loss": 0.1586, + "grad_norm": 0.11289030313491821, + "learning_rate": 9.433962264150944e-05, + "epoch": 0.531875 + }, + { + "step": 1452, + "timestamp": "2025-12-28T11:14:44.085834", + "elapsed_time": 8198.370924711227, + "loss": 0.1873, + "grad_norm": 0.1545426845550537, + "learning_rate": 9.421383647798742e-05, + "epoch": 0.5325 + }, + { + "step": 1453, + "timestamp": "2025-12-28T11:14:49.388857", + "elapsed_time": 8203.673947095871, + "loss": 0.4183, + "grad_norm": 0.2863948941230774, + "learning_rate": 9.408805031446541e-05, + "epoch": 0.533125 + }, + { + "step": 1454, + "timestamp": "2025-12-28T11:14:59.422428", + "elapsed_time": 8213.707518577576, + "loss": 0.1046, + "grad_norm": 0.0934741273522377, + "learning_rate": 9.396226415094341e-05, + "epoch": 0.53375 + }, + { + "step": 1455, + "timestamp": "2025-12-28T11:15:05.324002", + "elapsed_time": 8219.6090965271, + "loss": 0.2274, + "grad_norm": 0.150177463889122, + "learning_rate": 9.383647798742139e-05, + "epoch": 0.534375 + }, + { + "step": 1456, + "timestamp": "2025-12-28T11:15:09.916095", + "elapsed_time": 8224.201185464859, + "loss": 0.3107, + "grad_norm": 0.19902926683425903, + "learning_rate": 9.371069182389938e-05, + "epoch": 0.535 + }, + { + "step": 1457, + "timestamp": "2025-12-28T11:15:17.157963", + "elapsed_time": 8231.443053722382, + "loss": 0.2108, + "grad_norm": 0.14318348467350006, + "learning_rate": 9.358490566037736e-05, + "epoch": 0.535625 + }, + { + "step": 1458, + "timestamp": "2025-12-28T11:15:23.456852", + "elapsed_time": 8237.741942882538, + "loss": 0.1734, + "grad_norm": 0.1470271497964859, + "learning_rate": 9.345911949685535e-05, + "epoch": 0.53625 + }, + { + "step": 1459, + "timestamp": "2025-12-28T11:15:34.335466", + "elapsed_time": 8248.620555877686, + "loss": 0.1041, + "grad_norm": 0.08832447230815887, + "learning_rate": 9.333333333333334e-05, + "epoch": 0.536875 + }, + { + "step": 1460, + "timestamp": "2025-12-28T11:15:42.518003", + "elapsed_time": 8256.803106307983, + "loss": 0.333, + "grad_norm": 0.17225605249404907, + "learning_rate": 9.320754716981132e-05, + "epoch": 0.5375 + }, + { + "step": 1461, + "timestamp": "2025-12-28T11:15:52.109549", + "elapsed_time": 8266.394639015198, + "loss": 0.1827, + "grad_norm": 0.11171706020832062, + "learning_rate": 9.308176100628931e-05, + "epoch": 0.538125 + }, + { + "step": 1462, + "timestamp": "2025-12-28T11:15:59.068953", + "elapsed_time": 8273.354043006897, + "loss": 0.1926, + "grad_norm": 0.13213799893856049, + "learning_rate": 9.295597484276731e-05, + "epoch": 0.53875 + }, + { + "step": 1463, + "timestamp": "2025-12-28T11:16:12.804617", + "elapsed_time": 8287.089706897736, + "loss": 0.1089, + "grad_norm": 0.08467783033847809, + "learning_rate": 9.283018867924529e-05, + "epoch": 0.539375 + }, + { + "step": 1464, + "timestamp": "2025-12-28T11:16:18.706671", + "elapsed_time": 8292.991761922836, + "loss": 0.4806, + "grad_norm": 0.20453688502311707, + "learning_rate": 9.270440251572328e-05, + "epoch": 0.54 + }, + { + "step": 1465, + "timestamp": "2025-12-28T11:16:28.734388", + "elapsed_time": 8303.01947760582, + "loss": 0.1556, + "grad_norm": 0.11547412723302841, + "learning_rate": 9.257861635220126e-05, + "epoch": 0.540625 + }, + { + "step": 1466, + "timestamp": "2025-12-28T11:16:38.462518", + "elapsed_time": 8312.74760890007, + "loss": 0.1464, + "grad_norm": 0.10674792528152466, + "learning_rate": 9.245283018867925e-05, + "epoch": 0.54125 + }, + { + "step": 1467, + "timestamp": "2025-12-28T11:16:49.934266", + "elapsed_time": 8324.219356536865, + "loss": 0.1205, + "grad_norm": 0.10482378304004669, + "learning_rate": 9.232704402515724e-05, + "epoch": 0.541875 + }, + { + "step": 1468, + "timestamp": "2025-12-28T11:16:59.067362", + "elapsed_time": 8333.3524518013, + "loss": 0.1575, + "grad_norm": 0.18603090941905975, + "learning_rate": 9.220125786163522e-05, + "epoch": 0.5425 + }, + { + "step": 1469, + "timestamp": "2025-12-28T11:17:06.628602", + "elapsed_time": 8340.913692951202, + "loss": 0.2441, + "grad_norm": 0.16684553027153015, + "learning_rate": 9.207547169811322e-05, + "epoch": 0.543125 + }, + { + "step": 1470, + "timestamp": "2025-12-28T11:17:16.426551", + "elapsed_time": 8350.711641550064, + "loss": 0.3306, + "grad_norm": 0.14771923422813416, + "learning_rate": 9.19496855345912e-05, + "epoch": 0.54375 + }, + { + "step": 1471, + "timestamp": "2025-12-28T11:17:29.569642", + "elapsed_time": 8363.85473227501, + "loss": 0.1172, + "grad_norm": 0.08360818773508072, + "learning_rate": 9.182389937106919e-05, + "epoch": 0.544375 + }, + { + "step": 1472, + "timestamp": "2025-12-28T11:17:35.910254", + "elapsed_time": 8370.195344924927, + "loss": 0.2702, + "grad_norm": 0.1648559272289276, + "learning_rate": 9.169811320754718e-05, + "epoch": 0.545 + }, + { + "step": 1473, + "timestamp": "2025-12-28T11:17:43.053273", + "elapsed_time": 8377.338364124298, + "loss": 0.207, + "grad_norm": 0.1394606977701187, + "learning_rate": 9.157232704402516e-05, + "epoch": 0.545625 + }, + { + "step": 1474, + "timestamp": "2025-12-28T11:17:49.068090", + "elapsed_time": 8383.353180408478, + "loss": 0.148, + "grad_norm": 0.1300075501203537, + "learning_rate": 9.144654088050315e-05, + "epoch": 0.54625 + }, + { + "step": 1475, + "timestamp": "2025-12-28T11:17:56.519082", + "elapsed_time": 8390.804172039032, + "loss": 0.1676, + "grad_norm": 0.12073966860771179, + "learning_rate": 9.132075471698114e-05, + "epoch": 0.546875 + }, + { + "step": 1476, + "timestamp": "2025-12-28T11:18:08.644421", + "elapsed_time": 8402.92951130867, + "loss": 0.1639, + "grad_norm": 0.10669399052858353, + "learning_rate": 9.119496855345912e-05, + "epoch": 0.5475 + }, + { + "step": 1477, + "timestamp": "2025-12-28T11:18:17.423433", + "elapsed_time": 8411.708523750305, + "loss": 0.1742, + "grad_norm": 0.12219968438148499, + "learning_rate": 9.106918238993712e-05, + "epoch": 0.548125 + }, + { + "step": 1478, + "timestamp": "2025-12-28T11:18:36.177009", + "elapsed_time": 8430.462099313736, + "loss": 0.1255, + "grad_norm": 0.08146153390407562, + "learning_rate": 9.09433962264151e-05, + "epoch": 0.54875 + }, + { + "step": 1479, + "timestamp": "2025-12-28T11:18:46.115780", + "elapsed_time": 8440.400870800018, + "loss": 0.1592, + "grad_norm": 0.1143009215593338, + "learning_rate": 9.081761006289309e-05, + "epoch": 0.549375 + }, + { + "step": 1480, + "timestamp": "2025-12-28T11:18:51.646790", + "elapsed_time": 8445.931880950928, + "loss": 0.1368, + "grad_norm": 0.1322580873966217, + "learning_rate": 9.069182389937108e-05, + "epoch": 0.55 + }, + { + "step": 1481, + "timestamp": "2025-12-28T11:19:03.435806", + "elapsed_time": 8457.72089600563, + "loss": 0.184, + "grad_norm": 0.11221913248300552, + "learning_rate": 9.056603773584906e-05, + "epoch": 0.550625 + }, + { + "step": 1482, + "timestamp": "2025-12-28T11:19:11.463661", + "elapsed_time": 8465.74875164032, + "loss": 0.2432, + "grad_norm": 0.13333527743816376, + "learning_rate": 9.044025157232705e-05, + "epoch": 0.55125 + }, + { + "step": 1483, + "timestamp": "2025-12-28T11:19:20.808185", + "elapsed_time": 8475.093275308609, + "loss": 0.1595, + "grad_norm": 0.11253681033849716, + "learning_rate": 9.031446540880504e-05, + "epoch": 0.551875 + }, + { + "step": 1484, + "timestamp": "2025-12-28T11:19:26.372578", + "elapsed_time": 8480.657668352127, + "loss": 0.2285, + "grad_norm": 0.163836270570755, + "learning_rate": 9.018867924528302e-05, + "epoch": 0.5525 + }, + { + "step": 1485, + "timestamp": "2025-12-28T11:19:37.048053", + "elapsed_time": 8491.333143234253, + "loss": 0.1388, + "grad_norm": 0.1059766411781311, + "learning_rate": 9.006289308176102e-05, + "epoch": 0.553125 + }, + { + "step": 1486, + "timestamp": "2025-12-28T11:19:46.279936", + "elapsed_time": 8500.565026283264, + "loss": 0.1649, + "grad_norm": 0.11681363731622696, + "learning_rate": 8.9937106918239e-05, + "epoch": 0.55375 + }, + { + "step": 1487, + "timestamp": "2025-12-28T11:19:53.538665", + "elapsed_time": 8507.823755979538, + "loss": 0.2236, + "grad_norm": 0.1437806636095047, + "learning_rate": 8.981132075471699e-05, + "epoch": 0.554375 + }, + { + "step": 1488, + "timestamp": "2025-12-28T11:20:00.062395", + "elapsed_time": 8514.347485303879, + "loss": 0.1624, + "grad_norm": 0.13652074337005615, + "learning_rate": 8.968553459119498e-05, + "epoch": 0.555 + }, + { + "step": 1489, + "timestamp": "2025-12-28T11:20:12.801806", + "elapsed_time": 8527.08689570427, + "loss": 0.1095, + "grad_norm": 0.08968634903430939, + "learning_rate": 8.955974842767296e-05, + "epoch": 0.555625 + }, + { + "step": 1490, + "timestamp": "2025-12-28T11:20:19.013698", + "elapsed_time": 8533.298788785934, + "loss": 0.1724, + "grad_norm": 0.16458983719348907, + "learning_rate": 8.943396226415095e-05, + "epoch": 0.55625 + }, + { + "step": 1491, + "timestamp": "2025-12-28T11:20:30.809395", + "elapsed_time": 8545.094485759735, + "loss": 0.1667, + "grad_norm": 0.10606341063976288, + "learning_rate": 8.930817610062893e-05, + "epoch": 0.556875 + }, + { + "step": 1492, + "timestamp": "2025-12-28T11:20:44.762591", + "elapsed_time": 8559.047681331635, + "loss": 0.1019, + "grad_norm": 0.07538938522338867, + "learning_rate": 8.918238993710692e-05, + "epoch": 0.5575 + }, + { + "step": 1493, + "timestamp": "2025-12-28T11:21:01.644690", + "elapsed_time": 8575.92978143692, + "loss": 0.1099, + "grad_norm": 0.08097642660140991, + "learning_rate": 8.905660377358492e-05, + "epoch": 0.558125 + }, + { + "step": 1494, + "timestamp": "2025-12-28T11:21:10.267153", + "elapsed_time": 8584.55224275589, + "loss": 0.1285, + "grad_norm": 0.11686515063047409, + "learning_rate": 8.89308176100629e-05, + "epoch": 0.55875 + }, + { + "step": 1495, + "timestamp": "2025-12-28T11:21:16.174120", + "elapsed_time": 8590.459214687347, + "loss": 0.143, + "grad_norm": 0.1381654292345047, + "learning_rate": 8.880503144654089e-05, + "epoch": 0.559375 + }, + { + "step": 1496, + "timestamp": "2025-12-28T11:21:26.924445", + "elapsed_time": 8601.209535360336, + "loss": 0.1798, + "grad_norm": 0.12096145004034042, + "learning_rate": 8.867924528301888e-05, + "epoch": 0.56 + }, + { + "step": 1497, + "timestamp": "2025-12-28T11:21:34.078375", + "elapsed_time": 8608.363465070724, + "loss": 0.1584, + "grad_norm": 0.13140904903411865, + "learning_rate": 8.855345911949686e-05, + "epoch": 0.560625 + }, + { + "step": 1498, + "timestamp": "2025-12-28T11:21:44.697298", + "elapsed_time": 8618.982388973236, + "loss": 0.2015, + "grad_norm": 0.13744622468948364, + "learning_rate": 8.842767295597485e-05, + "epoch": 0.56125 + }, + { + "step": 1499, + "timestamp": "2025-12-28T11:21:53.321142", + "elapsed_time": 8627.60623216629, + "loss": 0.1702, + "grad_norm": 0.10851988196372986, + "learning_rate": 8.830188679245283e-05, + "epoch": 0.561875 + }, + { + "step": 1500, + "timestamp": "2025-12-28T11:22:01.457244", + "elapsed_time": 8635.7423350811, + "loss": 0.1723, + "grad_norm": 0.123291976749897, + "learning_rate": 8.817610062893082e-05, + "epoch": 0.5625 + }, + { + "step": 1501, + "timestamp": "2025-12-28T11:22:14.727687", + "elapsed_time": 8649.012777328491, + "loss": 0.1281, + "grad_norm": 0.09212938696146011, + "learning_rate": 8.805031446540882e-05, + "epoch": 0.563125 + }, + { + "step": 1502, + "timestamp": "2025-12-28T11:22:31.623445", + "elapsed_time": 8665.90853524208, + "loss": 0.1026, + "grad_norm": 0.0903933122754097, + "learning_rate": 8.79245283018868e-05, + "epoch": 0.56375 + }, + { + "step": 1503, + "timestamp": "2025-12-28T11:22:44.799315", + "elapsed_time": 8679.084406137466, + "loss": 0.1257, + "grad_norm": 0.10182217508554459, + "learning_rate": 8.779874213836479e-05, + "epoch": 0.564375 + }, + { + "step": 1504, + "timestamp": "2025-12-28T11:22:49.619397", + "elapsed_time": 8683.904487848282, + "loss": 0.3112, + "grad_norm": 0.19652864336967468, + "learning_rate": 8.767295597484277e-05, + "epoch": 0.565 + }, + { + "step": 1505, + "timestamp": "2025-12-28T11:22:55.523465", + "elapsed_time": 8689.808555364609, + "loss": 0.1635, + "grad_norm": 0.12541894614696503, + "learning_rate": 8.754716981132076e-05, + "epoch": 0.565625 + }, + { + "step": 1506, + "timestamp": "2025-12-28T11:23:00.308337", + "elapsed_time": 8694.593427419662, + "loss": 0.2257, + "grad_norm": 0.18083442747592926, + "learning_rate": 8.742138364779875e-05, + "epoch": 0.56625 + }, + { + "step": 1507, + "timestamp": "2025-12-28T11:23:05.013413", + "elapsed_time": 8699.298503875732, + "loss": 0.2438, + "grad_norm": 0.19925068318843842, + "learning_rate": 8.729559748427673e-05, + "epoch": 0.566875 + }, + { + "step": 1508, + "timestamp": "2025-12-28T11:23:15.771520", + "elapsed_time": 8710.056610822678, + "loss": 0.1307, + "grad_norm": 0.0917578861117363, + "learning_rate": 8.716981132075472e-05, + "epoch": 0.5675 + }, + { + "step": 1509, + "timestamp": "2025-12-28T11:23:35.259930", + "elapsed_time": 8729.545020341873, + "loss": 0.1047, + "grad_norm": 0.0767156183719635, + "learning_rate": 8.704402515723272e-05, + "epoch": 0.568125 + }, + { + "step": 1510, + "timestamp": "2025-12-28T11:23:53.128246", + "elapsed_time": 8747.413336277008, + "loss": 0.1, + "grad_norm": 0.08321381360292435, + "learning_rate": 8.69182389937107e-05, + "epoch": 0.56875 + }, + { + "step": 1511, + "timestamp": "2025-12-28T11:24:06.430650", + "elapsed_time": 8760.715740919113, + "loss": 0.1469, + "grad_norm": 0.09777943044900894, + "learning_rate": 8.679245283018869e-05, + "epoch": 0.569375 + }, + { + "step": 1512, + "timestamp": "2025-12-28T11:24:17.892332", + "elapsed_time": 8772.17742228508, + "loss": 0.1326, + "grad_norm": 0.10049686580896378, + "learning_rate": 8.666666666666667e-05, + "epoch": 0.57 + }, + { + "step": 1513, + "timestamp": "2025-12-28T11:24:25.872236", + "elapsed_time": 8780.157325983047, + "loss": 0.1542, + "grad_norm": 0.11362040787935257, + "learning_rate": 8.654088050314466e-05, + "epoch": 0.570625 + }, + { + "step": 1514, + "timestamp": "2025-12-28T11:24:30.494130", + "elapsed_time": 8784.779220819473, + "loss": 0.2623, + "grad_norm": 0.1871953308582306, + "learning_rate": 8.641509433962265e-05, + "epoch": 0.57125 + }, + { + "step": 1515, + "timestamp": "2025-12-28T11:24:36.507144", + "elapsed_time": 8790.792234420776, + "loss": 0.1441, + "grad_norm": 0.1314094513654709, + "learning_rate": 8.628930817610063e-05, + "epoch": 0.571875 + }, + { + "step": 1516, + "timestamp": "2025-12-28T11:24:49.493922", + "elapsed_time": 8803.779012680054, + "loss": 0.1849, + "grad_norm": 0.10825859010219574, + "learning_rate": 8.616352201257863e-05, + "epoch": 0.5725 + }, + { + "step": 1517, + "timestamp": "2025-12-28T11:24:56.586838", + "elapsed_time": 8810.871928453445, + "loss": 0.223, + "grad_norm": 0.13489221036434174, + "learning_rate": 8.603773584905662e-05, + "epoch": 0.573125 + }, + { + "step": 1518, + "timestamp": "2025-12-28T11:25:07.311983", + "elapsed_time": 8821.597073554993, + "loss": 0.1414, + "grad_norm": 0.1013222485780716, + "learning_rate": 8.59119496855346e-05, + "epoch": 0.57375 + }, + { + "step": 1519, + "timestamp": "2025-12-28T11:25:19.598691", + "elapsed_time": 8833.883781433105, + "loss": 0.1759, + "grad_norm": 0.10948710888624191, + "learning_rate": 8.578616352201259e-05, + "epoch": 0.574375 + }, + { + "step": 1520, + "timestamp": "2025-12-28T11:25:24.829791", + "elapsed_time": 8839.114881277084, + "loss": 0.2123, + "grad_norm": 0.1585264950990677, + "learning_rate": 8.566037735849057e-05, + "epoch": 0.575 + }, + { + "step": 1521, + "timestamp": "2025-12-28T11:25:31.895164", + "elapsed_time": 8846.180253505707, + "loss": 0.2303, + "grad_norm": 0.15961548686027527, + "learning_rate": 8.553459119496856e-05, + "epoch": 0.575625 + }, + { + "step": 1522, + "timestamp": "2025-12-28T11:25:41.238568", + "elapsed_time": 8855.523658752441, + "loss": 0.2164, + "grad_norm": 0.12455689162015915, + "learning_rate": 8.540880503144655e-05, + "epoch": 0.57625 + }, + { + "step": 1523, + "timestamp": "2025-12-28T11:26:00.476163", + "elapsed_time": 8874.76125407219, + "loss": 0.0883, + "grad_norm": 0.06583801656961441, + "learning_rate": 8.528301886792453e-05, + "epoch": 0.576875 + }, + { + "step": 1524, + "timestamp": "2025-12-28T11:26:06.460351", + "elapsed_time": 8880.745441198349, + "loss": 0.196, + "grad_norm": 0.14068111777305603, + "learning_rate": 8.515723270440253e-05, + "epoch": 0.5775 + }, + { + "step": 1525, + "timestamp": "2025-12-28T11:26:20.362662", + "elapsed_time": 8894.647752046585, + "loss": 0.125, + "grad_norm": 0.08721217513084412, + "learning_rate": 8.50314465408805e-05, + "epoch": 0.578125 + }, + { + "step": 1526, + "timestamp": "2025-12-28T11:26:32.991352", + "elapsed_time": 8907.276442289352, + "loss": 0.1126, + "grad_norm": 0.08798250555992126, + "learning_rate": 8.49056603773585e-05, + "epoch": 0.57875 + }, + { + "step": 1527, + "timestamp": "2025-12-28T11:26:40.116742", + "elapsed_time": 8914.401833057404, + "loss": 0.1558, + "grad_norm": 0.12924781441688538, + "learning_rate": 8.477987421383649e-05, + "epoch": 0.579375 + }, + { + "step": 1528, + "timestamp": "2025-12-28T11:26:53.096204", + "elapsed_time": 8927.381293773651, + "loss": 0.1997, + "grad_norm": 0.11501215398311615, + "learning_rate": 8.465408805031447e-05, + "epoch": 0.58 + }, + { + "step": 1529, + "timestamp": "2025-12-28T11:27:01.938294", + "elapsed_time": 8936.223383903503, + "loss": 0.1264, + "grad_norm": 0.1118590384721756, + "learning_rate": 8.452830188679246e-05, + "epoch": 0.580625 + }, + { + "step": 1530, + "timestamp": "2025-12-28T11:27:13.567330", + "elapsed_time": 8947.852420091629, + "loss": 0.1195, + "grad_norm": 0.08885247260332108, + "learning_rate": 8.440251572327045e-05, + "epoch": 0.58125 + }, + { + "step": 1531, + "timestamp": "2025-12-28T11:27:28.081330", + "elapsed_time": 8962.366420507431, + "loss": 0.1682, + "grad_norm": 0.10244069248437881, + "learning_rate": 8.427672955974843e-05, + "epoch": 0.581875 + }, + { + "step": 1532, + "timestamp": "2025-12-28T11:27:35.182126", + "elapsed_time": 8969.46721625328, + "loss": 0.2695, + "grad_norm": 0.14979572594165802, + "learning_rate": 8.415094339622643e-05, + "epoch": 0.5825 + }, + { + "step": 1533, + "timestamp": "2025-12-28T11:27:43.626751", + "elapsed_time": 8977.911841392517, + "loss": 0.3527, + "grad_norm": 0.2613980174064636, + "learning_rate": 8.40251572327044e-05, + "epoch": 0.583125 + }, + { + "step": 1534, + "timestamp": "2025-12-28T11:28:04.513663", + "elapsed_time": 8998.798753499985, + "loss": 0.1256, + "grad_norm": 0.09623493254184723, + "learning_rate": 8.38993710691824e-05, + "epoch": 0.58375 + }, + { + "step": 1535, + "timestamp": "2025-12-28T11:28:10.502220", + "elapsed_time": 9004.787314653397, + "loss": 0.1629, + "grad_norm": 0.19102250039577484, + "learning_rate": 8.377358490566039e-05, + "epoch": 0.584375 + }, + { + "step": 1536, + "timestamp": "2025-12-28T11:28:22.123196", + "elapsed_time": 9016.40828704834, + "loss": 0.1249, + "grad_norm": 0.2857312262058258, + "learning_rate": 8.364779874213837e-05, + "epoch": 0.585 + }, + { + "step": 1537, + "timestamp": "2025-12-28T11:28:33.969444", + "elapsed_time": 9028.254534721375, + "loss": 0.1436, + "grad_norm": 0.0983673706650734, + "learning_rate": 8.352201257861636e-05, + "epoch": 0.585625 + }, + { + "step": 1538, + "timestamp": "2025-12-28T11:28:40.851432", + "elapsed_time": 9035.136521816254, + "loss": 0.2335, + "grad_norm": 0.15554088354110718, + "learning_rate": 8.339622641509434e-05, + "epoch": 0.58625 + }, + { + "step": 1539, + "timestamp": "2025-12-28T11:28:49.945462", + "elapsed_time": 9044.230551958084, + "loss": 0.1401, + "grad_norm": 0.11018933355808258, + "learning_rate": 8.327044025157233e-05, + "epoch": 0.586875 + }, + { + "step": 1540, + "timestamp": "2025-12-28T11:28:58.168763", + "elapsed_time": 9052.45385313034, + "loss": 0.1303, + "grad_norm": 0.1073099821805954, + "learning_rate": 8.314465408805033e-05, + "epoch": 0.5875 + }, + { + "step": 1541, + "timestamp": "2025-12-28T11:29:12.414620", + "elapsed_time": 9066.699710607529, + "loss": 0.1614, + "grad_norm": 0.10494954884052277, + "learning_rate": 8.30188679245283e-05, + "epoch": 0.588125 + }, + { + "step": 1542, + "timestamp": "2025-12-28T11:29:20.721392", + "elapsed_time": 9075.006482601166, + "loss": 0.1503, + "grad_norm": 0.11659257858991623, + "learning_rate": 8.28930817610063e-05, + "epoch": 0.58875 + }, + { + "step": 1543, + "timestamp": "2025-12-28T11:29:33.420507", + "elapsed_time": 9087.705597639084, + "loss": 0.1244, + "grad_norm": 0.09270057827234268, + "learning_rate": 8.276729559748429e-05, + "epoch": 0.589375 + }, + { + "step": 1544, + "timestamp": "2025-12-28T11:29:42.359077", + "elapsed_time": 9096.64416718483, + "loss": 0.1409, + "grad_norm": 0.11551465094089508, + "learning_rate": 8.264150943396227e-05, + "epoch": 0.59 + }, + { + "step": 1545, + "timestamp": "2025-12-28T11:29:58.755264", + "elapsed_time": 9113.040354728699, + "loss": 0.1204, + "grad_norm": 0.09296566992998123, + "learning_rate": 8.251572327044026e-05, + "epoch": 0.590625 + }, + { + "step": 1546, + "timestamp": "2025-12-28T11:30:04.541006", + "elapsed_time": 9118.826096773148, + "loss": 0.1537, + "grad_norm": 0.33984237909317017, + "learning_rate": 8.238993710691824e-05, + "epoch": 0.59125 + }, + { + "step": 1547, + "timestamp": "2025-12-28T11:30:15.055486", + "elapsed_time": 9129.340576410294, + "loss": 0.1195, + "grad_norm": 0.11354348808526993, + "learning_rate": 8.226415094339623e-05, + "epoch": 0.591875 + }, + { + "step": 1548, + "timestamp": "2025-12-28T11:30:20.580712", + "elapsed_time": 9134.865801811218, + "loss": 0.2554, + "grad_norm": 0.17498965561389923, + "learning_rate": 8.213836477987423e-05, + "epoch": 0.5925 + }, + { + "step": 1549, + "timestamp": "2025-12-28T11:30:25.852317", + "elapsed_time": 9140.137407064438, + "loss": 0.2915, + "grad_norm": 0.17898206412792206, + "learning_rate": 8.20125786163522e-05, + "epoch": 0.593125 + }, + { + "step": 1550, + "timestamp": "2025-12-28T11:30:37.662920", + "elapsed_time": 9151.948010444641, + "loss": 0.2806, + "grad_norm": 0.14019358158111572, + "learning_rate": 8.18867924528302e-05, + "epoch": 0.59375 + }, + { + "step": 1551, + "timestamp": "2025-12-28T11:30:45.929046", + "elapsed_time": 9160.214137077332, + "loss": 0.171, + "grad_norm": 0.12632305920124054, + "learning_rate": 8.176100628930818e-05, + "epoch": 0.594375 + }, + { + "step": 1552, + "timestamp": "2025-12-28T11:30:56.181321", + "elapsed_time": 9170.466411352158, + "loss": 0.1839, + "grad_norm": 0.11256668716669083, + "learning_rate": 8.163522012578617e-05, + "epoch": 0.595 + }, + { + "step": 1553, + "timestamp": "2025-12-28T11:31:07.963905", + "elapsed_time": 9182.248995542526, + "loss": 0.325, + "grad_norm": 0.1411469578742981, + "learning_rate": 8.150943396226416e-05, + "epoch": 0.595625 + }, + { + "step": 1554, + "timestamp": "2025-12-28T11:31:15.818950", + "elapsed_time": 9190.104040145874, + "loss": 0.2134, + "grad_norm": 0.157893106341362, + "learning_rate": 8.138364779874214e-05, + "epoch": 0.59625 + }, + { + "step": 1555, + "timestamp": "2025-12-28T11:31:22.342648", + "elapsed_time": 9196.627738714218, + "loss": 0.1857, + "grad_norm": 0.12139203399419785, + "learning_rate": 8.125786163522013e-05, + "epoch": 0.596875 + }, + { + "step": 1556, + "timestamp": "2025-12-28T11:31:32.092816", + "elapsed_time": 9206.37790632248, + "loss": 0.1616, + "grad_norm": 0.10829068720340729, + "learning_rate": 8.113207547169813e-05, + "epoch": 0.5975 + }, + { + "step": 1557, + "timestamp": "2025-12-28T11:31:38.204155", + "elapsed_time": 9212.489244699478, + "loss": 0.2388, + "grad_norm": 0.15463301539421082, + "learning_rate": 8.10062893081761e-05, + "epoch": 0.598125 + }, + { + "step": 1558, + "timestamp": "2025-12-28T11:31:46.978385", + "elapsed_time": 9221.263475179672, + "loss": 0.1431, + "grad_norm": 0.1963123232126236, + "learning_rate": 8.08805031446541e-05, + "epoch": 0.59875 + }, + { + "step": 1559, + "timestamp": "2025-12-28T11:31:51.937096", + "elapsed_time": 9226.222190618515, + "loss": 0.2025, + "grad_norm": 0.15821681916713715, + "learning_rate": 8.075471698113208e-05, + "epoch": 0.599375 + }, + { + "step": 1560, + "timestamp": "2025-12-28T11:32:04.000430", + "elapsed_time": 9238.285520553589, + "loss": 0.3795, + "grad_norm": 0.1718837171792984, + "learning_rate": 8.062893081761007e-05, + "epoch": 0.6 + }, + { + "step": 1561, + "timestamp": "2025-12-28T11:32:08.448180", + "elapsed_time": 9242.73327088356, + "loss": 0.2619, + "grad_norm": 0.19040848314762115, + "learning_rate": 8.050314465408806e-05, + "epoch": 0.600625 + }, + { + "step": 1562, + "timestamp": "2025-12-28T11:32:15.075715", + "elapsed_time": 9249.360805511475, + "loss": 0.1528, + "grad_norm": 0.14181895554065704, + "learning_rate": 8.037735849056604e-05, + "epoch": 0.60125 + }, + { + "step": 1563, + "timestamp": "2025-12-28T11:32:24.698558", + "elapsed_time": 9258.983648777008, + "loss": 0.1461, + "grad_norm": 0.11014454066753387, + "learning_rate": 8.025157232704403e-05, + "epoch": 0.601875 + }, + { + "step": 1564, + "timestamp": "2025-12-28T11:32:29.643873", + "elapsed_time": 9263.928963661194, + "loss": 0.3451, + "grad_norm": 0.19026674330234528, + "learning_rate": 8.012578616352203e-05, + "epoch": 0.6025 + }, + { + "step": 1565, + "timestamp": "2025-12-28T11:32:36.118502", + "elapsed_time": 9270.403592348099, + "loss": 0.2698, + "grad_norm": 0.15855702757835388, + "learning_rate": 8e-05, + "epoch": 0.603125 + }, + { + "step": 1566, + "timestamp": "2025-12-28T11:32:47.584312", + "elapsed_time": 9281.869402885437, + "loss": 0.1317, + "grad_norm": 0.10985270142555237, + "learning_rate": 7.9874213836478e-05, + "epoch": 0.60375 + }, + { + "step": 1567, + "timestamp": "2025-12-28T11:32:57.951733", + "elapsed_time": 9292.236823320389, + "loss": 0.1344, + "grad_norm": 0.09886456280946732, + "learning_rate": 7.974842767295598e-05, + "epoch": 0.604375 + }, + { + "step": 1568, + "timestamp": "2025-12-28T11:33:08.832307", + "elapsed_time": 9303.117396831512, + "loss": 0.3551, + "grad_norm": 0.13379919528961182, + "learning_rate": 7.962264150943397e-05, + "epoch": 0.605 + }, + { + "step": 1569, + "timestamp": "2025-12-28T11:33:15.380680", + "elapsed_time": 9309.665770292282, + "loss": 0.4387, + "grad_norm": 0.20473594963550568, + "learning_rate": 7.949685534591196e-05, + "epoch": 0.605625 + }, + { + "step": 1570, + "timestamp": "2025-12-28T11:33:28.420603", + "elapsed_time": 9322.705693244934, + "loss": 0.1534, + "grad_norm": 0.09838785231113434, + "learning_rate": 7.937106918238994e-05, + "epoch": 0.60625 + }, + { + "step": 1571, + "timestamp": "2025-12-28T11:33:36.492152", + "elapsed_time": 9330.777242422104, + "loss": 0.4969, + "grad_norm": 0.19386343657970428, + "learning_rate": 7.924528301886794e-05, + "epoch": 0.606875 + }, + { + "step": 1572, + "timestamp": "2025-12-28T11:33:44.673340", + "elapsed_time": 9338.958430051804, + "loss": 0.1638, + "grad_norm": 0.1306898593902588, + "learning_rate": 7.911949685534591e-05, + "epoch": 0.6075 + }, + { + "step": 1573, + "timestamp": "2025-12-28T11:33:49.969537", + "elapsed_time": 9344.254627227783, + "loss": 0.1831, + "grad_norm": 0.1826959103345871, + "learning_rate": 7.899371069182391e-05, + "epoch": 0.608125 + }, + { + "step": 1574, + "timestamp": "2025-12-28T11:33:57.344461", + "elapsed_time": 9351.629550933838, + "loss": 0.1919, + "grad_norm": 0.1459428369998932, + "learning_rate": 7.88679245283019e-05, + "epoch": 0.60875 + }, + { + "step": 1575, + "timestamp": "2025-12-28T11:34:07.601020", + "elapsed_time": 9361.886110544205, + "loss": 0.1914, + "grad_norm": 0.11683019995689392, + "learning_rate": 7.874213836477988e-05, + "epoch": 0.609375 + }, + { + "step": 1576, + "timestamp": "2025-12-28T11:34:12.734692", + "elapsed_time": 9367.019781827927, + "loss": 0.162, + "grad_norm": 0.14056500792503357, + "learning_rate": 7.861635220125787e-05, + "epoch": 0.61 + }, + { + "step": 1577, + "timestamp": "2025-12-28T11:34:30.402188", + "elapsed_time": 9384.687278032303, + "loss": 0.1201, + "grad_norm": 0.08820690959692001, + "learning_rate": 7.849056603773586e-05, + "epoch": 0.610625 + }, + { + "step": 1578, + "timestamp": "2025-12-28T11:34:39.188114", + "elapsed_time": 9393.473204135895, + "loss": 0.2578, + "grad_norm": 0.14672920107841492, + "learning_rate": 7.836477987421384e-05, + "epoch": 0.61125 + }, + { + "step": 1579, + "timestamp": "2025-12-28T11:34:50.013659", + "elapsed_time": 9404.298749446869, + "loss": 0.1506, + "grad_norm": 0.11653705686330795, + "learning_rate": 7.823899371069184e-05, + "epoch": 0.611875 + }, + { + "step": 1580, + "timestamp": "2025-12-28T11:34:54.799485", + "elapsed_time": 9409.084575176239, + "loss": 0.1957, + "grad_norm": 0.1690455675125122, + "learning_rate": 7.811320754716981e-05, + "epoch": 0.6125 + }, + { + "step": 1581, + "timestamp": "2025-12-28T11:34:58.274219", + "elapsed_time": 9412.559309482574, + "loss": 0.3539, + "grad_norm": 0.24397805333137512, + "learning_rate": 7.798742138364781e-05, + "epoch": 0.613125 + }, + { + "step": 1582, + "timestamp": "2025-12-28T11:35:05.235925", + "elapsed_time": 9419.521015405655, + "loss": 0.3798, + "grad_norm": 0.22178561985492706, + "learning_rate": 7.78616352201258e-05, + "epoch": 0.61375 + }, + { + "step": 1583, + "timestamp": "2025-12-28T11:35:13.675308", + "elapsed_time": 9427.960398435593, + "loss": 0.2782, + "grad_norm": 0.14682307839393616, + "learning_rate": 7.773584905660378e-05, + "epoch": 0.614375 + }, + { + "step": 1584, + "timestamp": "2025-12-28T11:35:27.622590", + "elapsed_time": 9441.907680273056, + "loss": 0.1162, + "grad_norm": 0.08615633100271225, + "learning_rate": 7.761006289308177e-05, + "epoch": 0.615 + }, + { + "step": 1585, + "timestamp": "2025-12-28T11:35:44.742170", + "elapsed_time": 9459.02725982666, + "loss": 0.0976, + "grad_norm": 0.07224409282207489, + "learning_rate": 7.748427672955975e-05, + "epoch": 0.615625 + }, + { + "step": 1586, + "timestamp": "2025-12-28T11:35:56.023997", + "elapsed_time": 9470.30908703804, + "loss": 0.1516, + "grad_norm": 0.1125619113445282, + "learning_rate": 7.735849056603774e-05, + "epoch": 0.61625 + }, + { + "step": 1587, + "timestamp": "2025-12-28T11:36:07.646063", + "elapsed_time": 9481.931153059006, + "loss": 0.1575, + "grad_norm": 0.12028831243515015, + "learning_rate": 7.723270440251574e-05, + "epoch": 0.616875 + }, + { + "step": 1588, + "timestamp": "2025-12-28T11:36:25.636674", + "elapsed_time": 9499.92176437378, + "loss": 0.1307, + "grad_norm": 0.08736097812652588, + "learning_rate": 7.710691823899372e-05, + "epoch": 0.6175 + }, + { + "step": 1589, + "timestamp": "2025-12-28T11:36:35.410727", + "elapsed_time": 9509.69581747055, + "loss": 0.2209, + "grad_norm": 0.21880212426185608, + "learning_rate": 7.698113207547171e-05, + "epoch": 0.618125 + }, + { + "step": 1590, + "timestamp": "2025-12-28T11:36:47.358344", + "elapsed_time": 9521.643434047699, + "loss": 0.3212, + "grad_norm": 0.13422144949436188, + "learning_rate": 7.68553459119497e-05, + "epoch": 0.61875 + }, + { + "step": 1591, + "timestamp": "2025-12-28T11:36:56.658149", + "elapsed_time": 9530.943238973618, + "loss": 0.3681, + "grad_norm": 0.2638307511806488, + "learning_rate": 7.672955974842768e-05, + "epoch": 0.619375 + }, + { + "step": 1592, + "timestamp": "2025-12-28T11:37:13.127742", + "elapsed_time": 9547.41283249855, + "loss": 0.0943, + "grad_norm": 0.08068544417619705, + "learning_rate": 7.660377358490567e-05, + "epoch": 0.62 + }, + { + "step": 1593, + "timestamp": "2025-12-28T11:37:21.962911", + "elapsed_time": 9556.248000860214, + "loss": 0.183, + "grad_norm": 0.1172947883605957, + "learning_rate": 7.647798742138365e-05, + "epoch": 0.620625 + }, + { + "step": 1594, + "timestamp": "2025-12-28T11:37:32.441106", + "elapsed_time": 9566.726195812225, + "loss": 0.2204, + "grad_norm": 0.09986839443445206, + "learning_rate": 7.635220125786164e-05, + "epoch": 0.62125 + }, + { + "step": 1595, + "timestamp": "2025-12-28T11:37:38.220326", + "elapsed_time": 9572.505415916443, + "loss": 0.2831, + "grad_norm": 0.1995793581008911, + "learning_rate": 7.622641509433964e-05, + "epoch": 0.621875 + }, + { + "step": 1596, + "timestamp": "2025-12-28T11:37:48.926748", + "elapsed_time": 9583.21183848381, + "loss": 0.1441, + "grad_norm": 0.09849265217781067, + "learning_rate": 7.610062893081762e-05, + "epoch": 0.6225 + }, + { + "step": 1597, + "timestamp": "2025-12-28T11:37:59.937984", + "elapsed_time": 9594.223074674606, + "loss": 0.1331, + "grad_norm": 0.11908537149429321, + "learning_rate": 7.597484276729561e-05, + "epoch": 0.623125 + }, + { + "step": 1598, + "timestamp": "2025-12-28T11:38:08.883234", + "elapsed_time": 9603.168324947357, + "loss": 0.1916, + "grad_norm": 0.12535853683948517, + "learning_rate": 7.584905660377359e-05, + "epoch": 0.62375 + }, + { + "step": 1599, + "timestamp": "2025-12-28T11:38:17.822928", + "elapsed_time": 9612.108018875122, + "loss": 0.4302, + "grad_norm": 0.21333205699920654, + "learning_rate": 7.572327044025158e-05, + "epoch": 0.624375 + }, + { + "step": 1600, + "timestamp": "2025-12-28T11:38:28.592143", + "elapsed_time": 9622.877233028412, + "loss": 0.1339, + "grad_norm": 0.10051511973142624, + "learning_rate": 7.559748427672957e-05, + "epoch": 0.625 + }, + { + "step": 1601, + "timestamp": "2025-12-28T11:38:39.199999", + "elapsed_time": 9633.485090017319, + "loss": 0.1352, + "grad_norm": 0.11261122673749924, + "learning_rate": 7.547169811320755e-05, + "epoch": 0.625625 + }, + { + "step": 1602, + "timestamp": "2025-12-28T11:38:46.300157", + "elapsed_time": 9640.58524775505, + "loss": 0.1886, + "grad_norm": 0.12923336029052734, + "learning_rate": 7.534591194968554e-05, + "epoch": 0.62625 + }, + { + "step": 1603, + "timestamp": "2025-12-28T11:38:55.288169", + "elapsed_time": 9649.5732588768, + "loss": 0.1616, + "grad_norm": 0.11907650530338287, + "learning_rate": 7.522012578616354e-05, + "epoch": 0.626875 + }, + { + "step": 1604, + "timestamp": "2025-12-28T11:39:04.668887", + "elapsed_time": 9658.953977823257, + "loss": 0.1443, + "grad_norm": 0.11075244098901749, + "learning_rate": 7.509433962264152e-05, + "epoch": 0.6275 + }, + { + "step": 1605, + "timestamp": "2025-12-28T11:39:10.754759", + "elapsed_time": 9665.039849996567, + "loss": 0.2587, + "grad_norm": 0.2336777150630951, + "learning_rate": 7.49685534591195e-05, + "epoch": 0.628125 + }, + { + "step": 1606, + "timestamp": "2025-12-28T11:39:18.045720", + "elapsed_time": 9672.330810546875, + "loss": 0.2099, + "grad_norm": 0.14048390090465546, + "learning_rate": 7.484276729559749e-05, + "epoch": 0.62875 + }, + { + "step": 1607, + "timestamp": "2025-12-28T11:39:26.880911", + "elapsed_time": 9681.166000843048, + "loss": 0.1913, + "grad_norm": 0.12840668857097626, + "learning_rate": 7.471698113207547e-05, + "epoch": 0.629375 + }, + { + "step": 1608, + "timestamp": "2025-12-28T11:39:38.404229", + "elapsed_time": 9692.689319133759, + "loss": 0.1631, + "grad_norm": 0.17724475264549255, + "learning_rate": 7.459119496855346e-05, + "epoch": 0.63 + }, + { + "step": 1609, + "timestamp": "2025-12-28T11:39:46.668636", + "elapsed_time": 9700.95373082161, + "loss": 0.1678, + "grad_norm": 0.12339440733194351, + "learning_rate": 7.446540880503144e-05, + "epoch": 0.630625 + }, + { + "step": 1610, + "timestamp": "2025-12-28T11:39:55.140506", + "elapsed_time": 9709.425596952438, + "loss": 0.3796, + "grad_norm": 0.15686391294002533, + "learning_rate": 7.433962264150943e-05, + "epoch": 0.63125 + }, + { + "step": 1611, + "timestamp": "2025-12-28T11:40:03.202851", + "elapsed_time": 9717.487941265106, + "loss": 0.1415, + "grad_norm": 0.1224021390080452, + "learning_rate": 7.421383647798742e-05, + "epoch": 0.631875 + }, + { + "step": 1612, + "timestamp": "2025-12-28T11:40:10.163533", + "elapsed_time": 9724.44862318039, + "loss": 0.1966, + "grad_norm": 0.17393508553504944, + "learning_rate": 7.40880503144654e-05, + "epoch": 0.6325 + }, + { + "step": 1613, + "timestamp": "2025-12-28T11:40:24.152425", + "elapsed_time": 9738.437515258789, + "loss": 0.1186, + "grad_norm": 0.08472223579883575, + "learning_rate": 7.39622641509434e-05, + "epoch": 0.633125 + }, + { + "step": 1614, + "timestamp": "2025-12-28T11:40:45.041966", + "elapsed_time": 9759.327056407928, + "loss": 0.1331, + "grad_norm": 0.08091577142477036, + "learning_rate": 7.383647798742139e-05, + "epoch": 0.63375 + }, + { + "step": 1615, + "timestamp": "2025-12-28T11:40:52.221602", + "elapsed_time": 9766.506692886353, + "loss": 0.1571, + "grad_norm": 0.12333718687295914, + "learning_rate": 7.371069182389937e-05, + "epoch": 0.634375 + }, + { + "step": 1616, + "timestamp": "2025-12-28T11:41:02.877788", + "elapsed_time": 9777.162879228592, + "loss": 0.1733, + "grad_norm": 0.12430380284786224, + "learning_rate": 7.358490566037736e-05, + "epoch": 0.635 + }, + { + "step": 1617, + "timestamp": "2025-12-28T11:41:11.012938", + "elapsed_time": 9785.298028707504, + "loss": 0.1672, + "grad_norm": 0.12017210572957993, + "learning_rate": 7.345911949685534e-05, + "epoch": 0.635625 + }, + { + "step": 1618, + "timestamp": "2025-12-28T11:41:19.447939", + "elapsed_time": 9793.7330327034, + "loss": 0.1954, + "grad_norm": 0.14666104316711426, + "learning_rate": 7.333333333333333e-05, + "epoch": 0.63625 + }, + { + "step": 1619, + "timestamp": "2025-12-28T11:41:30.321982", + "elapsed_time": 9804.607072114944, + "loss": 0.1235, + "grad_norm": 0.0968768298625946, + "learning_rate": 7.320754716981132e-05, + "epoch": 0.636875 + }, + { + "step": 1620, + "timestamp": "2025-12-28T11:41:43.576219", + "elapsed_time": 9817.86130952835, + "loss": 0.1528, + "grad_norm": 0.11526031047105789, + "learning_rate": 7.30817610062893e-05, + "epoch": 0.6375 + }, + { + "step": 1621, + "timestamp": "2025-12-28T11:41:51.883719", + "elapsed_time": 9826.16880941391, + "loss": 0.2519, + "grad_norm": 0.13182447850704193, + "learning_rate": 7.29559748427673e-05, + "epoch": 0.638125 + }, + { + "step": 1622, + "timestamp": "2025-12-28T11:42:01.783503", + "elapsed_time": 9836.068593025208, + "loss": 0.18, + "grad_norm": 0.11508210748434067, + "learning_rate": 7.283018867924527e-05, + "epoch": 0.63875 + }, + { + "step": 1623, + "timestamp": "2025-12-28T11:42:08.222492", + "elapsed_time": 9842.507582426071, + "loss": 0.1862, + "grad_norm": 0.14239539206027985, + "learning_rate": 7.270440251572327e-05, + "epoch": 0.639375 + }, + { + "step": 1624, + "timestamp": "2025-12-28T11:42:16.155646", + "elapsed_time": 9850.440736532211, + "loss": 0.3164, + "grad_norm": 0.16672523319721222, + "learning_rate": 7.257861635220126e-05, + "epoch": 0.64 + }, + { + "step": 1625, + "timestamp": "2025-12-28T11:42:22.465387", + "elapsed_time": 9856.750477075577, + "loss": 0.1875, + "grad_norm": 0.13851501047611237, + "learning_rate": 7.245283018867924e-05, + "epoch": 0.640625 + }, + { + "step": 1626, + "timestamp": "2025-12-28T11:42:43.343959", + "elapsed_time": 9877.629049301147, + "loss": 0.1101, + "grad_norm": 0.07382085174322128, + "learning_rate": 7.232704402515723e-05, + "epoch": 0.64125 + }, + { + "step": 1627, + "timestamp": "2025-12-28T11:42:48.836245", + "elapsed_time": 9883.121335983276, + "loss": 0.3851, + "grad_norm": 0.19532591104507446, + "learning_rate": 7.220125786163522e-05, + "epoch": 0.641875 + }, + { + "step": 1628, + "timestamp": "2025-12-28T11:42:54.575703", + "elapsed_time": 9888.860793590546, + "loss": 0.226, + "grad_norm": 0.1721881628036499, + "learning_rate": 7.20754716981132e-05, + "epoch": 0.6425 + }, + { + "step": 1629, + "timestamp": "2025-12-28T11:43:02.309918", + "elapsed_time": 9896.595008134842, + "loss": 0.1805, + "grad_norm": 0.12694233655929565, + "learning_rate": 7.19496855345912e-05, + "epoch": 0.643125 + }, + { + "step": 1630, + "timestamp": "2025-12-28T11:43:10.240464", + "elapsed_time": 9904.52555346489, + "loss": 0.1923, + "grad_norm": 0.137771874666214, + "learning_rate": 7.182389937106918e-05, + "epoch": 0.64375 + }, + { + "step": 1631, + "timestamp": "2025-12-28T11:43:14.942263", + "elapsed_time": 9909.227353334427, + "loss": 0.3536, + "grad_norm": 0.19589699804782867, + "learning_rate": 7.169811320754717e-05, + "epoch": 0.644375 + }, + { + "step": 1632, + "timestamp": "2025-12-28T11:43:20.961561", + "elapsed_time": 9915.2466506958, + "loss": 0.1823, + "grad_norm": 0.14660844206809998, + "learning_rate": 7.157232704402516e-05, + "epoch": 0.645 + }, + { + "step": 1633, + "timestamp": "2025-12-28T11:43:25.350571", + "elapsed_time": 9919.635661840439, + "loss": 0.22, + "grad_norm": 0.18052536249160767, + "learning_rate": 7.144654088050314e-05, + "epoch": 0.645625 + }, + { + "step": 1634, + "timestamp": "2025-12-28T11:43:34.930070", + "elapsed_time": 9929.215160131454, + "loss": 0.1316, + "grad_norm": 0.10462430864572525, + "learning_rate": 7.132075471698113e-05, + "epoch": 0.64625 + }, + { + "step": 1635, + "timestamp": "2025-12-28T11:43:43.559647", + "elapsed_time": 9937.844737291336, + "loss": 0.1756, + "grad_norm": 0.126973494887352, + "learning_rate": 7.119496855345912e-05, + "epoch": 0.646875 + }, + { + "step": 1636, + "timestamp": "2025-12-28T11:43:51.704679", + "elapsed_time": 9945.989769220352, + "loss": 0.1654, + "grad_norm": 0.14970456063747406, + "learning_rate": 7.10691823899371e-05, + "epoch": 0.6475 + }, + { + "step": 1637, + "timestamp": "2025-12-28T11:44:10.864475", + "elapsed_time": 9965.149565935135, + "loss": 0.1098, + "grad_norm": 0.08534051477909088, + "learning_rate": 7.09433962264151e-05, + "epoch": 0.648125 + }, + { + "step": 1638, + "timestamp": "2025-12-28T11:44:24.042231", + "elapsed_time": 9978.327321767807, + "loss": 0.1609, + "grad_norm": 0.10151031613349915, + "learning_rate": 7.081761006289308e-05, + "epoch": 0.64875 + }, + { + "step": 1639, + "timestamp": "2025-12-28T11:44:33.412608", + "elapsed_time": 9987.697702884674, + "loss": 0.1008, + "grad_norm": 0.09468650072813034, + "learning_rate": 7.069182389937107e-05, + "epoch": 0.649375 + }, + { + "step": 1640, + "timestamp": "2025-12-28T11:44:42.835568", + "elapsed_time": 9997.120658397675, + "loss": 0.1202, + "grad_norm": 0.1058143749833107, + "learning_rate": 7.056603773584906e-05, + "epoch": 0.65 + }, + { + "step": 1641, + "timestamp": "2025-12-28T11:44:51.467264", + "elapsed_time": 10005.752353906631, + "loss": 0.1627, + "grad_norm": 0.12414304912090302, + "learning_rate": 7.044025157232704e-05, + "epoch": 0.650625 + }, + { + "step": 1642, + "timestamp": "2025-12-28T11:45:00.450823", + "elapsed_time": 10014.735912799835, + "loss": 0.1949, + "grad_norm": 0.13407202064990997, + "learning_rate": 7.031446540880503e-05, + "epoch": 0.65125 + }, + { + "step": 1643, + "timestamp": "2025-12-28T11:45:09.542076", + "elapsed_time": 10023.827166318893, + "loss": 0.1587, + "grad_norm": 0.19232605397701263, + "learning_rate": 7.018867924528301e-05, + "epoch": 0.651875 + }, + { + "step": 1644, + "timestamp": "2025-12-28T11:45:17.892732", + "elapsed_time": 10032.177821874619, + "loss": 0.1543, + "grad_norm": 0.12963691353797913, + "learning_rate": 7.0062893081761e-05, + "epoch": 0.6525 + }, + { + "step": 1645, + "timestamp": "2025-12-28T11:45:29.559165", + "elapsed_time": 10043.844255447388, + "loss": 0.1364, + "grad_norm": 0.09772875905036926, + "learning_rate": 6.9937106918239e-05, + "epoch": 0.653125 + }, + { + "step": 1646, + "timestamp": "2025-12-28T11:45:34.381174", + "elapsed_time": 10048.66626906395, + "loss": 0.2831, + "grad_norm": 0.20503878593444824, + "learning_rate": 6.981132075471698e-05, + "epoch": 0.65375 + }, + { + "step": 1647, + "timestamp": "2025-12-28T11:45:38.491344", + "elapsed_time": 10052.77643442154, + "loss": 0.4374, + "grad_norm": 0.20627865195274353, + "learning_rate": 6.968553459119497e-05, + "epoch": 0.654375 + }, + { + "step": 1648, + "timestamp": "2025-12-28T11:45:44.518475", + "elapsed_time": 10058.803565263748, + "loss": 0.1858, + "grad_norm": 0.16573020815849304, + "learning_rate": 6.955974842767296e-05, + "epoch": 0.655 + }, + { + "step": 1649, + "timestamp": "2025-12-28T11:46:05.413855", + "elapsed_time": 10079.698945760727, + "loss": 0.0942, + "grad_norm": 0.07246656715869904, + "learning_rate": 6.943396226415094e-05, + "epoch": 0.655625 + }, + { + "step": 1650, + "timestamp": "2025-12-28T11:46:24.467732", + "elapsed_time": 10098.752822637558, + "loss": 0.1091, + "grad_norm": 0.09009367972612381, + "learning_rate": 6.930817610062893e-05, + "epoch": 0.65625 + }, + { + "step": 1651, + "timestamp": "2025-12-28T11:46:33.886132", + "elapsed_time": 10108.17122220993, + "loss": 0.2347, + "grad_norm": 0.12859852612018585, + "learning_rate": 6.918238993710691e-05, + "epoch": 0.656875 + }, + { + "step": 1652, + "timestamp": "2025-12-28T11:46:42.823278", + "elapsed_time": 10117.108368635178, + "loss": 0.1737, + "grad_norm": 0.11088800430297852, + "learning_rate": 6.90566037735849e-05, + "epoch": 0.6575 + }, + { + "step": 1653, + "timestamp": "2025-12-28T11:46:54.658395", + "elapsed_time": 10128.943485021591, + "loss": 0.1427, + "grad_norm": 0.10402551293373108, + "learning_rate": 6.89308176100629e-05, + "epoch": 0.658125 + }, + { + "step": 1654, + "timestamp": "2025-12-28T11:47:03.630914", + "elapsed_time": 10137.916004419327, + "loss": 0.1378, + "grad_norm": 0.11617520451545715, + "learning_rate": 6.880503144654088e-05, + "epoch": 0.65875 + }, + { + "step": 1655, + "timestamp": "2025-12-28T11:47:13.389780", + "elapsed_time": 10147.67487025261, + "loss": 0.2234, + "grad_norm": 0.11303507536649704, + "learning_rate": 6.867924528301887e-05, + "epoch": 0.659375 + }, + { + "step": 1656, + "timestamp": "2025-12-28T11:47:26.010568", + "elapsed_time": 10160.29565834999, + "loss": 0.2006, + "grad_norm": 0.11115586757659912, + "learning_rate": 6.855345911949685e-05, + "epoch": 0.66 + }, + { + "step": 1657, + "timestamp": "2025-12-28T11:47:33.149623", + "elapsed_time": 10167.434713602066, + "loss": 0.4581, + "grad_norm": 0.19294337928295135, + "learning_rate": 6.842767295597484e-05, + "epoch": 0.660625 + }, + { + "step": 1658, + "timestamp": "2025-12-28T11:47:43.773293", + "elapsed_time": 10178.058383464813, + "loss": 0.197, + "grad_norm": 0.1200195699930191, + "learning_rate": 6.830188679245283e-05, + "epoch": 0.66125 + }, + { + "step": 1659, + "timestamp": "2025-12-28T11:47:58.867153", + "elapsed_time": 10193.152243375778, + "loss": 0.1369, + "grad_norm": 0.09150442481040955, + "learning_rate": 6.817610062893081e-05, + "epoch": 0.661875 + }, + { + "step": 1660, + "timestamp": "2025-12-28T11:48:11.905738", + "elapsed_time": 10206.190828084946, + "loss": 0.1244, + "grad_norm": 0.09038378298282623, + "learning_rate": 6.80503144654088e-05, + "epoch": 0.6625 + }, + { + "step": 1661, + "timestamp": "2025-12-28T11:48:31.524146", + "elapsed_time": 10225.80923986435, + "loss": 0.1345, + "grad_norm": 0.08619663864374161, + "learning_rate": 6.79245283018868e-05, + "epoch": 0.663125 + }, + { + "step": 1662, + "timestamp": "2025-12-28T11:48:46.868927", + "elapsed_time": 10241.154017448425, + "loss": 0.1112, + "grad_norm": 0.08081649988889694, + "learning_rate": 6.779874213836478e-05, + "epoch": 0.66375 + }, + { + "step": 1663, + "timestamp": "2025-12-28T11:48:56.461538", + "elapsed_time": 10250.746627807617, + "loss": 0.1567, + "grad_norm": 0.12073066085577011, + "learning_rate": 6.767295597484277e-05, + "epoch": 0.664375 + }, + { + "step": 1664, + "timestamp": "2025-12-28T11:49:01.469922", + "elapsed_time": 10255.755012512207, + "loss": 0.2881, + "grad_norm": 0.17294885218143463, + "learning_rate": 6.754716981132075e-05, + "epoch": 0.665 + }, + { + "step": 1665, + "timestamp": "2025-12-28T11:49:14.018002", + "elapsed_time": 10268.303092956543, + "loss": 0.1356, + "grad_norm": 0.09254828095436096, + "learning_rate": 6.742138364779874e-05, + "epoch": 0.665625 + }, + { + "step": 1666, + "timestamp": "2025-12-28T11:49:34.915088", + "elapsed_time": 10289.20017838478, + "loss": 0.0919, + "grad_norm": 0.06759099662303925, + "learning_rate": 6.729559748427673e-05, + "epoch": 0.66625 + }, + { + "step": 1667, + "timestamp": "2025-12-28T11:49:43.859625", + "elapsed_time": 10298.144714593887, + "loss": 0.1992, + "grad_norm": 0.11738283932209015, + "learning_rate": 6.716981132075471e-05, + "epoch": 0.666875 + }, + { + "step": 1668, + "timestamp": "2025-12-28T11:49:54.744691", + "elapsed_time": 10309.029781341553, + "loss": 0.3147, + "grad_norm": 0.15712329745292664, + "learning_rate": 6.70440251572327e-05, + "epoch": 0.6675 + }, + { + "step": 1669, + "timestamp": "2025-12-28T11:50:06.585145", + "elapsed_time": 10320.870235919952, + "loss": 0.1362, + "grad_norm": 0.09576017409563065, + "learning_rate": 6.691823899371068e-05, + "epoch": 0.668125 + }, + { + "step": 1670, + "timestamp": "2025-12-28T11:50:12.699422", + "elapsed_time": 10326.984511852264, + "loss": 0.3545, + "grad_norm": 0.17530383169651031, + "learning_rate": 6.679245283018868e-05, + "epoch": 0.66875 + }, + { + "step": 1671, + "timestamp": "2025-12-28T11:50:20.519400", + "elapsed_time": 10334.804490327835, + "loss": 0.1705, + "grad_norm": 0.17078684270381927, + "learning_rate": 6.666666666666667e-05, + "epoch": 0.669375 + }, + { + "step": 1672, + "timestamp": "2025-12-28T11:50:27.666519", + "elapsed_time": 10341.951608896255, + "loss": 0.1515, + "grad_norm": 0.13205629587173462, + "learning_rate": 6.654088050314465e-05, + "epoch": 0.67 + }, + { + "step": 1673, + "timestamp": "2025-12-28T11:50:36.092046", + "elapsed_time": 10350.37713599205, + "loss": 0.1247, + "grad_norm": 0.1089896485209465, + "learning_rate": 6.641509433962264e-05, + "epoch": 0.670625 + }, + { + "step": 1674, + "timestamp": "2025-12-28T11:50:44.269347", + "elapsed_time": 10358.55443739891, + "loss": 0.2136, + "grad_norm": 0.2696709930896759, + "learning_rate": 6.628930817610063e-05, + "epoch": 0.67125 + }, + { + "step": 1675, + "timestamp": "2025-12-28T11:50:54.025458", + "elapsed_time": 10368.310548067093, + "loss": 0.0958, + "grad_norm": 0.09596393257379532, + "learning_rate": 6.616352201257861e-05, + "epoch": 0.671875 + }, + { + "step": 1676, + "timestamp": "2025-12-28T11:51:02.324646", + "elapsed_time": 10376.609736204147, + "loss": 0.3834, + "grad_norm": 0.1570800244808197, + "learning_rate": 6.60377358490566e-05, + "epoch": 0.6725 + }, + { + "step": 1677, + "timestamp": "2025-12-28T11:51:08.949728", + "elapsed_time": 10383.234818220139, + "loss": 0.2527, + "grad_norm": 0.17659151554107666, + "learning_rate": 6.591194968553458e-05, + "epoch": 0.673125 + }, + { + "step": 1678, + "timestamp": "2025-12-28T11:51:16.808478", + "elapsed_time": 10391.093568086624, + "loss": 0.1488, + "grad_norm": 0.13421952724456787, + "learning_rate": 6.578616352201258e-05, + "epoch": 0.67375 + }, + { + "step": 1679, + "timestamp": "2025-12-28T11:51:23.951873", + "elapsed_time": 10398.236963272095, + "loss": 0.1324, + "grad_norm": 0.10407258570194244, + "learning_rate": 6.566037735849057e-05, + "epoch": 0.674375 + }, + { + "step": 1680, + "timestamp": "2025-12-28T11:51:44.838057", + "elapsed_time": 10419.123147726059, + "loss": 0.111, + "grad_norm": 0.08704216778278351, + "learning_rate": 6.553459119496855e-05, + "epoch": 0.675 + }, + { + "step": 1681, + "timestamp": "2025-12-28T11:51:54.548567", + "elapsed_time": 10428.83365702629, + "loss": 0.149, + "grad_norm": 0.11608418822288513, + "learning_rate": 6.540880503144654e-05, + "epoch": 0.675625 + }, + { + "step": 1682, + "timestamp": "2025-12-28T11:52:03.641934", + "elapsed_time": 10437.927023887634, + "loss": 0.2173, + "grad_norm": 0.1506926417350769, + "learning_rate": 6.528301886792453e-05, + "epoch": 0.67625 + }, + { + "step": 1683, + "timestamp": "2025-12-28T11:52:12.314341", + "elapsed_time": 10446.599431276321, + "loss": 0.192, + "grad_norm": 0.17497746646404266, + "learning_rate": 6.515723270440251e-05, + "epoch": 0.676875 + }, + { + "step": 1684, + "timestamp": "2025-12-28T11:52:24.992727", + "elapsed_time": 10459.277817249298, + "loss": 0.1565, + "grad_norm": 0.09996242821216583, + "learning_rate": 6.50314465408805e-05, + "epoch": 0.6775 + }, + { + "step": 1685, + "timestamp": "2025-12-28T11:52:35.596215", + "elapsed_time": 10469.881305456161, + "loss": 0.1227, + "grad_norm": 0.10592522472143173, + "learning_rate": 6.490566037735849e-05, + "epoch": 0.678125 + }, + { + "step": 1686, + "timestamp": "2025-12-28T11:52:43.075463", + "elapsed_time": 10477.360553264618, + "loss": 0.1698, + "grad_norm": 0.1294756829738617, + "learning_rate": 6.477987421383648e-05, + "epoch": 0.67875 + }, + { + "step": 1687, + "timestamp": "2025-12-28T11:52:52.975805", + "elapsed_time": 10487.260895729065, + "loss": 0.155, + "grad_norm": 0.12220288068056107, + "learning_rate": 6.465408805031447e-05, + "epoch": 0.679375 + }, + { + "step": 1688, + "timestamp": "2025-12-28T11:53:00.065512", + "elapsed_time": 10494.350602388382, + "loss": 0.3612, + "grad_norm": 0.18684455752372742, + "learning_rate": 6.452830188679245e-05, + "epoch": 0.68 + }, + { + "step": 1689, + "timestamp": "2025-12-28T11:53:07.317682", + "elapsed_time": 10501.602772951126, + "loss": 0.1816, + "grad_norm": 0.13791623711585999, + "learning_rate": 6.440251572327044e-05, + "epoch": 0.680625 + }, + { + "step": 1690, + "timestamp": "2025-12-28T11:53:17.843498", + "elapsed_time": 10512.128588676453, + "loss": 0.1549, + "grad_norm": 0.11356908082962036, + "learning_rate": 6.427672955974842e-05, + "epoch": 0.68125 + }, + { + "step": 1691, + "timestamp": "2025-12-28T11:53:38.715052", + "elapsed_time": 10533.00014257431, + "loss": 0.0906, + "grad_norm": 0.07176671177148819, + "learning_rate": 6.415094339622641e-05, + "epoch": 0.681875 + }, + { + "step": 1692, + "timestamp": "2025-12-28T11:53:46.531119", + "elapsed_time": 10540.816209077835, + "loss": 0.1907, + "grad_norm": 0.141318216919899, + "learning_rate": 6.40251572327044e-05, + "epoch": 0.6825 + }, + { + "step": 1693, + "timestamp": "2025-12-28T11:54:00.485420", + "elapsed_time": 10554.770510435104, + "loss": 0.1189, + "grad_norm": 0.08776037395000458, + "learning_rate": 6.389937106918239e-05, + "epoch": 0.683125 + }, + { + "step": 1694, + "timestamp": "2025-12-28T11:54:17.313372", + "elapsed_time": 10571.598462820053, + "loss": 0.1334, + "grad_norm": 0.09080228954553604, + "learning_rate": 6.377358490566038e-05, + "epoch": 0.68375 + }, + { + "step": 1695, + "timestamp": "2025-12-28T11:54:35.244713", + "elapsed_time": 10589.529803276062, + "loss": 0.117, + "grad_norm": 0.09272570163011551, + "learning_rate": 6.364779874213837e-05, + "epoch": 0.684375 + }, + { + "step": 1696, + "timestamp": "2025-12-28T11:54:42.409774", + "elapsed_time": 10596.69486451149, + "loss": 0.1424, + "grad_norm": 0.21328891813755035, + "learning_rate": 6.352201257861635e-05, + "epoch": 0.685 + }, + { + "step": 1697, + "timestamp": "2025-12-28T11:54:47.771967", + "elapsed_time": 10602.057057142258, + "loss": 0.1881, + "grad_norm": 0.14298084378242493, + "learning_rate": 6.339622641509434e-05, + "epoch": 0.685625 + }, + { + "step": 1698, + "timestamp": "2025-12-28T11:54:54.653860", + "elapsed_time": 10608.938950777054, + "loss": 0.2189, + "grad_norm": 0.1579611450433731, + "learning_rate": 6.327044025157232e-05, + "epoch": 0.68625 + }, + { + "step": 1699, + "timestamp": "2025-12-28T11:55:08.547726", + "elapsed_time": 10622.832817077637, + "loss": 0.1117, + "grad_norm": 0.13192813098430634, + "learning_rate": 6.314465408805031e-05, + "epoch": 0.686875 + }, + { + "step": 1700, + "timestamp": "2025-12-28T11:55:14.487705", + "elapsed_time": 10628.77279496193, + "loss": 0.1748, + "grad_norm": 0.13700881600379944, + "learning_rate": 6.301886792452831e-05, + "epoch": 0.6875 + }, + { + "step": 1701, + "timestamp": "2025-12-28T11:55:22.517878", + "elapsed_time": 10636.802968740463, + "loss": 0.1818, + "grad_norm": 0.13351568579673767, + "learning_rate": 6.289308176100629e-05, + "epoch": 0.688125 + }, + { + "step": 1702, + "timestamp": "2025-12-28T11:55:32.797402", + "elapsed_time": 10647.082492351532, + "loss": 0.1418, + "grad_norm": 0.11062666773796082, + "learning_rate": 6.276729559748428e-05, + "epoch": 0.68875 + }, + { + "step": 1703, + "timestamp": "2025-12-28T11:55:41.627529", + "elapsed_time": 10655.912618637085, + "loss": 0.138, + "grad_norm": 0.12265567481517792, + "learning_rate": 6.264150943396226e-05, + "epoch": 0.689375 + }, + { + "step": 1704, + "timestamp": "2025-12-28T11:55:48.617990", + "elapsed_time": 10662.903080940247, + "loss": 0.231, + "grad_norm": 0.14810243248939514, + "learning_rate": 6.251572327044025e-05, + "epoch": 0.69 + }, + { + "step": 1705, + "timestamp": "2025-12-28T11:55:56.917497", + "elapsed_time": 10671.202587604523, + "loss": 0.1909, + "grad_norm": 0.1277126967906952, + "learning_rate": 6.238993710691824e-05, + "epoch": 0.690625 + }, + { + "step": 1706, + "timestamp": "2025-12-28T11:56:11.042753", + "elapsed_time": 10685.327842950821, + "loss": 0.1558, + "grad_norm": 0.10240671783685684, + "learning_rate": 6.226415094339622e-05, + "epoch": 0.69125 + }, + { + "step": 1707, + "timestamp": "2025-12-28T11:56:19.175090", + "elapsed_time": 10693.460180997849, + "loss": 0.1653, + "grad_norm": 0.13144554197788239, + "learning_rate": 6.213836477987421e-05, + "epoch": 0.691875 + }, + { + "step": 1708, + "timestamp": "2025-12-28T11:56:28.590060", + "elapsed_time": 10702.875150203705, + "loss": 0.4414, + "grad_norm": 0.15143659710884094, + "learning_rate": 6.201257861635221e-05, + "epoch": 0.6925 + }, + { + "step": 1709, + "timestamp": "2025-12-28T11:56:35.593767", + "elapsed_time": 10709.87885761261, + "loss": 0.1623, + "grad_norm": 0.1259068101644516, + "learning_rate": 6.188679245283019e-05, + "epoch": 0.693125 + }, + { + "step": 1710, + "timestamp": "2025-12-28T11:56:48.621945", + "elapsed_time": 10722.907035589218, + "loss": 0.1455, + "grad_norm": 0.09466191381216049, + "learning_rate": 6.176100628930818e-05, + "epoch": 0.69375 + }, + { + "step": 1711, + "timestamp": "2025-12-28T11:57:02.125457", + "elapsed_time": 10736.41054725647, + "loss": 0.1517, + "grad_norm": 0.11332697421312332, + "learning_rate": 6.163522012578616e-05, + "epoch": 0.694375 + }, + { + "step": 1712, + "timestamp": "2025-12-28T11:57:17.576247", + "elapsed_time": 10751.861337184906, + "loss": 0.1169, + "grad_norm": 0.08637837320566177, + "learning_rate": 6.150943396226415e-05, + "epoch": 0.695 + }, + { + "step": 1713, + "timestamp": "2025-12-28T11:57:27.606626", + "elapsed_time": 10761.891716957092, + "loss": 0.2205, + "grad_norm": 0.220438614487648, + "learning_rate": 6.138364779874214e-05, + "epoch": 0.695625 + }, + { + "step": 1714, + "timestamp": "2025-12-28T11:57:44.152720", + "elapsed_time": 10778.437810659409, + "loss": 0.107, + "grad_norm": 0.07576917856931686, + "learning_rate": 6.125786163522012e-05, + "epoch": 0.69625 + }, + { + "step": 1715, + "timestamp": "2025-12-28T11:58:02.072391", + "elapsed_time": 10796.357481956482, + "loss": 0.1151, + "grad_norm": 0.08082325756549835, + "learning_rate": 6.113207547169812e-05, + "epoch": 0.696875 + }, + { + "step": 1716, + "timestamp": "2025-12-28T11:58:06.544039", + "elapsed_time": 10800.829129457474, + "loss": 0.253, + "grad_norm": 0.17654751241207123, + "learning_rate": 6.10062893081761e-05, + "epoch": 0.6975 + }, + { + "step": 1717, + "timestamp": "2025-12-28T11:58:12.629577", + "elapsed_time": 10806.914667367935, + "loss": 0.225, + "grad_norm": 0.15153177082538605, + "learning_rate": 6.088050314465409e-05, + "epoch": 0.698125 + }, + { + "step": 1718, + "timestamp": "2025-12-28T11:58:20.697858", + "elapsed_time": 10814.982948064804, + "loss": 0.1918, + "grad_norm": 0.14196348190307617, + "learning_rate": 6.075471698113207e-05, + "epoch": 0.69875 + }, + { + "step": 1719, + "timestamp": "2025-12-28T11:58:35.776435", + "elapsed_time": 10830.061525821686, + "loss": 0.1034, + "grad_norm": 0.08372589945793152, + "learning_rate": 6.0628930817610065e-05, + "epoch": 0.699375 + }, + { + "step": 1720, + "timestamp": "2025-12-28T11:58:41.937607", + "elapsed_time": 10836.222697019577, + "loss": 0.2077, + "grad_norm": 0.15489476919174194, + "learning_rate": 6.050314465408805e-05, + "epoch": 0.7 + }, + { + "step": 1721, + "timestamp": "2025-12-28T11:58:49.416543", + "elapsed_time": 10843.70163345337, + "loss": 0.1679, + "grad_norm": 0.13079725205898285, + "learning_rate": 6.037735849056604e-05, + "epoch": 0.700625 + }, + { + "step": 1722, + "timestamp": "2025-12-28T11:58:57.300619", + "elapsed_time": 10851.58570933342, + "loss": 0.232, + "grad_norm": 0.1388118714094162, + "learning_rate": 6.025157232704402e-05, + "epoch": 0.70125 + }, + { + "step": 1723, + "timestamp": "2025-12-28T11:59:01.515497", + "elapsed_time": 10855.800586938858, + "loss": 0.3026, + "grad_norm": 0.21141380071640015, + "learning_rate": 6.0125786163522016e-05, + "epoch": 0.701875 + }, + { + "step": 1724, + "timestamp": "2025-12-28T11:59:09.331565", + "elapsed_time": 10863.61665558815, + "loss": 0.2376, + "grad_norm": 0.14707203209400177, + "learning_rate": 6e-05, + "epoch": 0.7025 + }, + { + "step": 1725, + "timestamp": "2025-12-28T11:59:15.412136", + "elapsed_time": 10869.697226524353, + "loss": 0.2456, + "grad_norm": 0.16581028699874878, + "learning_rate": 5.987421383647799e-05, + "epoch": 0.703125 + }, + { + "step": 1726, + "timestamp": "2025-12-28T11:59:22.041531", + "elapsed_time": 10876.326621294022, + "loss": 0.2671, + "grad_norm": 0.15748703479766846, + "learning_rate": 5.974842767295597e-05, + "epoch": 0.70375 + }, + { + "step": 1727, + "timestamp": "2025-12-28T11:59:36.715440", + "elapsed_time": 10891.00053024292, + "loss": 0.0921, + "grad_norm": 0.083674855530262, + "learning_rate": 5.9622641509433966e-05, + "epoch": 0.704375 + }, + { + "step": 1728, + "timestamp": "2025-12-28T11:59:51.882192", + "elapsed_time": 10906.167282104492, + "loss": 0.126, + "grad_norm": 0.0893712192773819, + "learning_rate": 5.949685534591195e-05, + "epoch": 0.705 + }, + { + "step": 1729, + "timestamp": "2025-12-28T11:59:59.217349", + "elapsed_time": 10913.502439022064, + "loss": 0.1477, + "grad_norm": 0.15378941595554352, + "learning_rate": 5.937106918238994e-05, + "epoch": 0.705625 + }, + { + "step": 1730, + "timestamp": "2025-12-28T12:00:07.029866", + "elapsed_time": 10921.31495642662, + "loss": 0.1585, + "grad_norm": 0.5633273124694824, + "learning_rate": 5.9245283018867923e-05, + "epoch": 0.70625 + }, + { + "step": 1731, + "timestamp": "2025-12-28T12:00:15.817213", + "elapsed_time": 10930.102303981781, + "loss": 0.141, + "grad_norm": 0.11738086491823196, + "learning_rate": 5.9119496855345916e-05, + "epoch": 0.706875 + }, + { + "step": 1732, + "timestamp": "2025-12-28T12:00:31.339033", + "elapsed_time": 10945.62412405014, + "loss": 0.1483, + "grad_norm": 0.09447266906499863, + "learning_rate": 5.89937106918239e-05, + "epoch": 0.7075 + }, + { + "step": 1733, + "timestamp": "2025-12-28T12:00:45.283079", + "elapsed_time": 10959.568170070648, + "loss": 0.1092, + "grad_norm": 0.09043192118406296, + "learning_rate": 5.886792452830189e-05, + "epoch": 0.708125 + }, + { + "step": 1734, + "timestamp": "2025-12-28T12:00:54.651343", + "elapsed_time": 10968.936433792114, + "loss": 0.2226, + "grad_norm": 0.1368798464536667, + "learning_rate": 5.8742138364779874e-05, + "epoch": 0.70875 + }, + { + "step": 1735, + "timestamp": "2025-12-28T12:00:59.476546", + "elapsed_time": 10973.76163649559, + "loss": 0.23, + "grad_norm": 0.18571457266807556, + "learning_rate": 5.861635220125786e-05, + "epoch": 0.709375 + }, + { + "step": 1736, + "timestamp": "2025-12-28T12:01:18.837352", + "elapsed_time": 10993.12244272232, + "loss": 0.132, + "grad_norm": 0.07873135060071945, + "learning_rate": 5.849056603773585e-05, + "epoch": 0.71 + }, + { + "step": 1737, + "timestamp": "2025-12-28T12:01:36.420927", + "elapsed_time": 11010.70601773262, + "loss": 0.0795, + "grad_norm": 0.06837231665849686, + "learning_rate": 5.836477987421384e-05, + "epoch": 0.710625 + }, + { + "step": 1738, + "timestamp": "2025-12-28T12:01:44.490699", + "elapsed_time": 11018.77578997612, + "loss": 0.1778, + "grad_norm": 0.13413724303245544, + "learning_rate": 5.8238993710691824e-05, + "epoch": 0.71125 + }, + { + "step": 1739, + "timestamp": "2025-12-28T12:02:00.532185", + "elapsed_time": 11034.81727552414, + "loss": 0.1339, + "grad_norm": 0.092499740421772, + "learning_rate": 5.811320754716981e-05, + "epoch": 0.711875 + }, + { + "step": 1740, + "timestamp": "2025-12-28T12:02:08.705472", + "elapsed_time": 11042.990562677383, + "loss": 0.1989, + "grad_norm": 0.14828531444072723, + "learning_rate": 5.79874213836478e-05, + "epoch": 0.7125 + }, + { + "step": 1741, + "timestamp": "2025-12-28T12:02:21.495006", + "elapsed_time": 11055.780095815659, + "loss": 0.1429, + "grad_norm": 0.10832472145557404, + "learning_rate": 5.786163522012579e-05, + "epoch": 0.713125 + }, + { + "step": 1742, + "timestamp": "2025-12-28T12:02:27.508870", + "elapsed_time": 11061.793960094452, + "loss": 0.1582, + "grad_norm": 0.1331390142440796, + "learning_rate": 5.7735849056603774e-05, + "epoch": 0.71375 + }, + { + "step": 1743, + "timestamp": "2025-12-28T12:02:32.397381", + "elapsed_time": 11066.682476043701, + "loss": 0.3914, + "grad_norm": 0.20057356357574463, + "learning_rate": 5.761006289308176e-05, + "epoch": 0.714375 + }, + { + "step": 1744, + "timestamp": "2025-12-28T12:02:41.828268", + "elapsed_time": 11076.113358259201, + "loss": 0.1488, + "grad_norm": 0.1079302653670311, + "learning_rate": 5.748427672955975e-05, + "epoch": 0.715 + }, + { + "step": 1745, + "timestamp": "2025-12-28T12:02:48.746835", + "elapsed_time": 11083.03192615509, + "loss": 0.2745, + "grad_norm": 0.15143828094005585, + "learning_rate": 5.735849056603774e-05, + "epoch": 0.715625 + }, + { + "step": 1746, + "timestamp": "2025-12-28T12:02:54.122330", + "elapsed_time": 11088.407420396805, + "loss": 0.432, + "grad_norm": 0.1899183988571167, + "learning_rate": 5.7232704402515724e-05, + "epoch": 0.71625 + }, + { + "step": 1747, + "timestamp": "2025-12-28T12:03:04.601026", + "elapsed_time": 11098.88611626625, + "loss": 0.1694, + "grad_norm": 0.1249026283621788, + "learning_rate": 5.710691823899371e-05, + "epoch": 0.716875 + }, + { + "step": 1748, + "timestamp": "2025-12-28T12:03:24.883654", + "elapsed_time": 11119.168744325638, + "loss": 0.1206, + "grad_norm": 0.07808022201061249, + "learning_rate": 5.6981132075471696e-05, + "epoch": 0.7175 + }, + { + "step": 1749, + "timestamp": "2025-12-28T12:03:34.253119", + "elapsed_time": 11128.538208961487, + "loss": 0.1954, + "grad_norm": 0.12799711525440216, + "learning_rate": 5.685534591194969e-05, + "epoch": 0.718125 + }, + { + "step": 1750, + "timestamp": "2025-12-28T12:03:41.136923", + "elapsed_time": 11135.422013521194, + "loss": 0.1752, + "grad_norm": 0.15330050885677338, + "learning_rate": 5.6729559748427674e-05, + "epoch": 0.71875 + }, + { + "step": 1751, + "timestamp": "2025-12-28T12:03:58.075443", + "elapsed_time": 11152.360533714294, + "loss": 0.1457, + "grad_norm": 0.11428968608379364, + "learning_rate": 5.660377358490566e-05, + "epoch": 0.719375 + }, + { + "step": 1752, + "timestamp": "2025-12-28T12:04:06.337992", + "elapsed_time": 11160.623083114624, + "loss": 0.14, + "grad_norm": 0.1099119707942009, + "learning_rate": 5.6477987421383646e-05, + "epoch": 0.72 + }, + { + "step": 1753, + "timestamp": "2025-12-28T12:04:27.219359", + "elapsed_time": 11181.504449605942, + "loss": 0.192, + "grad_norm": 0.08993933349847794, + "learning_rate": 5.635220125786164e-05, + "epoch": 0.720625 + }, + { + "step": 1754, + "timestamp": "2025-12-28T12:04:32.971069", + "elapsed_time": 11187.256159305573, + "loss": 0.2048, + "grad_norm": 0.1937875747680664, + "learning_rate": 5.6226415094339625e-05, + "epoch": 0.72125 + }, + { + "step": 1755, + "timestamp": "2025-12-28T12:04:50.508927", + "elapsed_time": 11204.79401755333, + "loss": 0.1391, + "grad_norm": 0.10435943305492401, + "learning_rate": 5.610062893081761e-05, + "epoch": 0.721875 + }, + { + "step": 1756, + "timestamp": "2025-12-28T12:05:03.301495", + "elapsed_time": 11217.586585998535, + "loss": 0.1312, + "grad_norm": 0.10173836350440979, + "learning_rate": 5.5974842767295596e-05, + "epoch": 0.7225 + }, + { + "step": 1757, + "timestamp": "2025-12-28T12:05:09.379646", + "elapsed_time": 11223.66473698616, + "loss": 0.1685, + "grad_norm": 0.2740840017795563, + "learning_rate": 5.584905660377359e-05, + "epoch": 0.723125 + }, + { + "step": 1758, + "timestamp": "2025-12-28T12:05:16.331503", + "elapsed_time": 11230.616593122482, + "loss": 0.1821, + "grad_norm": 0.13564808666706085, + "learning_rate": 5.5723270440251575e-05, + "epoch": 0.72375 + }, + { + "step": 1759, + "timestamp": "2025-12-28T12:05:25.908391", + "elapsed_time": 11240.193481445312, + "loss": 0.1539, + "grad_norm": 0.11152060329914093, + "learning_rate": 5.559748427672956e-05, + "epoch": 0.724375 + }, + { + "step": 1760, + "timestamp": "2025-12-28T12:05:32.102932", + "elapsed_time": 11246.388021945953, + "loss": 0.2534, + "grad_norm": 0.35456421971321106, + "learning_rate": 5.5471698113207547e-05, + "epoch": 0.725 + }, + { + "step": 1761, + "timestamp": "2025-12-28T12:05:38.035109", + "elapsed_time": 11252.320199251175, + "loss": 0.2746, + "grad_norm": 0.20745328068733215, + "learning_rate": 5.534591194968554e-05, + "epoch": 0.725625 + }, + { + "step": 1762, + "timestamp": "2025-12-28T12:05:45.470001", + "elapsed_time": 11259.755092144012, + "loss": 0.1958, + "grad_norm": 0.1335449367761612, + "learning_rate": 5.5220125786163525e-05, + "epoch": 0.72625 + }, + { + "step": 1763, + "timestamp": "2025-12-28T12:05:51.035573", + "elapsed_time": 11265.320663452148, + "loss": 0.2487, + "grad_norm": 0.17504768073558807, + "learning_rate": 5.509433962264151e-05, + "epoch": 0.726875 + }, + { + "step": 1764, + "timestamp": "2025-12-28T12:06:07.328667", + "elapsed_time": 11281.613757371902, + "loss": 0.1174, + "grad_norm": 0.15383565425872803, + "learning_rate": 5.49685534591195e-05, + "epoch": 0.7275 + }, + { + "step": 1765, + "timestamp": "2025-12-28T12:06:13.760643", + "elapsed_time": 11288.045733213425, + "loss": 0.1973, + "grad_norm": 0.31960293650627136, + "learning_rate": 5.484276729559748e-05, + "epoch": 0.728125 + }, + { + "step": 1766, + "timestamp": "2025-12-28T12:06:20.069410", + "elapsed_time": 11294.354505062103, + "loss": 0.2468, + "grad_norm": 0.16368219256401062, + "learning_rate": 5.4716981132075475e-05, + "epoch": 0.72875 + }, + { + "step": 1767, + "timestamp": "2025-12-28T12:06:32.245904", + "elapsed_time": 11306.530994415283, + "loss": 0.0958, + "grad_norm": 0.08179421722888947, + "learning_rate": 5.459119496855346e-05, + "epoch": 0.729375 + }, + { + "step": 1768, + "timestamp": "2025-12-28T12:06:42.619417", + "elapsed_time": 11316.904507875443, + "loss": 0.1315, + "grad_norm": 0.10045388340950012, + "learning_rate": 5.446540880503145e-05, + "epoch": 0.73 + }, + { + "step": 1769, + "timestamp": "2025-12-28T12:06:51.947947", + "elapsed_time": 11326.233037233353, + "loss": 0.211, + "grad_norm": 0.13236618041992188, + "learning_rate": 5.433962264150943e-05, + "epoch": 0.730625 + }, + { + "step": 1770, + "timestamp": "2025-12-28T12:07:03.166704", + "elapsed_time": 11337.451794624329, + "loss": 0.1187, + "grad_norm": 0.08745797723531723, + "learning_rate": 5.4213836477987425e-05, + "epoch": 0.73125 + }, + { + "step": 1771, + "timestamp": "2025-12-28T12:07:08.268446", + "elapsed_time": 11342.553536176682, + "loss": 0.261, + "grad_norm": 0.1820874959230423, + "learning_rate": 5.408805031446541e-05, + "epoch": 0.731875 + }, + { + "step": 1772, + "timestamp": "2025-12-28T12:07:18.742417", + "elapsed_time": 11353.027507543564, + "loss": 0.1424, + "grad_norm": 0.10937105119228363, + "learning_rate": 5.39622641509434e-05, + "epoch": 0.7325 + }, + { + "step": 1773, + "timestamp": "2025-12-28T12:07:35.988111", + "elapsed_time": 11370.273201227188, + "loss": 0.131, + "grad_norm": 0.09035047143697739, + "learning_rate": 5.383647798742138e-05, + "epoch": 0.733125 + }, + { + "step": 1774, + "timestamp": "2025-12-28T12:07:42.368541", + "elapsed_time": 11376.653632164001, + "loss": 0.2366, + "grad_norm": 0.1836661696434021, + "learning_rate": 5.3710691823899376e-05, + "epoch": 0.73375 + }, + { + "step": 1775, + "timestamp": "2025-12-28T12:07:51.102768", + "elapsed_time": 11385.387857913971, + "loss": 0.1211, + "grad_norm": 0.10112607479095459, + "learning_rate": 5.358490566037736e-05, + "epoch": 0.734375 + }, + { + "step": 1776, + "timestamp": "2025-12-28T12:08:07.843256", + "elapsed_time": 11402.128346443176, + "loss": 0.1377, + "grad_norm": 0.08925390243530273, + "learning_rate": 5.345911949685535e-05, + "epoch": 0.735 + }, + { + "step": 1777, + "timestamp": "2025-12-28T12:08:15.286123", + "elapsed_time": 11409.571213960648, + "loss": 0.1673, + "grad_norm": 0.13005472719669342, + "learning_rate": 5.333333333333333e-05, + "epoch": 0.735625 + }, + { + "step": 1778, + "timestamp": "2025-12-28T12:08:26.521285", + "elapsed_time": 11420.806375265121, + "loss": 0.2347, + "grad_norm": 0.11882436275482178, + "learning_rate": 5.3207547169811326e-05, + "epoch": 0.73625 + }, + { + "step": 1779, + "timestamp": "2025-12-28T12:08:33.112874", + "elapsed_time": 11427.397964477539, + "loss": 0.1772, + "grad_norm": 0.14445851743221283, + "learning_rate": 5.308176100628931e-05, + "epoch": 0.736875 + }, + { + "step": 1780, + "timestamp": "2025-12-28T12:08:43.367927", + "elapsed_time": 11437.653017282486, + "loss": 0.1138, + "grad_norm": 0.10857395082712173, + "learning_rate": 5.29559748427673e-05, + "epoch": 0.7375 + }, + { + "step": 1781, + "timestamp": "2025-12-28T12:08:50.894299", + "elapsed_time": 11445.179389238358, + "loss": 0.1847, + "grad_norm": 0.13043537735939026, + "learning_rate": 5.283018867924528e-05, + "epoch": 0.738125 + }, + { + "step": 1782, + "timestamp": "2025-12-28T12:08:59.518528", + "elapsed_time": 11453.803642749786, + "loss": 0.1546, + "grad_norm": 0.16742828488349915, + "learning_rate": 5.270440251572327e-05, + "epoch": 0.73875 + }, + { + "step": 1783, + "timestamp": "2025-12-28T12:09:07.688718", + "elapsed_time": 11461.973808765411, + "loss": 0.137, + "grad_norm": 0.1259349137544632, + "learning_rate": 5.257861635220126e-05, + "epoch": 0.739375 + }, + { + "step": 1784, + "timestamp": "2025-12-28T12:09:14.061669", + "elapsed_time": 11468.346759557724, + "loss": 0.2945, + "grad_norm": 0.1729360669851303, + "learning_rate": 5.245283018867925e-05, + "epoch": 0.74 + }, + { + "step": 1785, + "timestamp": "2025-12-28T12:09:24.669063", + "elapsed_time": 11478.95415353775, + "loss": 0.143, + "grad_norm": 0.10868491232395172, + "learning_rate": 5.2327044025157234e-05, + "epoch": 0.740625 + }, + { + "step": 1786, + "timestamp": "2025-12-28T12:09:29.943561", + "elapsed_time": 11484.228651285172, + "loss": 0.2229, + "grad_norm": 0.1582539975643158, + "learning_rate": 5.220125786163522e-05, + "epoch": 0.74125 + }, + { + "step": 1787, + "timestamp": "2025-12-28T12:09:44.863706", + "elapsed_time": 11499.14879655838, + "loss": 0.1276, + "grad_norm": 0.09422967582941055, + "learning_rate": 5.207547169811321e-05, + "epoch": 0.741875 + }, + { + "step": 1788, + "timestamp": "2025-12-28T12:09:52.629311", + "elapsed_time": 11506.91440153122, + "loss": 0.3331, + "grad_norm": 0.15312416851520538, + "learning_rate": 5.19496855345912e-05, + "epoch": 0.7425 + }, + { + "step": 1789, + "timestamp": "2025-12-28T12:10:00.979750", + "elapsed_time": 11515.264840602875, + "loss": 0.1471, + "grad_norm": 0.14243608713150024, + "learning_rate": 5.1823899371069184e-05, + "epoch": 0.743125 + }, + { + "step": 1790, + "timestamp": "2025-12-28T12:10:21.855558", + "elapsed_time": 11536.140648126602, + "loss": 0.0733, + "grad_norm": 0.06610149145126343, + "learning_rate": 5.169811320754717e-05, + "epoch": 0.74375 + }, + { + "step": 1791, + "timestamp": "2025-12-28T12:10:37.078731", + "elapsed_time": 11551.363821744919, + "loss": 0.0946, + "grad_norm": 0.07790885865688324, + "learning_rate": 5.157232704402516e-05, + "epoch": 0.744375 + }, + { + "step": 1792, + "timestamp": "2025-12-28T12:10:42.615191", + "elapsed_time": 11556.90028142929, + "loss": 0.2814, + "grad_norm": 0.21737056970596313, + "learning_rate": 5.144654088050315e-05, + "epoch": 0.745 + }, + { + "step": 1793, + "timestamp": "2025-12-28T12:10:51.083715", + "elapsed_time": 11565.36880493164, + "loss": 0.1001, + "grad_norm": 0.10370247066020966, + "learning_rate": 5.1320754716981134e-05, + "epoch": 0.745625 + }, + { + "step": 1794, + "timestamp": "2025-12-28T12:11:01.500012", + "elapsed_time": 11575.785102844238, + "loss": 0.158, + "grad_norm": 0.10645274817943573, + "learning_rate": 5.119496855345912e-05, + "epoch": 0.74625 + }, + { + "step": 1795, + "timestamp": "2025-12-28T12:11:15.700384", + "elapsed_time": 11589.985474824905, + "loss": 0.1245, + "grad_norm": 0.09372659027576447, + "learning_rate": 5.106918238993711e-05, + "epoch": 0.746875 + }, + { + "step": 1796, + "timestamp": "2025-12-28T12:11:23.832555", + "elapsed_time": 11598.117645263672, + "loss": 0.1891, + "grad_norm": 0.2279994636774063, + "learning_rate": 5.09433962264151e-05, + "epoch": 0.7475 + }, + { + "step": 1797, + "timestamp": "2025-12-28T12:11:28.965281", + "elapsed_time": 11603.250371217728, + "loss": 0.242, + "grad_norm": 0.17063166201114655, + "learning_rate": 5.0817610062893084e-05, + "epoch": 0.748125 + }, + { + "step": 1798, + "timestamp": "2025-12-28T12:11:49.020815", + "elapsed_time": 11623.30590581894, + "loss": 0.1164, + "grad_norm": 0.07605966180562973, + "learning_rate": 5.069182389937107e-05, + "epoch": 0.74875 + }, + { + "step": 1799, + "timestamp": "2025-12-28T12:11:57.236498", + "elapsed_time": 11631.521588563919, + "loss": 0.145, + "grad_norm": 0.11617890745401382, + "learning_rate": 5.0566037735849056e-05, + "epoch": 0.749375 + }, + { + "step": 1800, + "timestamp": "2025-12-28T12:12:05.263297", + "elapsed_time": 11639.548387527466, + "loss": 0.1271, + "grad_norm": 0.12031132727861404, + "learning_rate": 5.044025157232705e-05, + "epoch": 0.75 + }, + { + "step": 1801, + "timestamp": "2025-12-28T12:12:13.459492", + "elapsed_time": 11647.744582653046, + "loss": 0.1386, + "grad_norm": 0.12550540268421173, + "learning_rate": 5.0314465408805034e-05, + "epoch": 0.750625 + }, + { + "step": 1802, + "timestamp": "2025-12-28T12:12:20.087338", + "elapsed_time": 11654.372427940369, + "loss": 0.2102, + "grad_norm": 0.14700697362422943, + "learning_rate": 5.018867924528302e-05, + "epoch": 0.75125 + }, + { + "step": 1803, + "timestamp": "2025-12-28T12:12:27.258226", + "elapsed_time": 11661.543317079544, + "loss": 0.1896, + "grad_norm": 0.1357898861169815, + "learning_rate": 5.0062893081761006e-05, + "epoch": 0.751875 + }, + { + "step": 1804, + "timestamp": "2025-12-28T12:12:36.128775", + "elapsed_time": 11670.413865566254, + "loss": 0.2121, + "grad_norm": 0.170795738697052, + "learning_rate": 4.9937106918239e-05, + "epoch": 0.7525 + }, + { + "step": 1805, + "timestamp": "2025-12-28T12:12:47.640738", + "elapsed_time": 11681.925828456879, + "loss": 0.1038, + "grad_norm": 0.08710363507270813, + "learning_rate": 4.9811320754716985e-05, + "epoch": 0.753125 + }, + { + "step": 1806, + "timestamp": "2025-12-28T12:12:59.703031", + "elapsed_time": 11693.988121509552, + "loss": 0.1155, + "grad_norm": 0.09086853265762329, + "learning_rate": 4.968553459119497e-05, + "epoch": 0.75375 + }, + { + "step": 1807, + "timestamp": "2025-12-28T12:13:07.973124", + "elapsed_time": 11702.258214712143, + "loss": 0.1217, + "grad_norm": 0.09986895322799683, + "learning_rate": 4.9559748427672956e-05, + "epoch": 0.754375 + }, + { + "step": 1808, + "timestamp": "2025-12-28T12:13:15.956985", + "elapsed_time": 11710.242075920105, + "loss": 0.2275, + "grad_norm": 0.2224908024072647, + "learning_rate": 4.943396226415095e-05, + "epoch": 0.755 + }, + { + "step": 1809, + "timestamp": "2025-12-28T12:13:26.842250", + "elapsed_time": 11721.127341032028, + "loss": 0.1621, + "grad_norm": 0.13384607434272766, + "learning_rate": 4.9308176100628935e-05, + "epoch": 0.755625 + }, + { + "step": 1810, + "timestamp": "2025-12-28T12:13:33.836759", + "elapsed_time": 11728.121849298477, + "loss": 0.1827, + "grad_norm": 0.12781156599521637, + "learning_rate": 4.918238993710692e-05, + "epoch": 0.75625 + }, + { + "step": 1811, + "timestamp": "2025-12-28T12:13:45.020366", + "elapsed_time": 11739.305456399918, + "loss": 0.1141, + "grad_norm": 0.08918755501508713, + "learning_rate": 4.9056603773584906e-05, + "epoch": 0.756875 + }, + { + "step": 1812, + "timestamp": "2025-12-28T12:13:53.188101", + "elapsed_time": 11747.473192214966, + "loss": 0.2376, + "grad_norm": 0.1457865685224533, + "learning_rate": 4.893081761006289e-05, + "epoch": 0.7575 + }, + { + "step": 1813, + "timestamp": "2025-12-28T12:14:02.152041", + "elapsed_time": 11756.437131166458, + "loss": 0.1533, + "grad_norm": 0.18399164080619812, + "learning_rate": 4.8805031446540885e-05, + "epoch": 0.758125 + }, + { + "step": 1814, + "timestamp": "2025-12-28T12:14:09.888429", + "elapsed_time": 11764.173519134521, + "loss": 0.2152, + "grad_norm": 0.14015917479991913, + "learning_rate": 4.867924528301887e-05, + "epoch": 0.75875 + }, + { + "step": 1815, + "timestamp": "2025-12-28T12:14:15.353774", + "elapsed_time": 11769.638864517212, + "loss": 0.1697, + "grad_norm": 0.14405082166194916, + "learning_rate": 4.855345911949686e-05, + "epoch": 0.759375 + }, + { + "step": 1816, + "timestamp": "2025-12-28T12:14:20.680001", + "elapsed_time": 11774.965090990067, + "loss": 0.3188, + "grad_norm": 0.23127539455890656, + "learning_rate": 4.842767295597484e-05, + "epoch": 0.76 + }, + { + "step": 1817, + "timestamp": "2025-12-28T12:14:26.352096", + "elapsed_time": 11780.637186527252, + "loss": 0.2125, + "grad_norm": 0.16115504503250122, + "learning_rate": 4.8301886792452835e-05, + "epoch": 0.760625 + }, + { + "step": 1818, + "timestamp": "2025-12-28T12:14:37.336389", + "elapsed_time": 11791.621479988098, + "loss": 0.3284, + "grad_norm": 0.14183737337589264, + "learning_rate": 4.817610062893082e-05, + "epoch": 0.76125 + }, + { + "step": 1819, + "timestamp": "2025-12-28T12:14:44.254425", + "elapsed_time": 11798.539515256882, + "loss": 0.2209, + "grad_norm": 0.14606201648712158, + "learning_rate": 4.805031446540881e-05, + "epoch": 0.761875 + }, + { + "step": 1820, + "timestamp": "2025-12-28T12:14:53.530877", + "elapsed_time": 11807.815967798233, + "loss": 0.1198, + "grad_norm": 0.18284346163272858, + "learning_rate": 4.792452830188679e-05, + "epoch": 0.7625 + }, + { + "step": 1821, + "timestamp": "2025-12-28T12:15:02.736188", + "elapsed_time": 11817.021278858185, + "loss": 0.1288, + "grad_norm": 0.1111404076218605, + "learning_rate": 4.7798742138364785e-05, + "epoch": 0.763125 + }, + { + "step": 1822, + "timestamp": "2025-12-28T12:15:10.672655", + "elapsed_time": 11824.957744836807, + "loss": 0.3206, + "grad_norm": 0.1543583869934082, + "learning_rate": 4.767295597484277e-05, + "epoch": 0.76375 + }, + { + "step": 1823, + "timestamp": "2025-12-28T12:15:24.962444", + "elapsed_time": 11839.247534513474, + "loss": 0.1522, + "grad_norm": 0.11686399579048157, + "learning_rate": 4.754716981132076e-05, + "epoch": 0.764375 + }, + { + "step": 1824, + "timestamp": "2025-12-28T12:15:32.700314", + "elapsed_time": 11846.985404253006, + "loss": 0.2664, + "grad_norm": 0.1384664922952652, + "learning_rate": 4.742138364779874e-05, + "epoch": 0.765 + }, + { + "step": 1825, + "timestamp": "2025-12-28T12:15:51.808174", + "elapsed_time": 11866.093264579773, + "loss": 0.2736, + "grad_norm": 0.11269756406545639, + "learning_rate": 4.7295597484276736e-05, + "epoch": 0.765625 + }, + { + "step": 1826, + "timestamp": "2025-12-28T12:15:57.370013", + "elapsed_time": 11871.655103206635, + "loss": 0.1664, + "grad_norm": 0.1405743509531021, + "learning_rate": 4.716981132075472e-05, + "epoch": 0.76625 + }, + { + "step": 1827, + "timestamp": "2025-12-28T12:16:10.153089", + "elapsed_time": 11884.43817949295, + "loss": 0.1049, + "grad_norm": 0.08988852053880692, + "learning_rate": 4.704402515723271e-05, + "epoch": 0.766875 + }, + { + "step": 1828, + "timestamp": "2025-12-28T12:16:18.898682", + "elapsed_time": 11893.183772325516, + "loss": 0.1607, + "grad_norm": 0.22954370081424713, + "learning_rate": 4.691823899371069e-05, + "epoch": 0.7675 + }, + { + "step": 1829, + "timestamp": "2025-12-28T12:16:26.036369", + "elapsed_time": 11900.321459770203, + "loss": 0.2229, + "grad_norm": 0.15082332491874695, + "learning_rate": 4.679245283018868e-05, + "epoch": 0.768125 + }, + { + "step": 1830, + "timestamp": "2025-12-28T12:16:35.324020", + "elapsed_time": 11909.609110355377, + "loss": 0.0898, + "grad_norm": 0.08692368119955063, + "learning_rate": 4.666666666666667e-05, + "epoch": 0.76875 + }, + { + "step": 1831, + "timestamp": "2025-12-28T12:16:41.705976", + "elapsed_time": 11915.991065740585, + "loss": 0.1725, + "grad_norm": 0.13606931269168854, + "learning_rate": 4.654088050314466e-05, + "epoch": 0.769375 + }, + { + "step": 1832, + "timestamp": "2025-12-28T12:16:53.044860", + "elapsed_time": 11927.32995057106, + "loss": 0.2069, + "grad_norm": 0.44243958592414856, + "learning_rate": 4.641509433962264e-05, + "epoch": 0.77 + }, + { + "step": 1833, + "timestamp": "2025-12-28T12:17:10.459005", + "elapsed_time": 11944.744095563889, + "loss": 0.1064, + "grad_norm": 0.07558456808328629, + "learning_rate": 4.628930817610063e-05, + "epoch": 0.770625 + }, + { + "step": 1834, + "timestamp": "2025-12-28T12:17:22.401241", + "elapsed_time": 11956.686330795288, + "loss": 0.0909, + "grad_norm": 0.09237058460712433, + "learning_rate": 4.616352201257862e-05, + "epoch": 0.77125 + }, + { + "step": 1835, + "timestamp": "2025-12-28T12:17:36.526898", + "elapsed_time": 11970.811988592148, + "loss": 0.1281, + "grad_norm": 0.09217442572116852, + "learning_rate": 4.603773584905661e-05, + "epoch": 0.771875 + }, + { + "step": 1836, + "timestamp": "2025-12-28T12:17:48.704766", + "elapsed_time": 11982.989856481552, + "loss": 0.1197, + "grad_norm": 0.09364216774702072, + "learning_rate": 4.5911949685534594e-05, + "epoch": 0.7725 + }, + { + "step": 1837, + "timestamp": "2025-12-28T12:17:54.175712", + "elapsed_time": 11988.460802555084, + "loss": 0.1996, + "grad_norm": 0.15755294263362885, + "learning_rate": 4.578616352201258e-05, + "epoch": 0.773125 + }, + { + "step": 1838, + "timestamp": "2025-12-28T12:18:03.161139", + "elapsed_time": 11997.446228981018, + "loss": 0.3362, + "grad_norm": 0.1641501635313034, + "learning_rate": 4.566037735849057e-05, + "epoch": 0.77375 + }, + { + "step": 1839, + "timestamp": "2025-12-28T12:18:15.104307", + "elapsed_time": 12009.389397144318, + "loss": 0.1722, + "grad_norm": 0.12171639502048492, + "learning_rate": 4.553459119496856e-05, + "epoch": 0.774375 + }, + { + "step": 1840, + "timestamp": "2025-12-28T12:18:19.637046", + "elapsed_time": 12013.922136545181, + "loss": 0.2978, + "grad_norm": 0.19682510197162628, + "learning_rate": 4.5408805031446544e-05, + "epoch": 0.775 + }, + { + "step": 1841, + "timestamp": "2025-12-28T12:18:31.203708", + "elapsed_time": 12025.48879814148, + "loss": 0.1497, + "grad_norm": 0.10714510083198547, + "learning_rate": 4.528301886792453e-05, + "epoch": 0.775625 + }, + { + "step": 1842, + "timestamp": "2025-12-28T12:18:41.667974", + "elapsed_time": 12035.95306444168, + "loss": 0.3087, + "grad_norm": 0.1449776291847229, + "learning_rate": 4.515723270440252e-05, + "epoch": 0.77625 + }, + { + "step": 1843, + "timestamp": "2025-12-28T12:18:48.114191", + "elapsed_time": 12042.399285078049, + "loss": 0.1493, + "grad_norm": 0.13305744528770447, + "learning_rate": 4.503144654088051e-05, + "epoch": 0.776875 + }, + { + "step": 1844, + "timestamp": "2025-12-28T12:18:55.887349", + "elapsed_time": 12050.172439575195, + "loss": 0.3034, + "grad_norm": 0.15352778136730194, + "learning_rate": 4.4905660377358494e-05, + "epoch": 0.7775 + }, + { + "step": 1845, + "timestamp": "2025-12-28T12:19:08.313052", + "elapsed_time": 12062.598142147064, + "loss": 0.1209, + "grad_norm": 0.08751270920038223, + "learning_rate": 4.477987421383648e-05, + "epoch": 0.778125 + }, + { + "step": 1846, + "timestamp": "2025-12-28T12:19:27.969503", + "elapsed_time": 12082.254593133926, + "loss": 0.1342, + "grad_norm": 0.07397376745939255, + "learning_rate": 4.4654088050314466e-05, + "epoch": 0.77875 + }, + { + "step": 1847, + "timestamp": "2025-12-28T12:19:34.562787", + "elapsed_time": 12088.847877264023, + "loss": 0.2321, + "grad_norm": 0.15114973485469818, + "learning_rate": 4.452830188679246e-05, + "epoch": 0.779375 + }, + { + "step": 1848, + "timestamp": "2025-12-28T12:19:40.318451", + "elapsed_time": 12094.603546380997, + "loss": 0.1495, + "grad_norm": 0.13792107999324799, + "learning_rate": 4.4402515723270444e-05, + "epoch": 0.78 + }, + { + "step": 1849, + "timestamp": "2025-12-28T12:19:50.399687", + "elapsed_time": 12104.68477678299, + "loss": 0.1054, + "grad_norm": 0.10309217870235443, + "learning_rate": 4.427672955974843e-05, + "epoch": 0.780625 + }, + { + "step": 1850, + "timestamp": "2025-12-28T12:20:05.387318", + "elapsed_time": 12119.672408342361, + "loss": 0.1211, + "grad_norm": 0.08788999915122986, + "learning_rate": 4.4150943396226416e-05, + "epoch": 0.78125 + }, + { + "step": 1851, + "timestamp": "2025-12-28T12:20:15.522711", + "elapsed_time": 12129.807801246643, + "loss": 0.1387, + "grad_norm": 0.11078450828790665, + "learning_rate": 4.402515723270441e-05, + "epoch": 0.781875 + }, + { + "step": 1852, + "timestamp": "2025-12-28T12:20:19.994887", + "elapsed_time": 12134.279977321625, + "loss": 0.3038, + "grad_norm": 0.19242431223392487, + "learning_rate": 4.3899371069182394e-05, + "epoch": 0.7825 + }, + { + "step": 1853, + "timestamp": "2025-12-28T12:20:37.161220", + "elapsed_time": 12151.446311235428, + "loss": 0.0975, + "grad_norm": 0.0753704383969307, + "learning_rate": 4.377358490566038e-05, + "epoch": 0.783125 + }, + { + "step": 1854, + "timestamp": "2025-12-28T12:20:45.622228", + "elapsed_time": 12159.907318115234, + "loss": 0.1389, + "grad_norm": 0.11638470739126205, + "learning_rate": 4.3647798742138366e-05, + "epoch": 0.78375 + }, + { + "step": 1855, + "timestamp": "2025-12-28T12:20:51.601755", + "elapsed_time": 12165.886845588684, + "loss": 0.1841, + "grad_norm": 0.14550043642520905, + "learning_rate": 4.352201257861636e-05, + "epoch": 0.784375 + }, + { + "step": 1856, + "timestamp": "2025-12-28T12:21:02.010335", + "elapsed_time": 12176.295424938202, + "loss": 0.1331, + "grad_norm": 0.10679540038108826, + "learning_rate": 4.3396226415094345e-05, + "epoch": 0.785 + }, + { + "step": 1857, + "timestamp": "2025-12-28T12:21:07.989370", + "elapsed_time": 12182.274460554123, + "loss": 0.147, + "grad_norm": 0.136283740401268, + "learning_rate": 4.327044025157233e-05, + "epoch": 0.785625 + }, + { + "step": 1858, + "timestamp": "2025-12-28T12:21:13.524178", + "elapsed_time": 12187.809268712997, + "loss": 0.2294, + "grad_norm": 0.16854384541511536, + "learning_rate": 4.3144654088050316e-05, + "epoch": 0.78625 + }, + { + "step": 1859, + "timestamp": "2025-12-28T12:21:22.939585", + "elapsed_time": 12197.22467494011, + "loss": 0.1837, + "grad_norm": 0.1425347775220871, + "learning_rate": 4.301886792452831e-05, + "epoch": 0.786875 + }, + { + "step": 1860, + "timestamp": "2025-12-28T12:21:29.373097", + "elapsed_time": 12203.658187150955, + "loss": 0.3369, + "grad_norm": 0.1832302361726761, + "learning_rate": 4.2893081761006295e-05, + "epoch": 0.7875 + }, + { + "step": 1861, + "timestamp": "2025-12-28T12:21:33.991433", + "elapsed_time": 12208.27652335167, + "loss": 0.1674, + "grad_norm": 0.16159717738628387, + "learning_rate": 4.276729559748428e-05, + "epoch": 0.788125 + }, + { + "step": 1862, + "timestamp": "2025-12-28T12:21:41.508287", + "elapsed_time": 12215.793377637863, + "loss": 0.182, + "grad_norm": 0.13854670524597168, + "learning_rate": 4.2641509433962266e-05, + "epoch": 0.78875 + }, + { + "step": 1863, + "timestamp": "2025-12-28T12:21:47.953231", + "elapsed_time": 12222.238321304321, + "loss": 0.2442, + "grad_norm": 0.13430240750312805, + "learning_rate": 4.251572327044025e-05, + "epoch": 0.789375 + }, + { + "step": 1864, + "timestamp": "2025-12-28T12:22:00.040777", + "elapsed_time": 12234.325867414474, + "loss": 0.1518, + "grad_norm": 0.10589438676834106, + "learning_rate": 4.2389937106918245e-05, + "epoch": 0.79 + }, + { + "step": 1865, + "timestamp": "2025-12-28T12:22:05.336070", + "elapsed_time": 12239.621160030365, + "loss": 0.2583, + "grad_norm": 0.1759846806526184, + "learning_rate": 4.226415094339623e-05, + "epoch": 0.790625 + }, + { + "step": 1866, + "timestamp": "2025-12-28T12:22:13.399786", + "elapsed_time": 12247.684876441956, + "loss": 0.1389, + "grad_norm": 0.12064716964960098, + "learning_rate": 4.213836477987422e-05, + "epoch": 0.79125 + }, + { + "step": 1867, + "timestamp": "2025-12-28T12:22:34.272638", + "elapsed_time": 12268.557728767395, + "loss": 0.0743, + "grad_norm": 0.06062662601470947, + "learning_rate": 4.20125786163522e-05, + "epoch": 0.791875 + }, + { + "step": 1868, + "timestamp": "2025-12-28T12:22:42.738728", + "elapsed_time": 12277.02381849289, + "loss": 0.1641, + "grad_norm": 0.12611140310764313, + "learning_rate": 4.1886792452830195e-05, + "epoch": 0.7925 + }, + { + "step": 1869, + "timestamp": "2025-12-28T12:22:49.875677", + "elapsed_time": 12284.160766839981, + "loss": 0.1267, + "grad_norm": 0.10889780521392822, + "learning_rate": 4.176100628930818e-05, + "epoch": 0.793125 + }, + { + "step": 1870, + "timestamp": "2025-12-28T12:22:58.497093", + "elapsed_time": 12292.782183408737, + "loss": 0.4324, + "grad_norm": 0.1732441633939743, + "learning_rate": 4.163522012578617e-05, + "epoch": 0.79375 + }, + { + "step": 1871, + "timestamp": "2025-12-28T12:23:04.803488", + "elapsed_time": 12299.08857870102, + "loss": 0.4409, + "grad_norm": 0.19805917143821716, + "learning_rate": 4.150943396226415e-05, + "epoch": 0.794375 + }, + { + "step": 1872, + "timestamp": "2025-12-28T12:23:10.710661", + "elapsed_time": 12304.995751619339, + "loss": 0.4087, + "grad_norm": 0.1881469041109085, + "learning_rate": 4.1383647798742145e-05, + "epoch": 0.795 + }, + { + "step": 1873, + "timestamp": "2025-12-28T12:23:18.265312", + "elapsed_time": 12312.55040216446, + "loss": 0.1252, + "grad_norm": 0.11210913211107254, + "learning_rate": 4.125786163522013e-05, + "epoch": 0.795625 + }, + { + "step": 1874, + "timestamp": "2025-12-28T12:23:34.658542", + "elapsed_time": 12328.943633079529, + "loss": 0.116, + "grad_norm": 0.10739357024431229, + "learning_rate": 4.113207547169812e-05, + "epoch": 0.79625 + }, + { + "step": 1875, + "timestamp": "2025-12-28T12:23:40.821995", + "elapsed_time": 12335.107085466385, + "loss": 0.1966, + "grad_norm": 0.14850841462612152, + "learning_rate": 4.10062893081761e-05, + "epoch": 0.796875 + }, + { + "step": 1876, + "timestamp": "2025-12-28T12:23:47.096816", + "elapsed_time": 12341.381906032562, + "loss": 0.1838, + "grad_norm": 0.1477261185646057, + "learning_rate": 4.088050314465409e-05, + "epoch": 0.7975 + }, + { + "step": 1877, + "timestamp": "2025-12-28T12:24:07.963691", + "elapsed_time": 12362.248781204224, + "loss": 0.0783, + "grad_norm": 0.06663120537996292, + "learning_rate": 4.075471698113208e-05, + "epoch": 0.798125 + }, + { + "step": 1878, + "timestamp": "2025-12-28T12:24:25.074044", + "elapsed_time": 12379.35913491249, + "loss": 0.1794, + "grad_norm": 0.10132871568202972, + "learning_rate": 4.062893081761007e-05, + "epoch": 0.79875 + }, + { + "step": 1879, + "timestamp": "2025-12-28T12:24:35.826343", + "elapsed_time": 12390.111433267593, + "loss": 0.1471, + "grad_norm": 0.11134599149227142, + "learning_rate": 4.050314465408805e-05, + "epoch": 0.799375 + }, + { + "step": 1880, + "timestamp": "2025-12-28T12:24:42.929007", + "elapsed_time": 12397.21409702301, + "loss": 0.2019, + "grad_norm": 0.1353609710931778, + "learning_rate": 4.037735849056604e-05, + "epoch": 0.8 + }, + { + "step": 1881, + "timestamp": "2025-12-28T12:24:55.553077", + "elapsed_time": 12409.83816742897, + "loss": 0.1102, + "grad_norm": 0.09974975883960724, + "learning_rate": 4.025157232704403e-05, + "epoch": 0.800625 + }, + { + "step": 1882, + "timestamp": "2025-12-28T12:25:09.292309", + "elapsed_time": 12423.577399730682, + "loss": 0.1936, + "grad_norm": 0.10728470981121063, + "learning_rate": 4.012578616352202e-05, + "epoch": 0.80125 + }, + { + "step": 1883, + "timestamp": "2025-12-28T12:25:29.788806", + "elapsed_time": 12444.073896169662, + "loss": 0.1056, + "grad_norm": 0.08063426613807678, + "learning_rate": 4e-05, + "epoch": 0.801875 + }, + { + "step": 1884, + "timestamp": "2025-12-28T12:25:37.645916", + "elapsed_time": 12451.931006908417, + "loss": 0.1324, + "grad_norm": 0.1093749925494194, + "learning_rate": 3.987421383647799e-05, + "epoch": 0.8025 + }, + { + "step": 1885, + "timestamp": "2025-12-28T12:25:50.396263", + "elapsed_time": 12464.681353330612, + "loss": 0.11, + "grad_norm": 0.0938941091299057, + "learning_rate": 3.974842767295598e-05, + "epoch": 0.803125 + }, + { + "step": 1886, + "timestamp": "2025-12-28T12:26:07.508041", + "elapsed_time": 12481.793131351471, + "loss": 0.1111, + "grad_norm": 0.08618809282779694, + "learning_rate": 3.962264150943397e-05, + "epoch": 0.80375 + }, + { + "step": 1887, + "timestamp": "2025-12-28T12:26:12.131185", + "elapsed_time": 12486.416275262833, + "loss": 0.1774, + "grad_norm": 0.16178780794143677, + "learning_rate": 3.9496855345911953e-05, + "epoch": 0.804375 + }, + { + "step": 1888, + "timestamp": "2025-12-28T12:26:22.840063", + "elapsed_time": 12497.125153303146, + "loss": 0.1668, + "grad_norm": 0.11834202706813812, + "learning_rate": 3.937106918238994e-05, + "epoch": 0.805 + }, + { + "step": 1889, + "timestamp": "2025-12-28T12:26:33.452731", + "elapsed_time": 12507.737821102142, + "loss": 0.1647, + "grad_norm": 0.10877163708209991, + "learning_rate": 3.924528301886793e-05, + "epoch": 0.805625 + }, + { + "step": 1890, + "timestamp": "2025-12-28T12:26:43.963908", + "elapsed_time": 12518.248998880386, + "loss": 0.3662, + "grad_norm": 0.34827324748039246, + "learning_rate": 3.911949685534592e-05, + "epoch": 0.80625 + }, + { + "step": 1891, + "timestamp": "2025-12-28T12:26:51.062721", + "elapsed_time": 12525.347811460495, + "loss": 0.1494, + "grad_norm": 0.15114177763462067, + "learning_rate": 3.8993710691823904e-05, + "epoch": 0.806875 + }, + { + "step": 1892, + "timestamp": "2025-12-28T12:27:00.778145", + "elapsed_time": 12535.063235282898, + "loss": 0.2763, + "grad_norm": 0.14429102838039398, + "learning_rate": 3.886792452830189e-05, + "epoch": 0.8075 + }, + { + "step": 1893, + "timestamp": "2025-12-28T12:27:07.766029", + "elapsed_time": 12542.051119327545, + "loss": 0.1599, + "grad_norm": 0.14175190031528473, + "learning_rate": 3.8742138364779875e-05, + "epoch": 0.808125 + }, + { + "step": 1894, + "timestamp": "2025-12-28T12:27:18.126378", + "elapsed_time": 12552.411469221115, + "loss": 0.1061, + "grad_norm": 0.08977079391479492, + "learning_rate": 3.861635220125787e-05, + "epoch": 0.80875 + }, + { + "step": 1895, + "timestamp": "2025-12-28T12:27:24.648082", + "elapsed_time": 12558.93317270279, + "loss": 0.2233, + "grad_norm": 0.15675166249275208, + "learning_rate": 3.8490566037735854e-05, + "epoch": 0.809375 + }, + { + "step": 1896, + "timestamp": "2025-12-28T12:27:38.818514", + "elapsed_time": 12573.10360455513, + "loss": 0.1142, + "grad_norm": 0.08880900591611862, + "learning_rate": 3.836477987421384e-05, + "epoch": 0.81 + }, + { + "step": 1897, + "timestamp": "2025-12-28T12:27:45.255238", + "elapsed_time": 12579.540328502655, + "loss": 0.1721, + "grad_norm": 0.14262078702449799, + "learning_rate": 3.8238993710691826e-05, + "epoch": 0.810625 + }, + { + "step": 1898, + "timestamp": "2025-12-28T12:27:57.156162", + "elapsed_time": 12591.441256284714, + "loss": 0.1236, + "grad_norm": 0.09527979791164398, + "learning_rate": 3.811320754716982e-05, + "epoch": 0.81125 + }, + { + "step": 1899, + "timestamp": "2025-12-28T12:28:18.026839", + "elapsed_time": 12612.31192946434, + "loss": 0.1045, + "grad_norm": 0.08572285622358322, + "learning_rate": 3.7987421383647804e-05, + "epoch": 0.811875 + }, + { + "step": 1900, + "timestamp": "2025-12-28T12:28:32.684984", + "elapsed_time": 12626.970074653625, + "loss": 0.1285, + "grad_norm": 0.09535647183656693, + "learning_rate": 3.786163522012579e-05, + "epoch": 0.8125 + }, + { + "step": 1901, + "timestamp": "2025-12-28T12:28:47.846051", + "elapsed_time": 12642.13114118576, + "loss": 0.1224, + "grad_norm": 0.09724906086921692, + "learning_rate": 3.7735849056603776e-05, + "epoch": 0.813125 + }, + { + "step": 1902, + "timestamp": "2025-12-28T12:28:56.318374", + "elapsed_time": 12650.603464603424, + "loss": 0.25, + "grad_norm": 0.1405816376209259, + "learning_rate": 3.761006289308177e-05, + "epoch": 0.81375 + }, + { + "step": 1903, + "timestamp": "2025-12-28T12:29:07.320674", + "elapsed_time": 12661.605763912201, + "loss": 0.1659, + "grad_norm": 0.1113525927066803, + "learning_rate": 3.748427672955975e-05, + "epoch": 0.814375 + }, + { + "step": 1904, + "timestamp": "2025-12-28T12:29:16.620685", + "elapsed_time": 12670.905775785446, + "loss": 0.1158, + "grad_norm": 0.11509151756763458, + "learning_rate": 3.735849056603773e-05, + "epoch": 0.815 + }, + { + "step": 1905, + "timestamp": "2025-12-28T12:29:34.296146", + "elapsed_time": 12688.581236124039, + "loss": 0.1305, + "grad_norm": 0.10004525631666183, + "learning_rate": 3.723270440251572e-05, + "epoch": 0.815625 + }, + { + "step": 1906, + "timestamp": "2025-12-28T12:29:44.763574", + "elapsed_time": 12699.048664331436, + "loss": 0.1535, + "grad_norm": 0.10531044751405716, + "learning_rate": 3.710691823899371e-05, + "epoch": 0.81625 + }, + { + "step": 1907, + "timestamp": "2025-12-28T12:29:52.278280", + "elapsed_time": 12706.56337094307, + "loss": 0.1443, + "grad_norm": 0.13411474227905273, + "learning_rate": 3.69811320754717e-05, + "epoch": 0.816875 + }, + { + "step": 1908, + "timestamp": "2025-12-28T12:30:05.896343", + "elapsed_time": 12720.181433439255, + "loss": 0.1701, + "grad_norm": 0.11000078171491623, + "learning_rate": 3.6855345911949684e-05, + "epoch": 0.8175 + }, + { + "step": 1909, + "timestamp": "2025-12-28T12:30:14.677199", + "elapsed_time": 12728.962289571762, + "loss": 0.3532, + "grad_norm": 0.1655169278383255, + "learning_rate": 3.672955974842767e-05, + "epoch": 0.818125 + }, + { + "step": 1910, + "timestamp": "2025-12-28T12:30:25.150670", + "elapsed_time": 12739.435760736465, + "loss": 0.1252, + "grad_norm": 0.1451990157365799, + "learning_rate": 3.660377358490566e-05, + "epoch": 0.81875 + }, + { + "step": 1911, + "timestamp": "2025-12-28T12:30:33.004276", + "elapsed_time": 12747.289366722107, + "loss": 0.2257, + "grad_norm": 0.15634727478027344, + "learning_rate": 3.647798742138365e-05, + "epoch": 0.819375 + }, + { + "step": 1912, + "timestamp": "2025-12-28T12:30:40.480022", + "elapsed_time": 12754.765112161636, + "loss": 0.3694, + "grad_norm": 0.17593269050121307, + "learning_rate": 3.6352201257861634e-05, + "epoch": 0.82 + }, + { + "step": 1913, + "timestamp": "2025-12-28T12:30:48.029511", + "elapsed_time": 12762.314601421356, + "loss": 0.2491, + "grad_norm": 0.14427348971366882, + "learning_rate": 3.622641509433962e-05, + "epoch": 0.820625 + }, + { + "step": 1914, + "timestamp": "2025-12-28T12:30:54.103421", + "elapsed_time": 12768.388511657715, + "loss": 0.2834, + "grad_norm": 0.21829214692115784, + "learning_rate": 3.610062893081761e-05, + "epoch": 0.82125 + }, + { + "step": 1915, + "timestamp": "2025-12-28T12:31:01.382574", + "elapsed_time": 12775.667664289474, + "loss": 0.168, + "grad_norm": 0.15699121356010437, + "learning_rate": 3.59748427672956e-05, + "epoch": 0.821875 + }, + { + "step": 1916, + "timestamp": "2025-12-28T12:31:09.694802", + "elapsed_time": 12783.979892253876, + "loss": 0.2832, + "grad_norm": 0.16049239039421082, + "learning_rate": 3.5849056603773584e-05, + "epoch": 0.8225 + }, + { + "step": 1917, + "timestamp": "2025-12-28T12:31:19.119223", + "elapsed_time": 12793.404314041138, + "loss": 0.196, + "grad_norm": 0.12521395087242126, + "learning_rate": 3.572327044025157e-05, + "epoch": 0.823125 + }, + { + "step": 1918, + "timestamp": "2025-12-28T12:31:25.059477", + "elapsed_time": 12799.344567537308, + "loss": 0.144, + "grad_norm": 0.12810324132442474, + "learning_rate": 3.559748427672956e-05, + "epoch": 0.82375 + }, + { + "step": 1919, + "timestamp": "2025-12-28T12:31:31.502266", + "elapsed_time": 12805.787356376648, + "loss": 0.2127, + "grad_norm": 0.14652138948440552, + "learning_rate": 3.547169811320755e-05, + "epoch": 0.824375 + }, + { + "step": 1920, + "timestamp": "2025-12-28T12:31:39.355931", + "elapsed_time": 12813.641021966934, + "loss": 0.2024, + "grad_norm": 0.13335974514484406, + "learning_rate": 3.5345911949685534e-05, + "epoch": 0.825 + }, + { + "step": 1921, + "timestamp": "2025-12-28T12:31:45.989318", + "elapsed_time": 12820.274408578873, + "loss": 0.1712, + "grad_norm": 0.13532021641731262, + "learning_rate": 3.522012578616352e-05, + "epoch": 0.825625 + }, + { + "step": 1922, + "timestamp": "2025-12-28T12:31:55.415620", + "elapsed_time": 12829.70071029663, + "loss": 0.1493, + "grad_norm": 0.11901037395000458, + "learning_rate": 3.5094339622641506e-05, + "epoch": 0.82625 + }, + { + "step": 1923, + "timestamp": "2025-12-28T12:32:06.926802", + "elapsed_time": 12841.211893081665, + "loss": 0.1154, + "grad_norm": 0.08895418792963028, + "learning_rate": 3.49685534591195e-05, + "epoch": 0.826875 + }, + { + "step": 1924, + "timestamp": "2025-12-28T12:32:13.518398", + "elapsed_time": 12847.8034927845, + "loss": 0.1603, + "grad_norm": 0.133051335811615, + "learning_rate": 3.4842767295597484e-05, + "epoch": 0.8275 + }, + { + "step": 1925, + "timestamp": "2025-12-28T12:32:24.513980", + "elapsed_time": 12858.799070358276, + "loss": 0.1377, + "grad_norm": 0.10832136869430542, + "learning_rate": 3.471698113207547e-05, + "epoch": 0.828125 + }, + { + "step": 1926, + "timestamp": "2025-12-28T12:32:30.547900", + "elapsed_time": 12864.83299088478, + "loss": 0.393, + "grad_norm": 0.16918057203292847, + "learning_rate": 3.4591194968553456e-05, + "epoch": 0.82875 + }, + { + "step": 1927, + "timestamp": "2025-12-28T12:32:37.613818", + "elapsed_time": 12871.89890909195, + "loss": 0.1779, + "grad_norm": 0.13324803113937378, + "learning_rate": 3.446540880503145e-05, + "epoch": 0.829375 + }, + { + "step": 1928, + "timestamp": "2025-12-28T12:32:44.130396", + "elapsed_time": 12878.415486097336, + "loss": 0.1886, + "grad_norm": 0.22868554294109344, + "learning_rate": 3.4339622641509435e-05, + "epoch": 0.83 + }, + { + "step": 1929, + "timestamp": "2025-12-28T12:32:50.719516", + "elapsed_time": 12885.00460600853, + "loss": 0.3735, + "grad_norm": 0.16795550286769867, + "learning_rate": 3.421383647798742e-05, + "epoch": 0.830625 + }, + { + "step": 1930, + "timestamp": "2025-12-28T12:33:00.614246", + "elapsed_time": 12894.899336338043, + "loss": 0.1517, + "grad_norm": 0.1097177118062973, + "learning_rate": 3.4088050314465406e-05, + "epoch": 0.83125 + }, + { + "step": 1931, + "timestamp": "2025-12-28T12:33:11.076050", + "elapsed_time": 12905.361140727997, + "loss": 0.1361, + "grad_norm": 0.11156441271305084, + "learning_rate": 3.39622641509434e-05, + "epoch": 0.831875 + }, + { + "step": 1932, + "timestamp": "2025-12-28T12:33:22.857950", + "elapsed_time": 12917.143040180206, + "loss": 0.1602, + "grad_norm": 0.11632666736841202, + "learning_rate": 3.3836477987421385e-05, + "epoch": 0.8325 + }, + { + "step": 1933, + "timestamp": "2025-12-28T12:33:43.115463", + "elapsed_time": 12937.400553941727, + "loss": 0.1039, + "grad_norm": 0.07940562069416046, + "learning_rate": 3.371069182389937e-05, + "epoch": 0.833125 + }, + { + "step": 1934, + "timestamp": "2025-12-28T12:33:56.721853", + "elapsed_time": 12951.006942987442, + "loss": 0.0978, + "grad_norm": 0.27172061800956726, + "learning_rate": 3.3584905660377356e-05, + "epoch": 0.83375 + }, + { + "step": 1935, + "timestamp": "2025-12-28T12:34:11.804848", + "elapsed_time": 12966.089939117432, + "loss": 0.1315, + "grad_norm": 0.09464891999959946, + "learning_rate": 3.345911949685534e-05, + "epoch": 0.834375 + }, + { + "step": 1936, + "timestamp": "2025-12-28T12:34:17.303351", + "elapsed_time": 12971.58844089508, + "loss": 0.1868, + "grad_norm": 0.15971067547798157, + "learning_rate": 3.3333333333333335e-05, + "epoch": 0.835 + }, + { + "step": 1937, + "timestamp": "2025-12-28T12:34:38.344428", + "elapsed_time": 12992.629518508911, + "loss": 0.0888, + "grad_norm": 0.07462675869464874, + "learning_rate": 3.320754716981132e-05, + "epoch": 0.835625 + }, + { + "step": 1938, + "timestamp": "2025-12-28T12:34:44.610775", + "elapsed_time": 12998.89586520195, + "loss": 0.1803, + "grad_norm": 0.18845033645629883, + "learning_rate": 3.308176100628931e-05, + "epoch": 0.83625 + }, + { + "step": 1939, + "timestamp": "2025-12-28T12:34:50.913733", + "elapsed_time": 13005.198822975159, + "loss": 0.2887, + "grad_norm": 0.17362096905708313, + "learning_rate": 3.295597484276729e-05, + "epoch": 0.836875 + }, + { + "step": 1940, + "timestamp": "2025-12-28T12:34:57.357092", + "elapsed_time": 13011.642182588577, + "loss": 0.1972, + "grad_norm": 0.1999269723892212, + "learning_rate": 3.2830188679245285e-05, + "epoch": 0.8375 + }, + { + "step": 1941, + "timestamp": "2025-12-28T12:35:02.217109", + "elapsed_time": 13016.502199172974, + "loss": 0.215, + "grad_norm": 0.16465437412261963, + "learning_rate": 3.270440251572327e-05, + "epoch": 0.838125 + }, + { + "step": 1942, + "timestamp": "2025-12-28T12:35:10.992022", + "elapsed_time": 13025.27711224556, + "loss": 0.1497, + "grad_norm": 0.12079225480556488, + "learning_rate": 3.257861635220126e-05, + "epoch": 0.83875 + }, + { + "step": 1943, + "timestamp": "2025-12-28T12:35:14.991163", + "elapsed_time": 13029.276253938675, + "loss": 0.2926, + "grad_norm": 0.23038972914218903, + "learning_rate": 3.245283018867924e-05, + "epoch": 0.839375 + }, + { + "step": 1944, + "timestamp": "2025-12-28T12:35:24.077690", + "elapsed_time": 13038.362780094147, + "loss": 0.5596, + "grad_norm": 0.17396830022335052, + "learning_rate": 3.2327044025157235e-05, + "epoch": 0.84 + }, + { + "step": 1945, + "timestamp": "2025-12-28T12:35:31.821806", + "elapsed_time": 13046.106900453568, + "loss": 0.1473, + "grad_norm": 0.12451894581317902, + "learning_rate": 3.220125786163522e-05, + "epoch": 0.840625 + }, + { + "step": 1946, + "timestamp": "2025-12-28T12:35:41.040337", + "elapsed_time": 13055.325427770615, + "loss": 0.0972, + "grad_norm": 0.09150367975234985, + "learning_rate": 3.207547169811321e-05, + "epoch": 0.84125 + }, + { + "step": 1947, + "timestamp": "2025-12-28T12:35:54.543075", + "elapsed_time": 13068.828165531158, + "loss": 0.1249, + "grad_norm": 0.09334319829940796, + "learning_rate": 3.194968553459119e-05, + "epoch": 0.841875 + }, + { + "step": 1948, + "timestamp": "2025-12-28T12:36:05.426685", + "elapsed_time": 13079.711775302887, + "loss": 0.1417, + "grad_norm": 0.10683770477771759, + "learning_rate": 3.1823899371069186e-05, + "epoch": 0.8425 + }, + { + "step": 1949, + "timestamp": "2025-12-28T12:36:18.168592", + "elapsed_time": 13092.453682422638, + "loss": 0.1475, + "grad_norm": 0.0970892384648323, + "learning_rate": 3.169811320754717e-05, + "epoch": 0.843125 + }, + { + "step": 1950, + "timestamp": "2025-12-28T12:36:34.081832", + "elapsed_time": 13108.366922616959, + "loss": 0.1373, + "grad_norm": 0.09424092620611191, + "learning_rate": 3.157232704402516e-05, + "epoch": 0.84375 + }, + { + "step": 1951, + "timestamp": "2025-12-28T12:36:44.441007", + "elapsed_time": 13118.72609782219, + "loss": 0.1229, + "grad_norm": 0.11297397315502167, + "learning_rate": 3.144654088050314e-05, + "epoch": 0.844375 + }, + { + "step": 1952, + "timestamp": "2025-12-28T12:36:54.066744", + "elapsed_time": 13128.351834058762, + "loss": 0.1486, + "grad_norm": 0.11981320381164551, + "learning_rate": 3.132075471698113e-05, + "epoch": 0.845 + }, + { + "step": 1953, + "timestamp": "2025-12-28T12:37:04.580382", + "elapsed_time": 13138.86547279358, + "loss": 0.1323, + "grad_norm": 0.10277368128299713, + "learning_rate": 3.119496855345912e-05, + "epoch": 0.845625 + }, + { + "step": 1954, + "timestamp": "2025-12-28T12:37:14.942990", + "elapsed_time": 13149.228080272675, + "loss": 0.2132, + "grad_norm": 0.13679690659046173, + "learning_rate": 3.106918238993711e-05, + "epoch": 0.84625 + }, + { + "step": 1955, + "timestamp": "2025-12-28T12:37:21.894712", + "elapsed_time": 13156.179806470871, + "loss": 0.1818, + "grad_norm": 0.14240722358226776, + "learning_rate": 3.094339622641509e-05, + "epoch": 0.846875 + }, + { + "step": 1956, + "timestamp": "2025-12-28T12:37:26.431086", + "elapsed_time": 13160.716176271439, + "loss": 0.2607, + "grad_norm": 0.1875351518392563, + "learning_rate": 3.081761006289308e-05, + "epoch": 0.8475 + }, + { + "step": 1957, + "timestamp": "2025-12-28T12:37:39.981919", + "elapsed_time": 13174.267012834549, + "loss": 0.181, + "grad_norm": 0.12086289376020432, + "learning_rate": 3.069182389937107e-05, + "epoch": 0.848125 + }, + { + "step": 1958, + "timestamp": "2025-12-28T12:37:50.822168", + "elapsed_time": 13185.107258558273, + "loss": 0.1471, + "grad_norm": 0.11906228959560394, + "learning_rate": 3.056603773584906e-05, + "epoch": 0.84875 + }, + { + "step": 1959, + "timestamp": "2025-12-28T12:38:08.057968", + "elapsed_time": 13202.34305858612, + "loss": 0.1174, + "grad_norm": 0.07859740406274796, + "learning_rate": 3.0440251572327043e-05, + "epoch": 0.849375 + }, + { + "step": 1960, + "timestamp": "2025-12-28T12:38:15.115247", + "elapsed_time": 13209.400336742401, + "loss": 0.1584, + "grad_norm": 0.22734029591083527, + "learning_rate": 3.0314465408805033e-05, + "epoch": 0.85 + }, + { + "step": 1961, + "timestamp": "2025-12-28T12:38:19.840730", + "elapsed_time": 13214.12582039833, + "loss": 0.2167, + "grad_norm": 0.18263642489910126, + "learning_rate": 3.018867924528302e-05, + "epoch": 0.850625 + }, + { + "step": 1962, + "timestamp": "2025-12-28T12:38:28.595786", + "elapsed_time": 13222.880876779556, + "loss": 0.1756, + "grad_norm": 0.12433876842260361, + "learning_rate": 3.0062893081761008e-05, + "epoch": 0.85125 + }, + { + "step": 1963, + "timestamp": "2025-12-28T12:38:40.961363", + "elapsed_time": 13235.246453523636, + "loss": 0.1387, + "grad_norm": 0.10110918432474136, + "learning_rate": 2.9937106918238994e-05, + "epoch": 0.851875 + }, + { + "step": 1964, + "timestamp": "2025-12-28T12:38:52.419176", + "elapsed_time": 13246.704270601273, + "loss": 0.1096, + "grad_norm": 0.0898265689611435, + "learning_rate": 2.9811320754716983e-05, + "epoch": 0.8525 + }, + { + "step": 1965, + "timestamp": "2025-12-28T12:38:58.899414", + "elapsed_time": 13253.18450474739, + "loss": 0.194, + "grad_norm": 0.15123769640922546, + "learning_rate": 2.968553459119497e-05, + "epoch": 0.853125 + }, + { + "step": 1966, + "timestamp": "2025-12-28T12:39:06.139908", + "elapsed_time": 13260.425002336502, + "loss": 0.147, + "grad_norm": 0.12028482556343079, + "learning_rate": 2.9559748427672958e-05, + "epoch": 0.85375 + }, + { + "step": 1967, + "timestamp": "2025-12-28T12:39:14.272168", + "elapsed_time": 13268.55725812912, + "loss": 0.1709, + "grad_norm": 0.13067594170570374, + "learning_rate": 2.9433962264150944e-05, + "epoch": 0.854375 + }, + { + "step": 1968, + "timestamp": "2025-12-28T12:39:20.178108", + "elapsed_time": 13274.463198184967, + "loss": 0.1636, + "grad_norm": 0.15121272206306458, + "learning_rate": 2.930817610062893e-05, + "epoch": 0.855 + }, + { + "step": 1969, + "timestamp": "2025-12-28T12:39:27.555839", + "elapsed_time": 13281.84092926979, + "loss": 0.1912, + "grad_norm": 0.18086270987987518, + "learning_rate": 2.918238993710692e-05, + "epoch": 0.855625 + }, + { + "step": 1970, + "timestamp": "2025-12-28T12:39:37.363912", + "elapsed_time": 13291.649002075195, + "loss": 0.1739, + "grad_norm": 0.12668268382549286, + "learning_rate": 2.9056603773584905e-05, + "epoch": 0.85625 + }, + { + "step": 1971, + "timestamp": "2025-12-28T12:39:45.421058", + "elapsed_time": 13299.706147909164, + "loss": 0.1524, + "grad_norm": 0.12265957146883011, + "learning_rate": 2.8930817610062894e-05, + "epoch": 0.856875 + }, + { + "step": 1972, + "timestamp": "2025-12-28T12:40:02.948650", + "elapsed_time": 13317.233740568161, + "loss": 0.1257, + "grad_norm": 0.08345521986484528, + "learning_rate": 2.880503144654088e-05, + "epoch": 0.8575 + }, + { + "step": 1973, + "timestamp": "2025-12-28T12:40:12.078552", + "elapsed_time": 13326.363642930984, + "loss": 0.1756, + "grad_norm": 0.11556252837181091, + "learning_rate": 2.867924528301887e-05, + "epoch": 0.858125 + }, + { + "step": 1974, + "timestamp": "2025-12-28T12:40:25.168584", + "elapsed_time": 13339.453674077988, + "loss": 0.1235, + "grad_norm": 0.09028176963329315, + "learning_rate": 2.8553459119496855e-05, + "epoch": 0.85875 + }, + { + "step": 1975, + "timestamp": "2025-12-28T12:40:33.147283", + "elapsed_time": 13347.432373523712, + "loss": 0.1337, + "grad_norm": 0.12279459834098816, + "learning_rate": 2.8427672955974844e-05, + "epoch": 0.859375 + }, + { + "step": 1976, + "timestamp": "2025-12-28T12:40:44.760775", + "elapsed_time": 13359.045865058899, + "loss": 0.1334, + "grad_norm": 0.09873386472463608, + "learning_rate": 2.830188679245283e-05, + "epoch": 0.86 + }, + { + "step": 1977, + "timestamp": "2025-12-28T12:40:55.871028", + "elapsed_time": 13370.156118869781, + "loss": 0.122, + "grad_norm": 0.10663529485464096, + "learning_rate": 2.817610062893082e-05, + "epoch": 0.860625 + }, + { + "step": 1978, + "timestamp": "2025-12-28T12:41:03.235317", + "elapsed_time": 13377.520410776138, + "loss": 0.1669, + "grad_norm": 0.12646640837192535, + "learning_rate": 2.8050314465408805e-05, + "epoch": 0.86125 + }, + { + "step": 1979, + "timestamp": "2025-12-28T12:41:11.043014", + "elapsed_time": 13385.32810497284, + "loss": 0.1255, + "grad_norm": 0.12962931394577026, + "learning_rate": 2.7924528301886794e-05, + "epoch": 0.861875 + }, + { + "step": 1980, + "timestamp": "2025-12-28T12:41:16.966780", + "elapsed_time": 13391.251874685287, + "loss": 0.1866, + "grad_norm": 0.14886367321014404, + "learning_rate": 2.779874213836478e-05, + "epoch": 0.8625 + }, + { + "step": 1981, + "timestamp": "2025-12-28T12:41:28.910216", + "elapsed_time": 13403.195305585861, + "loss": 0.1053, + "grad_norm": 0.08952232450246811, + "learning_rate": 2.767295597484277e-05, + "epoch": 0.863125 + }, + { + "step": 1982, + "timestamp": "2025-12-28T12:41:37.341955", + "elapsed_time": 13411.627045869827, + "loss": 0.2185, + "grad_norm": 0.1354040801525116, + "learning_rate": 2.7547169811320755e-05, + "epoch": 0.86375 + }, + { + "step": 1983, + "timestamp": "2025-12-28T12:41:43.511317", + "elapsed_time": 13417.796411514282, + "loss": 0.1658, + "grad_norm": 0.1327390819787979, + "learning_rate": 2.742138364779874e-05, + "epoch": 0.864375 + }, + { + "step": 1984, + "timestamp": "2025-12-28T12:41:54.923595", + "elapsed_time": 13429.208688497543, + "loss": 0.1531, + "grad_norm": 0.13857761025428772, + "learning_rate": 2.729559748427673e-05, + "epoch": 0.865 + }, + { + "step": 1985, + "timestamp": "2025-12-28T12:42:07.782059", + "elapsed_time": 13442.06714963913, + "loss": 0.1701, + "grad_norm": 0.1057012677192688, + "learning_rate": 2.7169811320754716e-05, + "epoch": 0.865625 + }, + { + "step": 1986, + "timestamp": "2025-12-28T12:42:13.794560", + "elapsed_time": 13448.07965040207, + "loss": 0.2544, + "grad_norm": 0.16066089272499084, + "learning_rate": 2.7044025157232706e-05, + "epoch": 0.86625 + }, + { + "step": 1987, + "timestamp": "2025-12-28T12:42:24.972624", + "elapsed_time": 13459.257714748383, + "loss": 0.1063, + "grad_norm": 0.10727685689926147, + "learning_rate": 2.691823899371069e-05, + "epoch": 0.866875 + }, + { + "step": 1988, + "timestamp": "2025-12-28T12:42:35.333485", + "elapsed_time": 13469.618575811386, + "loss": 0.1744, + "grad_norm": 0.10637471824884415, + "learning_rate": 2.679245283018868e-05, + "epoch": 0.8675 + }, + { + "step": 1989, + "timestamp": "2025-12-28T12:42:44.268707", + "elapsed_time": 13478.553797006607, + "loss": 0.3446, + "grad_norm": 0.18917757272720337, + "learning_rate": 2.6666666666666667e-05, + "epoch": 0.868125 + }, + { + "step": 1990, + "timestamp": "2025-12-28T12:42:53.119626", + "elapsed_time": 13487.404715776443, + "loss": 0.1694, + "grad_norm": 0.16625277698040009, + "learning_rate": 2.6540880503144656e-05, + "epoch": 0.86875 + }, + { + "step": 1991, + "timestamp": "2025-12-28T12:42:56.594548", + "elapsed_time": 13490.879638910294, + "loss": 0.3027, + "grad_norm": 0.21782198548316956, + "learning_rate": 2.641509433962264e-05, + "epoch": 0.869375 + }, + { + "step": 1992, + "timestamp": "2025-12-28T12:43:05.768847", + "elapsed_time": 13500.05393743515, + "loss": 0.1083, + "grad_norm": 0.09708056598901749, + "learning_rate": 2.628930817610063e-05, + "epoch": 0.87 + }, + { + "step": 1993, + "timestamp": "2025-12-28T12:43:15.670772", + "elapsed_time": 13509.955861568451, + "loss": 0.13, + "grad_norm": 0.12269952893257141, + "learning_rate": 2.6163522012578617e-05, + "epoch": 0.870625 + }, + { + "step": 1994, + "timestamp": "2025-12-28T12:43:21.608461", + "elapsed_time": 13515.893551588058, + "loss": 0.2237, + "grad_norm": 0.17270702123641968, + "learning_rate": 2.6037735849056606e-05, + "epoch": 0.87125 + }, + { + "step": 1995, + "timestamp": "2025-12-28T12:43:28.693932", + "elapsed_time": 13522.9790225029, + "loss": 0.3098, + "grad_norm": 0.17161986231803894, + "learning_rate": 2.5911949685534592e-05, + "epoch": 0.871875 + }, + { + "step": 1996, + "timestamp": "2025-12-28T12:43:35.216183", + "elapsed_time": 13529.501273870468, + "loss": 0.4953, + "grad_norm": 0.18305501341819763, + "learning_rate": 2.578616352201258e-05, + "epoch": 0.8725 + }, + { + "step": 1997, + "timestamp": "2025-12-28T12:43:40.964345", + "elapsed_time": 13535.249434709549, + "loss": 0.3349, + "grad_norm": 0.1995151937007904, + "learning_rate": 2.5660377358490567e-05, + "epoch": 0.873125 + }, + { + "step": 1998, + "timestamp": "2025-12-28T12:43:50.213890", + "elapsed_time": 13544.498980283737, + "loss": 0.1256, + "grad_norm": 0.11612726747989655, + "learning_rate": 2.5534591194968556e-05, + "epoch": 0.87375 + }, + { + "step": 1999, + "timestamp": "2025-12-28T12:44:01.831854", + "elapsed_time": 13556.11694407463, + "loss": 0.1089, + "grad_norm": 0.09675383567810059, + "learning_rate": 2.5408805031446542e-05, + "epoch": 0.874375 + }, + { + "step": 2000, + "timestamp": "2025-12-28T12:44:08.303416", + "elapsed_time": 13562.588506937027, + "loss": 0.4029, + "grad_norm": 0.19399595260620117, + "learning_rate": 2.5283018867924528e-05, + "epoch": 0.875 + }, + { + "step": 2001, + "timestamp": "2025-12-28T12:44:16.958567", + "elapsed_time": 13571.243657827377, + "loss": 0.1523, + "grad_norm": 0.1444273293018341, + "learning_rate": 2.5157232704402517e-05, + "epoch": 0.875625 + }, + { + "step": 2002, + "timestamp": "2025-12-28T12:44:24.980330", + "elapsed_time": 13579.26542043686, + "loss": 0.4692, + "grad_norm": 0.18777810037136078, + "learning_rate": 2.5031446540880503e-05, + "epoch": 0.87625 + }, + { + "step": 2003, + "timestamp": "2025-12-28T12:44:32.077927", + "elapsed_time": 13586.36301779747, + "loss": 0.1718, + "grad_norm": 0.1408010870218277, + "learning_rate": 2.4905660377358492e-05, + "epoch": 0.876875 + }, + { + "step": 2004, + "timestamp": "2025-12-28T12:44:37.283086", + "elapsed_time": 13591.568180322647, + "loss": 0.1881, + "grad_norm": 0.1654125154018402, + "learning_rate": 2.4779874213836478e-05, + "epoch": 0.8775 + }, + { + "step": 2005, + "timestamp": "2025-12-28T12:44:44.801095", + "elapsed_time": 13599.08618569374, + "loss": 0.1881, + "grad_norm": 0.15793928503990173, + "learning_rate": 2.4654088050314467e-05, + "epoch": 0.878125 + }, + { + "step": 2006, + "timestamp": "2025-12-28T12:44:50.482390", + "elapsed_time": 13604.767484664917, + "loss": 0.185, + "grad_norm": 0.1794230043888092, + "learning_rate": 2.4528301886792453e-05, + "epoch": 0.87875 + }, + { + "step": 2007, + "timestamp": "2025-12-28T12:45:06.649005", + "elapsed_time": 13620.934096097946, + "loss": 0.0961, + "grad_norm": 0.07328634709119797, + "learning_rate": 2.4402515723270442e-05, + "epoch": 0.879375 + }, + { + "step": 2008, + "timestamp": "2025-12-28T12:45:10.656329", + "elapsed_time": 13624.941419363022, + "loss": 0.2643, + "grad_norm": 0.2130497395992279, + "learning_rate": 2.427672955974843e-05, + "epoch": 0.88 + }, + { + "step": 2009, + "timestamp": "2025-12-28T12:45:26.129250", + "elapsed_time": 13640.414340496063, + "loss": 0.146, + "grad_norm": 0.09115441888570786, + "learning_rate": 2.4150943396226418e-05, + "epoch": 0.880625 + }, + { + "step": 2010, + "timestamp": "2025-12-28T12:45:35.552234", + "elapsed_time": 13649.837324380875, + "loss": 0.1519, + "grad_norm": 0.11384664475917816, + "learning_rate": 2.4025157232704403e-05, + "epoch": 0.88125 + }, + { + "step": 2011, + "timestamp": "2025-12-28T12:45:42.432010", + "elapsed_time": 13656.71710062027, + "loss": 0.1838, + "grad_norm": 0.13385522365570068, + "learning_rate": 2.3899371069182393e-05, + "epoch": 0.881875 + }, + { + "step": 2012, + "timestamp": "2025-12-28T12:45:48.338311", + "elapsed_time": 13662.623405456543, + "loss": 0.3394, + "grad_norm": 0.18050479888916016, + "learning_rate": 2.377358490566038e-05, + "epoch": 0.8825 + }, + { + "step": 2013, + "timestamp": "2025-12-28T12:45:57.763912", + "elapsed_time": 13672.049002170563, + "loss": 0.1075, + "grad_norm": 0.094784677028656, + "learning_rate": 2.3647798742138368e-05, + "epoch": 0.883125 + }, + { + "step": 2014, + "timestamp": "2025-12-28T12:46:03.098688", + "elapsed_time": 13677.383778333664, + "loss": 0.2244, + "grad_norm": 0.19391919672489166, + "learning_rate": 2.3522012578616354e-05, + "epoch": 0.88375 + }, + { + "step": 2015, + "timestamp": "2025-12-28T12:46:09.212719", + "elapsed_time": 13683.49780869484, + "loss": 0.1827, + "grad_norm": 0.1965276449918747, + "learning_rate": 2.339622641509434e-05, + "epoch": 0.884375 + }, + { + "step": 2016, + "timestamp": "2025-12-28T12:46:19.627349", + "elapsed_time": 13693.912438869476, + "loss": 0.2289, + "grad_norm": 0.2740084230899811, + "learning_rate": 2.327044025157233e-05, + "epoch": 0.885 + }, + { + "step": 2017, + "timestamp": "2025-12-28T12:46:31.416162", + "elapsed_time": 13705.70125246048, + "loss": 0.2902, + "grad_norm": 0.12946578860282898, + "learning_rate": 2.3144654088050315e-05, + "epoch": 0.885625 + }, + { + "step": 2018, + "timestamp": "2025-12-28T12:46:41.666259", + "elapsed_time": 13715.951349258423, + "loss": 0.1345, + "grad_norm": 0.11686120927333832, + "learning_rate": 2.3018867924528304e-05, + "epoch": 0.88625 + }, + { + "step": 2019, + "timestamp": "2025-12-28T12:46:57.304742", + "elapsed_time": 13731.589831829071, + "loss": 0.1551, + "grad_norm": 0.10054396092891693, + "learning_rate": 2.289308176100629e-05, + "epoch": 0.886875 + }, + { + "step": 2020, + "timestamp": "2025-12-28T12:47:12.450934", + "elapsed_time": 13746.736023902893, + "loss": 0.1416, + "grad_norm": 0.11809642612934113, + "learning_rate": 2.276729559748428e-05, + "epoch": 0.8875 + }, + { + "step": 2021, + "timestamp": "2025-12-28T12:47:22.163353", + "elapsed_time": 13756.44844341278, + "loss": 0.1301, + "grad_norm": 0.1024162694811821, + "learning_rate": 2.2641509433962265e-05, + "epoch": 0.888125 + }, + { + "step": 2022, + "timestamp": "2025-12-28T12:47:33.984884", + "elapsed_time": 13768.269978284836, + "loss": 0.1243, + "grad_norm": 0.09722079336643219, + "learning_rate": 2.2515723270440254e-05, + "epoch": 0.88875 + }, + { + "step": 2023, + "timestamp": "2025-12-28T12:47:44.991859", + "elapsed_time": 13779.276949644089, + "loss": 0.1849, + "grad_norm": 0.11634030938148499, + "learning_rate": 2.238993710691824e-05, + "epoch": 0.889375 + }, + { + "step": 2024, + "timestamp": "2025-12-28T12:47:52.178131", + "elapsed_time": 13786.463220596313, + "loss": 0.3668, + "grad_norm": 0.2070102095603943, + "learning_rate": 2.226415094339623e-05, + "epoch": 0.89 + }, + { + "step": 2025, + "timestamp": "2025-12-28T12:47:58.522744", + "elapsed_time": 13792.807834386826, + "loss": 0.1465, + "grad_norm": 0.11943688988685608, + "learning_rate": 2.2138364779874215e-05, + "epoch": 0.890625 + }, + { + "step": 2026, + "timestamp": "2025-12-28T12:48:06.267378", + "elapsed_time": 13800.5524725914, + "loss": 0.3664, + "grad_norm": 0.18064796924591064, + "learning_rate": 2.2012578616352204e-05, + "epoch": 0.89125 + }, + { + "step": 2027, + "timestamp": "2025-12-28T12:48:16.684863", + "elapsed_time": 13810.969953536987, + "loss": 0.1507, + "grad_norm": 0.10812822729349136, + "learning_rate": 2.188679245283019e-05, + "epoch": 0.891875 + }, + { + "step": 2028, + "timestamp": "2025-12-28T12:48:26.633930", + "elapsed_time": 13820.919019937515, + "loss": 0.1116, + "grad_norm": 0.0921606570482254, + "learning_rate": 2.176100628930818e-05, + "epoch": 0.8925 + }, + { + "step": 2029, + "timestamp": "2025-12-28T12:48:34.803507", + "elapsed_time": 13829.08859705925, + "loss": 0.2362, + "grad_norm": 0.14584888517856598, + "learning_rate": 2.1635220125786165e-05, + "epoch": 0.893125 + }, + { + "step": 2030, + "timestamp": "2025-12-28T12:48:41.721190", + "elapsed_time": 13836.006280899048, + "loss": 0.3648, + "grad_norm": 0.1669163852930069, + "learning_rate": 2.1509433962264154e-05, + "epoch": 0.89375 + }, + { + "step": 2031, + "timestamp": "2025-12-28T12:48:51.532480", + "elapsed_time": 13845.817570209503, + "loss": 0.1496, + "grad_norm": 0.11350235342979431, + "learning_rate": 2.138364779874214e-05, + "epoch": 0.894375 + }, + { + "step": 2032, + "timestamp": "2025-12-28T12:49:01.120902", + "elapsed_time": 13855.405995368958, + "loss": 0.1385, + "grad_norm": 0.10483425110578537, + "learning_rate": 2.1257861635220126e-05, + "epoch": 0.895 + }, + { + "step": 2033, + "timestamp": "2025-12-28T12:49:10.447301", + "elapsed_time": 13864.732391834259, + "loss": 0.1473, + "grad_norm": 0.11778236925601959, + "learning_rate": 2.1132075471698115e-05, + "epoch": 0.895625 + }, + { + "step": 2034, + "timestamp": "2025-12-28T12:49:24.060029", + "elapsed_time": 13878.3451192379, + "loss": 0.1106, + "grad_norm": 0.09687533229589462, + "learning_rate": 2.10062893081761e-05, + "epoch": 0.89625 + }, + { + "step": 2035, + "timestamp": "2025-12-28T12:49:29.037036", + "elapsed_time": 13883.322126865387, + "loss": 0.4332, + "grad_norm": 0.2040807008743286, + "learning_rate": 2.088050314465409e-05, + "epoch": 0.896875 + }, + { + "step": 2036, + "timestamp": "2025-12-28T12:49:45.341778", + "elapsed_time": 13899.626868963242, + "loss": 0.2711, + "grad_norm": 0.12027235329151154, + "learning_rate": 2.0754716981132076e-05, + "epoch": 0.8975 + }, + { + "step": 2037, + "timestamp": "2025-12-28T12:49:55.608164", + "elapsed_time": 13909.893257856369, + "loss": 0.1105, + "grad_norm": 0.10322773456573486, + "learning_rate": 2.0628930817610066e-05, + "epoch": 0.898125 + }, + { + "step": 2038, + "timestamp": "2025-12-28T12:50:02.286113", + "elapsed_time": 13916.571203231812, + "loss": 0.1892, + "grad_norm": 0.15024706721305847, + "learning_rate": 2.050314465408805e-05, + "epoch": 0.89875 + }, + { + "step": 2039, + "timestamp": "2025-12-28T12:50:22.839318", + "elapsed_time": 13937.124408245087, + "loss": 0.1057, + "grad_norm": 0.07259613275527954, + "learning_rate": 2.037735849056604e-05, + "epoch": 0.899375 + }, + { + "step": 2040, + "timestamp": "2025-12-28T12:50:30.370013", + "elapsed_time": 13944.655103683472, + "loss": 0.1901, + "grad_norm": 0.14556270837783813, + "learning_rate": 2.0251572327044027e-05, + "epoch": 0.9 + }, + { + "step": 2041, + "timestamp": "2025-12-28T12:50:40.280173", + "elapsed_time": 13954.565263748169, + "loss": 0.1534, + "grad_norm": 0.2072232961654663, + "learning_rate": 2.0125786163522016e-05, + "epoch": 0.900625 + }, + { + "step": 2042, + "timestamp": "2025-12-28T12:50:48.582155", + "elapsed_time": 13962.867245435715, + "loss": 0.3279, + "grad_norm": 0.2846614122390747, + "learning_rate": 2e-05, + "epoch": 0.90125 + }, + { + "step": 2043, + "timestamp": "2025-12-28T12:50:55.464925", + "elapsed_time": 13969.750015258789, + "loss": 0.2373, + "grad_norm": 0.14941410720348358, + "learning_rate": 1.987421383647799e-05, + "epoch": 0.901875 + }, + { + "step": 2044, + "timestamp": "2025-12-28T12:51:03.935715", + "elapsed_time": 13978.220804929733, + "loss": 0.2142, + "grad_norm": 0.13745370507240295, + "learning_rate": 1.9748427672955977e-05, + "epoch": 0.9025 + }, + { + "step": 2045, + "timestamp": "2025-12-28T12:51:14.650669", + "elapsed_time": 13988.935759544373, + "loss": 0.1392, + "grad_norm": 0.100920669734478, + "learning_rate": 1.9622641509433966e-05, + "epoch": 0.903125 + }, + { + "step": 2046, + "timestamp": "2025-12-28T12:51:22.402615", + "elapsed_time": 13996.687705039978, + "loss": 0.1752, + "grad_norm": 0.13003851473331451, + "learning_rate": 1.9496855345911952e-05, + "epoch": 0.90375 + }, + { + "step": 2047, + "timestamp": "2025-12-28T12:51:41.001338", + "elapsed_time": 14015.286432504654, + "loss": 0.2414, + "grad_norm": 0.12125887721776962, + "learning_rate": 1.9371069182389938e-05, + "epoch": 0.904375 + }, + { + "step": 2048, + "timestamp": "2025-12-28T12:51:50.001884", + "elapsed_time": 14024.286974191666, + "loss": 0.1899, + "grad_norm": 0.1845715045928955, + "learning_rate": 1.9245283018867927e-05, + "epoch": 0.905 + }, + { + "step": 2049, + "timestamp": "2025-12-28T12:52:10.709887", + "elapsed_time": 14044.994977474213, + "loss": 0.1702, + "grad_norm": 0.09026765078306198, + "learning_rate": 1.9119496855345913e-05, + "epoch": 0.905625 + }, + { + "step": 2050, + "timestamp": "2025-12-28T12:52:17.185077", + "elapsed_time": 14051.470167636871, + "loss": 0.1794, + "grad_norm": 0.1455916464328766, + "learning_rate": 1.8993710691823902e-05, + "epoch": 0.90625 + }, + { + "step": 2051, + "timestamp": "2025-12-28T12:52:28.177130", + "elapsed_time": 14062.462219953537, + "loss": 0.1146, + "grad_norm": 0.11119363456964493, + "learning_rate": 1.8867924528301888e-05, + "epoch": 0.906875 + }, + { + "step": 2052, + "timestamp": "2025-12-28T12:52:36.238218", + "elapsed_time": 14070.523307800293, + "loss": 0.2618, + "grad_norm": 0.17123495042324066, + "learning_rate": 1.8742138364779874e-05, + "epoch": 0.9075 + }, + { + "step": 2053, + "timestamp": "2025-12-28T12:52:46.104010", + "elapsed_time": 14080.389100313187, + "loss": 0.2253, + "grad_norm": 0.1355234980583191, + "learning_rate": 1.861635220125786e-05, + "epoch": 0.908125 + }, + { + "step": 2054, + "timestamp": "2025-12-28T12:52:54.928314", + "elapsed_time": 14089.213404417038, + "loss": 0.1234, + "grad_norm": 0.10494759678840637, + "learning_rate": 1.849056603773585e-05, + "epoch": 0.90875 + }, + { + "step": 2055, + "timestamp": "2025-12-28T12:53:04.303552", + "elapsed_time": 14098.588641881943, + "loss": 0.1178, + "grad_norm": 0.11895407736301422, + "learning_rate": 1.8364779874213835e-05, + "epoch": 0.909375 + }, + { + "step": 2056, + "timestamp": "2025-12-28T12:53:10.684555", + "elapsed_time": 14104.969645500183, + "loss": 0.3704, + "grad_norm": 0.20144037902355194, + "learning_rate": 1.8238993710691824e-05, + "epoch": 0.91 + }, + { + "step": 2057, + "timestamp": "2025-12-28T12:53:24.571026", + "elapsed_time": 14118.856116056442, + "loss": 0.1448, + "grad_norm": 0.10675830394029617, + "learning_rate": 1.811320754716981e-05, + "epoch": 0.910625 + }, + { + "step": 2058, + "timestamp": "2025-12-28T12:53:35.228851", + "elapsed_time": 14129.513941764832, + "loss": 0.1201, + "grad_norm": 0.09952869266271591, + "learning_rate": 1.79874213836478e-05, + "epoch": 0.91125 + }, + { + "step": 2059, + "timestamp": "2025-12-28T12:53:44.722006", + "elapsed_time": 14139.007096767426, + "loss": 0.2014, + "grad_norm": 0.15313848853111267, + "learning_rate": 1.7861635220125785e-05, + "epoch": 0.911875 + }, + { + "step": 2060, + "timestamp": "2025-12-28T12:53:55.235495", + "elapsed_time": 14149.520585536957, + "loss": 0.156, + "grad_norm": 0.16458632051944733, + "learning_rate": 1.7735849056603774e-05, + "epoch": 0.9125 + }, + { + "step": 2061, + "timestamp": "2025-12-28T12:54:04.167911", + "elapsed_time": 14158.453001022339, + "loss": 0.1428, + "grad_norm": 0.11248766630887985, + "learning_rate": 1.761006289308176e-05, + "epoch": 0.913125 + }, + { + "step": 2062, + "timestamp": "2025-12-28T12:54:14.179512", + "elapsed_time": 14168.464602470398, + "loss": 0.1162, + "grad_norm": 0.10351016372442245, + "learning_rate": 1.748427672955975e-05, + "epoch": 0.91375 + }, + { + "step": 2063, + "timestamp": "2025-12-28T12:54:19.161273", + "elapsed_time": 14173.446363210678, + "loss": 0.2882, + "grad_norm": 0.19298550486564636, + "learning_rate": 1.7358490566037735e-05, + "epoch": 0.914375 + }, + { + "step": 2064, + "timestamp": "2025-12-28T12:54:30.239615", + "elapsed_time": 14184.524705171585, + "loss": 0.1304, + "grad_norm": 0.10329542309045792, + "learning_rate": 1.7232704402515724e-05, + "epoch": 0.915 + }, + { + "step": 2065, + "timestamp": "2025-12-28T12:54:36.398946", + "elapsed_time": 14190.684037208557, + "loss": 0.2199, + "grad_norm": 0.15773223340511322, + "learning_rate": 1.710691823899371e-05, + "epoch": 0.915625 + }, + { + "step": 2066, + "timestamp": "2025-12-28T12:54:42.958616", + "elapsed_time": 14197.243706703186, + "loss": 0.2395, + "grad_norm": 0.176762193441391, + "learning_rate": 1.69811320754717e-05, + "epoch": 0.91625 + }, + { + "step": 2067, + "timestamp": "2025-12-28T12:54:54.130810", + "elapsed_time": 14208.41589999199, + "loss": 0.0974, + "grad_norm": 0.08783324062824249, + "learning_rate": 1.6855345911949685e-05, + "epoch": 0.916875 + }, + { + "step": 2068, + "timestamp": "2025-12-28T12:55:01.944477", + "elapsed_time": 14216.22956776619, + "loss": 0.196, + "grad_norm": 0.14528080821037292, + "learning_rate": 1.672955974842767e-05, + "epoch": 0.9175 + }, + { + "step": 2069, + "timestamp": "2025-12-28T12:55:13.899335", + "elapsed_time": 14228.184425115585, + "loss": 0.205, + "grad_norm": 0.12957097589969635, + "learning_rate": 1.660377358490566e-05, + "epoch": 0.918125 + }, + { + "step": 2070, + "timestamp": "2025-12-28T12:55:23.618615", + "elapsed_time": 14237.903705358505, + "loss": 0.1474, + "grad_norm": 0.11285874992609024, + "learning_rate": 1.6477987421383646e-05, + "epoch": 0.91875 + }, + { + "step": 2071, + "timestamp": "2025-12-28T12:55:37.163652", + "elapsed_time": 14251.44874215126, + "loss": 0.1786, + "grad_norm": 0.09955456852912903, + "learning_rate": 1.6352201257861635e-05, + "epoch": 0.919375 + }, + { + "step": 2072, + "timestamp": "2025-12-28T12:55:44.637996", + "elapsed_time": 14258.923086166382, + "loss": 0.1564, + "grad_norm": 0.14337117969989777, + "learning_rate": 1.622641509433962e-05, + "epoch": 0.92 + }, + { + "step": 2073, + "timestamp": "2025-12-28T12:55:53.757282", + "elapsed_time": 14268.042372465134, + "loss": 0.2802, + "grad_norm": 0.1456514596939087, + "learning_rate": 1.610062893081761e-05, + "epoch": 0.920625 + }, + { + "step": 2074, + "timestamp": "2025-12-28T12:56:00.273054", + "elapsed_time": 14274.558144569397, + "loss": 0.4306, + "grad_norm": 0.18445518612861633, + "learning_rate": 1.5974842767295596e-05, + "epoch": 0.92125 + }, + { + "step": 2075, + "timestamp": "2025-12-28T12:56:07.521121", + "elapsed_time": 14281.806211948395, + "loss": 0.1628, + "grad_norm": 0.1335839480161667, + "learning_rate": 1.5849056603773586e-05, + "epoch": 0.921875 + }, + { + "step": 2076, + "timestamp": "2025-12-28T12:56:15.072953", + "elapsed_time": 14289.358043909073, + "loss": 0.1945, + "grad_norm": 0.13724136352539062, + "learning_rate": 1.572327044025157e-05, + "epoch": 0.9225 + }, + { + "step": 2077, + "timestamp": "2025-12-28T12:56:26.969569", + "elapsed_time": 14301.254658937454, + "loss": 0.1434, + "grad_norm": 0.10332413017749786, + "learning_rate": 1.559748427672956e-05, + "epoch": 0.923125 + }, + { + "step": 2078, + "timestamp": "2025-12-28T12:56:40.909120", + "elapsed_time": 14315.194210767746, + "loss": 0.1229, + "grad_norm": 0.0949297845363617, + "learning_rate": 1.5471698113207547e-05, + "epoch": 0.92375 + }, + { + "step": 2079, + "timestamp": "2025-12-28T12:56:46.573585", + "elapsed_time": 14320.858675479889, + "loss": 0.1417, + "grad_norm": 0.1971912533044815, + "learning_rate": 1.5345911949685536e-05, + "epoch": 0.924375 + }, + { + "step": 2080, + "timestamp": "2025-12-28T12:56:55.214575", + "elapsed_time": 14329.499665021896, + "loss": 0.1695, + "grad_norm": 0.13136085867881775, + "learning_rate": 1.5220125786163522e-05, + "epoch": 0.925 + }, + { + "step": 2081, + "timestamp": "2025-12-28T12:57:16.087282", + "elapsed_time": 14350.372372865677, + "loss": 0.0893, + "grad_norm": 0.07915375381708145, + "learning_rate": 1.509433962264151e-05, + "epoch": 0.925625 + }, + { + "step": 2082, + "timestamp": "2025-12-28T12:57:33.493431", + "elapsed_time": 14367.778520822525, + "loss": 0.1024, + "grad_norm": 0.10471498221158981, + "learning_rate": 1.4968553459119497e-05, + "epoch": 0.92625 + }, + { + "step": 2083, + "timestamp": "2025-12-28T12:57:42.614440", + "elapsed_time": 14376.899530172348, + "loss": 0.1551, + "grad_norm": 0.11656136065721512, + "learning_rate": 1.4842767295597484e-05, + "epoch": 0.926875 + }, + { + "step": 2084, + "timestamp": "2025-12-28T12:57:47.407488", + "elapsed_time": 14381.692578554153, + "loss": 0.1693, + "grad_norm": 0.1702698916196823, + "learning_rate": 1.4716981132075472e-05, + "epoch": 0.9275 + }, + { + "step": 2085, + "timestamp": "2025-12-28T12:58:02.399763", + "elapsed_time": 14396.684853076935, + "loss": 0.1079, + "grad_norm": 0.09625794738531113, + "learning_rate": 1.459119496855346e-05, + "epoch": 0.928125 + }, + { + "step": 2086, + "timestamp": "2025-12-28T12:58:15.434870", + "elapsed_time": 14409.719960689545, + "loss": 0.1103, + "grad_norm": 0.09237074851989746, + "learning_rate": 1.4465408805031447e-05, + "epoch": 0.92875 + }, + { + "step": 2087, + "timestamp": "2025-12-28T12:58:25.288608", + "elapsed_time": 14419.573698043823, + "loss": 0.1244, + "grad_norm": 0.1188262552022934, + "learning_rate": 1.4339622641509435e-05, + "epoch": 0.929375 + }, + { + "step": 2088, + "timestamp": "2025-12-28T12:58:32.277959", + "elapsed_time": 14426.563049316406, + "loss": 0.2027, + "grad_norm": 0.2063203752040863, + "learning_rate": 1.4213836477987422e-05, + "epoch": 0.93 + }, + { + "step": 2089, + "timestamp": "2025-12-28T12:58:43.511867", + "elapsed_time": 14437.796962022781, + "loss": 0.1312, + "grad_norm": 0.10580072551965714, + "learning_rate": 1.408805031446541e-05, + "epoch": 0.930625 + }, + { + "step": 2090, + "timestamp": "2025-12-28T12:58:56.223461", + "elapsed_time": 14450.508551120758, + "loss": 0.1399, + "grad_norm": 0.10505926609039307, + "learning_rate": 1.3962264150943397e-05, + "epoch": 0.93125 + }, + { + "step": 2091, + "timestamp": "2025-12-28T12:59:05.563465", + "elapsed_time": 14459.848555326462, + "loss": 0.2831, + "grad_norm": 0.15598870813846588, + "learning_rate": 1.3836477987421385e-05, + "epoch": 0.931875 + }, + { + "step": 2092, + "timestamp": "2025-12-28T12:59:15.934233", + "elapsed_time": 14470.219323635101, + "loss": 0.127, + "grad_norm": 0.10266047716140747, + "learning_rate": 1.371069182389937e-05, + "epoch": 0.9325 + }, + { + "step": 2093, + "timestamp": "2025-12-28T12:59:28.065574", + "elapsed_time": 14482.350664615631, + "loss": 0.1606, + "grad_norm": 0.1118762195110321, + "learning_rate": 1.3584905660377358e-05, + "epoch": 0.933125 + }, + { + "step": 2094, + "timestamp": "2025-12-28T12:59:41.619545", + "elapsed_time": 14495.90463590622, + "loss": 0.1627, + "grad_norm": 0.1268414705991745, + "learning_rate": 1.3459119496855346e-05, + "epoch": 0.93375 + }, + { + "step": 2095, + "timestamp": "2025-12-28T12:59:57.135947", + "elapsed_time": 14511.421037197113, + "loss": 0.1509, + "grad_norm": 0.09595310688018799, + "learning_rate": 1.3333333333333333e-05, + "epoch": 0.934375 + }, + { + "step": 2096, + "timestamp": "2025-12-28T13:00:07.077230", + "elapsed_time": 14521.362320899963, + "loss": 0.1116, + "grad_norm": 0.09903395920991898, + "learning_rate": 1.320754716981132e-05, + "epoch": 0.935 + }, + { + "step": 2097, + "timestamp": "2025-12-28T13:00:17.748745", + "elapsed_time": 14532.033835411072, + "loss": 0.1372, + "grad_norm": 0.10799795389175415, + "learning_rate": 1.3081761006289308e-05, + "epoch": 0.935625 + }, + { + "step": 2098, + "timestamp": "2025-12-28T13:00:34.507777", + "elapsed_time": 14548.792867422104, + "loss": 0.0991, + "grad_norm": 0.0850529670715332, + "learning_rate": 1.2955974842767296e-05, + "epoch": 0.93625 + }, + { + "step": 2099, + "timestamp": "2025-12-28T13:00:49.895456", + "elapsed_time": 14564.180546045303, + "loss": 0.1757, + "grad_norm": 0.09990602731704712, + "learning_rate": 1.2830188679245283e-05, + "epoch": 0.936875 + }, + { + "step": 2100, + "timestamp": "2025-12-28T13:00:57.176345", + "elapsed_time": 14571.46143579483, + "loss": 0.2252, + "grad_norm": 0.1569076031446457, + "learning_rate": 1.2704402515723271e-05, + "epoch": 0.9375 + }, + { + "step": 2101, + "timestamp": "2025-12-28T13:01:00.897935", + "elapsed_time": 14575.183025836945, + "loss": 0.2693, + "grad_norm": 0.2882073223590851, + "learning_rate": 1.2578616352201259e-05, + "epoch": 0.938125 + }, + { + "step": 2102, + "timestamp": "2025-12-28T13:01:09.059749", + "elapsed_time": 14583.344839334488, + "loss": 0.076, + "grad_norm": 0.09046997874975204, + "learning_rate": 1.2452830188679246e-05, + "epoch": 0.93875 + }, + { + "step": 2103, + "timestamp": "2025-12-28T13:01:13.882199", + "elapsed_time": 14588.167289018631, + "loss": 0.1864, + "grad_norm": 0.18029439449310303, + "learning_rate": 1.2327044025157234e-05, + "epoch": 0.939375 + }, + { + "step": 2104, + "timestamp": "2025-12-28T13:01:26.361765", + "elapsed_time": 14600.646855354309, + "loss": 0.1504, + "grad_norm": 0.09953170269727707, + "learning_rate": 1.2201257861635221e-05, + "epoch": 0.94 + }, + { + "step": 2105, + "timestamp": "2025-12-28T13:01:33.533065", + "elapsed_time": 14607.818155527115, + "loss": 0.1704, + "grad_norm": 0.14062711596488953, + "learning_rate": 1.2075471698113209e-05, + "epoch": 0.940625 + }, + { + "step": 2106, + "timestamp": "2025-12-28T13:01:40.874776", + "elapsed_time": 14615.159866333008, + "loss": 0.3254, + "grad_norm": 0.19320929050445557, + "learning_rate": 1.1949685534591196e-05, + "epoch": 0.94125 + }, + { + "step": 2107, + "timestamp": "2025-12-28T13:01:45.352972", + "elapsed_time": 14619.63806271553, + "loss": 0.3774, + "grad_norm": 0.19888830184936523, + "learning_rate": 1.1823899371069184e-05, + "epoch": 0.941875 + }, + { + "step": 2108, + "timestamp": "2025-12-28T13:01:54.645732", + "elapsed_time": 14628.930822610855, + "loss": 0.1445, + "grad_norm": 0.11683964729309082, + "learning_rate": 1.169811320754717e-05, + "epoch": 0.9425 + }, + { + "step": 2109, + "timestamp": "2025-12-28T13:02:12.993090", + "elapsed_time": 14647.278180122375, + "loss": 0.1005, + "grad_norm": 0.08152023702859879, + "learning_rate": 1.1572327044025157e-05, + "epoch": 0.943125 + }, + { + "step": 2110, + "timestamp": "2025-12-28T13:02:20.465761", + "elapsed_time": 14654.750851392746, + "loss": 0.1488, + "grad_norm": 0.1277417093515396, + "learning_rate": 1.1446540880503145e-05, + "epoch": 0.94375 + }, + { + "step": 2111, + "timestamp": "2025-12-28T13:02:26.990394", + "elapsed_time": 14661.27548456192, + "loss": 0.2444, + "grad_norm": 0.49488744139671326, + "learning_rate": 1.1320754716981132e-05, + "epoch": 0.944375 + }, + { + "step": 2112, + "timestamp": "2025-12-28T13:02:36.374549", + "elapsed_time": 14670.659639120102, + "loss": 0.1365, + "grad_norm": 0.11476011574268341, + "learning_rate": 1.119496855345912e-05, + "epoch": 0.945 + }, + { + "step": 2113, + "timestamp": "2025-12-28T13:02:45.111270", + "elapsed_time": 14679.396360874176, + "loss": 0.1309, + "grad_norm": 0.11950299888849258, + "learning_rate": 1.1069182389937107e-05, + "epoch": 0.945625 + }, + { + "step": 2114, + "timestamp": "2025-12-28T13:02:51.591249", + "elapsed_time": 14685.876343011856, + "loss": 0.2217, + "grad_norm": 0.16578605771064758, + "learning_rate": 1.0943396226415095e-05, + "epoch": 0.94625 + }, + { + "step": 2115, + "timestamp": "2025-12-28T13:03:00.576464", + "elapsed_time": 14694.861553907394, + "loss": 0.3675, + "grad_norm": 0.16877128183841705, + "learning_rate": 1.0817610062893083e-05, + "epoch": 0.946875 + }, + { + "step": 2116, + "timestamp": "2025-12-28T13:03:18.311130", + "elapsed_time": 14712.596220493317, + "loss": 0.1257, + "grad_norm": 0.08800987899303436, + "learning_rate": 1.069182389937107e-05, + "epoch": 0.9475 + }, + { + "step": 2117, + "timestamp": "2025-12-28T13:03:27.242185", + "elapsed_time": 14721.527275800705, + "loss": 0.1195, + "grad_norm": 0.11122479289770126, + "learning_rate": 1.0566037735849058e-05, + "epoch": 0.948125 + }, + { + "step": 2118, + "timestamp": "2025-12-28T13:03:37.954344", + "elapsed_time": 14732.239434480667, + "loss": 0.1142, + "grad_norm": 0.0907776802778244, + "learning_rate": 1.0440251572327045e-05, + "epoch": 0.94875 + }, + { + "step": 2119, + "timestamp": "2025-12-28T13:03:55.267284", + "elapsed_time": 14749.552374362946, + "loss": 0.0934, + "grad_norm": 0.07522504776716232, + "learning_rate": 1.0314465408805033e-05, + "epoch": 0.949375 + }, + { + "step": 2120, + "timestamp": "2025-12-28T13:04:02.452456", + "elapsed_time": 14756.73754644394, + "loss": 0.1678, + "grad_norm": 0.1397898644208908, + "learning_rate": 1.018867924528302e-05, + "epoch": 0.95 + }, + { + "step": 2121, + "timestamp": "2025-12-28T13:04:13.404991", + "elapsed_time": 14767.690082073212, + "loss": 0.1685, + "grad_norm": 0.1206582635641098, + "learning_rate": 1.0062893081761008e-05, + "epoch": 0.950625 + }, + { + "step": 2122, + "timestamp": "2025-12-28T13:04:21.537710", + "elapsed_time": 14775.822799921036, + "loss": 0.4038, + "grad_norm": 0.17276889085769653, + "learning_rate": 9.937106918238995e-06, + "epoch": 0.95125 + }, + { + "step": 2123, + "timestamp": "2025-12-28T13:04:29.361970", + "elapsed_time": 14783.647060155869, + "loss": 0.1303, + "grad_norm": 0.12048753350973129, + "learning_rate": 9.811320754716983e-06, + "epoch": 0.951875 + }, + { + "step": 2124, + "timestamp": "2025-12-28T13:04:36.912129", + "elapsed_time": 14791.197219133377, + "loss": 0.1316, + "grad_norm": 0.1113729476928711, + "learning_rate": 9.685534591194969e-06, + "epoch": 0.9525 + }, + { + "step": 2125, + "timestamp": "2025-12-28T13:04:42.123245", + "elapsed_time": 14796.408335924149, + "loss": 0.2985, + "grad_norm": 0.1864505261182785, + "learning_rate": 9.559748427672956e-06, + "epoch": 0.953125 + }, + { + "step": 2126, + "timestamp": "2025-12-28T13:04:59.705245", + "elapsed_time": 14813.990335941315, + "loss": 0.1241, + "grad_norm": 0.08124972134828568, + "learning_rate": 9.433962264150944e-06, + "epoch": 0.95375 + }, + { + "step": 2127, + "timestamp": "2025-12-28T13:05:06.621313", + "elapsed_time": 14820.906403064728, + "loss": 0.261, + "grad_norm": 0.19667741656303406, + "learning_rate": 9.30817610062893e-06, + "epoch": 0.954375 + }, + { + "step": 2128, + "timestamp": "2025-12-28T13:05:18.124603", + "elapsed_time": 14832.409697771072, + "loss": 0.2653, + "grad_norm": 0.12561364471912384, + "learning_rate": 9.182389937106917e-06, + "epoch": 0.955 + }, + { + "step": 2129, + "timestamp": "2025-12-28T13:05:22.177655", + "elapsed_time": 14836.46274471283, + "loss": 0.2647, + "grad_norm": 0.21830357611179352, + "learning_rate": 9.056603773584905e-06, + "epoch": 0.955625 + }, + { + "step": 2130, + "timestamp": "2025-12-28T13:05:31.039317", + "elapsed_time": 14845.324407577515, + "loss": 0.3741, + "grad_norm": 0.14887583255767822, + "learning_rate": 8.930817610062892e-06, + "epoch": 0.95625 + }, + { + "step": 2131, + "timestamp": "2025-12-28T13:05:42.990517", + "elapsed_time": 14857.275606870651, + "loss": 0.1244, + "grad_norm": 0.11540309339761734, + "learning_rate": 8.80503144654088e-06, + "epoch": 0.956875 + }, + { + "step": 2132, + "timestamp": "2025-12-28T13:05:48.518364", + "elapsed_time": 14862.803453683853, + "loss": 0.2524, + "grad_norm": 0.17561720311641693, + "learning_rate": 8.679245283018868e-06, + "epoch": 0.9575 + }, + { + "step": 2133, + "timestamp": "2025-12-28T13:05:54.626380", + "elapsed_time": 14868.911471128464, + "loss": 0.205, + "grad_norm": 0.14403176307678223, + "learning_rate": 8.553459119496855e-06, + "epoch": 0.958125 + }, + { + "step": 2134, + "timestamp": "2025-12-28T13:06:03.304009", + "elapsed_time": 14877.589098930359, + "loss": 0.1677, + "grad_norm": 0.1289132833480835, + "learning_rate": 8.427672955974843e-06, + "epoch": 0.95875 + }, + { + "step": 2135, + "timestamp": "2025-12-28T13:06:19.305306", + "elapsed_time": 14893.590396642685, + "loss": 0.1706, + "grad_norm": 0.11218154430389404, + "learning_rate": 8.30188679245283e-06, + "epoch": 0.959375 + }, + { + "step": 2136, + "timestamp": "2025-12-28T13:06:34.028175", + "elapsed_time": 14908.313265562057, + "loss": 0.1002, + "grad_norm": 0.09118315577507019, + "learning_rate": 8.176100628930818e-06, + "epoch": 0.96 + }, + { + "step": 2137, + "timestamp": "2025-12-28T13:06:44.744421", + "elapsed_time": 14919.029515028, + "loss": 0.1318, + "grad_norm": 0.11322548240423203, + "learning_rate": 8.050314465408805e-06, + "epoch": 0.960625 + }, + { + "step": 2138, + "timestamp": "2025-12-28T13:07:05.626666", + "elapsed_time": 14939.911756277084, + "loss": 0.1157, + "grad_norm": 0.07905202358961105, + "learning_rate": 7.924528301886793e-06, + "epoch": 0.96125 + }, + { + "step": 2139, + "timestamp": "2025-12-28T13:07:12.917071", + "elapsed_time": 14947.202164649963, + "loss": 0.193, + "grad_norm": 0.14137513935565948, + "learning_rate": 7.79874213836478e-06, + "epoch": 0.961875 + }, + { + "step": 2140, + "timestamp": "2025-12-28T13:07:20.296717", + "elapsed_time": 14954.581811666489, + "loss": 0.1775, + "grad_norm": 0.13806195557117462, + "learning_rate": 7.672955974842768e-06, + "epoch": 0.9625 + }, + { + "step": 2141, + "timestamp": "2025-12-28T13:07:33.172898", + "elapsed_time": 14967.457988500595, + "loss": 0.1039, + "grad_norm": 0.09371381998062134, + "learning_rate": 7.547169811320755e-06, + "epoch": 0.963125 + }, + { + "step": 2142, + "timestamp": "2025-12-28T13:07:49.768333", + "elapsed_time": 14984.053423166275, + "loss": 0.1143, + "grad_norm": 0.09071200340986252, + "learning_rate": 7.421383647798742e-06, + "epoch": 0.96375 + }, + { + "step": 2143, + "timestamp": "2025-12-28T13:08:00.029288", + "elapsed_time": 14994.314378499985, + "loss": 0.113, + "grad_norm": 0.0975324884057045, + "learning_rate": 7.29559748427673e-06, + "epoch": 0.964375 + }, + { + "step": 2144, + "timestamp": "2025-12-28T13:08:07.277118", + "elapsed_time": 15001.56220817566, + "loss": 0.1588, + "grad_norm": 0.1530754715204239, + "learning_rate": 7.169811320754717e-06, + "epoch": 0.965 + }, + { + "step": 2145, + "timestamp": "2025-12-28T13:08:11.788620", + "elapsed_time": 15006.07371020317, + "loss": 0.2424, + "grad_norm": 0.17115284502506256, + "learning_rate": 7.044025157232705e-06, + "epoch": 0.965625 + }, + { + "step": 2146, + "timestamp": "2025-12-28T13:08:24.581324", + "elapsed_time": 15018.866414546967, + "loss": 0.0938, + "grad_norm": 0.08514872193336487, + "learning_rate": 6.918238993710692e-06, + "epoch": 0.96625 + }, + { + "step": 2147, + "timestamp": "2025-12-28T13:08:35.277645", + "elapsed_time": 15029.562735319138, + "loss": 0.16, + "grad_norm": 0.11149827390909195, + "learning_rate": 6.792452830188679e-06, + "epoch": 0.966875 + }, + { + "step": 2148, + "timestamp": "2025-12-28T13:08:44.659727", + "elapsed_time": 15038.944817781448, + "loss": 0.1451, + "grad_norm": 0.1102396696805954, + "learning_rate": 6.666666666666667e-06, + "epoch": 0.9675 + }, + { + "step": 2149, + "timestamp": "2025-12-28T13:08:56.168291", + "elapsed_time": 15050.453381538391, + "loss": 0.1014, + "grad_norm": 0.11420729011297226, + "learning_rate": 6.540880503144654e-06, + "epoch": 0.968125 + }, + { + "step": 2150, + "timestamp": "2025-12-28T13:09:05.395277", + "elapsed_time": 15059.68036699295, + "loss": 0.1707, + "grad_norm": 0.13086120784282684, + "learning_rate": 6.415094339622642e-06, + "epoch": 0.96875 + }, + { + "step": 2151, + "timestamp": "2025-12-28T13:09:13.567665", + "elapsed_time": 15067.852755784988, + "loss": 0.1435, + "grad_norm": 0.12494708597660065, + "learning_rate": 6.289308176100629e-06, + "epoch": 0.969375 + }, + { + "step": 2152, + "timestamp": "2025-12-28T13:09:22.026165", + "elapsed_time": 15076.311259746552, + "loss": 0.2231, + "grad_norm": 0.15418364107608795, + "learning_rate": 6.163522012578617e-06, + "epoch": 0.97 + }, + { + "step": 2153, + "timestamp": "2025-12-28T13:09:28.234531", + "elapsed_time": 15082.519621133804, + "loss": 0.1911, + "grad_norm": 0.15970326960086823, + "learning_rate": 6.037735849056604e-06, + "epoch": 0.970625 + }, + { + "step": 2154, + "timestamp": "2025-12-28T13:09:41.850657", + "elapsed_time": 15096.135746717453, + "loss": 0.1166, + "grad_norm": 0.08712539076805115, + "learning_rate": 5.911949685534592e-06, + "epoch": 0.97125 + }, + { + "step": 2155, + "timestamp": "2025-12-28T13:09:49.022333", + "elapsed_time": 15103.307422876358, + "loss": 0.2125, + "grad_norm": 0.15600165724754333, + "learning_rate": 5.786163522012579e-06, + "epoch": 0.971875 + }, + { + "step": 2156, + "timestamp": "2025-12-28T13:09:55.267040", + "elapsed_time": 15109.552130699158, + "loss": 0.1704, + "grad_norm": 0.1566295623779297, + "learning_rate": 5.660377358490566e-06, + "epoch": 0.9725 + }, + { + "step": 2157, + "timestamp": "2025-12-28T13:10:07.050404", + "elapsed_time": 15121.33549451828, + "loss": 0.122, + "grad_norm": 0.09626911580562592, + "learning_rate": 5.534591194968554e-06, + "epoch": 0.973125 + }, + { + "step": 2158, + "timestamp": "2025-12-28T13:10:13.971436", + "elapsed_time": 15128.256526470184, + "loss": 0.2565, + "grad_norm": 0.14205630123615265, + "learning_rate": 5.408805031446541e-06, + "epoch": 0.97375 + }, + { + "step": 2159, + "timestamp": "2025-12-28T13:10:23.606334", + "elapsed_time": 15137.891424417496, + "loss": 0.1477, + "grad_norm": 0.12025802582502365, + "learning_rate": 5.283018867924529e-06, + "epoch": 0.974375 + }, + { + "step": 2160, + "timestamp": "2025-12-28T13:10:38.587449", + "elapsed_time": 15152.872539758682, + "loss": 0.122, + "grad_norm": 0.0904950201511383, + "learning_rate": 5.157232704402516e-06, + "epoch": 0.975 + }, + { + "step": 2161, + "timestamp": "2025-12-28T13:10:48.211282", + "elapsed_time": 15162.496372699738, + "loss": 0.1241, + "grad_norm": 0.1013362929224968, + "learning_rate": 5.031446540880504e-06, + "epoch": 0.975625 + }, + { + "step": 2162, + "timestamp": "2025-12-28T13:11:00.090939", + "elapsed_time": 15174.376032590866, + "loss": 0.3714, + "grad_norm": 0.14272800087928772, + "learning_rate": 4.9056603773584915e-06, + "epoch": 0.97625 + }, + { + "step": 2163, + "timestamp": "2025-12-28T13:11:03.768510", + "elapsed_time": 15178.05360007286, + "loss": 0.3031, + "grad_norm": 0.21242351830005646, + "learning_rate": 4.779874213836478e-06, + "epoch": 0.976875 + }, + { + "step": 2164, + "timestamp": "2025-12-28T13:11:15.372898", + "elapsed_time": 15189.65798830986, + "loss": 0.1319, + "grad_norm": 0.10468777269124985, + "learning_rate": 4.654088050314465e-06, + "epoch": 0.9775 + }, + { + "step": 2165, + "timestamp": "2025-12-28T13:11:22.360971", + "elapsed_time": 15196.64606142044, + "loss": 0.2542, + "grad_norm": 0.1459261178970337, + "learning_rate": 4.5283018867924524e-06, + "epoch": 0.978125 + }, + { + "step": 2166, + "timestamp": "2025-12-28T13:11:27.698671", + "elapsed_time": 15201.983761548996, + "loss": 0.1664, + "grad_norm": 0.16032001376152039, + "learning_rate": 4.40251572327044e-06, + "epoch": 0.97875 + }, + { + "step": 2167, + "timestamp": "2025-12-28T13:11:35.312824", + "elapsed_time": 15209.597914457321, + "loss": 0.14, + "grad_norm": 0.12672804296016693, + "learning_rate": 4.2767295597484275e-06, + "epoch": 0.979375 + }, + { + "step": 2168, + "timestamp": "2025-12-28T13:11:42.449067", + "elapsed_time": 15216.734157085419, + "loss": 0.1409, + "grad_norm": 0.21911774575710297, + "learning_rate": 4.150943396226415e-06, + "epoch": 0.98 + }, + { + "step": 2169, + "timestamp": "2025-12-28T13:11:52.968471", + "elapsed_time": 15227.253565788269, + "loss": 0.1652, + "grad_norm": 0.10740287601947784, + "learning_rate": 4.025157232704403e-06, + "epoch": 0.980625 + }, + { + "step": 2170, + "timestamp": "2025-12-28T13:12:14.028363", + "elapsed_time": 15248.31345319748, + "loss": 0.0946, + "grad_norm": 0.107594795525074, + "learning_rate": 3.89937106918239e-06, + "epoch": 0.98125 + }, + { + "step": 2171, + "timestamp": "2025-12-28T13:12:26.958057", + "elapsed_time": 15261.243147850037, + "loss": 0.1402, + "grad_norm": 0.09502803534269333, + "learning_rate": 3.7735849056603773e-06, + "epoch": 0.981875 + }, + { + "step": 2172, + "timestamp": "2025-12-28T13:12:41.825213", + "elapsed_time": 15276.110303640366, + "loss": 0.1656, + "grad_norm": 0.10404454171657562, + "learning_rate": 3.647798742138365e-06, + "epoch": 0.9825 + }, + { + "step": 2173, + "timestamp": "2025-12-28T13:12:54.237611", + "elapsed_time": 15288.522701501846, + "loss": 0.1088, + "grad_norm": 0.09469737857580185, + "learning_rate": 3.5220125786163524e-06, + "epoch": 0.983125 + }, + { + "step": 2174, + "timestamp": "2025-12-28T13:13:14.320573", + "elapsed_time": 15308.60566353798, + "loss": 0.1108, + "grad_norm": 0.08538592606782913, + "learning_rate": 3.3962264150943395e-06, + "epoch": 0.98375 + }, + { + "step": 2175, + "timestamp": "2025-12-28T13:13:31.983656", + "elapsed_time": 15326.2687458992, + "loss": 0.113, + "grad_norm": 0.07896923273801804, + "learning_rate": 3.270440251572327e-06, + "epoch": 0.984375 + }, + { + "step": 2176, + "timestamp": "2025-12-28T13:13:36.117474", + "elapsed_time": 15330.402564287186, + "loss": 0.2856, + "grad_norm": 0.20000196993350983, + "learning_rate": 3.1446540880503146e-06, + "epoch": 0.985 + }, + { + "step": 2177, + "timestamp": "2025-12-28T13:13:40.749900", + "elapsed_time": 15335.03499007225, + "loss": 0.2019, + "grad_norm": 0.17783957719802856, + "learning_rate": 3.018867924528302e-06, + "epoch": 0.985625 + }, + { + "step": 2178, + "timestamp": "2025-12-28T13:13:50.081194", + "elapsed_time": 15344.366284370422, + "loss": 0.124, + "grad_norm": 0.10862841457128525, + "learning_rate": 2.8930817610062893e-06, + "epoch": 0.98625 + }, + { + "step": 2179, + "timestamp": "2025-12-28T13:13:57.182561", + "elapsed_time": 15351.467651605606, + "loss": 0.3846, + "grad_norm": 0.1680181324481964, + "learning_rate": 2.767295597484277e-06, + "epoch": 0.986875 + }, + { + "step": 2180, + "timestamp": "2025-12-28T13:14:18.069949", + "elapsed_time": 15372.355040073395, + "loss": 0.1295, + "grad_norm": 0.10930982232093811, + "learning_rate": 2.6415094339622644e-06, + "epoch": 0.9875 + }, + { + "step": 2181, + "timestamp": "2025-12-28T13:14:25.840440", + "elapsed_time": 15380.125534534454, + "loss": 0.1287, + "grad_norm": 0.11625991761684418, + "learning_rate": 2.515723270440252e-06, + "epoch": 0.988125 + }, + { + "step": 2182, + "timestamp": "2025-12-28T13:14:32.900696", + "elapsed_time": 15387.185786247253, + "loss": 0.2032, + "grad_norm": 0.13967153429985046, + "learning_rate": 2.389937106918239e-06, + "epoch": 0.98875 + }, + { + "step": 2183, + "timestamp": "2025-12-28T13:14:38.813341", + "elapsed_time": 15393.098430871964, + "loss": 0.1665, + "grad_norm": 0.16941803693771362, + "learning_rate": 2.2641509433962262e-06, + "epoch": 0.989375 + }, + { + "step": 2184, + "timestamp": "2025-12-28T13:14:48.082713", + "elapsed_time": 15402.36780333519, + "loss": 0.1239, + "grad_norm": 0.1080615222454071, + "learning_rate": 2.1383647798742138e-06, + "epoch": 0.99 + }, + { + "step": 2185, + "timestamp": "2025-12-28T13:14:59.031848", + "elapsed_time": 15413.316938877106, + "loss": 0.1657, + "grad_norm": 0.1154465302824974, + "learning_rate": 2.0125786163522013e-06, + "epoch": 0.990625 + }, + { + "step": 2186, + "timestamp": "2025-12-28T13:15:09.288405", + "elapsed_time": 15423.573495388031, + "loss": 0.1329, + "grad_norm": 0.10635515302419662, + "learning_rate": 1.8867924528301887e-06, + "epoch": 0.99125 + }, + { + "step": 2187, + "timestamp": "2025-12-28T13:15:19.358530", + "elapsed_time": 15433.64362025261, + "loss": 0.1496, + "grad_norm": 0.10568325221538544, + "learning_rate": 1.7610062893081762e-06, + "epoch": 0.991875 + }, + { + "step": 2188, + "timestamp": "2025-12-28T13:15:27.696443", + "elapsed_time": 15441.981533288956, + "loss": 0.1668, + "grad_norm": 0.11821103096008301, + "learning_rate": 1.6352201257861635e-06, + "epoch": 0.9925 + }, + { + "step": 2189, + "timestamp": "2025-12-28T13:15:32.319517", + "elapsed_time": 15446.60460782051, + "loss": 0.2601, + "grad_norm": 0.18787573277950287, + "learning_rate": 1.509433962264151e-06, + "epoch": 0.993125 + }, + { + "step": 2190, + "timestamp": "2025-12-28T13:15:42.782764", + "elapsed_time": 15457.067858695984, + "loss": 0.1287, + "grad_norm": 0.11284953355789185, + "learning_rate": 1.3836477987421384e-06, + "epoch": 0.99375 + }, + { + "step": 2191, + "timestamp": "2025-12-28T13:15:48.893842", + "elapsed_time": 15463.178936243057, + "loss": 0.1804, + "grad_norm": 0.15460120141506195, + "learning_rate": 1.257861635220126e-06, + "epoch": 0.994375 + }, + { + "step": 2192, + "timestamp": "2025-12-28T13:15:56.849546", + "elapsed_time": 15471.134636163712, + "loss": 0.166, + "grad_norm": 0.13522419333457947, + "learning_rate": 1.1320754716981131e-06, + "epoch": 0.995 + }, + { + "step": 2193, + "timestamp": "2025-12-28T13:16:09.481063", + "elapsed_time": 15483.766153335571, + "loss": 0.1317, + "grad_norm": 0.11077344417572021, + "learning_rate": 1.0062893081761007e-06, + "epoch": 0.995625 + }, + { + "step": 2194, + "timestamp": "2025-12-28T13:16:17.328514", + "elapsed_time": 15491.613604068756, + "loss": 0.4237, + "grad_norm": 0.1749534010887146, + "learning_rate": 8.805031446540881e-07, + "epoch": 0.99625 + }, + { + "step": 2195, + "timestamp": "2025-12-28T13:16:23.345606", + "elapsed_time": 15497.63069653511, + "loss": 0.1748, + "grad_norm": 0.1672588586807251, + "learning_rate": 7.547169811320755e-07, + "epoch": 0.996875 + }, + { + "step": 2196, + "timestamp": "2025-12-28T13:16:31.823739", + "elapsed_time": 15506.108829021454, + "loss": 0.2156, + "grad_norm": 0.1420753449201584, + "learning_rate": 6.28930817610063e-07, + "epoch": 0.9975 + }, + { + "step": 2197, + "timestamp": "2025-12-28T13:16:40.998256", + "elapsed_time": 15515.283346652985, + "loss": 0.1277, + "grad_norm": 0.10503512620925903, + "learning_rate": 5.031446540880503e-07, + "epoch": 0.998125 + }, + { + "step": 2198, + "timestamp": "2025-12-28T13:16:49.844562", + "elapsed_time": 15524.12965297699, + "loss": 0.1375, + "grad_norm": 0.1124568060040474, + "learning_rate": 3.773584905660378e-07, + "epoch": 0.99875 + }, + { + "step": 2199, + "timestamp": "2025-12-28T13:16:58.714665", + "elapsed_time": 15532.9997549057, + "loss": 0.1504, + "grad_norm": 0.11710984259843826, + "learning_rate": 2.5157232704402517e-07, + "epoch": 0.999375 + }, + { + "step": 2200, + "timestamp": "2025-12-28T13:17:05.804184", + "elapsed_time": 15540.089273929596, + "loss": 0.1503, + "grad_norm": 0.12923410534858704, + "learning_rate": 1.2578616352201258e-07, + "epoch": 1.0 + }, + { + "step": 2200, + "timestamp": "2025-12-28T13:17:06.653219", + "elapsed_time": 15540.938311100006, + "train_runtime": 15528.6205, + "train_samples_per_second": 0.103, + "train_steps_per_second": 0.103, + "total_flos": 5.392201841787095e+17, + "train_loss": 0.19463951266836374, + "epoch": 1.0 + } + ], + "loss_summary": { + "min": 0.0733, + "max": 0.7795, + "final": 0.1503, + "average": 0.1946379375 + }, + "grad_norm_summary": { + "min": 0.06062662601470947, + "max": 2.4628686904907227, + "final": 0.12923410534858704, + "average": 0.14187599109020085 + } +} \ No newline at end of file