{
  "best_global_step": 188,
  "best_metric": 0.28944254,
  "best_model_checkpoint": "/group/40143/hongzhuyi/ms-swift/output/v4-20250916-174722/checkpoint-188",
  "epoch": 2.0,
  "eval_steps": 100.0,
  "global_step": 376,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0053404539385847796,
      "grad_norm": 1.4680844344295192,
      "learning_rate": 5.2631578947368416e-08,
      "loss": 0.28618815541267395,
      "step": 1,
      "token_acc": 0.8993438482284546
    },
    {
      "epoch": 0.010680907877169559,
      "grad_norm": 1.6685681306773734,
      "learning_rate": 1.0526315789473683e-07,
      "loss": 0.3127783238887787,
      "step": 2,
      "token_acc": 0.8937526345252991
    },
    {
      "epoch": 0.01602136181575434,
      "grad_norm": 1.4357694002934198,
      "learning_rate": 1.5789473684210525e-07,
      "loss": 0.27423810958862305,
      "step": 3,
      "token_acc": 0.9052796959877014
    },
    {
      "epoch": 0.021361815754339118,
      "grad_norm": 1.4730832359618218,
      "learning_rate": 2.1052631578947366e-07,
      "loss": 0.29357409477233887,
      "step": 4,
      "token_acc": 0.8994738459587097
    },
    {
      "epoch": 0.0267022696929239,
      "grad_norm": 1.5683917577404938,
      "learning_rate": 2.631578947368421e-07,
      "loss": 0.3035960793495178,
      "step": 5,
      "token_acc": 0.8966883420944214
    },
    {
      "epoch": 0.03204272363150868,
      "grad_norm": 1.4633956584310943,
      "learning_rate": 3.157894736842105e-07,
      "loss": 0.2846561670303345,
      "step": 6,
      "token_acc": 0.8998727202415466
    },
    {
      "epoch": 0.037383177570093455,
      "grad_norm": 1.5193698521980548,
      "learning_rate": 3.684210526315789e-07,
      "loss": 0.27327555418014526,
      "step": 7,
      "token_acc": 0.9057236909866333
    },
    {
      "epoch": 0.042723631508678236,
      "grad_norm": 1.5085350745441748,
      "learning_rate": 4.2105263157894733e-07,
      "loss": 0.29018205404281616,
      "step": 8,
      "token_acc": 0.9011104702949524
    },
    {
      "epoch": 0.04806408544726302,
      "grad_norm": 1.5689240737231291,
      "learning_rate": 4.7368421052631574e-07,
      "loss": 0.2944909334182739,
      "step": 9,
      "token_acc": 0.9000617265701294
    },
    {
      "epoch": 0.0534045393858478,
      "grad_norm": 1.3525300533751718,
      "learning_rate": 5.263157894736842e-07,
      "loss": 0.25412237644195557,
      "step": 10,
      "token_acc": 0.913280725479126
    },
    {
      "epoch": 0.05874499332443257,
      "grad_norm": 1.5332051047815112,
      "learning_rate": 5.789473684210526e-07,
      "loss": 0.2714199125766754,
      "step": 11,
      "token_acc": 0.9061141014099121
    },
    {
      "epoch": 0.06408544726301736,
      "grad_norm": 1.6675024272172654,
      "learning_rate": 6.31578947368421e-07,
      "loss": 0.2993066906929016,
      "step": 12,
      "token_acc": 0.8960835933685303
    },
    {
      "epoch": 0.06942590120160214,
      "grad_norm": 1.5387437343480326,
      "learning_rate": 6.842105263157895e-07,
      "loss": 0.3007085919380188,
      "step": 13,
      "token_acc": 0.8965851664543152
    },
    {
      "epoch": 0.07476635514018691,
      "grad_norm": 1.5009599762006036,
      "learning_rate": 7.368421052631578e-07,
      "loss": 0.2959010601043701,
      "step": 14,
      "token_acc": 0.8978061676025391
    },
    {
      "epoch": 0.0801068090787717,
      "grad_norm": 1.5918906035265024,
      "learning_rate": 7.894736842105263e-07,
      "loss": 0.2931970953941345,
      "step": 15,
      "token_acc": 0.8976789712905884
    },
    {
      "epoch": 0.08544726301735647,
      "grad_norm": 1.3571249600452637,
      "learning_rate": 8.421052631578947e-07,
      "loss": 0.25758588314056396,
      "step": 16,
      "token_acc": 0.9122288823127747
    },
    {
      "epoch": 0.09078771695594126,
      "grad_norm": 1.4960297904157556,
      "learning_rate": 8.947368421052631e-07,
      "loss": 0.311839759349823,
      "step": 17,
      "token_acc": 0.8928477168083191
    },
    {
      "epoch": 0.09612817089452604,
      "grad_norm": 1.5167929236403785,
      "learning_rate": 9.473684210526315e-07,
      "loss": 0.317889928817749,
      "step": 18,
      "token_acc": 0.889501690864563
    },
    {
      "epoch": 0.10146862483311081,
      "grad_norm": 1.440565799039953,
      "learning_rate": 1e-06,
      "loss": 0.2831297516822815,
      "step": 19,
      "token_acc": 0.9045619964599609
    },
    {
      "epoch": 0.1068090787716956,
      "grad_norm": 1.406346333791705,
      "learning_rate": 9.999806402154789e-07,
      "loss": 0.2887258231639862,
      "step": 20,
      "token_acc": 0.9011761546134949
    },
    {
      "epoch": 0.11214953271028037,
      "grad_norm": 1.5111427184007904,
      "learning_rate": 9.999225623611207e-07,
      "loss": 0.31958216428756714,
      "step": 21,
      "token_acc": 0.8933359384536743
    },
    {
      "epoch": 0.11748998664886515,
      "grad_norm": 1.380820258533298,
      "learning_rate": 9.998257709344243e-07,
      "loss": 0.287494033575058,
      "step": 22,
      "token_acc": 0.9029217958450317
    },
    {
      "epoch": 0.12283044058744993,
      "grad_norm": 1.501227290349748,
      "learning_rate": 9.996902734308345e-07,
      "loss": 0.3120245337486267,
      "step": 23,
      "token_acc": 0.8955358266830444
    },
    {
      "epoch": 0.12817089452603472,
      "grad_norm": 1.3907620713631643,
      "learning_rate": 9.995160803431612e-07,
      "loss": 0.2652924954891205,
      "step": 24,
      "token_acc": 0.911475658416748
    },
    {
      "epoch": 0.13351134846461948,
      "grad_norm": 1.4894374866926008,
      "learning_rate": 9.993032051607668e-07,
      "loss": 0.3017966151237488,
      "step": 25,
      "token_acc": 0.8967162370681763
    },
    {
      "epoch": 0.13885180240320427,
      "grad_norm": 1.4377257049445678,
      "learning_rate": 9.990516643685221e-07,
      "loss": 0.30019575357437134,
      "step": 26,
      "token_acc": 0.8983671069145203
    },
    {
      "epoch": 0.14419225634178906,
      "grad_norm": 1.444789517167666,
      "learning_rate": 9.98761477445529e-07,
      "loss": 0.29359400272369385,
      "step": 27,
      "token_acc": 0.898292064666748
    },
    {
      "epoch": 0.14953271028037382,
      "grad_norm": 1.5166558298180315,
      "learning_rate": 9.98432666863613e-07,
      "loss": 0.28964900970458984,
      "step": 28,
      "token_acc": 0.9022803902626038
    },
    {
      "epoch": 0.1548731642189586,
      "grad_norm": 1.4267991552660044,
      "learning_rate": 9.980652580855819e-07,
      "loss": 0.280508816242218,
      "step": 29,
      "token_acc": 0.9031826257705688
    },
    {
      "epoch": 0.1602136181575434,
      "grad_norm": 1.5835260406220442,
      "learning_rate": 9.97659279563255e-07,
      "loss": 0.32461118698120117,
      "step": 30,
      "token_acc": 0.890064001083374
    },
    {
      "epoch": 0.16555407209612816,
      "grad_norm": 1.458595332392139,
      "learning_rate": 9.972147627352593e-07,
      "loss": 0.2812092900276184,
      "step": 31,
      "token_acc": 0.9043583273887634
    },
    {
      "epoch": 0.17089452603471295,
      "grad_norm": 1.4247084503881582,
      "learning_rate": 9.96731742024594e-07,
      "loss": 0.29723116755485535,
      "step": 32,
      "token_acc": 0.898239016532898
    },
    {
      "epoch": 0.17623497997329773,
      "grad_norm": 1.4162726844062017,
      "learning_rate": 9.96210254835968e-07,
      "loss": 0.2897300720214844,
      "step": 33,
      "token_acc": 0.9001203179359436
    },
    {
      "epoch": 0.18157543391188252,
      "grad_norm": 1.5506961166890605,
      "learning_rate": 9.956503415528982e-07,
      "loss": 0.25957155227661133,
      "step": 34,
      "token_acc": 0.9100760817527771
    },
    {
      "epoch": 0.18691588785046728,
      "grad_norm": 1.440608304077442,
      "learning_rate": 9.95052045534588e-07,
      "loss": 0.2758601903915405,
      "step": 35,
      "token_acc": 0.9061575531959534
    },
    {
      "epoch": 0.19225634178905207,
      "grad_norm": 1.5192839960137057,
      "learning_rate": 9.944154131125642e-07,
      "loss": 0.2823089063167572,
      "step": 36,
      "token_acc": 0.9057112336158752
    },
    {
      "epoch": 0.19759679572763686,
      "grad_norm": 1.4943292164165989,
      "learning_rate": 9.937404935870937e-07,
      "loss": 0.2794153690338135,
      "step": 37,
      "token_acc": 0.9042868614196777
    },
    {
      "epoch": 0.20293724966622162,
      "grad_norm": 1.4635503317305278,
      "learning_rate": 9.930273392233624e-07,
      "loss": 0.28065863251686096,
      "step": 38,
      "token_acc": 0.9040310978889465
    },
    {
      "epoch": 0.2082777036048064,
      "grad_norm": 1.4609796175621217,
      "learning_rate": 9.922760052474294e-07,
      "loss": 0.27919909358024597,
      "step": 39,
      "token_acc": 0.9046768546104431
    },
    {
      "epoch": 0.2136181575433912,
      "grad_norm": 1.4677584371951407,
      "learning_rate": 9.91486549841951e-07,
      "loss": 0.2925993502140045,
      "step": 40,
      "token_acc": 0.9009308218955994
    },
    {
      "epoch": 0.21895861148197596,
      "grad_norm": 1.4405005579476842,
      "learning_rate": 9.906590341416723e-07,
      "loss": 0.27640053629875183,
      "step": 41,
      "token_acc": 0.9062628746032715
    },
    {
      "epoch": 0.22429906542056074,
      "grad_norm": 1.5453992146200137,
      "learning_rate": 9.897935222286966e-07,
      "loss": 0.29461127519607544,
      "step": 42,
      "token_acc": 0.8993297815322876
    },
    {
      "epoch": 0.22963951935914553,
      "grad_norm": 1.5735314554728557,
      "learning_rate": 9.888900811275203e-07,
      "loss": 0.314420223236084,
      "step": 43,
      "token_acc": 0.8900313377380371
    },
    {
      "epoch": 0.2349799732977303,
      "grad_norm": 1.480951808194191,
      "learning_rate": 9.879487807998435e-07,
      "loss": 0.29261118173599243,
      "step": 44,
      "token_acc": 0.901530385017395
    },
    {
      "epoch": 0.24032042723631508,
      "grad_norm": 1.4386926707775394,
      "learning_rate": 9.869696941391523e-07,
      "loss": 0.28782376646995544,
      "step": 45,
      "token_acc": 0.9029006958007812
    },
    {
      "epoch": 0.24566088117489987,
      "grad_norm": 1.533482422206995,
      "learning_rate": 9.859528969650737e-07,
      "loss": 0.3115203082561493,
      "step": 46,
      "token_acc": 0.8934023380279541
    },
    {
      "epoch": 0.25100133511348466,
      "grad_norm": 1.5248490732703248,
      "learning_rate": 9.848984680175048e-07,
      "loss": 0.30711060762405396,
      "step": 47,
      "token_acc": 0.8952121734619141
    },
    {
      "epoch": 0.25634178905206945,
      "grad_norm": 1.4301160521804694,
      "learning_rate": 9.83806488950514e-07,
      "loss": 0.27855733036994934,
      "step": 48,
      "token_acc": 0.9025441408157349
    },
    {
      "epoch": 0.2616822429906542,
      "grad_norm": 1.4439078814307809,
      "learning_rate": 9.826770443260193e-07,
      "loss": 0.268754780292511,
      "step": 49,
      "token_acc": 0.9062784910202026
    },
    {
      "epoch": 0.26702269692923897,
      "grad_norm": 1.3756290389467116,
      "learning_rate": 9.81510221607239e-07,
      "loss": 0.2641981542110443,
      "step": 50,
      "token_acc": 0.9079115986824036
    },
    {
      "epoch": 0.27236315086782376,
      "grad_norm": 1.3801487007930435,
      "learning_rate": 9.803061111519185e-07,
      "loss": 0.2629387378692627,
      "step": 51,
      "token_acc": 0.9080008268356323
    },
    {
      "epoch": 0.27770360480640854,
      "grad_norm": 1.4769272869669696,
      "learning_rate": 9.79064806205334e-07,
      "loss": 0.2927890419960022,
      "step": 52,
      "token_acc": 0.8967737555503845
    },
    {
      "epoch": 0.28304405874499333,
      "grad_norm": 1.44443609487884,
      "learning_rate": 9.777864028930705e-07,
      "loss": 0.2803707718849182,
      "step": 53,
      "token_acc": 0.9024505615234375
    },
    {
      "epoch": 0.2883845126835781,
      "grad_norm": 1.4098880500394106,
      "learning_rate": 9.764710002135782e-07,
      "loss": 0.2643783986568451,
      "step": 54,
      "token_acc": 0.9092001914978027
    },
    {
      "epoch": 0.2937249666221629,
      "grad_norm": 1.5158998686252954,
      "learning_rate": 9.751187000305074e-07,
      "loss": 0.3155965209007263,
      "step": 55,
      "token_acc": 0.8920438885688782
    },
    {
      "epoch": 0.29906542056074764,
      "grad_norm": 1.4283142529712562,
      "learning_rate": 9.737296070648186e-07,
      "loss": 0.2711144685745239,
      "step": 56,
      "token_acc": 0.9078071117401123
    },
    {
      "epoch": 0.30440587449933243,
      "grad_norm": 1.445103036518831,
      "learning_rate": 9.723038288866736e-07,
      "loss": 0.2946472764015198,
      "step": 57,
      "token_acc": 0.9006671905517578
    },
    {
      "epoch": 0.3097463284379172,
      "grad_norm": 1.5469319594722661,
      "learning_rate": 9.708414759071057e-07,
      "loss": 0.2815186381340027,
      "step": 58,
      "token_acc": 0.9045448303222656
    },
    {
      "epoch": 0.315086782376502,
      "grad_norm": 1.6171152135871012,
      "learning_rate": 9.693426613694692e-07,
      "loss": 0.31847983598709106,
      "step": 59,
      "token_acc": 0.8905124664306641
    },
    {
      "epoch": 0.3204272363150868,
      "grad_norm": 1.4706442684944392,
      "learning_rate": 9.678075013406702e-07,
      "loss": 0.2880111336708069,
      "step": 60,
      "token_acc": 0.899748682975769
    },
    {
      "epoch": 0.3257676902536716,
      "grad_norm": 1.5860771109565805,
      "learning_rate": 9.66236114702178e-07,
      "loss": 0.29920926690101624,
      "step": 61,
      "token_acc": 0.897007942199707
    },
    {
      "epoch": 0.3311081441922563,
      "grad_norm": 1.5010123249171283,
      "learning_rate": 9.646286231408192e-07,
      "loss": 0.297206312417984,
      "step": 62,
      "token_acc": 0.8984295129776001
    },
    {
      "epoch": 0.3364485981308411,
      "grad_norm": 1.4938645899870184,
      "learning_rate": 9.629851511393555e-07,
      "loss": 0.29847198724746704,
      "step": 63,
      "token_acc": 0.8973211050033569
    },
    {
      "epoch": 0.3417890520694259,
      "grad_norm": 1.5454968109005192,
      "learning_rate": 9.613058259668414e-07,
      "loss": 0.2826240658760071,
      "step": 64,
      "token_acc": 0.9042453169822693
    },
    {
      "epoch": 0.3471295060080107,
      "grad_norm": 1.4274151779113933,
      "learning_rate": 9.595907776687713e-07,
      "loss": 0.28468042612075806,
      "step": 65,
      "token_acc": 0.9014208912849426
    },
    {
      "epoch": 0.35246995994659547,
      "grad_norm": 1.5919794599897976,
      "learning_rate": 9.57840139057007e-07,
      "loss": 0.3108655512332916,
      "step": 66,
      "token_acc": 0.8931441903114319
    },
    {
      "epoch": 0.35781041388518026,
      "grad_norm": 1.42662289804713,
      "learning_rate": 9.560540456994939e-07,
      "loss": 0.2712291479110718,
      "step": 67,
      "token_acc": 0.9068335890769958
    },
    {
      "epoch": 0.36315086782376504,
      "grad_norm": 1.4428041843820245,
      "learning_rate": 9.542326359097617e-07,
      "loss": 0.29337918758392334,
      "step": 68,
      "token_acc": 0.9008380770683289
    },
    {
      "epoch": 0.3684913217623498,
      "grad_norm": 1.4505913834841988,
      "learning_rate": 9.523760507362151e-07,
      "loss": 0.2790825366973877,
      "step": 69,
      "token_acc": 0.9048055410385132
    },
    {
      "epoch": 0.37383177570093457,
      "grad_norm": 1.4289939758067638,
      "learning_rate": 9.504844339512094e-07,
      "loss": 0.2657867670059204,
      "step": 70,
      "token_acc": 0.9082612991333008
    },
    {
      "epoch": 0.37917222963951935,
      "grad_norm": 1.510860982065089,
      "learning_rate": 9.485579320399183e-07,
      "loss": 0.3108121156692505,
      "step": 71,
      "token_acc": 0.8939459323883057
    },
    {
      "epoch": 0.38451268357810414,
      "grad_norm": 1.312682482794381,
      "learning_rate": 9.465966941889891e-07,
      "loss": 0.23279854655265808,
      "step": 72,
      "token_acc": 0.9207527041435242
    },
    {
      "epoch": 0.38985313751668893,
      "grad_norm": 1.4699417988775814,
      "learning_rate": 9.446008722749905e-07,
      "loss": 0.28645938634872437,
      "step": 73,
      "token_acc": 0.9035031795501709
    },
    {
      "epoch": 0.3951935914552737,
      "grad_norm": 1.3500117274900032,
      "learning_rate": 9.425706208526515e-07,
      "loss": 0.27431654930114746,
      "step": 74,
      "token_acc": 0.9042099714279175
    },
    {
      "epoch": 0.40053404539385845,
      "grad_norm": 1.4713499460053907,
      "learning_rate": 9.405060971428922e-07,
      "loss": 0.29679012298583984,
      "step": 75,
      "token_acc": 0.8999800086021423
    },
    {
      "epoch": 0.40587449933244324,
      "grad_norm": 1.4818382523702867,
      "learning_rate": 9.384074610206493e-07,
      "loss": 0.29305019974708557,
      "step": 76,
      "token_acc": 0.898842990398407
    },
    {
      "epoch": 0.411214953271028,
      "grad_norm": 1.4493370130800018,
      "learning_rate": 9.362748750024952e-07,
      "loss": 0.2696800231933594,
      "step": 77,
      "token_acc": 0.9080367088317871
    },
    {
      "epoch": 0.4165554072096128,
      "grad_norm": 1.5709905022949062,
      "learning_rate": 9.341085042340531e-07,
      "loss": 0.32400715351104736,
      "step": 78,
      "token_acc": 0.8896335363388062
    },
    {
      "epoch": 0.4218958611481976,
      "grad_norm": 1.571526565600695,
      "learning_rate": 9.31908516477208e-07,
      "loss": 0.30972063541412354,
      "step": 79,
      "token_acc": 0.8935267925262451
    },
    {
      "epoch": 0.4272363150867824,
      "grad_norm": 1.4042062660347052,
      "learning_rate": 9.296750820971157e-07,
      "loss": 0.2732260227203369,
      "step": 80,
      "token_acc": 0.9055958390235901
    },
    {
      "epoch": 0.4325767690253672,
      "grad_norm": 1.4305861561440973,
      "learning_rate": 9.274083740490096e-07,
      "loss": 0.26816362142562866,
      "step": 81,
      "token_acc": 0.9088789820671082
    },
    {
      "epoch": 0.4379172229639519,
      "grad_norm": 1.4109974033898394,
      "learning_rate": 9.251085678648071e-07,
      "loss": 0.23632025718688965,
      "step": 82,
      "token_acc": 0.9193145036697388
    },
    {
      "epoch": 0.4432576769025367,
      "grad_norm": 1.579783094360126,
      "learning_rate": 9.227758416395169e-07,
      "loss": 0.32243475317955017,
      "step": 83,
      "token_acc": 0.8915097713470459
    },
    {
      "epoch": 0.4485981308411215,
      "grad_norm": 1.444883529989335,
      "learning_rate": 9.204103760174471e-07,
      "loss": 0.27780580520629883,
      "step": 84,
      "token_acc": 0.903154730796814
    },
    {
      "epoch": 0.4539385847797063,
      "grad_norm": 1.4743610793683541,
      "learning_rate": 9.18012354178217e-07,
      "loss": 0.290909081697464,
      "step": 85,
      "token_acc": 0.8995449542999268
    },
    {
      "epoch": 0.45927903871829107,
      "grad_norm": 1.4453910878354272,
      "learning_rate": 9.155819618225706e-07,
      "loss": 0.2741404175758362,
      "step": 86,
      "token_acc": 0.9050561785697937
    },
    {
      "epoch": 0.46461949265687585,
      "grad_norm": 1.5341161775376193,
      "learning_rate": 9.131193871579974e-07,
      "loss": 0.29184362292289734,
      "step": 87,
      "token_acc": 0.8988983631134033
    },
    {
      "epoch": 0.4699599465954606,
      "grad_norm": 1.4215267618699114,
      "learning_rate": 9.106248208841568e-07,
      "loss": 0.27542126178741455,
      "step": 88,
      "token_acc": 0.906243622303009
    },
    {
      "epoch": 0.4753004005340454,
      "grad_norm": 1.4725473172620342,
      "learning_rate": 9.080984561781109e-07,
      "loss": 0.2945716977119446,
      "step": 89,
      "token_acc": 0.8966306447982788
    },
    {
      "epoch": 0.48064085447263016,
      "grad_norm": 1.4591526761766243,
      "learning_rate": 9.05540488679365e-07,
      "loss": 0.2776861786842346,
      "step": 90,
      "token_acc": 0.9028105139732361
    },
    {
      "epoch": 0.48598130841121495,
      "grad_norm": 1.431454776628444,
      "learning_rate": 9.029511164747175e-07,
      "loss": 0.28304219245910645,
      "step": 91,
      "token_acc": 0.9032279849052429
    },
    {
      "epoch": 0.49132176234979974,
      "grad_norm": 1.4748810607233322,
      "learning_rate": 9.0033054008292e-07,
      "loss": 0.27895230054855347,
      "step": 92,
      "token_acc": 0.9046729207038879
    },
    {
      "epoch": 0.49666221628838453,
      "grad_norm": 1.5007381614880584,
      "learning_rate": 8.976789624391497e-07,
      "loss": 0.28230053186416626,
      "step": 93,
      "token_acc": 0.9033601880073547
    },
    {
      "epoch": 0.5020026702269693,
      "grad_norm": 1.4839915635561016,
      "learning_rate": 8.94996588879294e-07,
      "loss": 0.27275383472442627,
      "step": 94,
      "token_acc": 0.9072387218475342
    },
    {
      "epoch": 0.507343124165554,
      "grad_norm": 1.4575359090259006,
      "learning_rate": 8.922836271240491e-07,
      "loss": 0.2795729637145996,
      "step": 95,
      "token_acc": 0.9037200808525085
    },
    {
      "epoch": 0.5126835781041389,
      "grad_norm": 1.4275482143438216,
      "learning_rate": 8.895402872628351e-07,
      "loss": 0.2748318910598755,
      "step": 96,
      "token_acc": 0.9038447737693787
    },
    {
      "epoch": 0.5180240320427236,
      "grad_norm": 1.4996164861212264,
      "learning_rate": 8.867667817375265e-07,
      "loss": 0.30395227670669556,
      "step": 97,
      "token_acc": 0.8991894125938416
    },
    {
      "epoch": 0.5233644859813084,
      "grad_norm": 1.63200655269095,
      "learning_rate": 8.839633253260005e-07,
      "loss": 0.3181121051311493,
      "step": 98,
      "token_acc": 0.8909761309623718
    },
    {
      "epoch": 0.5287049399198932,
      "grad_norm": 1.4487856510791033,
      "learning_rate": 8.811301351255052e-07,
      "loss": 0.27428990602493286,
      "step": 99,
      "token_acc": 0.9081797003746033
    },
    {
      "epoch": 0.5340453938584779,
      "grad_norm": 1.3755745306694576,
      "learning_rate": 8.782674305358479e-07,
      "loss": 0.23392319679260254,
      "step": 100,
      "token_acc": 0.9194831848144531
    },
    {
      "epoch": 0.5393858477970628,
      "grad_norm": 1.3783872170462725,
      "learning_rate": 8.753754332424046e-07,
      "loss": 0.25885897874832153,
      "step": 101,
      "token_acc": 0.9093189835548401
    },
    {
      "epoch": 0.5447263017356475,
      "grad_norm": 1.4673998061357842,
      "learning_rate": 8.724543671989529e-07,
      "loss": 0.27853184938430786,
      "step": 102,
      "token_acc": 0.9039020538330078
    },
    {
      "epoch": 0.5500667556742324,
      "grad_norm": 1.468751593365532,
      "learning_rate": 8.695044586103295e-07,
      "loss": 0.2907976508140564,
      "step": 103,
      "token_acc": 0.9023234248161316
    },
    {
      "epoch": 0.5554072096128171,
      "grad_norm": 1.3942453997655584,
      "learning_rate": 8.66525935914913e-07,
      "loss": 0.26022887229919434,
      "step": 104,
      "token_acc": 0.9076283574104309
    },
    {
      "epoch": 0.5607476635514018,
      "grad_norm": 1.4405529584001042,
      "learning_rate": 8.635190297669339e-07,
      "loss": 0.26023077964782715,
      "step": 105,
      "token_acc": 0.9096285700798035
    },
    {
      "epoch": 0.5660881174899867,
      "grad_norm": 1.4704608320018198,
      "learning_rate": 8.604839730186124e-07,
      "loss": 0.257341593503952,
      "step": 106,
      "token_acc": 0.9118361473083496
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 1.448708511759169,
      "learning_rate": 8.57421000702127e-07,
      "loss": 0.27755892276763916,
      "step": 107,
      "token_acc": 0.9039767384529114
    },
    {
      "epoch": 0.5767690253671562,
      "grad_norm": 1.3687283265061863,
      "learning_rate": 8.543303500114141e-07,
      "loss": 0.2541080713272095,
      "step": 108,
      "token_acc": 0.9100450873374939
    },
    {
      "epoch": 0.582109479305741,
      "grad_norm": 1.5076396646770536,
      "learning_rate": 8.512122602837992e-07,
      "loss": 0.3047584295272827,
      "step": 109,
      "token_acc": 0.8959829211235046
    },
    {
      "epoch": 0.5874499332443258,
      "grad_norm": 1.5146292336620015,
      "learning_rate": 8.480669729814633e-07,
      "loss": 0.30244219303131104,
      "step": 110,
      "token_acc": 0.8954659104347229
    },
    {
      "epoch": 0.5927903871829105,
      "grad_norm": 1.5321775985722281,
      "learning_rate": 8.448947316727442e-07,
      "loss": 0.31041330099105835,
      "step": 111,
      "token_acc": 0.894790768623352
    },
    {
      "epoch": 0.5981308411214953,
      "grad_norm": 1.5104925518676726,
      "learning_rate": 8.416957820132741e-07,
      "loss": 0.29461923241615295,
      "step": 112,
      "token_acc": 0.8996652960777283
    },
    {
      "epoch": 0.6034712950600801,
      "grad_norm": 1.5074874404548617,
      "learning_rate": 8.384703717269583e-07,
      "loss": 0.30026569962501526,
      "step": 113,
      "token_acc": 0.8964109420776367
    },
    {
      "epoch": 0.6088117489986649,
      "grad_norm": 1.430948277878645,
      "learning_rate": 8.35218750586789e-07,
      "loss": 0.27259859442710876,
      "step": 114,
      "token_acc": 0.9043173789978027
    },
    {
      "epoch": 0.6141522029372497,
      "grad_norm": 1.4730184398133797,
      "learning_rate": 8.319411703955041e-07,
      "loss": 0.28818240761756897,
      "step": 115,
      "token_acc": 0.901012659072876
    },
    {
      "epoch": 0.6194926568758344,
      "grad_norm": 1.4737839717551757,
      "learning_rate": 8.286378849660895e-07,
      "loss": 0.27523326873779297,
      "step": 116,
      "token_acc": 0.9044082164764404
    },
    {
      "epoch": 0.6248331108144193,
      "grad_norm": 1.3893191266941896,
      "learning_rate": 8.25309150102121e-07,
      "loss": 0.2681213617324829,
      "step": 117,
      "token_acc": 0.9055965542793274
    },
    {
      "epoch": 0.630173564753004,
      "grad_norm": 1.4487156196233832,
      "learning_rate": 8.219552235779578e-07,
      "loss": 0.2612762451171875,
      "step": 118,
      "token_acc": 0.9065930247306824
    },
    {
      "epoch": 0.6355140186915887,
      "grad_norm": 1.3809737797890187,
      "learning_rate": 8.185763651187789e-07,
      "loss": 0.26775848865509033,
      "step": 119,
      "token_acc": 0.9093659520149231
    },
    {
      "epoch": 0.6408544726301736,
      "grad_norm": 1.4475105084507855,
      "learning_rate": 8.151728363804713e-07,
      "loss": 0.2951820492744446,
      "step": 120,
      "token_acc": 0.8978663682937622
    },
    {
      "epoch": 0.6461949265687583,
      "grad_norm": 1.4139139859602043,
      "learning_rate": 8.117449009293668e-07,
      "loss": 0.2766643762588501,
      "step": 121,
      "token_acc": 0.9017517566680908
    },
    {
      "epoch": 0.6515353805073432,
      "grad_norm": 1.5121148872564947,
      "learning_rate": 8.082928242218321e-07,
      "loss": 0.30561524629592896,
      "step": 122,
      "token_acc": 0.897788941860199
    },
    {
      "epoch": 0.6568758344459279,
      "grad_norm": 1.4841028682972484,
      "learning_rate": 8.04816873583712e-07,
      "loss": 0.2939586043357849,
      "step": 123,
      "token_acc": 0.9001566171646118
    },
    {
      "epoch": 0.6622162883845126,
      "grad_norm": 1.4245067768897755,
      "learning_rate": 8.013173181896282e-07,
      "loss": 0.2514035105705261,
      "step": 124,
      "token_acc": 0.9155417680740356
    },
    {
      "epoch": 0.6675567423230975,
      "grad_norm": 1.3802195837048552,
      "learning_rate": 7.977944290421339e-07,
      "loss": 0.2620801329612732,
      "step": 125,
      "token_acc": 0.9096019268035889
    },
    {
      "epoch": 0.6728971962616822,
      "grad_norm": 1.461725826567453,
      "learning_rate": 7.942484789507282e-07,
      "loss": 0.27589190006256104,
      "step": 126,
      "token_acc": 0.9012653231620789
    },
    {
      "epoch": 0.678237650200267,
      "grad_norm": 1.4413519022677317,
      "learning_rate": 7.906797425107298e-07,
      "loss": 0.26920682191848755,
      "step": 127,
      "token_acc": 0.9086182713508606
    },
    {
      "epoch": 0.6835781041388518,
      "grad_norm": 1.367892107085817,
      "learning_rate": 7.870884960820129e-07,
      "loss": 0.2539078891277313,
      "step": 128,
      "token_acc": 0.9143032431602478
    },
    {
      "epoch": 0.6889185580774366,
      "grad_norm": 1.5817696641183787,
      "learning_rate": 7.834750177676057e-07,
      "loss": 0.3091282248497009,
      "step": 129,
      "token_acc": 0.891278862953186
    },
    {
      "epoch": 0.6942590120160214,
      "grad_norm": 1.5302522292243204,
      "learning_rate": 7.798395873921541e-07,
      "loss": 0.27863165736198425,
      "step": 130,
      "token_acc": 0.9007149338722229
    },
    {
      "epoch": 0.6995994659546061,
      "grad_norm": 1.4659073800662585,
      "learning_rate": 7.761824864802528e-07,
      "loss": 0.28092169761657715,
      "step": 131,
      "token_acc": 0.9041672348976135
    },
    {
      "epoch": 0.7049399198931909,
      "grad_norm": 1.5118745047717976,
      "learning_rate": 7.725039982346448e-07,
      "loss": 0.30099111795425415,
      "step": 132,
      "token_acc": 0.8950716257095337
    },
    {
      "epoch": 0.7102803738317757,
      "grad_norm": 1.457094154522222,
      "learning_rate": 7.688044075142887e-07,
      "loss": 0.29831060767173767,
      "step": 133,
      "token_acc": 0.8978042602539062
    },
    {
      "epoch": 0.7156208277703605,
      "grad_norm": 1.4662059075543796,
      "learning_rate": 7.650840008123014e-07,
      "loss": 0.27699634432792664,
      "step": 134,
      "token_acc": 0.9014725685119629
    },
    {
      "epoch": 0.7209612817089452,
      "grad_norm": 1.4413615253320124,
      "learning_rate": 7.613430662337714e-07,
      "loss": 0.28902721405029297,
      "step": 135,
      "token_acc": 0.9037944078445435
    },
    {
      "epoch": 0.7263017356475301,
      "grad_norm": 1.5447834440335302,
      "learning_rate": 7.575818934734479e-07,
      "loss": 0.30010080337524414,
      "step": 136,
      "token_acc": 0.897688627243042
    },
    {
      "epoch": 0.7316421895861148,
      "grad_norm": 1.534480828454567,
      "learning_rate": 7.538007737933079e-07,
      "loss": 0.28456270694732666,
      "step": 137,
      "token_acc": 0.9014143347740173
    },
    {
      "epoch": 0.7369826435246996,
      "grad_norm": 1.4040106816829079,
      "learning_rate": 7.5e-07,
      "loss": 0.2786479890346527,
      "step": 138,
      "token_acc": 0.9083135724067688
    },
    {
      "epoch": 0.7423230974632844,
      "grad_norm": 1.693219878226182,
      "learning_rate": 7.46179866422171e-07,
      "loss": 0.30642837285995483,
      "step": 139,
      "token_acc": 0.8953761458396912
    },
    {
      "epoch": 0.7476635514018691,
      "grad_norm": 1.5123956340013087,
      "learning_rate": 7.423406688876726e-07,
      "loss": 0.2847447991371155,
      "step": 140,
      "token_acc": 0.9026427268981934
    },
    {
      "epoch": 0.753004005340454,
      "grad_norm": 1.5165972852128844,
      "learning_rate": 7.384827047006529e-07,
      "loss": 0.28060221672058105,
      "step": 141,
      "token_acc": 0.9037265181541443
    },
    {
      "epoch": 0.7583444592790387,
      "grad_norm": 1.4884818065566605,
      "learning_rate": 7.346062726185331e-07,
      "loss": 0.29293128848075867,
      "step": 142,
      "token_acc": 0.8995508551597595
    },
    {
      "epoch": 0.7636849132176236,
      "grad_norm": 1.448783078032984,
      "learning_rate": 7.307116728288726e-07,
      "loss": 0.2762266993522644,
      "step": 143,
      "token_acc": 0.905832052230835
    },
    {
      "epoch": 0.7690253671562083,
      "grad_norm": 1.4969627454330798,
      "learning_rate": 7.267992069261221e-07,
      "loss": 0.30249935388565063,
      "step": 144,
      "token_acc": 0.8975799083709717
    },
    {
      "epoch": 0.774365821094793,
      "grad_norm": 1.4402831472902118,
      "learning_rate": 7.228691778882692e-07,
      "loss": 0.267535924911499,
      "step": 145,
      "token_acc": 0.9076101183891296
    },
    {
      "epoch": 0.7797062750333779,
      "grad_norm": 1.4908723191951325,
      "learning_rate": 7.189218900533749e-07,
      "loss": 0.3062886595726013,
      "step": 146,
      "token_acc": 0.8947438597679138
    },
    {
      "epoch": 0.7850467289719626,
      "grad_norm": 1.4437415050725992,
      "learning_rate": 7.149576490960072e-07,
      "loss": 0.2801040709018707,
      "step": 147,
      "token_acc": 0.9041796326637268
    },
    {
      "epoch": 0.7903871829105474,
      "grad_norm": 1.4899191386757624,
      "learning_rate": 7.109767620035688e-07,
      "loss": 0.28240880370140076,
      "step": 148,
      "token_acc": 0.902775764465332
    },
    {
      "epoch": 0.7957276368491322,
      "grad_norm": 1.3643095946721608,
      "learning_rate": 7.069795370525251e-07,
      "loss": 0.25867897272109985,
      "step": 149,
      "token_acc": 0.9112460613250732
    },
    {
      "epoch": 0.8010680907877169,
      "grad_norm": 1.4178796095004065,
      "learning_rate": 7.029662837845311e-07,
      "loss": 0.276249498128891,
      "step": 150,
      "token_acc": 0.9065636396408081
    },
    {
      "epoch": 0.8064085447263017,
      "grad_norm": 1.472665746353715,
      "learning_rate": 6.989373129824604e-07,
      "loss": 0.27797502279281616,
      "step": 151,
      "token_acc": 0.9020600318908691
    },
    {
      "epoch": 0.8117489986648865,
      "grad_norm": 1.2857162836865903,
      "learning_rate": 6.948929366463396e-07,
      "loss": 0.22690004110336304,
      "step": 152,
      "token_acc": 0.9221194386482239
    },
    {
      "epoch": 0.8170894526034713,
      "grad_norm": 1.5413852318610612,
      "learning_rate": 6.908334679691863e-07,
      "loss": 0.3124600648880005,
      "step": 153,
      "token_acc": 0.895993709564209
    },
    {
      "epoch": 0.822429906542056,
      "grad_norm": 1.5503364220585383,
      "learning_rate": 6.867592213127557e-07,
      "loss": 0.2983526289463043,
      "step": 154,
      "token_acc": 0.8980738520622253
    },
    {
      "epoch": 0.8277703604806409,
      "grad_norm": 1.456305481825788,
      "learning_rate": 6.826705121831976e-07,
      "loss": 0.2825569212436676,
      "step": 155,
      "token_acc": 0.9024941921234131
    },
    {
      "epoch": 0.8331108144192256,
      "grad_norm": 1.552345382355808,
      "learning_rate": 6.785676572066223e-07,
      "loss": 0.28823018074035645,
      "step": 156,
      "token_acc": 0.8996476531028748
    },
    {
      "epoch": 0.8384512683578104,
      "grad_norm": 1.463702360369313,
      "learning_rate": 6.744509741045834e-07,
      "loss": 0.2687251567840576,
      "step": 157,
      "token_acc": 0.9064703583717346
    },
    {
      "epoch": 0.8437917222963952,
      "grad_norm": 1.3516216726169812,
      "learning_rate": 6.703207816694718e-07,
      "loss": 0.26400265097618103,
      "step": 158,
      "token_acc": 0.905772864818573
    },
    {
      "epoch": 0.8491321762349799,
      "grad_norm": 1.4476139419013239,
      "learning_rate": 6.661773997398297e-07,
      "loss": 0.26589322090148926,
      "step": 159,
      "token_acc": 0.9063243269920349
    },
    {
      "epoch": 0.8544726301735648,
      "grad_norm": 1.367534189635397,
      "learning_rate": 6.62021149175583e-07,
      "loss": 0.25522464513778687,
      "step": 160,
      "token_acc": 0.9129465818405151
    },
    {
      "epoch": 0.8598130841121495,
      "grad_norm": 1.4613995712079086,
      "learning_rate": 6.578523518331926e-07,
      "loss": 0.29331648349761963,
      "step": 161,
      "token_acc": 0.8985590934753418
    },
    {
      "epoch": 0.8651535380507344,
      "grad_norm": 1.4125807144896219,
      "learning_rate": 6.536713305407314e-07,
      "loss": 0.26160115003585815,
      "step": 162,
      "token_acc": 0.9072727560997009
    },
    {
      "epoch": 0.8704939919893191,
      "grad_norm": 1.54523708977056,
      "learning_rate": 6.494784090728851e-07,
      "loss": 0.30150866508483887,
      "step": 163,
      "token_acc": 0.896626889705658
    },
    {
      "epoch": 0.8758344459279038,
      "grad_norm": 1.41010615455906,
      "learning_rate": 6.45273912125878e-07,
      "loss": 0.2659100294113159,
      "step": 164,
      "token_acc": 0.9065546989440918
    },
    {
      "epoch": 0.8811748998664887,
      "grad_norm": 1.4727634670720684,
      "learning_rate": 6.410581652923297e-07,
      "loss": 0.27308836579322815,
      "step": 165,
      "token_acc": 0.9060620665550232
    },
    {
      "epoch": 0.8865153538050734,
      "grad_norm": 1.4505380629695295,
      "learning_rate": 6.368314950360415e-07,
      "loss": 0.2644720673561096,
      "step": 166,
      "token_acc": 0.9064491391181946
    },
    {
      "epoch": 0.8918558077436582,
      "grad_norm": 1.4149838933079968,
      "learning_rate": 6.325942286667148e-07,
      "loss": 0.252743661403656,
      "step": 167,
      "token_acc": 0.9134348630905151
    },
    {
      "epoch": 0.897196261682243,
      "grad_norm": 1.4192512704646,
      "learning_rate": 6.283466943146051e-07,
      "loss": 0.2717309594154358,
      "step": 168,
      "token_acc": 0.9042630195617676
    },
    {
      "epoch": 0.9025367156208278,
      "grad_norm": 1.4874485589098323,
      "learning_rate": 6.240892209051119e-07,
      "loss": 0.2797185778617859,
      "step": 169,
      "token_acc": 0.9046774506568909
    },
    {
      "epoch": 0.9078771695594126,
      "grad_norm": 1.5220997493997237,
      "learning_rate": 6.198221381333063e-07,
      "loss": 0.3090575933456421,
      "step": 170,
      "token_acc": 0.8961625099182129
    },
    {
      "epoch": 0.9132176234979973,
      "grad_norm": 1.467678079871528,
      "learning_rate": 6.155457764384e-07,
      "loss": 0.30064499378204346,
      "step": 171,
      "token_acc": 0.8983897566795349
    },
    {
      "epoch": 0.9185580774365821,
      "grad_norm": 1.5213816256226624,
      "learning_rate": 6.112604669781572e-07,
      "loss": 0.29739877581596375,
      "step": 172,
      "token_acc": 0.8958553075790405
    },
    {
      "epoch": 0.9238985313751669,
      "grad_norm": 1.4056572448993678,
      "learning_rate": 6.069665416032486e-07,
      "loss": 0.24357512593269348,
      "step": 173,
      "token_acc": 0.9159275889396667
    },
    {
      "epoch": 0.9292389853137517,
      "grad_norm": 1.4132964934246643,
      "learning_rate": 6.026643328315545e-07,
      "loss": 0.2843455672264099,
      "step": 174,
      "token_acc": 0.8996742963790894
    },
    {
      "epoch": 0.9345794392523364,
      "grad_norm": 1.5158651145960838,
      "learning_rate": 5.98354173822414e-07,
      "loss": 0.30474793910980225,
      "step": 175,
      "token_acc": 0.8991747498512268
    },
    {
      "epoch": 0.9399198931909212,
      "grad_norm": 1.5682686186076444,
      "learning_rate": 5.940363983508256e-07,
      "loss": 0.3241081237792969,
      "step": 176,
      "token_acc": 0.8867406845092773
    },
    {
      "epoch": 0.945260347129506,
      "grad_norm": 1.488780669927335,
      "learning_rate": 5.897113407816006e-07,
      "loss": 0.27237415313720703,
      "step": 177,
      "token_acc": 0.9065018892288208
    },
    {
      "epoch": 0.9506008010680908,
      "grad_norm": 1.441648821258601,
      "learning_rate": 5.853793360434687e-07,
      "loss": 0.2751544117927551,
      "step": 178,
      "token_acc": 0.9051114320755005
    },
    {
      "epoch": 0.9559412550066756,
      "grad_norm": 1.4510757578550872,
      "learning_rate": 5.810407196031436e-07,
      "loss": 0.2876569628715515,
      "step": 179,
      "token_acc": 0.9022960662841797
    },
    {
      "epoch": 0.9612817089452603,
      "grad_norm": 1.5177733417316286,
      "learning_rate": 5.766958274393428e-07,
      "loss": 0.2696794867515564,
      "step": 180,
      "token_acc": 0.9082053303718567
    },
    {
      "epoch": 0.9666221628838452,
      "grad_norm": 1.526410901108856,
      "learning_rate": 5.723449960167703e-07,
      "loss": 0.29867053031921387,
      "step": 181,
      "token_acc": 0.8969375491142273
    },
    {
      "epoch": 0.9719626168224299,
      "grad_norm": 1.5830878812419842,
      "learning_rate": 5.679885622600616e-07,
      "loss": 0.31017428636550903,
      "step": 182,
      "token_acc": 0.8941692113876343
    },
    {
      "epoch": 0.9773030707610146,
      "grad_norm": 1.479189267754833,
      "learning_rate": 5.636268635276917e-07,
      "loss": 0.2776733636856079,
      "step": 183,
      "token_acc": 0.9059219360351562
    },
    {
      "epoch": 0.9826435246995995,
      "grad_norm": 1.5067298012627606,
      "learning_rate": 5.592602375858513e-07,
      "loss": 0.2824113667011261,
      "step": 184,
      "token_acc": 0.902838408946991
    },
    {
      "epoch": 0.9879839786381842,
      "grad_norm": 1.4272874923061691,
      "learning_rate": 5.548890225822896e-07,
      "loss": 0.23751461505889893,
      "step": 185,
      "token_acc": 0.9159970879554749
    },
    {
      "epoch": 0.9933244325767691,
      "grad_norm": 1.4812489898470391,
      "learning_rate": 5.505135570201289e-07,
      "loss": 0.2700713872909546,
      "step": 186,
      "token_acc": 0.9049946069717407
    },
    {
      "epoch": 0.9986648865153538,
      "grad_norm": 1.5086585649641973,
      "learning_rate": 5.46134179731651e-07,
      "loss": 0.277831107378006,
      "step": 187,
      "token_acc": 0.9033421874046326
    },
    {
      "epoch": 1.0,
      "grad_norm": 3.0592845448074675,
      "learning_rate": 5.417512298520584e-07,
      "loss": 0.34686022996902466,
      "step": 188,
      "token_acc": 0.8805857300758362
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.2894425392150879,
      "eval_runtime": 4.387,
      "eval_samples_per_second": 4.787,
      "eval_steps_per_second": 0.684,
      "eval_token_acc": 0.89005047082901,
      "step": 188
    },
    {
      "epoch": 1.0053404539385848,
      "grad_norm": 1.4602282690072395,
      "learning_rate": 5.373650467932121e-07,
      "loss": 0.2740900218486786,
      "step": 189,
      "token_acc": 0.9074295163154602
    },
    {
      "epoch": 1.0106809078771695,
      "grad_norm": 1.5136301947507462,
      "learning_rate": 5.329759702173476e-07,
      "loss": 0.3051583170890808,
      "step": 190,
      "token_acc": 0.8956478834152222
    },
    {
      "epoch": 1.0160213618157543,
      "grad_norm": 1.3661146518445126,
      "learning_rate": 5.285843400107721e-07,
      "loss": 0.2452118843793869,
      "step": 191,
      "token_acc": 0.9162895679473877
    },
    {
      "epoch": 1.0213618157543392,
      "grad_norm": 1.407017074271343,
      "learning_rate": 5.241904962575434e-07,
      "loss": 0.25142550468444824,
      "step": 192,
      "token_acc": 0.9127443432807922
    },
    {
      "epoch": 1.026702269692924,
      "grad_norm": 1.5075117616213347,
      "learning_rate": 5.197947792131348e-07,
      "loss": 0.2882319986820221,
      "step": 193,
      "token_acc": 0.901009738445282
    },
    {
      "epoch": 1.0320427236315086,
      "grad_norm": 1.4145772441652307,
      "learning_rate": 5.153975292780853e-07,
      "loss": 0.26002272963523865,
      "step": 194,
      "token_acc": 0.910018265247345
    },
    {
      "epoch": 1.0373831775700935,
      "grad_norm": 1.3484605891925254,
      "learning_rate": 5.109990869716398e-07,
      "loss": 0.2504333257675171,
      "step": 195,
      "token_acc": 0.9167550802230835
    },
    {
      "epoch": 1.0427236315086783,
      "grad_norm": 1.3847645458543762,
      "learning_rate": 5.065997929053795e-07,
      "loss": 0.25030696392059326,
      "step": 196,
      "token_acc": 0.9109770059585571
    },
    {
      "epoch": 1.048064085447263,
      "grad_norm": 1.3702442162226314,
      "learning_rate": 5.021999877568451e-07,
      "loss": 0.2521418333053589,
      "step": 197,
      "token_acc": 0.9099429249763489
    },
    {
      "epoch": 1.0534045393858478,
      "grad_norm": 1.3723544867506587,
      "learning_rate": 4.97800012243155e-07,
      "loss": 0.2647276222705841,
      "step": 198,
      "token_acc": 0.9087542295455933
    },
    {
      "epoch": 1.0587449933244326,
      "grad_norm": 1.3781067722926077,
      "learning_rate": 4.934002070946206e-07,
      "loss": 0.26125913858413696,
      "step": 199,
      "token_acc": 0.9107503890991211
    },
    {
      "epoch": 1.0640854472630175,
      "grad_norm": 1.4020339885094901,
      "learning_rate": 4.890009130283603e-07,
      "loss": 0.2743861973285675,
      "step": 200,
      "token_acc": 0.9042608737945557
    },
    {
      "epoch": 1.069425901201602,
      "grad_norm": 1.3693718671474886,
      "learning_rate": 4.846024707219149e-07,
      "loss": 0.24946187436580658,
      "step": 201,
      "token_acc": 0.9140281081199646
    },
    {
      "epoch": 1.074766355140187,
      "grad_norm": 1.3787521380659007,
      "learning_rate": 4.802052207868654e-07,
      "loss": 0.25498655438423157,
      "step": 202,
      "token_acc": 0.9129205942153931
    },
    {
      "epoch": 1.0801068090787718,
      "grad_norm": 1.412877966296016,
      "learning_rate": 4.7580950374245664e-07,
      "loss": 0.25205057859420776,
      "step": 203,
      "token_acc": 0.9117180705070496
    },
    {
      "epoch": 1.0854472630173564,
      "grad_norm": 1.4117305531385913,
      "learning_rate": 4.7141565998922775e-07,
      "loss": 0.27328014373779297,
      "step": 204,
      "token_acc": 0.906479001045227
    },
    {
      "epoch": 1.0907877169559412,
      "grad_norm": 1.4211720504886196,
      "learning_rate": 4.6702402978265226e-07,
      "loss": 0.2628495991230011,
      "step": 205,
      "token_acc": 0.910480797290802
    },
    {
      "epoch": 1.096128170894526,
      "grad_norm": 1.47434612577362,
      "learning_rate": 4.626349532067879e-07,
      "loss": 0.2671458125114441,
      "step": 206,
      "token_acc": 0.9060709476470947
    },
    {
      "epoch": 1.1014686248331107,
      "grad_norm": 1.5056237856922248,
      "learning_rate": 4.582487701479416e-07,
      "loss": 0.2675018012523651,
      "step": 207,
      "token_acc": 0.905981719493866
    },
    {
      "epoch": 1.1068090787716955,
      "grad_norm": 1.4771276314121324,
      "learning_rate": 4.5386582026834904e-07,
      "loss": 0.2852442264556885,
      "step": 208,
      "token_acc": 0.9004486203193665
    },
    {
      "epoch": 1.1121495327102804,
      "grad_norm": 1.4211424546241151,
      "learning_rate": 4.4948644297987116e-07,
      "loss": 0.26674801111221313,
      "step": 209,
      "token_acc": 0.9085896611213684
    },
    {
      "epoch": 1.1174899866488652,
      "grad_norm": 1.4536878916949376,
      "learning_rate": 4.451109774177104e-07,
      "loss": 0.24601322412490845,
      "step": 210,
      "token_acc": 0.9139390587806702
    },
    {
      "epoch": 1.1228304405874499,
      "grad_norm": 1.4715781254837346,
      "learning_rate": 4.4073976241414866e-07,
      "loss": 0.27014046907424927,
      "step": 211,
      "token_acc": 0.907028079032898
    },
    {
      "epoch": 1.1281708945260347,
      "grad_norm": 1.424416966510835,
      "learning_rate": 4.3637313647230835e-07,
      "loss": 0.2506459951400757,
      "step": 212,
      "token_acc": 0.9118000864982605
    },
    {
      "epoch": 1.1335113484646195,
      "grad_norm": 1.4898081464605304,
      "learning_rate": 4.3201143773993864e-07,
      "loss": 0.2772943377494812,
      "step": 213,
      "token_acc": 0.9037544131278992
    },
    {
      "epoch": 1.1388518024032042,
      "grad_norm": 1.4256895516620778,
      "learning_rate": 4.276550039832299e-07,
      "loss": 0.2632078528404236,
      "step": 214,
      "token_acc": 0.9107373952865601
    },
    {
      "epoch": 1.144192256341789,
      "grad_norm": 1.4538979505384508,
      "learning_rate": 4.2330417256065723e-07,
      "loss": 0.255924791097641,
      "step": 215,
      "token_acc": 0.9106543064117432
    },
    {
      "epoch": 1.1495327102803738,
      "grad_norm": 1.4129976201354708,
      "learning_rate": 4.189592803968563e-07,
      "loss": 0.25921228528022766,
      "step": 216,
      "token_acc": 0.9084319472312927
    },
    {
      "epoch": 1.1548731642189587,
      "grad_norm": 1.5037036498752998,
      "learning_rate": 4.146206639565312e-07,
      "loss": 0.2904360294342041,
      "step": 217,
      "token_acc": 0.8985555768013
    },
    {
      "epoch": 1.1602136181575433,
      "grad_norm": 1.4966339093456151,
      "learning_rate": 4.102886592183995e-07,
      "loss": 0.255800724029541,
      "step": 218,
      "token_acc": 0.9113923907279968
    },
    {
      "epoch": 1.1655540720961282,
      "grad_norm": 1.4296087578106005,
      "learning_rate": 4.059636016491743e-07,
      "loss": 0.2537374794483185,
      "step": 219,
      "token_acc": 0.9147658944129944
    },
    {
      "epoch": 1.170894526034713,
      "grad_norm": 1.4754905839164394,
      "learning_rate": 4.0164582617758596e-07,
      "loss": 0.25285664200782776,
      "step": 220,
      "token_acc": 0.9140230417251587
    },
    {
      "epoch": 1.1762349799732976,
      "grad_norm": 1.4771920428465208,
      "learning_rate": 3.9733566716844546e-07,
      "loss": 0.25915923714637756,
      "step": 221,
      "token_acc": 0.907535970211029
    },
    {
      "epoch": 1.1815754339118825,
      "grad_norm": 1.4541677714934378,
      "learning_rate": 3.930334583967514e-07,
      "loss": 0.2689444124698639,
      "step": 222,
      "token_acc": 0.9067755341529846
    },
    {
      "epoch": 1.1869158878504673,
      "grad_norm": 1.4559575144856152,
      "learning_rate": 3.8873953302184283e-07,
      "loss": 0.26866400241851807,
      "step": 223,
      "token_acc": 0.9058283567428589
    },
    {
      "epoch": 1.1922563417890522,
      "grad_norm": 1.4022951210583676,
      "learning_rate": 3.844542235616e-07,
      "loss": 0.23354247212409973,
      "step": 224,
      "token_acc": 0.919279158115387
    },
    {
      "epoch": 1.1975967957276368,
      "grad_norm": 1.5285809712268623,
      "learning_rate": 3.801778618666939e-07,
      "loss": 0.2832622230052948,
      "step": 225,
      "token_acc": 0.9032950401306152
    },
    {
      "epoch": 1.2029372496662216,
      "grad_norm": 1.430275686725923,
      "learning_rate": 3.7591077909488813e-07,
      "loss": 0.27595195174217224,
      "step": 226,
      "token_acc": 0.9067350625991821
    },
    {
      "epoch": 1.2082777036048065,
      "grad_norm": 1.555897792468559,
      "learning_rate": 3.7165330568539476e-07,
      "loss": 0.300298810005188,
      "step": 227,
      "token_acc": 0.896763801574707
    },
    {
      "epoch": 1.213618157543391,
      "grad_norm": 1.3452269331461637,
      "learning_rate": 3.674057713332852e-07,
      "loss": 0.22913071513175964,
      "step": 228,
      "token_acc": 0.9232072830200195
    },
    {
      "epoch": 1.218958611481976,
      "grad_norm": 1.4295420285953415,
      "learning_rate": 3.6316850496395855e-07,
      "loss": 0.23216448724269867,
      "step": 229,
      "token_acc": 0.9208974242210388
    },
    {
      "epoch": 1.2242990654205608,
      "grad_norm": 1.5005109210982908,
      "learning_rate": 3.5894183470767024e-07,
      "loss": 0.2616773247718811,
      "step": 230,
      "token_acc": 0.9137930870056152
    },
    {
      "epoch": 1.2296395193591456,
      "grad_norm": 1.469159178335662,
      "learning_rate": 3.5472608787412196e-07,
      "loss": 0.26962506771087646,
      "step": 231,
      "token_acc": 0.9102106690406799
    },
    {
      "epoch": 1.2349799732977302,
      "grad_norm": 1.4387277452443312,
      "learning_rate": 3.5052159092711487e-07,
      "loss": 0.25072669982910156,
      "step": 232,
      "token_acc": 0.9132289290428162
    },
    {
      "epoch": 1.240320427236315,
      "grad_norm": 1.5712551751033983,
      "learning_rate": 3.463286694592685e-07,
      "loss": 0.27846434712409973,
      "step": 233,
      "token_acc": 0.905695915222168
    },
    {
      "epoch": 1.2456608811749,
      "grad_norm": 1.4683847168955748,
      "learning_rate": 3.4214764816680765e-07,
      "loss": 0.26152026653289795,
      "step": 234,
      "token_acc": 0.9105028510093689
    },
    {
      "epoch": 1.2510013351134845,
      "grad_norm": 1.362720771911512,
      "learning_rate": 3.3797885082441714e-07,
      "loss": 0.24608175456523895,
      "step": 235,
      "token_acc": 0.9162537455558777
    },
    {
      "epoch": 1.2563417890520694,
      "grad_norm": 1.5120452174304069,
      "learning_rate": 3.338226002601702e-07,
      "loss": 0.26837417483329773,
      "step": 236,
      "token_acc": 0.9068318605422974
    },
    {
      "epoch": 1.2616822429906542,
      "grad_norm": 1.5092509395624194,
      "learning_rate": 3.2967921833052824e-07,
      "loss": 0.2892254590988159,
      "step": 237,
      "token_acc": 0.9020274877548218
    },
    {
      "epoch": 1.267022696929239,
      "grad_norm": 1.4355643996176837,
      "learning_rate": 3.2554902589541664e-07,
      "loss": 0.26041290163993835,
      "step": 238,
      "token_acc": 0.9089854955673218
    },
    {
      "epoch": 1.2723631508678237,
      "grad_norm": 1.446021243644821,
      "learning_rate": 3.214323427933776e-07,
      "loss": 0.2684992551803589,
      "step": 239,
      "token_acc": 0.9068037271499634
    },
    {
      "epoch": 1.2777036048064085,
      "grad_norm": 1.4168442387057927,
      "learning_rate": 3.173294878168025e-07,
      "loss": 0.24656590819358826,
      "step": 240,
      "token_acc": 0.9142815470695496
    },
    {
      "epoch": 1.2830440587449934,
      "grad_norm": 1.4362139424394869,
      "learning_rate": 3.132407786872442e-07,
      "loss": 0.2668442726135254,
      "step": 241,
      "token_acc": 0.9060892462730408
    },
    {
      "epoch": 1.288384512683578,
      "grad_norm": 1.4293904830266082,
      "learning_rate": 3.0916653203081366e-07,
      "loss": 0.2602171301841736,
      "step": 242,
      "token_acc": 0.9075544476509094
    },
    {
      "epoch": 1.2937249666221629,
      "grad_norm": 1.4435873738245797,
      "learning_rate": 3.0510706335366034e-07,
      "loss": 0.259932279586792,
      "step": 243,
      "token_acc": 0.9109272956848145
    },
    {
      "epoch": 1.2990654205607477,
      "grad_norm": 1.559852301450218,
      "learning_rate": 3.010626870175396e-07,
      "loss": 0.26695287227630615,
      "step": 244,
      "token_acc": 0.90992671251297
    },
    {
      "epoch": 1.3044058744993325,
      "grad_norm": 1.44807428700543,
      "learning_rate": 2.97033716215469e-07,
      "loss": 0.271627813577652,
      "step": 245,
      "token_acc": 0.9043883085250854
    },
    {
      "epoch": 1.3097463284379172,
      "grad_norm": 1.4380708495361707,
      "learning_rate": 2.9302046294747496e-07,
      "loss": 0.264931321144104,
      "step": 246,
      "token_acc": 0.9069004654884338
    },
    {
      "epoch": 1.315086782376502,
      "grad_norm": 1.5047225277819458,
      "learning_rate": 2.8902323799643116e-07,
      "loss": 0.27212783694267273,
      "step": 247,
      "token_acc": 0.904800295829773
    },
    {
      "epoch": 1.3204272363150868,
      "grad_norm": 1.540729426597796,
      "learning_rate": 2.8504235090399275e-07,
      "loss": 0.29125404357910156,
      "step": 248,
      "token_acc": 0.8967776298522949
    },
    {
      "epoch": 1.3257676902536715,
      "grad_norm": 1.493945990007267,
      "learning_rate": 2.8107810994662496e-07,
      "loss": 0.28409773111343384,
      "step": 249,
      "token_acc": 0.9039379954338074
    },
    {
      "epoch": 1.3311081441922563,
      "grad_norm": 1.432138392794738,
      "learning_rate": 2.771308221117309e-07,
      "loss": 0.2491723746061325,
      "step": 250,
      "token_acc": 0.9145965576171875
    },
    {
      "epoch": 1.3364485981308412,
      "grad_norm": 1.4727787949664666,
      "learning_rate": 2.7320079307387785e-07,
      "loss": 0.28228527307510376,
      "step": 251,
      "token_acc": 0.904411256313324
    },
    {
      "epoch": 1.341789052069426,
      "grad_norm": 1.449287226237925,
      "learning_rate": 2.692883271711275e-07,
      "loss": 0.27173715829849243,
      "step": 252,
      "token_acc": 0.9059191942214966
    },
    {
      "epoch": 1.3471295060080106,
      "grad_norm": 1.3798929264973847,
      "learning_rate": 2.6539372738146694e-07,
      "loss": 0.23727157711982727,
      "step": 253,
      "token_acc": 0.9166837334632874
    },
    {
      "epoch": 1.3524699599465955,
      "grad_norm": 1.435173104583045,
      "learning_rate": 2.6151729529934717e-07,
      "loss": 0.27902302145957947,
      "step": 254,
      "token_acc": 0.9027296900749207
    },
    {
      "epoch": 1.3578104138851803,
      "grad_norm": 1.402087384499151,
      "learning_rate": 2.576593311123273e-07,
      "loss": 0.23791877925395966,
      "step": 255,
      "token_acc": 0.9171873927116394
    },
    {
      "epoch": 1.363150867823765,
      "grad_norm": 1.4093689237147342,
      "learning_rate": 2.5382013357782887e-07,
      "loss": 0.25300198793411255,
      "step": 256,
      "token_acc": 0.9124014377593994
    },
    {
      "epoch": 1.3684913217623498,
      "grad_norm": 1.4571113873033046,
      "learning_rate": 2.500000000000001e-07,
      "loss": 0.2664501965045929,
      "step": 257,
      "token_acc": 0.9091030955314636
    },
    {
      "epoch": 1.3738317757009346,
      "grad_norm": 1.3968349109932723,
      "learning_rate": 2.4619922620669215e-07,
      "loss": 0.2595187723636627,
      "step": 258,
      "token_acc": 0.9105026721954346
    },
    {
      "epoch": 1.3791722296395195,
      "grad_norm": 1.4044152527446263,
      "learning_rate": 2.424181065265519e-07,
      "loss": 0.25990092754364014,
      "step": 259,
      "token_acc": 0.9086959362030029
    },
    {
      "epoch": 1.384512683578104,
      "grad_norm": 1.3909935473869073,
      "learning_rate": 2.3865693376622837e-07,
      "loss": 0.24984635412693024,
      "step": 260,
      "token_acc": 0.9132649302482605
    },
    {
      "epoch": 1.389853137516689,
      "grad_norm": 1.4678526105359815,
      "learning_rate": 2.349159991876985e-07,
      "loss": 0.2550868093967438,
      "step": 261,
      "token_acc": 0.911477267742157
    },
    {
      "epoch": 1.3951935914552738,
      "grad_norm": 1.5206242222795416,
      "learning_rate": 2.3119559248571125e-07,
      "loss": 0.2857544720172882,
      "step": 262,
      "token_acc": 0.9032999873161316
    },
    {
      "epoch": 1.4005340453938584,
      "grad_norm": 1.5523953005488513,
      "learning_rate": 2.2749600176535533e-07,
      "loss": 0.2837677597999573,
      "step": 263,
      "token_acc": 0.9017560482025146
    },
    {
      "epoch": 1.4058744993324432,
      "grad_norm": 1.4341607986577565,
      "learning_rate": 2.238175135197471e-07,
      "loss": 0.2448330521583557,
      "step": 264,
      "token_acc": 0.9117502570152283
    },
    {
      "epoch": 1.411214953271028,
      "grad_norm": 1.3657319954662046,
      "learning_rate": 2.20160412607846e-07,
      "loss": 0.22466401755809784,
      "step": 265,
      "token_acc": 0.9228646159172058
    },
    {
      "epoch": 1.416555407209613,
      "grad_norm": 1.4134869097230374,
      "learning_rate": 2.1652498223239424e-07,
      "loss": 0.24781662225723267,
      "step": 266,
      "token_acc": 0.917634904384613
    },
    {
      "epoch": 1.4218958611481975,
      "grad_norm": 1.3722889014401347,
      "learning_rate": 2.12911503917987e-07,
      "loss": 0.25652462244033813,
      "step": 267,
      "token_acc": 0.9106440544128418
    },
    {
      "epoch": 1.4272363150867824,
      "grad_norm": 1.4396810617156974,
      "learning_rate": 2.0932025748927014e-07,
      "loss": 0.26380470395088196,
      "step": 268,
      "token_acc": 0.9076635241508484
    },
    {
      "epoch": 1.4325767690253672,
      "grad_norm": 1.463697565339671,
      "learning_rate": 2.0575152104927184e-07,
      "loss": 0.2753934860229492,
      "step": 269,
      "token_acc": 0.904605507850647
    },
    {
      "epoch": 1.4379172229639519,
      "grad_norm": 1.383758809568443,
      "learning_rate": 2.022055709578663e-07,
      "loss": 0.23397940397262573,
      "step": 270,
      "token_acc": 0.9185501337051392
    },
    {
      "epoch": 1.4432576769025367,
      "grad_norm": 1.4888548606644507,
      "learning_rate": 1.9868268181037184e-07,
      "loss": 0.2751680016517639,
      "step": 271,
      "token_acc": 0.9026899337768555
    },
    {
      "epoch": 1.4485981308411215,
      "grad_norm": 1.4714995717657386,
      "learning_rate": 1.9518312641628794e-07,
      "loss": 0.29395294189453125,
      "step": 272,
      "token_acc": 0.8981555700302124
    },
    {
      "epoch": 1.4539385847797064,
      "grad_norm": 1.4874925723897325,
      "learning_rate": 1.9170717577816786e-07,
      "loss": 0.2756873369216919,
      "step": 273,
      "token_acc": 0.9020717144012451
    },
    {
      "epoch": 1.459279038718291,
      "grad_norm": 1.500702881912016,
      "learning_rate": 1.8825509907063326e-07,
      "loss": 0.26557326316833496,
      "step": 274,
      "token_acc": 0.9083135724067688
    },
    {
      "epoch": 1.4646194926568759,
      "grad_norm": 1.3604816080833995,
      "learning_rate": 1.8482716361952866e-07,
      "loss": 0.21450334787368774,
      "step": 275,
      "token_acc": 0.9234243631362915
    },
    {
      "epoch": 1.4699599465954605,
      "grad_norm": 1.4858528642325521,
      "learning_rate": 1.814236348812211e-07,
      "loss": 0.28684794902801514,
      "step": 276,
      "token_acc": 0.9020061492919922
    },
    {
      "epoch": 1.4753004005340453,
      "grad_norm": 1.461134060884768,
      "learning_rate": 1.780447764220422e-07,
      "loss": 0.25680720806121826,
      "step": 277,
      "token_acc": 0.9088454842567444
    },
    {
      "epoch": 1.4806408544726302,
      "grad_norm": 1.5159158289627939,
      "learning_rate": 1.7469084989787908e-07,
      "loss": 0.2583525776863098,
      "step": 278,
      "token_acc": 0.9110085368156433
    },
    {
      "epoch": 1.485981308411215,
      "grad_norm": 1.3920994885118625,
      "learning_rate": 1.7136211503391064e-07,
      "loss": 0.24292919039726257,
      "step": 279,
      "token_acc": 0.9164571762084961
    },
    {
      "epoch": 1.4913217623497999,
      "grad_norm": 1.4616319858485727,
      "learning_rate": 1.6805882960449591e-07,
      "loss": 0.25447607040405273,
      "step": 280,
      "token_acc": 0.9136971831321716
    },
    {
      "epoch": 1.4966622162883845,
      "grad_norm": 1.4763779390712473,
      "learning_rate": 1.647812494132112e-07,
      "loss": 0.26822683215141296,
      "step": 281,
      "token_acc": 0.9056375622749329
    },
    {
      "epoch": 1.5020026702269693,
      "grad_norm": 1.4849032070980095,
      "learning_rate": 1.6152962827304162e-07,
      "loss": 0.2860797643661499,
      "step": 282,
      "token_acc": 0.9022382497787476
    },
    {
      "epoch": 1.507343124165554,
      "grad_norm": 1.4115351347738414,
      "learning_rate": 1.5830421798672565e-07,
      "loss": 0.2623022794723511,
      "step": 283,
      "token_acc": 0.9100821018218994
    },
    {
      "epoch": 1.5126835781041388,
      "grad_norm": 1.3813342996484363,
      "learning_rate": 1.55105268327256e-07,
      "loss": 0.24317273497581482,
      "step": 284,
      "token_acc": 0.9143872261047363
    },
    {
      "epoch": 1.5180240320427236,
      "grad_norm": 1.4375797257035856,
      "learning_rate": 1.5193302701853671e-07,
      "loss": 0.265342652797699,
      "step": 285,
      "token_acc": 0.9082852602005005
    },
    {
      "epoch": 1.5233644859813085,
      "grad_norm": 1.414526730665197,
      "learning_rate": 1.4878773971620074e-07,
      "loss": 0.25253400206565857,
      "step": 286,
      "token_acc": 0.914754331111908
    },
    {
      "epoch": 1.5287049399198933,
      "grad_norm": 1.464593317753449,
      "learning_rate": 1.456696499885859e-07,
      "loss": 0.25316375494003296,
      "step": 287,
      "token_acc": 0.912530243396759
    },
    {
      "epoch": 1.534045393858478,
      "grad_norm": 1.4487442057968352,
      "learning_rate": 1.4257899929787292e-07,
      "loss": 0.2663083076477051,
      "step": 288,
      "token_acc": 0.9098187685012817
    },
    {
      "epoch": 1.5393858477970628,
      "grad_norm": 1.4399427803065452,
      "learning_rate": 1.395160269813877e-07,
      "loss": 0.26566773653030396,
      "step": 289,
      "token_acc": 0.9072151184082031
    },
    {
      "epoch": 1.5447263017356474,
      "grad_norm": 1.3931273371764323,
      "learning_rate": 1.3648097023306605e-07,
      "loss": 0.2460891157388687,
      "step": 290,
      "token_acc": 0.9154627919197083
    },
    {
      "epoch": 1.5500667556742322,
      "grad_norm": 1.3899520186448877,
      "learning_rate": 1.3347406408508694e-07,
      "loss": 0.23878102004528046,
      "step": 291,
      "token_acc": 0.9187286496162415
    },
    {
      "epoch": 1.555407209612817,
      "grad_norm": 1.459235915904728,
      "learning_rate": 1.304955413896705e-07,
      "loss": 0.26599904894828796,
      "step": 292,
      "token_acc": 0.9076880216598511
    },
    {
      "epoch": 1.560747663551402,
      "grad_norm": 1.3796536049033186,
      "learning_rate": 1.2754563280104714e-07,
      "loss": 0.247115358710289,
      "step": 293,
      "token_acc": 0.9128190875053406
    },
    {
      "epoch": 1.5660881174899868,
      "grad_norm": 1.4850247151523317,
      "learning_rate": 1.246245667575954e-07,
      "loss": 0.27901333570480347,
      "step": 294,
      "token_acc": 0.9044761657714844
    },
    {
      "epoch": 1.5714285714285714,
      "grad_norm": 1.520453732672435,
      "learning_rate": 1.217325694641521e-07,
      "loss": 0.2759890556335449,
      "step": 295,
      "token_acc": 0.9028441309928894
    },
    {
      "epoch": 1.5767690253671562,
      "grad_norm": 1.439575846463379,
      "learning_rate": 1.1886986487449474e-07,
      "loss": 0.25484615564346313,
      "step": 296,
      "token_acc": 0.9107190370559692
    },
    {
      "epoch": 1.5821094793057409,
      "grad_norm": 1.5015420964241837,
      "learning_rate": 1.1603667467399958e-07,
      "loss": 0.2664136588573456,
      "step": 297,
      "token_acc": 0.907971978187561
    },
    {
      "epoch": 1.5874499332443257,
      "grad_norm": 1.5466384256456123,
      "learning_rate": 1.1323321826247345e-07,
      "loss": 0.26804324984550476,
      "step": 298,
      "token_acc": 0.9053434133529663
    },
    {
      "epoch": 1.5927903871829105,
      "grad_norm": 1.4997487114215713,
      "learning_rate": 1.1045971273716475e-07,
      "loss": 0.3074328303337097,
      "step": 299,
      "token_acc": 0.8962905406951904
    },
    {
      "epoch": 1.5981308411214954,
      "grad_norm": 1.4672957276128578,
      "learning_rate": 1.0771637287595093e-07,
      "loss": 0.26438483595848083,
      "step": 300,
      "token_acc": 0.9071192741394043
    },
    {
      "epoch": 1.6034712950600802,
      "grad_norm": 1.4219807693957391,
      "learning_rate": 1.0500341112070605e-07,
      "loss": 0.25495845079421997,
      "step": 301,
      "token_acc": 0.910598874092102
    },
    {
      "epoch": 1.6088117489986649,
      "grad_norm": 1.3842935411591768,
      "learning_rate": 1.0232103756085031e-07,
      "loss": 0.2260182797908783,
      "step": 302,
      "token_acc": 0.9204638004302979
    },
    {
      "epoch": 1.6141522029372497,
      "grad_norm": 1.338168304733856,
      "learning_rate": 9.966945991708003e-08,
      "loss": 0.22434785962104797,
      "step": 303,
      "token_acc": 0.9186108708381653
    },
    {
      "epoch": 1.6194926568758343,
      "grad_norm": 1.4183012454769175,
      "learning_rate": 9.704888352528257e-08,
      "loss": 0.25781041383743286,
      "step": 304,
      "token_acc": 0.9086217284202576
    },
    {
      "epoch": 1.6248331108144192,
      "grad_norm": 1.389770866870741,
      "learning_rate": 9.4459511320635e-08,
      "loss": 0.2480362057685852,
      "step": 305,
      "token_acc": 0.9142640829086304
    },
    {
      "epoch": 1.630173564753004,
      "grad_norm": 1.406383862980942,
      "learning_rate": 9.190154382188919e-08,
      "loss": 0.2541887164115906,
      "step": 306,
      "token_acc": 0.9115785360336304
    },
    {
      "epoch": 1.6355140186915889,
      "grad_norm": 1.4319487792993628,
      "learning_rate": 8.93751791158432e-08,
      "loss": 0.26790666580200195,
      "step": 307,
      "token_acc": 0.9076664447784424
    },
    {
      "epoch": 1.6408544726301737,
      "grad_norm": 1.401546988673429,
      "learning_rate": 8.688061284200265e-08,
      "loss": 0.2590189576148987,
      "step": 308,
      "token_acc": 0.9114912748336792
    },
    {
      "epoch": 1.6461949265687583,
      "grad_norm": 1.4684165295805367,
      "learning_rate": 8.441803817742937e-08,
      "loss": 0.26799529790878296,
      "step": 309,
      "token_acc": 0.9075418710708618
    },
    {
      "epoch": 1.6515353805073432,
      "grad_norm": 1.3883776742949236,
      "learning_rate": 8.198764582178303e-08,
      "loss": 0.25157350301742554,
      "step": 310,
      "token_acc": 0.9110392928123474
    },
    {
      "epoch": 1.6568758344459278,
      "grad_norm": 1.5014584285261854,
      "learning_rate": 7.958962398255292e-08,
      "loss": 0.26508602499961853,
      "step": 311,
      "token_acc": 0.9058190584182739
    },
    {
      "epoch": 1.6622162883845126,
      "grad_norm": 1.481468715143014,
      "learning_rate": 7.722415836048319e-08,
      "loss": 0.2786027193069458,
      "step": 312,
      "token_acc": 0.9030973315238953
    },
    {
      "epoch": 1.6675567423230975,
      "grad_norm": 1.4189335511856311,
      "learning_rate": 7.4891432135193e-08,
      "loss": 0.26643815636634827,
      "step": 313,
      "token_acc": 0.9056907296180725
    },
    {
      "epoch": 1.6728971962616823,
      "grad_norm": 1.47480277718732,
      "learning_rate": 7.259162595099038e-08,
      "loss": 0.26537132263183594,
      "step": 314,
      "token_acc": 0.9071456789970398
    },
    {
      "epoch": 1.6782376502002672,
      "grad_norm": 1.356507996068163,
      "learning_rate": 7.032491790288414e-08,
      "loss": 0.24253031611442566,
      "step": 315,
      "token_acc": 0.917544960975647
    },
    {
      "epoch": 1.6835781041388518,
      "grad_norm": 1.4763380360550427,
      "learning_rate": 6.809148352279182e-08,
      "loss": 0.2710064649581909,
      "step": 316,
      "token_acc": 0.907119870185852
    },
    {
      "epoch": 1.6889185580774366,
      "grad_norm": 1.4917398315673782,
      "learning_rate": 6.589149576594683e-08,
      "loss": 0.2452443689107895,
      "step": 317,
      "token_acc": 0.9166606664657593
    },
    {
      "epoch": 1.6942590120160212,
      "grad_norm": 1.5033403018878553,
      "learning_rate": 6.372512499750471e-08,
      "loss": 0.2632375657558441,
      "step": 318,
      "token_acc": 0.9092115163803101
    },
    {
      "epoch": 1.699599465954606,
      "grad_norm": 1.4432081810780555,
      "learning_rate": 6.159253897935068e-08,
      "loss": 0.27750128507614136,
      "step": 319,
      "token_acc": 0.9026854634284973
    },
    {
      "epoch": 1.704939919893191,
      "grad_norm": 1.4196352309149338,
      "learning_rate": 5.949390285710776e-08,
      "loss": 0.25379252433776855,
      "step": 320,
      "token_acc": 0.9134438037872314
    },
    {
      "epoch": 1.7102803738317758,
      "grad_norm": 1.4843608214550161,
      "learning_rate": 5.7429379147348524e-08,
      "loss": 0.27610349655151367,
      "step": 321,
      "token_acc": 0.9039999842643738
    },
    {
      "epoch": 1.7156208277703606,
      "grad_norm": 1.4257894063539942,
      "learning_rate": 5.539912772500943e-08,
      "loss": 0.25680220127105713,
      "step": 322,
      "token_acc": 0.9101070165634155
    },
    {
      "epoch": 1.7209612817089452,
      "grad_norm": 1.4362523419923912,
      "learning_rate": 5.340330581101088e-08,
      "loss": 0.24241293966770172,
      "step": 323,
      "token_acc": 0.9187228083610535
    },
    {
      "epoch": 1.72630173564753,
      "grad_norm": 1.462338984812856,
      "learning_rate": 5.144206796008171e-08,
      "loss": 0.2747628688812256,
      "step": 324,
      "token_acc": 0.9040683507919312
    },
    {
      "epoch": 1.7316421895861147,
      "grad_norm": 1.370844349885608,
      "learning_rate": 4.951556604879048e-08,
      "loss": 0.2372513860464096,
      "step": 325,
      "token_acc": 0.9169549942016602
    },
    {
      "epoch": 1.7369826435246996,
      "grad_norm": 1.4254978954280453,
      "learning_rate": 4.762394926378477e-08,
      "loss": 0.23891480267047882,
      "step": 326,
      "token_acc": 0.9169511795043945
    },
    {
      "epoch": 1.7423230974632844,
      "grad_norm": 1.3309258402811488,
      "learning_rate": 4.5767364090238125e-08,
      "loss": 0.22812946140766144,
      "step": 327,
      "token_acc": 0.9210246205329895
    },
    {
      "epoch": 1.7476635514018692,
      "grad_norm": 1.4525636268924094,
      "learning_rate": 4.394595430050613e-08,
      "loss": 0.2848016023635864,
      "step": 328,
      "token_acc": 0.9006134867668152
    },
    {
      "epoch": 1.753004005340454,
      "grad_norm": 1.4359976775542875,
      "learning_rate": 4.2159860942992896e-08,
      "loss": 0.25562846660614014,
      "step": 329,
      "token_acc": 0.9098303914070129
    },
    {
      "epoch": 1.7583444592790387,
      "grad_norm": 1.3655617783145657,
      "learning_rate": 4.040922233122867e-08,
      "loss": 0.24017946422100067,
      "step": 330,
      "token_acc": 0.9158671498298645
    },
    {
      "epoch": 1.7636849132176236,
      "grad_norm": 1.4042925240863642,
      "learning_rate": 3.869417403315855e-08,
      "loss": 0.2531910240650177,
      "step": 331,
      "token_acc": 0.9119323492050171
    },
    {
      "epoch": 1.7690253671562082,
      "grad_norm": 1.4140198338870087,
      "learning_rate": 3.7014848860644656e-08,
      "loss": 0.2664337456226349,
      "step": 332,
      "token_acc": 0.9074438214302063
    },
    {
      "epoch": 1.774365821094793,
      "grad_norm": 1.4344530764398091,
      "learning_rate": 3.537137685918074e-08,
      "loss": 0.23290452361106873,
      "step": 333,
      "token_acc": 0.9213561415672302
    },
    {
      "epoch": 1.7797062750333779,
      "grad_norm": 1.4779216750030324,
      "learning_rate": 3.376388529782215e-08,
      "loss": 0.2818738520145416,
      "step": 334,
      "token_acc": 0.9026679396629333
    },
    {
      "epoch": 1.7850467289719627,
      "grad_norm": 1.365057462635337,
      "learning_rate": 3.2192498659329825e-08,
      "loss": 0.23468706011772156,
      "step": 335,
      "token_acc": 0.9187262058258057
    },
    {
      "epoch": 1.7903871829105475,
      "grad_norm": 1.3882531080856395,
      "learning_rate": 3.0657338630530714e-08,
      "loss": 0.2264155149459839,
      "step": 336,
      "token_acc": 0.9217442274093628
    },
    {
      "epoch": 1.7957276368491322,
      "grad_norm": 1.4836028672452228,
      "learning_rate": 2.915852409289421e-08,
      "loss": 0.301832377910614,
      "step": 337,
      "token_acc": 0.8975476026535034
    },
    {
      "epoch": 1.8010680907877168,
      "grad_norm": 1.4567174679583408,
      "learning_rate": 2.7696171113326394e-08,
      "loss": 0.26500403881073,
      "step": 338,
      "token_acc": 0.9114041328430176
    },
    {
      "epoch": 1.8064085447263016,
      "grad_norm": 1.3758769332034844,
      "learning_rate": 2.627039293518141e-08,
      "loss": 0.24441155791282654,
      "step": 339,
      "token_acc": 0.916365921497345
    },
    {
      "epoch": 1.8117489986648865,
      "grad_norm": 1.4531906171284417,
      "learning_rate": 2.488129996949251e-08,
      "loss": 0.28229498863220215,
      "step": 340,
      "token_acc": 0.9021169543266296
    },
    {
      "epoch": 1.8170894526034713,
      "grad_norm": 1.381877569909683,
      "learning_rate": 2.3528999786421756e-08,
      "loss": 0.24434484541416168,
      "step": 341,
      "token_acc": 0.9159563183784485
    },
    {
      "epoch": 1.8224299065420562,
      "grad_norm": 1.5411229546858307,
      "learning_rate": 2.2213597106929605e-08,
      "loss": 0.27012330293655396,
      "step": 342,
      "token_acc": 0.9059337973594666
    },
    {
      "epoch": 1.827770360480641,
      "grad_norm": 1.4831531485158305,
      "learning_rate": 2.0935193794666016e-08,
      "loss": 0.27244606614112854,
      "step": 343,
      "token_acc": 0.90367192029953
    },
    {
      "epoch": 1.8331108144192256,
      "grad_norm": 1.5545043069986804,
      "learning_rate": 1.9693888848081374e-08,
      "loss": 0.3090653717517853,
      "step": 344,
      "token_acc": 0.8940036296844482
    },
    {
      "epoch": 1.8384512683578103,
      "grad_norm": 1.4071056028454143,
      "learning_rate": 1.848977839276106e-08,
      "loss": 0.24859920144081116,
      "step": 345,
      "token_acc": 0.914310872554779
    },
    {
      "epoch": 1.843791722296395,
      "grad_norm": 1.4290783741940223,
      "learning_rate": 1.7322955673980676e-08,
      "loss": 0.26679256558418274,
      "step": 346,
      "token_acc": 0.9072739481925964
    },
    {
      "epoch": 1.84913217623498,
      "grad_norm": 1.3767009402112278,
      "learning_rate": 1.6193511049486076e-08,
      "loss": 0.24236524105072021,
      "step": 347,
      "token_acc": 0.9140719175338745
    },
    {
      "epoch": 1.8544726301735648,
      "grad_norm": 1.580188509090287,
      "learning_rate": 1.5101531982495308e-08,
      "loss": 0.27757054567337036,
      "step": 348,
      "token_acc": 0.9031488299369812
    },
    {
      "epoch": 1.8598130841121496,
      "grad_norm": 1.43000083713732,
      "learning_rate": 1.4047103034926177e-08,
      "loss": 0.26554566621780396,
      "step": 349,
      "token_acc": 0.9084463119506836
    },
    {
      "epoch": 1.8651535380507345,
      "grad_norm": 1.3630578163347118,
      "learning_rate": 1.3030305860847634e-08,
      "loss": 0.24486608803272247,
      "step": 350,
      "token_acc": 0.9160959720611572
    },
    {
      "epoch": 1.870493991989319,
      "grad_norm": 1.4148054827921452,
      "learning_rate": 1.2051219200156394e-08,
      "loss": 0.2587983310222626,
      "step": 351,
      "token_acc": 0.9098652601242065
    },
    {
      "epoch": 1.8758344459279037,
      "grad_norm": 1.5523977007722423,
      "learning_rate": 1.110991887247964e-08,
      "loss": 0.282438188791275,
      "step": 352,
      "token_acc": 0.9023434519767761
    },
    {
      "epoch": 1.8811748998664886,
      "grad_norm": 1.3801390415938055,
      "learning_rate": 1.0206477771303234e-08,
      "loss": 0.2522791922092438,
      "step": 353,
      "token_acc": 0.9178361892700195
    },
    {
      "epoch": 1.8865153538050734,
      "grad_norm": 1.3966295330897605,
      "learning_rate": 9.34096585832761e-09,
      "loss": 0.24833403527736664,
      "step": 354,
      "token_acc": 0.9156897068023682
    },
    {
      "epoch": 1.8918558077436582,
      "grad_norm": 1.481195424551228,
      "learning_rate": 8.513450158049106e-09,
      "loss": 0.25216856598854065,
      "step": 355,
      "token_acc": 0.9140508770942688
    },
    {
      "epoch": 1.897196261682243,
      "grad_norm": 1.4551430455448862,
      "learning_rate": 7.723994752570461e-09,
      "loss": 0.2929322421550751,
      "step": 356,
      "token_acc": 0.9002907276153564
    },
    {
      "epoch": 1.902536715620828,
      "grad_norm": 1.4946637939695147,
      "learning_rate": 6.9726607766376625e-09,
      "loss": 0.2807663679122925,
      "step": 357,
      "token_acc": 0.9045160412788391
    },
    {
      "epoch": 1.9078771695594126,
      "grad_norm": 1.3924130148617424,
      "learning_rate": 6.259506412906402e-09,
      "loss": 0.2570255398750305,
      "step": 358,
      "token_acc": 0.912279486656189
    },
    {
      "epoch": 1.9132176234979972,
      "grad_norm": 1.4919850065972815,
      "learning_rate": 5.5845868874357385e-09,
      "loss": 0.2989378869533539,
      "step": 359,
      "token_acc": 0.8965189456939697
    },
    {
      "epoch": 1.918558077436582,
      "grad_norm": 1.4349317320728794,
      "learning_rate": 4.9479544654121295e-09,
      "loss": 0.270618736743927,
      "step": 360,
      "token_acc": 0.90790855884552
    },
    {
      "epoch": 1.9238985313751669,
      "grad_norm": 1.4467606012656322,
      "learning_rate": 4.349658447101612e-09,
      "loss": 0.27248671650886536,
      "step": 361,
      "token_acc": 0.9059975147247314
    },
    {
      "epoch": 1.9292389853137517,
      "grad_norm": 1.3826848485657364,
      "learning_rate": 3.7897451640321324e-09,
      "loss": 0.24656200408935547,
      "step": 362,
      "token_acc": 0.9151585102081299
    },
    {
      "epoch": 1.9345794392523366,
      "grad_norm": 1.478220080119649,
      "learning_rate": 3.268257975405697e-09,
      "loss": 0.2581631541252136,
      "step": 363,
      "token_acc": 0.9104219675064087
    },
    {
      "epoch": 1.9399198931909212,
      "grad_norm": 1.499147178861332,
      "learning_rate": 2.7852372647407805e-09,
      "loss": 0.2823525667190552,
      "step": 364,
      "token_acc": 0.9019198417663574
    },
    {
      "epoch": 1.945260347129506,
      "grad_norm": 1.3588527844662899,
      "learning_rate": 2.3407204367448806e-09,
      "loss": 0.22563135623931885,
      "step": 365,
      "token_acc": 0.9197922348976135
    },
    {
      "epoch": 1.9506008010680906,
      "grad_norm": 1.5051895476035058,
      "learning_rate": 1.9347419144180034e-09,
      "loss": 0.26220065355300903,
      "step": 366,
      "token_acc": 0.9092711806297302
    },
    {
      "epoch": 1.9559412550066755,
      "grad_norm": 1.4274692153792508,
      "learning_rate": 1.5673331363870169e-09,
      "loss": 0.25699615478515625,
      "step": 367,
      "token_acc": 0.9097827672958374
    },
    {
      "epoch": 1.9612817089452603,
      "grad_norm": 1.3804954213494365,
      "learning_rate": 1.2385225544709887e-09,
      "loss": 0.254372239112854,
      "step": 368,
      "token_acc": 0.9101460576057434
    },
    {
      "epoch": 1.9666221628838452,
      "grad_norm": 1.4953065449874243,
      "learning_rate": 9.483356314779478e-10,
      "loss": 0.29659712314605713,
      "step": 369,
      "token_acc": 0.9007611870765686
    },
    {
      "epoch": 1.97196261682243,
      "grad_norm": 1.4423339718569121,
      "learning_rate": 6.967948392331835e-10,
      "loss": 0.27328187227249146,
      "step": 370,
      "token_acc": 0.9044981598854065
    },
    {
      "epoch": 1.9773030707610146,
      "grad_norm": 1.4517352449230594,
      "learning_rate": 4.839196568388049e-10,
      "loss": 0.272581547498703,
      "step": 371,
      "token_acc": 0.9086258411407471
    },
    {
      "epoch": 1.9826435246995995,
      "grad_norm": 1.3739514757587716,
      "learning_rate": 3.097265691653916e-10,
      "loss": 0.231607586145401,
      "step": 372,
      "token_acc": 0.9167190194129944
    },
    {
      "epoch": 1.987983978638184,
      "grad_norm": 1.4160776023389043,
      "learning_rate": 1.742290655755707e-10,
      "loss": 0.26814740896224976,
      "step": 373,
      "token_acc": 0.9057011604309082
    },
    {
      "epoch": 1.993324432576769,
      "grad_norm": 1.4320277107866837,
      "learning_rate": 7.743763887924081e-11,
      "loss": 0.26162806153297424,
      "step": 374,
      "token_acc": 0.9093886613845825
    },
    {
      "epoch": 1.9986648865153538,
      "grad_norm": 1.488518242502641,
      "learning_rate": 1.9359784521055798e-11,
      "loss": 0.26544928550720215,
      "step": 375,
      "token_acc": 0.9078947305679321
    },
    {
      "epoch": 2.0,
      "grad_norm": 3.093963088385598,
      "learning_rate": 0.0,
      "loss": 0.263308048248291,
      "step": 376,
      "token_acc": 0.914944589138031
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.29051336646080017,
      "eval_runtime": 4.255,
      "eval_samples_per_second": 4.935,
      "eval_steps_per_second": 0.705,
      "eval_token_acc": 0.8918529152870178,
      "step": 376
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.29051336646080017,
      "eval_runtime": 3.9858,
      "eval_samples_per_second": 5.269,
      "eval_steps_per_second": 0.753,
      "eval_token_acc": 0.8918529152870178,
      "step": 376
    }
  ],
  "logging_steps": 1,
  "max_steps": 376,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 270447029338112.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}
