diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,154708 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 22095, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.525910839556461e-05, + "grad_norm": 8.66013199834246, + "learning_rate": 0.0, + "loss": 0.8106, + "step": 1 + }, + { + "epoch": 9.051821679112921e-05, + "grad_norm": 8.116963999167655, + "learning_rate": 1.5082956259426848e-08, + "loss": 0.8228, + "step": 2 + }, + { + "epoch": 0.00013577732518669383, + "grad_norm": 8.805888630344679, + "learning_rate": 3.0165912518853697e-08, + "loss": 0.8379, + "step": 3 + }, + { + "epoch": 0.00018103643358225843, + "grad_norm": 10.413908076660961, + "learning_rate": 4.524886877828055e-08, + "loss": 0.8215, + "step": 4 + }, + { + "epoch": 0.00022629554197782303, + "grad_norm": 10.270466065239447, + "learning_rate": 6.033182503770739e-08, + "loss": 0.7408, + "step": 5 + }, + { + "epoch": 0.00027155465037338765, + "grad_norm": 8.667172848979332, + "learning_rate": 7.541478129713425e-08, + "loss": 0.871, + "step": 6 + }, + { + "epoch": 0.00031681375876895223, + "grad_norm": 7.875222542229044, + "learning_rate": 9.04977375565611e-08, + "loss": 0.826, + "step": 7 + }, + { + "epoch": 0.00036207286716451686, + "grad_norm": 9.852921507702614, + "learning_rate": 1.0558069381598795e-07, + "loss": 0.7683, + "step": 8 + }, + { + "epoch": 0.0004073319755600815, + "grad_norm": 2.931646823848608, + "learning_rate": 1.2066365007541479e-07, + "loss": 0.7524, + "step": 9 + }, + { + "epoch": 0.00045259108395564606, + "grad_norm": 7.699231177320019, + "learning_rate": 1.3574660633484163e-07, + "loss": 0.8115, + "step": 10 + }, + { + "epoch": 0.0004978501923512107, + "grad_norm": 8.48382482276139, + "learning_rate": 1.508295625942685e-07, + "loss": 0.8242, + "step": 11 + }, + { + "epoch": 0.0005431093007467753, + "grad_norm": 3.0335535418133057, + "learning_rate": 1.6591251885369535e-07, + "loss": 0.7765, + "step": 12 + }, + { + "epoch": 0.0005883684091423399, + "grad_norm": 7.977093909893501, + "learning_rate": 1.809954751131222e-07, + "loss": 0.8172, + "step": 13 + }, + { + "epoch": 0.0006336275175379045, + "grad_norm": 8.497572549254802, + "learning_rate": 1.9607843137254904e-07, + "loss": 0.8067, + "step": 14 + }, + { + "epoch": 0.0006788866259334691, + "grad_norm": 2.8940758258319446, + "learning_rate": 2.111613876319759e-07, + "loss": 0.7585, + "step": 15 + }, + { + "epoch": 0.0007241457343290337, + "grad_norm": 2.855482184271788, + "learning_rate": 2.2624434389140273e-07, + "loss": 0.771, + "step": 16 + }, + { + "epoch": 0.0007694048427245983, + "grad_norm": 8.986529342997084, + "learning_rate": 2.4132730015082957e-07, + "loss": 0.8223, + "step": 17 + }, + { + "epoch": 0.000814663951120163, + "grad_norm": 6.395648167097882, + "learning_rate": 2.564102564102564e-07, + "loss": 0.7447, + "step": 18 + }, + { + "epoch": 0.0008599230595157276, + "grad_norm": 4.9798889312227725, + "learning_rate": 2.7149321266968326e-07, + "loss": 0.829, + "step": 19 + }, + { + "epoch": 0.0009051821679112921, + "grad_norm": 2.6740823934447224, + "learning_rate": 2.865761689291101e-07, + "loss": 0.7568, + "step": 20 + }, + { + "epoch": 0.0009504412763068567, + "grad_norm": 4.052800002632703, + "learning_rate": 3.01659125188537e-07, + "loss": 0.8026, + "step": 21 + }, + { + "epoch": 0.0009957003847024214, + "grad_norm": 2.9369933535648247, + "learning_rate": 3.167420814479638e-07, + "loss": 0.7492, + "step": 22 + }, + { + "epoch": 0.001040959493097986, + "grad_norm": 4.247413851142735, + "learning_rate": 3.318250377073907e-07, + "loss": 0.7575, + "step": 23 + }, + { + "epoch": 0.0010862186014935506, + "grad_norm": 3.879005288265461, + "learning_rate": 3.4690799396681754e-07, + "loss": 0.759, + "step": 24 + }, + { + "epoch": 0.0011314777098891152, + "grad_norm": 3.6440819833985922, + "learning_rate": 3.619909502262444e-07, + "loss": 0.7482, + "step": 25 + }, + { + "epoch": 0.0011767368182846799, + "grad_norm": 2.757922595973135, + "learning_rate": 3.770739064856712e-07, + "loss": 0.7626, + "step": 26 + }, + { + "epoch": 0.0012219959266802445, + "grad_norm": 3.832157008102444, + "learning_rate": 3.921568627450981e-07, + "loss": 0.7556, + "step": 27 + }, + { + "epoch": 0.001267255035075809, + "grad_norm": 3.372216966665436, + "learning_rate": 4.072398190045249e-07, + "loss": 0.7646, + "step": 28 + }, + { + "epoch": 0.0013125141434713735, + "grad_norm": 3.13813749845113, + "learning_rate": 4.223227752639518e-07, + "loss": 0.7665, + "step": 29 + }, + { + "epoch": 0.0013577732518669382, + "grad_norm": 2.8809864229440163, + "learning_rate": 4.374057315233786e-07, + "loss": 0.748, + "step": 30 + }, + { + "epoch": 0.0014030323602625028, + "grad_norm": 2.8897137086592086, + "learning_rate": 4.5248868778280546e-07, + "loss": 0.7738, + "step": 31 + }, + { + "epoch": 0.0014482914686580674, + "grad_norm": 2.942818955850594, + "learning_rate": 4.675716440422323e-07, + "loss": 0.738, + "step": 32 + }, + { + "epoch": 0.001493550577053632, + "grad_norm": 2.6309860751703624, + "learning_rate": 4.826546003016591e-07, + "loss": 0.7678, + "step": 33 + }, + { + "epoch": 0.0015388096854491967, + "grad_norm": 2.8062178485999776, + "learning_rate": 4.977375565610859e-07, + "loss": 0.7834, + "step": 34 + }, + { + "epoch": 0.0015840687938447613, + "grad_norm": 2.8268426312674646, + "learning_rate": 5.128205128205128e-07, + "loss": 0.7633, + "step": 35 + }, + { + "epoch": 0.001629327902240326, + "grad_norm": 2.4556470307247205, + "learning_rate": 5.279034690799397e-07, + "loss": 0.7394, + "step": 36 + }, + { + "epoch": 0.0016745870106358906, + "grad_norm": 2.472058064042505, + "learning_rate": 5.429864253393665e-07, + "loss": 0.7233, + "step": 37 + }, + { + "epoch": 0.0017198461190314552, + "grad_norm": 2.1758732925169255, + "learning_rate": 5.580693815987934e-07, + "loss": 0.7058, + "step": 38 + }, + { + "epoch": 0.0017651052274270196, + "grad_norm": 2.5628467709923193, + "learning_rate": 5.731523378582202e-07, + "loss": 0.7554, + "step": 39 + }, + { + "epoch": 0.0018103643358225842, + "grad_norm": 2.3023149629245503, + "learning_rate": 5.882352941176471e-07, + "loss": 0.7271, + "step": 40 + }, + { + "epoch": 0.0018556234442181488, + "grad_norm": 2.0801763787360965, + "learning_rate": 6.03318250377074e-07, + "loss": 0.6804, + "step": 41 + }, + { + "epoch": 0.0019008825526137135, + "grad_norm": 2.2400032166503094, + "learning_rate": 6.184012066365008e-07, + "loss": 0.7269, + "step": 42 + }, + { + "epoch": 0.001946141661009278, + "grad_norm": 2.074448591273433, + "learning_rate": 6.334841628959276e-07, + "loss": 0.724, + "step": 43 + }, + { + "epoch": 0.0019914007694048427, + "grad_norm": 2.052458166169399, + "learning_rate": 6.485671191553546e-07, + "loss": 0.7597, + "step": 44 + }, + { + "epoch": 0.002036659877800407, + "grad_norm": 2.3000279668851826, + "learning_rate": 6.636500754147814e-07, + "loss": 0.775, + "step": 45 + }, + { + "epoch": 0.002081918986195972, + "grad_norm": 2.040178744317726, + "learning_rate": 6.787330316742082e-07, + "loss": 0.6826, + "step": 46 + }, + { + "epoch": 0.0021271780945915364, + "grad_norm": 2.169650593616676, + "learning_rate": 6.938159879336351e-07, + "loss": 0.7196, + "step": 47 + }, + { + "epoch": 0.0021724372029871012, + "grad_norm": 2.243237967368775, + "learning_rate": 7.088989441930619e-07, + "loss": 0.6534, + "step": 48 + }, + { + "epoch": 0.0022176963113826656, + "grad_norm": 1.805594330039211, + "learning_rate": 7.239819004524888e-07, + "loss": 0.7239, + "step": 49 + }, + { + "epoch": 0.0022629554197782305, + "grad_norm": 1.95060511614219, + "learning_rate": 7.390648567119156e-07, + "loss": 0.7892, + "step": 50 + }, + { + "epoch": 0.002308214528173795, + "grad_norm": 2.0050821311562967, + "learning_rate": 7.541478129713424e-07, + "loss": 0.7541, + "step": 51 + }, + { + "epoch": 0.0023534736365693597, + "grad_norm": 1.591345649647068, + "learning_rate": 7.692307692307694e-07, + "loss": 0.653, + "step": 52 + }, + { + "epoch": 0.002398732744964924, + "grad_norm": 1.5359586585503124, + "learning_rate": 7.843137254901962e-07, + "loss": 0.7579, + "step": 53 + }, + { + "epoch": 0.002443991853360489, + "grad_norm": 1.4903440630350036, + "learning_rate": 7.993966817496229e-07, + "loss": 0.6981, + "step": 54 + }, + { + "epoch": 0.0024892509617560534, + "grad_norm": 1.576261684717775, + "learning_rate": 8.144796380090498e-07, + "loss": 0.7096, + "step": 55 + }, + { + "epoch": 0.002534510070151618, + "grad_norm": 1.4623041693607248, + "learning_rate": 8.295625942684766e-07, + "loss": 0.7056, + "step": 56 + }, + { + "epoch": 0.0025797691785471827, + "grad_norm": 1.3299793426063247, + "learning_rate": 8.446455505279036e-07, + "loss": 0.7375, + "step": 57 + }, + { + "epoch": 0.002625028286942747, + "grad_norm": 1.3229170043287524, + "learning_rate": 8.597285067873304e-07, + "loss": 0.6838, + "step": 58 + }, + { + "epoch": 0.002670287395338312, + "grad_norm": 1.3144587271599693, + "learning_rate": 8.748114630467572e-07, + "loss": 0.7115, + "step": 59 + }, + { + "epoch": 0.0027155465037338763, + "grad_norm": 1.6142307994153728, + "learning_rate": 8.898944193061841e-07, + "loss": 0.7371, + "step": 60 + }, + { + "epoch": 0.002760805612129441, + "grad_norm": 1.244580858498938, + "learning_rate": 9.049773755656109e-07, + "loss": 0.6543, + "step": 61 + }, + { + "epoch": 0.0028060647205250056, + "grad_norm": 1.2418783287357869, + "learning_rate": 9.200603318250378e-07, + "loss": 0.6459, + "step": 62 + }, + { + "epoch": 0.0028513238289205704, + "grad_norm": 1.3734658575051808, + "learning_rate": 9.351432880844646e-07, + "loss": 0.7234, + "step": 63 + }, + { + "epoch": 0.002896582937316135, + "grad_norm": 1.5154654997771089, + "learning_rate": 9.502262443438914e-07, + "loss": 0.7172, + "step": 64 + }, + { + "epoch": 0.0029418420457116997, + "grad_norm": 1.5993349941418993, + "learning_rate": 9.653092006033183e-07, + "loss": 0.7091, + "step": 65 + }, + { + "epoch": 0.002987101154107264, + "grad_norm": 1.2952398954480047, + "learning_rate": 9.80392156862745e-07, + "loss": 0.6245, + "step": 66 + }, + { + "epoch": 0.0030323602625028285, + "grad_norm": 1.3771464561355673, + "learning_rate": 9.954751131221719e-07, + "loss": 0.6454, + "step": 67 + }, + { + "epoch": 0.0030776193708983933, + "grad_norm": 1.412649273270314, + "learning_rate": 1.0105580693815989e-06, + "loss": 0.6814, + "step": 68 + }, + { + "epoch": 0.0031228784792939578, + "grad_norm": 1.3193487969255955, + "learning_rate": 1.0256410256410257e-06, + "loss": 0.682, + "step": 69 + }, + { + "epoch": 0.0031681375876895226, + "grad_norm": 1.273629024869017, + "learning_rate": 1.0407239819004527e-06, + "loss": 0.7166, + "step": 70 + }, + { + "epoch": 0.003213396696085087, + "grad_norm": 1.1381038066518965, + "learning_rate": 1.0558069381598795e-06, + "loss": 0.7045, + "step": 71 + }, + { + "epoch": 0.003258655804480652, + "grad_norm": 1.2047824209988878, + "learning_rate": 1.0708898944193063e-06, + "loss": 0.6771, + "step": 72 + }, + { + "epoch": 0.0033039149128762163, + "grad_norm": 1.2391818859284285, + "learning_rate": 1.085972850678733e-06, + "loss": 0.6741, + "step": 73 + }, + { + "epoch": 0.003349174021271781, + "grad_norm": 1.2521185180506431, + "learning_rate": 1.1010558069381598e-06, + "loss": 0.6855, + "step": 74 + }, + { + "epoch": 0.0033944331296673455, + "grad_norm": 1.3882698005588492, + "learning_rate": 1.1161387631975868e-06, + "loss": 0.7001, + "step": 75 + }, + { + "epoch": 0.0034396922380629104, + "grad_norm": 1.179314695521784, + "learning_rate": 1.1312217194570136e-06, + "loss": 0.6747, + "step": 76 + }, + { + "epoch": 0.0034849513464584748, + "grad_norm": 1.5552928150597494, + "learning_rate": 1.1463046757164404e-06, + "loss": 0.6286, + "step": 77 + }, + { + "epoch": 0.003530210454854039, + "grad_norm": 1.1734748345240251, + "learning_rate": 1.1613876319758674e-06, + "loss": 0.6742, + "step": 78 + }, + { + "epoch": 0.003575469563249604, + "grad_norm": 1.131287052435935, + "learning_rate": 1.1764705882352942e-06, + "loss": 0.6426, + "step": 79 + }, + { + "epoch": 0.0036207286716451684, + "grad_norm": 1.3295414806754717, + "learning_rate": 1.1915535444947212e-06, + "loss": 0.6618, + "step": 80 + }, + { + "epoch": 0.0036659877800407333, + "grad_norm": 1.2939253815058376, + "learning_rate": 1.206636500754148e-06, + "loss": 0.6729, + "step": 81 + }, + { + "epoch": 0.0037112468884362977, + "grad_norm": 1.059572927368594, + "learning_rate": 1.2217194570135748e-06, + "loss": 0.5791, + "step": 82 + }, + { + "epoch": 0.0037565059968318625, + "grad_norm": 1.1326662378816292, + "learning_rate": 1.2368024132730016e-06, + "loss": 0.6478, + "step": 83 + }, + { + "epoch": 0.003801765105227427, + "grad_norm": 1.4420484198243764, + "learning_rate": 1.2518853695324284e-06, + "loss": 0.7105, + "step": 84 + }, + { + "epoch": 0.003847024213622992, + "grad_norm": 1.054691744896305, + "learning_rate": 1.2669683257918552e-06, + "loss": 0.6299, + "step": 85 + }, + { + "epoch": 0.003892283322018556, + "grad_norm": 1.1882611476012201, + "learning_rate": 1.282051282051282e-06, + "loss": 0.6486, + "step": 86 + }, + { + "epoch": 0.003937542430414121, + "grad_norm": 1.059033257046168, + "learning_rate": 1.2971342383107092e-06, + "loss": 0.6162, + "step": 87 + }, + { + "epoch": 0.0039828015388096855, + "grad_norm": 1.4556419443337114, + "learning_rate": 1.312217194570136e-06, + "loss": 0.7638, + "step": 88 + }, + { + "epoch": 0.00402806064720525, + "grad_norm": 1.1239534063715333, + "learning_rate": 1.3273001508295628e-06, + "loss": 0.6258, + "step": 89 + }, + { + "epoch": 0.004073319755600814, + "grad_norm": 1.106624334983117, + "learning_rate": 1.3423831070889896e-06, + "loss": 0.6705, + "step": 90 + }, + { + "epoch": 0.004118578863996379, + "grad_norm": 1.0594735066509668, + "learning_rate": 1.3574660633484164e-06, + "loss": 0.6365, + "step": 91 + }, + { + "epoch": 0.004163837972391944, + "grad_norm": 1.1407310916855053, + "learning_rate": 1.3725490196078434e-06, + "loss": 0.6674, + "step": 92 + }, + { + "epoch": 0.004209097080787509, + "grad_norm": 1.0868736007753048, + "learning_rate": 1.3876319758672702e-06, + "loss": 0.6586, + "step": 93 + }, + { + "epoch": 0.004254356189183073, + "grad_norm": 1.0787095460749092, + "learning_rate": 1.402714932126697e-06, + "loss": 0.6344, + "step": 94 + }, + { + "epoch": 0.004299615297578638, + "grad_norm": 1.1365671650774605, + "learning_rate": 1.4177978883861237e-06, + "loss": 0.6751, + "step": 95 + }, + { + "epoch": 0.0043448744059742025, + "grad_norm": 1.0487165231214994, + "learning_rate": 1.4328808446455505e-06, + "loss": 0.657, + "step": 96 + }, + { + "epoch": 0.004390133514369767, + "grad_norm": 0.9787033497294746, + "learning_rate": 1.4479638009049775e-06, + "loss": 0.6337, + "step": 97 + }, + { + "epoch": 0.004435392622765331, + "grad_norm": 1.0254485226838135, + "learning_rate": 1.4630467571644043e-06, + "loss": 0.6081, + "step": 98 + }, + { + "epoch": 0.004480651731160896, + "grad_norm": 1.3872002279212687, + "learning_rate": 1.4781297134238311e-06, + "loss": 0.5998, + "step": 99 + }, + { + "epoch": 0.004525910839556461, + "grad_norm": 1.2984889703154123, + "learning_rate": 1.493212669683258e-06, + "loss": 0.6809, + "step": 100 + }, + { + "epoch": 0.004571169947952025, + "grad_norm": 1.0452806515562352, + "learning_rate": 1.5082956259426847e-06, + "loss": 0.6208, + "step": 101 + }, + { + "epoch": 0.00461642905634759, + "grad_norm": 1.0097601200328257, + "learning_rate": 1.5233785822021115e-06, + "loss": 0.6415, + "step": 102 + }, + { + "epoch": 0.004661688164743155, + "grad_norm": 1.269723703523461, + "learning_rate": 1.5384615384615387e-06, + "loss": 0.7088, + "step": 103 + }, + { + "epoch": 0.0047069472731387195, + "grad_norm": 1.2470497789460646, + "learning_rate": 1.5535444947209655e-06, + "loss": 0.7053, + "step": 104 + }, + { + "epoch": 0.0047522063815342835, + "grad_norm": 0.9368939620497067, + "learning_rate": 1.5686274509803923e-06, + "loss": 0.6018, + "step": 105 + }, + { + "epoch": 0.004797465489929848, + "grad_norm": 1.1604437387684339, + "learning_rate": 1.583710407239819e-06, + "loss": 0.6662, + "step": 106 + }, + { + "epoch": 0.004842724598325413, + "grad_norm": 1.0358552080502414, + "learning_rate": 1.5987933634992459e-06, + "loss": 0.6463, + "step": 107 + }, + { + "epoch": 0.004887983706720978, + "grad_norm": 1.0368493258936142, + "learning_rate": 1.6138763197586729e-06, + "loss": 0.6304, + "step": 108 + }, + { + "epoch": 0.004933242815116542, + "grad_norm": 1.1302662456729546, + "learning_rate": 1.6289592760180997e-06, + "loss": 0.668, + "step": 109 + }, + { + "epoch": 0.004978501923512107, + "grad_norm": 1.0351636351517919, + "learning_rate": 1.6440422322775265e-06, + "loss": 0.5568, + "step": 110 + }, + { + "epoch": 0.005023761031907672, + "grad_norm": 1.0501568943457311, + "learning_rate": 1.6591251885369533e-06, + "loss": 0.6857, + "step": 111 + }, + { + "epoch": 0.005069020140303236, + "grad_norm": 1.0207344968356886, + "learning_rate": 1.67420814479638e-06, + "loss": 0.6603, + "step": 112 + }, + { + "epoch": 0.0051142792486988005, + "grad_norm": 1.0310491415955947, + "learning_rate": 1.6892911010558073e-06, + "loss": 0.6509, + "step": 113 + }, + { + "epoch": 0.005159538357094365, + "grad_norm": 0.9639024601411313, + "learning_rate": 1.704374057315234e-06, + "loss": 0.7051, + "step": 114 + }, + { + "epoch": 0.00520479746548993, + "grad_norm": 0.9586419580660428, + "learning_rate": 1.7194570135746609e-06, + "loss": 0.6142, + "step": 115 + }, + { + "epoch": 0.005250056573885494, + "grad_norm": 1.04842344522062, + "learning_rate": 1.7345399698340876e-06, + "loss": 0.6466, + "step": 116 + }, + { + "epoch": 0.005295315682281059, + "grad_norm": 0.9173299249262994, + "learning_rate": 1.7496229260935144e-06, + "loss": 0.5516, + "step": 117 + }, + { + "epoch": 0.005340574790676624, + "grad_norm": 1.0831856068854298, + "learning_rate": 1.7647058823529414e-06, + "loss": 0.5931, + "step": 118 + }, + { + "epoch": 0.005385833899072189, + "grad_norm": 0.8946723582329622, + "learning_rate": 1.7797888386123682e-06, + "loss": 0.577, + "step": 119 + }, + { + "epoch": 0.005431093007467753, + "grad_norm": 1.1010913291353444, + "learning_rate": 1.794871794871795e-06, + "loss": 0.6203, + "step": 120 + }, + { + "epoch": 0.0054763521158633175, + "grad_norm": 0.9085173100560293, + "learning_rate": 1.8099547511312218e-06, + "loss": 0.6101, + "step": 121 + }, + { + "epoch": 0.005521611224258882, + "grad_norm": 0.9320802267784369, + "learning_rate": 1.8250377073906486e-06, + "loss": 0.5623, + "step": 122 + }, + { + "epoch": 0.005566870332654446, + "grad_norm": 0.8195711642520936, + "learning_rate": 1.8401206636500756e-06, + "loss": 0.6649, + "step": 123 + }, + { + "epoch": 0.005612129441050011, + "grad_norm": 1.010253752587403, + "learning_rate": 1.8552036199095024e-06, + "loss": 0.5649, + "step": 124 + }, + { + "epoch": 0.005657388549445576, + "grad_norm": 0.991330010194571, + "learning_rate": 1.8702865761689292e-06, + "loss": 0.5982, + "step": 125 + }, + { + "epoch": 0.005702647657841141, + "grad_norm": 1.1475476851088662, + "learning_rate": 1.885369532428356e-06, + "loss": 0.6453, + "step": 126 + }, + { + "epoch": 0.005747906766236705, + "grad_norm": 0.9235417511336075, + "learning_rate": 1.9004524886877828e-06, + "loss": 0.5992, + "step": 127 + }, + { + "epoch": 0.00579316587463227, + "grad_norm": 0.9847063346731131, + "learning_rate": 1.91553544494721e-06, + "loss": 0.6304, + "step": 128 + }, + { + "epoch": 0.0058384249830278345, + "grad_norm": 1.0269890471208356, + "learning_rate": 1.9306184012066366e-06, + "loss": 0.6046, + "step": 129 + }, + { + "epoch": 0.005883684091423399, + "grad_norm": 0.9258189317090748, + "learning_rate": 1.9457013574660634e-06, + "loss": 0.6118, + "step": 130 + }, + { + "epoch": 0.005928943199818963, + "grad_norm": 0.7492225521686104, + "learning_rate": 1.96078431372549e-06, + "loss": 0.6506, + "step": 131 + }, + { + "epoch": 0.005974202308214528, + "grad_norm": 0.7421506908255864, + "learning_rate": 1.975867269984917e-06, + "loss": 0.6702, + "step": 132 + }, + { + "epoch": 0.006019461416610093, + "grad_norm": 0.9545777179324334, + "learning_rate": 1.9909502262443437e-06, + "loss": 0.5685, + "step": 133 + }, + { + "epoch": 0.006064720525005657, + "grad_norm": 0.9622591564554519, + "learning_rate": 2.006033182503771e-06, + "loss": 0.5939, + "step": 134 + }, + { + "epoch": 0.006109979633401222, + "grad_norm": 0.9109690493087242, + "learning_rate": 2.0211161387631978e-06, + "loss": 0.6169, + "step": 135 + }, + { + "epoch": 0.006155238741796787, + "grad_norm": 0.6813272311762413, + "learning_rate": 2.0361990950226245e-06, + "loss": 0.6737, + "step": 136 + }, + { + "epoch": 0.0062004978501923515, + "grad_norm": 0.6720777722130816, + "learning_rate": 2.0512820512820513e-06, + "loss": 0.6571, + "step": 137 + }, + { + "epoch": 0.0062457569585879155, + "grad_norm": 0.9667588053275596, + "learning_rate": 2.066365007541478e-06, + "loss": 0.5855, + "step": 138 + }, + { + "epoch": 0.00629101606698348, + "grad_norm": 0.9344038736230162, + "learning_rate": 2.0814479638009053e-06, + "loss": 0.6125, + "step": 139 + }, + { + "epoch": 0.006336275175379045, + "grad_norm": 0.9465344342175216, + "learning_rate": 2.096530920060332e-06, + "loss": 0.5956, + "step": 140 + }, + { + "epoch": 0.00638153428377461, + "grad_norm": 1.2591164988076586, + "learning_rate": 2.111613876319759e-06, + "loss": 0.6398, + "step": 141 + }, + { + "epoch": 0.006426793392170174, + "grad_norm": 0.9677788926426463, + "learning_rate": 2.1266968325791857e-06, + "loss": 0.6047, + "step": 142 + }, + { + "epoch": 0.006472052500565739, + "grad_norm": 0.9872448306229715, + "learning_rate": 2.1417797888386125e-06, + "loss": 0.6125, + "step": 143 + }, + { + "epoch": 0.006517311608961304, + "grad_norm": 1.0035271309418503, + "learning_rate": 2.1568627450980393e-06, + "loss": 0.5664, + "step": 144 + }, + { + "epoch": 0.006562570717356868, + "grad_norm": 0.9880506990276164, + "learning_rate": 2.171945701357466e-06, + "loss": 0.5886, + "step": 145 + }, + { + "epoch": 0.0066078298257524325, + "grad_norm": 1.0103893385719878, + "learning_rate": 2.187028657616893e-06, + "loss": 0.6753, + "step": 146 + }, + { + "epoch": 0.006653088934147997, + "grad_norm": 1.011094895629863, + "learning_rate": 2.2021116138763197e-06, + "loss": 0.6041, + "step": 147 + }, + { + "epoch": 0.006698348042543562, + "grad_norm": 0.9464836309060873, + "learning_rate": 2.2171945701357465e-06, + "loss": 0.6077, + "step": 148 + }, + { + "epoch": 0.006743607150939126, + "grad_norm": 0.9495819965291539, + "learning_rate": 2.2322775263951737e-06, + "loss": 0.5735, + "step": 149 + }, + { + "epoch": 0.006788866259334691, + "grad_norm": 1.1858654132024509, + "learning_rate": 2.2473604826546005e-06, + "loss": 0.6186, + "step": 150 + }, + { + "epoch": 0.006834125367730256, + "grad_norm": 0.9009709214733077, + "learning_rate": 2.2624434389140273e-06, + "loss": 0.5791, + "step": 151 + }, + { + "epoch": 0.006879384476125821, + "grad_norm": 1.9798213247768532, + "learning_rate": 2.277526395173454e-06, + "loss": 0.5288, + "step": 152 + }, + { + "epoch": 0.006924643584521385, + "grad_norm": 0.8768656065313846, + "learning_rate": 2.292609351432881e-06, + "loss": 0.5388, + "step": 153 + }, + { + "epoch": 0.0069699026929169496, + "grad_norm": 0.9853972537936977, + "learning_rate": 2.307692307692308e-06, + "loss": 0.621, + "step": 154 + }, + { + "epoch": 0.007015161801312514, + "grad_norm": 1.183982065728492, + "learning_rate": 2.322775263951735e-06, + "loss": 0.5722, + "step": 155 + }, + { + "epoch": 0.007060420909708078, + "grad_norm": 0.9122814416114049, + "learning_rate": 2.3378582202111617e-06, + "loss": 0.5931, + "step": 156 + }, + { + "epoch": 0.007105680018103643, + "grad_norm": 0.6050120092736706, + "learning_rate": 2.3529411764705885e-06, + "loss": 0.6359, + "step": 157 + }, + { + "epoch": 0.007150939126499208, + "grad_norm": 0.8846810488876649, + "learning_rate": 2.3680241327300152e-06, + "loss": 0.5777, + "step": 158 + }, + { + "epoch": 0.007196198234894773, + "grad_norm": 0.9791346336803588, + "learning_rate": 2.3831070889894425e-06, + "loss": 0.6196, + "step": 159 + }, + { + "epoch": 0.007241457343290337, + "grad_norm": 1.0364431416834374, + "learning_rate": 2.3981900452488693e-06, + "loss": 0.5649, + "step": 160 + }, + { + "epoch": 0.007286716451685902, + "grad_norm": 1.0907811411319832, + "learning_rate": 2.413273001508296e-06, + "loss": 0.5675, + "step": 161 + }, + { + "epoch": 0.007331975560081467, + "grad_norm": 1.0702644046402912, + "learning_rate": 2.428355957767723e-06, + "loss": 0.5868, + "step": 162 + }, + { + "epoch": 0.007377234668477031, + "grad_norm": 1.1961484830707276, + "learning_rate": 2.4434389140271496e-06, + "loss": 0.6126, + "step": 163 + }, + { + "epoch": 0.007422493776872595, + "grad_norm": 0.9997853040755823, + "learning_rate": 2.4585218702865764e-06, + "loss": 0.5593, + "step": 164 + }, + { + "epoch": 0.00746775288526816, + "grad_norm": 1.5306620336532337, + "learning_rate": 2.4736048265460032e-06, + "loss": 0.5741, + "step": 165 + }, + { + "epoch": 0.007513011993663725, + "grad_norm": 1.0365389768096527, + "learning_rate": 2.48868778280543e-06, + "loss": 0.597, + "step": 166 + }, + { + "epoch": 0.007558271102059289, + "grad_norm": 0.9300028853685578, + "learning_rate": 2.503770739064857e-06, + "loss": 0.6228, + "step": 167 + }, + { + "epoch": 0.007603530210454854, + "grad_norm": 0.9730959927324633, + "learning_rate": 2.5188536953242836e-06, + "loss": 0.5888, + "step": 168 + }, + { + "epoch": 0.007648789318850419, + "grad_norm": 0.7004727910376336, + "learning_rate": 2.5339366515837104e-06, + "loss": 0.6655, + "step": 169 + }, + { + "epoch": 0.007694048427245984, + "grad_norm": 1.1770735730846689, + "learning_rate": 2.549019607843137e-06, + "loss": 0.61, + "step": 170 + }, + { + "epoch": 0.007739307535641548, + "grad_norm": 0.9265069767412805, + "learning_rate": 2.564102564102564e-06, + "loss": 0.6124, + "step": 171 + }, + { + "epoch": 0.007784566644037112, + "grad_norm": 1.0026834295632552, + "learning_rate": 2.5791855203619916e-06, + "loss": 0.5759, + "step": 172 + }, + { + "epoch": 0.007829825752432677, + "grad_norm": 0.893051784847375, + "learning_rate": 2.5942684766214184e-06, + "loss": 0.6195, + "step": 173 + }, + { + "epoch": 0.007875084860828241, + "grad_norm": 1.0294544753456212, + "learning_rate": 2.609351432880845e-06, + "loss": 0.5924, + "step": 174 + }, + { + "epoch": 0.007920343969223807, + "grad_norm": 0.983431175208823, + "learning_rate": 2.624434389140272e-06, + "loss": 0.6401, + "step": 175 + }, + { + "epoch": 0.007965603077619371, + "grad_norm": 0.9732249669160397, + "learning_rate": 2.6395173453996988e-06, + "loss": 0.5839, + "step": 176 + }, + { + "epoch": 0.008010862186014935, + "grad_norm": 1.0460819509049557, + "learning_rate": 2.6546003016591256e-06, + "loss": 0.5342, + "step": 177 + }, + { + "epoch": 0.0080561212944105, + "grad_norm": 0.6725530926649889, + "learning_rate": 2.6696832579185524e-06, + "loss": 0.6642, + "step": 178 + }, + { + "epoch": 0.008101380402806065, + "grad_norm": 0.947656576932551, + "learning_rate": 2.684766214177979e-06, + "loss": 0.5543, + "step": 179 + }, + { + "epoch": 0.008146639511201629, + "grad_norm": 0.890279138583737, + "learning_rate": 2.699849170437406e-06, + "loss": 0.5781, + "step": 180 + }, + { + "epoch": 0.008191898619597194, + "grad_norm": 0.9491950792758711, + "learning_rate": 2.7149321266968327e-06, + "loss": 0.5896, + "step": 181 + }, + { + "epoch": 0.008237157727992758, + "grad_norm": 0.9199486672240875, + "learning_rate": 2.7300150829562595e-06, + "loss": 0.5462, + "step": 182 + }, + { + "epoch": 0.008282416836388324, + "grad_norm": 4.539286243882512, + "learning_rate": 2.7450980392156867e-06, + "loss": 0.5749, + "step": 183 + }, + { + "epoch": 0.008327675944783888, + "grad_norm": 0.9266218360089107, + "learning_rate": 2.7601809954751135e-06, + "loss": 0.5577, + "step": 184 + }, + { + "epoch": 0.008372935053179452, + "grad_norm": 0.9869037300499367, + "learning_rate": 2.7752639517345403e-06, + "loss": 0.5218, + "step": 185 + }, + { + "epoch": 0.008418194161575018, + "grad_norm": 0.9711731377333721, + "learning_rate": 2.790346907993967e-06, + "loss": 0.5768, + "step": 186 + }, + { + "epoch": 0.008463453269970582, + "grad_norm": 1.2170837862401673, + "learning_rate": 2.805429864253394e-06, + "loss": 0.587, + "step": 187 + }, + { + "epoch": 0.008508712378366146, + "grad_norm": 0.8855600221161494, + "learning_rate": 2.8205128205128207e-06, + "loss": 0.5275, + "step": 188 + }, + { + "epoch": 0.008553971486761711, + "grad_norm": 0.9718908666916015, + "learning_rate": 2.8355957767722475e-06, + "loss": 0.5574, + "step": 189 + }, + { + "epoch": 0.008599230595157275, + "grad_norm": 0.9121332025706556, + "learning_rate": 2.8506787330316743e-06, + "loss": 0.5696, + "step": 190 + }, + { + "epoch": 0.00864448970355284, + "grad_norm": 0.9606521465966855, + "learning_rate": 2.865761689291101e-06, + "loss": 0.5831, + "step": 191 + }, + { + "epoch": 0.008689748811948405, + "grad_norm": 0.9248220902665218, + "learning_rate": 2.880844645550528e-06, + "loss": 0.6033, + "step": 192 + }, + { + "epoch": 0.008735007920343969, + "grad_norm": 1.245130579446091, + "learning_rate": 2.895927601809955e-06, + "loss": 0.5711, + "step": 193 + }, + { + "epoch": 0.008780267028739535, + "grad_norm": 0.6920799519601023, + "learning_rate": 2.911010558069382e-06, + "loss": 0.6706, + "step": 194 + }, + { + "epoch": 0.008825526137135099, + "grad_norm": 0.9225517446215864, + "learning_rate": 2.9260935143288087e-06, + "loss": 0.5236, + "step": 195 + }, + { + "epoch": 0.008870785245530663, + "grad_norm": 0.6084442686451063, + "learning_rate": 2.9411764705882355e-06, + "loss": 0.6724, + "step": 196 + }, + { + "epoch": 0.008916044353926228, + "grad_norm": 1.0686412628431616, + "learning_rate": 2.9562594268476623e-06, + "loss": 0.5737, + "step": 197 + }, + { + "epoch": 0.008961303462321792, + "grad_norm": 1.0296132076998055, + "learning_rate": 2.971342383107089e-06, + "loss": 0.5786, + "step": 198 + }, + { + "epoch": 0.009006562570717356, + "grad_norm": 0.6266104875319409, + "learning_rate": 2.986425339366516e-06, + "loss": 0.6535, + "step": 199 + }, + { + "epoch": 0.009051821679112922, + "grad_norm": 0.5458319005831402, + "learning_rate": 3.0015082956259426e-06, + "loss": 0.647, + "step": 200 + }, + { + "epoch": 0.009097080787508486, + "grad_norm": 0.963218116755177, + "learning_rate": 3.0165912518853694e-06, + "loss": 0.5753, + "step": 201 + }, + { + "epoch": 0.00914233989590405, + "grad_norm": 1.1478861970452678, + "learning_rate": 3.0316742081447962e-06, + "loss": 0.5516, + "step": 202 + }, + { + "epoch": 0.009187599004299616, + "grad_norm": 0.8944848254240031, + "learning_rate": 3.046757164404223e-06, + "loss": 0.597, + "step": 203 + }, + { + "epoch": 0.00923285811269518, + "grad_norm": 0.9869650281799124, + "learning_rate": 3.0618401206636506e-06, + "loss": 0.5382, + "step": 204 + }, + { + "epoch": 0.009278117221090745, + "grad_norm": 1.130476760242102, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.5331, + "step": 205 + }, + { + "epoch": 0.00932337632948631, + "grad_norm": 0.6252836318965277, + "learning_rate": 3.0920060331825042e-06, + "loss": 0.618, + "step": 206 + }, + { + "epoch": 0.009368635437881873, + "grad_norm": 0.9759516513557152, + "learning_rate": 3.107088989441931e-06, + "loss": 0.5858, + "step": 207 + }, + { + "epoch": 0.009413894546277439, + "grad_norm": 0.8682518804136213, + "learning_rate": 3.122171945701358e-06, + "loss": 0.5768, + "step": 208 + }, + { + "epoch": 0.009459153654673003, + "grad_norm": 1.236201277393762, + "learning_rate": 3.1372549019607846e-06, + "loss": 0.529, + "step": 209 + }, + { + "epoch": 0.009504412763068567, + "grad_norm": 0.49697934177661585, + "learning_rate": 3.1523378582202114e-06, + "loss": 0.6492, + "step": 210 + }, + { + "epoch": 0.009549671871464133, + "grad_norm": 0.8940926607531419, + "learning_rate": 3.167420814479638e-06, + "loss": 0.563, + "step": 211 + }, + { + "epoch": 0.009594930979859697, + "grad_norm": 0.858566838909827, + "learning_rate": 3.182503770739065e-06, + "loss": 0.5136, + "step": 212 + }, + { + "epoch": 0.00964019008825526, + "grad_norm": 0.4250007793137212, + "learning_rate": 3.1975867269984918e-06, + "loss": 0.6387, + "step": 213 + }, + { + "epoch": 0.009685449196650826, + "grad_norm": 0.4830424869851058, + "learning_rate": 3.212669683257919e-06, + "loss": 0.6461, + "step": 214 + }, + { + "epoch": 0.00973070830504639, + "grad_norm": 0.9492593456229038, + "learning_rate": 3.2277526395173458e-06, + "loss": 0.5798, + "step": 215 + }, + { + "epoch": 0.009775967413441956, + "grad_norm": 0.9052606044295339, + "learning_rate": 3.2428355957767726e-06, + "loss": 0.5629, + "step": 216 + }, + { + "epoch": 0.00982122652183752, + "grad_norm": 0.46529736780797787, + "learning_rate": 3.2579185520361994e-06, + "loss": 0.6497, + "step": 217 + }, + { + "epoch": 0.009866485630233084, + "grad_norm": 0.44782130490666033, + "learning_rate": 3.273001508295626e-06, + "loss": 0.6619, + "step": 218 + }, + { + "epoch": 0.00991174473862865, + "grad_norm": 0.8659706800572665, + "learning_rate": 3.288084464555053e-06, + "loss": 0.4668, + "step": 219 + }, + { + "epoch": 0.009957003847024214, + "grad_norm": 0.8891708692268783, + "learning_rate": 3.3031674208144797e-06, + "loss": 0.517, + "step": 220 + }, + { + "epoch": 0.010002262955419778, + "grad_norm": 0.44320004826346965, + "learning_rate": 3.3182503770739065e-06, + "loss": 0.6212, + "step": 221 + }, + { + "epoch": 0.010047522063815343, + "grad_norm": 1.0023703249828462, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.612, + "step": 222 + }, + { + "epoch": 0.010092781172210907, + "grad_norm": 0.9391998913476327, + "learning_rate": 3.34841628959276e-06, + "loss": 0.5485, + "step": 223 + }, + { + "epoch": 0.010138040280606471, + "grad_norm": 0.9215626336665612, + "learning_rate": 3.3634992458521878e-06, + "loss": 0.5324, + "step": 224 + }, + { + "epoch": 0.010183299389002037, + "grad_norm": 0.9650536964226126, + "learning_rate": 3.3785822021116145e-06, + "loss": 0.5148, + "step": 225 + }, + { + "epoch": 0.010228558497397601, + "grad_norm": 0.44025580492320604, + "learning_rate": 3.3936651583710413e-06, + "loss": 0.6352, + "step": 226 + }, + { + "epoch": 0.010273817605793167, + "grad_norm": 0.9860234711060266, + "learning_rate": 3.408748114630468e-06, + "loss": 0.5221, + "step": 227 + }, + { + "epoch": 0.01031907671418873, + "grad_norm": 0.8750523725384597, + "learning_rate": 3.423831070889895e-06, + "loss": 0.4942, + "step": 228 + }, + { + "epoch": 0.010364335822584295, + "grad_norm": 0.40786929784799386, + "learning_rate": 3.4389140271493217e-06, + "loss": 0.6168, + "step": 229 + }, + { + "epoch": 0.01040959493097986, + "grad_norm": 0.9581620031963619, + "learning_rate": 3.4539969834087485e-06, + "loss": 0.5732, + "step": 230 + }, + { + "epoch": 0.010454854039375424, + "grad_norm": 1.078295738150313, + "learning_rate": 3.4690799396681753e-06, + "loss": 0.5713, + "step": 231 + }, + { + "epoch": 0.010500113147770988, + "grad_norm": 1.005658714168155, + "learning_rate": 3.484162895927602e-06, + "loss": 0.5712, + "step": 232 + }, + { + "epoch": 0.010545372256166554, + "grad_norm": 0.9349868398807674, + "learning_rate": 3.499245852187029e-06, + "loss": 0.5375, + "step": 233 + }, + { + "epoch": 0.010590631364562118, + "grad_norm": 0.8593073232849099, + "learning_rate": 3.5143288084464557e-06, + "loss": 0.4573, + "step": 234 + }, + { + "epoch": 0.010635890472957682, + "grad_norm": 0.46015794426951856, + "learning_rate": 3.529411764705883e-06, + "loss": 0.6272, + "step": 235 + }, + { + "epoch": 0.010681149581353248, + "grad_norm": 1.0336342436633565, + "learning_rate": 3.5444947209653097e-06, + "loss": 0.5125, + "step": 236 + }, + { + "epoch": 0.010726408689748812, + "grad_norm": 0.8832268250263099, + "learning_rate": 3.5595776772247365e-06, + "loss": 0.5745, + "step": 237 + }, + { + "epoch": 0.010771667798144377, + "grad_norm": 0.9516494399278698, + "learning_rate": 3.5746606334841633e-06, + "loss": 0.5466, + "step": 238 + }, + { + "epoch": 0.010816926906539941, + "grad_norm": 0.4369975511253576, + "learning_rate": 3.58974358974359e-06, + "loss": 0.6034, + "step": 239 + }, + { + "epoch": 0.010862186014935505, + "grad_norm": 0.9401986167737724, + "learning_rate": 3.604826546003017e-06, + "loss": 0.5792, + "step": 240 + }, + { + "epoch": 0.010907445123331071, + "grad_norm": 1.0006366096995205, + "learning_rate": 3.6199095022624436e-06, + "loss": 0.6009, + "step": 241 + }, + { + "epoch": 0.010952704231726635, + "grad_norm": 0.9510760230842723, + "learning_rate": 3.6349924585218704e-06, + "loss": 0.5477, + "step": 242 + }, + { + "epoch": 0.010997963340122199, + "grad_norm": 0.9288859948652094, + "learning_rate": 3.6500754147812972e-06, + "loss": 0.544, + "step": 243 + }, + { + "epoch": 0.011043222448517765, + "grad_norm": 0.9863343501078657, + "learning_rate": 3.665158371040724e-06, + "loss": 0.5416, + "step": 244 + }, + { + "epoch": 0.011088481556913329, + "grad_norm": 0.8605513996689019, + "learning_rate": 3.6802413273001512e-06, + "loss": 0.4932, + "step": 245 + }, + { + "epoch": 0.011133740665308893, + "grad_norm": 0.916463140897627, + "learning_rate": 3.695324283559578e-06, + "loss": 0.6034, + "step": 246 + }, + { + "epoch": 0.011178999773704458, + "grad_norm": 0.4956358437007129, + "learning_rate": 3.710407239819005e-06, + "loss": 0.6056, + "step": 247 + }, + { + "epoch": 0.011224258882100022, + "grad_norm": 0.9163501523417295, + "learning_rate": 3.7254901960784316e-06, + "loss": 0.5617, + "step": 248 + }, + { + "epoch": 0.011269517990495588, + "grad_norm": 0.9778185038718414, + "learning_rate": 3.7405731523378584e-06, + "loss": 0.546, + "step": 249 + }, + { + "epoch": 0.011314777098891152, + "grad_norm": 0.9520944809364426, + "learning_rate": 3.755656108597285e-06, + "loss": 0.5811, + "step": 250 + }, + { + "epoch": 0.011360036207286716, + "grad_norm": 0.8823450931650489, + "learning_rate": 3.770739064856712e-06, + "loss": 0.5386, + "step": 251 + }, + { + "epoch": 0.011405295315682282, + "grad_norm": 0.9512201225676397, + "learning_rate": 3.7858220211161388e-06, + "loss": 0.5205, + "step": 252 + }, + { + "epoch": 0.011450554424077846, + "grad_norm": 0.9448051386395275, + "learning_rate": 3.8009049773755656e-06, + "loss": 0.5838, + "step": 253 + }, + { + "epoch": 0.01149581353247341, + "grad_norm": 0.8613594278239519, + "learning_rate": 3.815987933634992e-06, + "loss": 0.5709, + "step": 254 + }, + { + "epoch": 0.011541072640868975, + "grad_norm": 0.9715804592335193, + "learning_rate": 3.83107088989442e-06, + "loss": 0.551, + "step": 255 + }, + { + "epoch": 0.01158633174926454, + "grad_norm": 0.8766071855257321, + "learning_rate": 3.846153846153847e-06, + "loss": 0.5178, + "step": 256 + }, + { + "epoch": 0.011631590857660103, + "grad_norm": 0.8675288371592743, + "learning_rate": 3.861236802413273e-06, + "loss": 0.5359, + "step": 257 + }, + { + "epoch": 0.011676849966055669, + "grad_norm": 0.9401513608490044, + "learning_rate": 3.8763197586727e-06, + "loss": 0.531, + "step": 258 + }, + { + "epoch": 0.011722109074451233, + "grad_norm": 0.8856111486307915, + "learning_rate": 3.891402714932127e-06, + "loss": 0.536, + "step": 259 + }, + { + "epoch": 0.011767368182846799, + "grad_norm": 0.6697874287839527, + "learning_rate": 3.906485671191554e-06, + "loss": 0.617, + "step": 260 + }, + { + "epoch": 0.011812627291242363, + "grad_norm": 0.9666300735640014, + "learning_rate": 3.92156862745098e-06, + "loss": 0.536, + "step": 261 + }, + { + "epoch": 0.011857886399637927, + "grad_norm": 0.5422182479370271, + "learning_rate": 3.9366515837104075e-06, + "loss": 0.6328, + "step": 262 + }, + { + "epoch": 0.011903145508033492, + "grad_norm": 0.4923913584350843, + "learning_rate": 3.951734539969834e-06, + "loss": 0.5888, + "step": 263 + }, + { + "epoch": 0.011948404616429056, + "grad_norm": 0.43537169302489886, + "learning_rate": 3.966817496229261e-06, + "loss": 0.5906, + "step": 264 + }, + { + "epoch": 0.01199366372482462, + "grad_norm": 0.495426622721109, + "learning_rate": 3.9819004524886875e-06, + "loss": 0.6193, + "step": 265 + }, + { + "epoch": 0.012038922833220186, + "grad_norm": 1.0064974142968786, + "learning_rate": 3.9969834087481156e-06, + "loss": 0.5928, + "step": 266 + }, + { + "epoch": 0.01208418194161575, + "grad_norm": 0.9836738785638621, + "learning_rate": 4.012066365007542e-06, + "loss": 0.4592, + "step": 267 + }, + { + "epoch": 0.012129441050011314, + "grad_norm": 0.6441927181482473, + "learning_rate": 4.027149321266969e-06, + "loss": 0.5993, + "step": 268 + }, + { + "epoch": 0.01217470015840688, + "grad_norm": 0.5193332967918078, + "learning_rate": 4.0422322775263955e-06, + "loss": 0.6138, + "step": 269 + }, + { + "epoch": 0.012219959266802444, + "grad_norm": 0.9500292114748106, + "learning_rate": 4.057315233785823e-06, + "loss": 0.5578, + "step": 270 + }, + { + "epoch": 0.01226521837519801, + "grad_norm": 0.4149215761090333, + "learning_rate": 4.072398190045249e-06, + "loss": 0.608, + "step": 271 + }, + { + "epoch": 0.012310477483593573, + "grad_norm": 1.0754271378193447, + "learning_rate": 4.087481146304676e-06, + "loss": 0.5623, + "step": 272 + }, + { + "epoch": 0.012355736591989137, + "grad_norm": 0.925293932357001, + "learning_rate": 4.102564102564103e-06, + "loss": 0.5349, + "step": 273 + }, + { + "epoch": 0.012400995700384703, + "grad_norm": 1.133479898251426, + "learning_rate": 4.11764705882353e-06, + "loss": 0.5174, + "step": 274 + }, + { + "epoch": 0.012446254808780267, + "grad_norm": 0.5209625544150582, + "learning_rate": 4.132730015082956e-06, + "loss": 0.6002, + "step": 275 + }, + { + "epoch": 0.012491513917175831, + "grad_norm": 1.0776330878254095, + "learning_rate": 4.1478129713423835e-06, + "loss": 0.5646, + "step": 276 + }, + { + "epoch": 0.012536773025571397, + "grad_norm": 1.1219212069602342, + "learning_rate": 4.162895927601811e-06, + "loss": 0.5618, + "step": 277 + }, + { + "epoch": 0.01258203213396696, + "grad_norm": 0.9540197245213747, + "learning_rate": 4.177978883861237e-06, + "loss": 0.5335, + "step": 278 + }, + { + "epoch": 0.012627291242362525, + "grad_norm": 1.0409140098530238, + "learning_rate": 4.193061840120664e-06, + "loss": 0.5274, + "step": 279 + }, + { + "epoch": 0.01267255035075809, + "grad_norm": 0.9479380496867869, + "learning_rate": 4.208144796380091e-06, + "loss": 0.5062, + "step": 280 + }, + { + "epoch": 0.012717809459153654, + "grad_norm": 0.8985186859100934, + "learning_rate": 4.223227752639518e-06, + "loss": 0.5108, + "step": 281 + }, + { + "epoch": 0.01276306856754922, + "grad_norm": 0.8673565799474766, + "learning_rate": 4.238310708898944e-06, + "loss": 0.5079, + "step": 282 + }, + { + "epoch": 0.012808327675944784, + "grad_norm": 0.9738525782248589, + "learning_rate": 4.2533936651583714e-06, + "loss": 0.5285, + "step": 283 + }, + { + "epoch": 0.012853586784340348, + "grad_norm": 0.9445428406253431, + "learning_rate": 4.268476621417798e-06, + "loss": 0.5087, + "step": 284 + }, + { + "epoch": 0.012898845892735914, + "grad_norm": 0.9482994825503861, + "learning_rate": 4.283559577677225e-06, + "loss": 0.5435, + "step": 285 + }, + { + "epoch": 0.012944105001131478, + "grad_norm": 0.9428865202414309, + "learning_rate": 4.298642533936652e-06, + "loss": 0.4748, + "step": 286 + }, + { + "epoch": 0.012989364109527042, + "grad_norm": 0.48671128565727295, + "learning_rate": 4.313725490196079e-06, + "loss": 0.6106, + "step": 287 + }, + { + "epoch": 0.013034623217922607, + "grad_norm": 0.8757214278700259, + "learning_rate": 4.328808446455506e-06, + "loss": 0.5244, + "step": 288 + }, + { + "epoch": 0.013079882326318171, + "grad_norm": 1.316171662310517, + "learning_rate": 4.343891402714932e-06, + "loss": 0.5239, + "step": 289 + }, + { + "epoch": 0.013125141434713735, + "grad_norm": 0.9589527483195361, + "learning_rate": 4.358974358974359e-06, + "loss": 0.549, + "step": 290 + }, + { + "epoch": 0.013170400543109301, + "grad_norm": 0.4200317179916816, + "learning_rate": 4.374057315233786e-06, + "loss": 0.6291, + "step": 291 + }, + { + "epoch": 0.013215659651504865, + "grad_norm": 0.9318035726460884, + "learning_rate": 4.389140271493213e-06, + "loss": 0.558, + "step": 292 + }, + { + "epoch": 0.01326091875990043, + "grad_norm": 1.0019818689732092, + "learning_rate": 4.404223227752639e-06, + "loss": 0.5025, + "step": 293 + }, + { + "epoch": 0.013306177868295995, + "grad_norm": 1.113539719910521, + "learning_rate": 4.419306184012067e-06, + "loss": 0.5599, + "step": 294 + }, + { + "epoch": 0.013351436976691559, + "grad_norm": 1.0569103377615405, + "learning_rate": 4.434389140271493e-06, + "loss": 0.5216, + "step": 295 + }, + { + "epoch": 0.013396696085087124, + "grad_norm": 0.9664867476522317, + "learning_rate": 4.44947209653092e-06, + "loss": 0.4954, + "step": 296 + }, + { + "epoch": 0.013441955193482688, + "grad_norm": 0.896450043739785, + "learning_rate": 4.464555052790347e-06, + "loss": 0.5027, + "step": 297 + }, + { + "epoch": 0.013487214301878252, + "grad_norm": 0.9422311268833776, + "learning_rate": 4.479638009049775e-06, + "loss": 0.5497, + "step": 298 + }, + { + "epoch": 0.013532473410273818, + "grad_norm": 1.035399348209044, + "learning_rate": 4.494720965309201e-06, + "loss": 0.56, + "step": 299 + }, + { + "epoch": 0.013577732518669382, + "grad_norm": 0.5018088786562147, + "learning_rate": 4.509803921568628e-06, + "loss": 0.5795, + "step": 300 + }, + { + "epoch": 0.013622991627064946, + "grad_norm": 0.9210544017751315, + "learning_rate": 4.5248868778280546e-06, + "loss": 0.5114, + "step": 301 + }, + { + "epoch": 0.013668250735460512, + "grad_norm": 1.291852571021736, + "learning_rate": 4.539969834087482e-06, + "loss": 0.5583, + "step": 302 + }, + { + "epoch": 0.013713509843856076, + "grad_norm": 1.0262904351383784, + "learning_rate": 4.555052790346908e-06, + "loss": 0.4886, + "step": 303 + }, + { + "epoch": 0.013758768952251641, + "grad_norm": 0.9650289716611675, + "learning_rate": 4.570135746606335e-06, + "loss": 0.5207, + "step": 304 + }, + { + "epoch": 0.013804028060647205, + "grad_norm": 0.48075664142150215, + "learning_rate": 4.585218702865762e-06, + "loss": 0.6426, + "step": 305 + }, + { + "epoch": 0.01384928716904277, + "grad_norm": 0.8763321117534213, + "learning_rate": 4.600301659125189e-06, + "loss": 0.5104, + "step": 306 + }, + { + "epoch": 0.013894546277438335, + "grad_norm": 0.941361972531803, + "learning_rate": 4.615384615384616e-06, + "loss": 0.5378, + "step": 307 + }, + { + "epoch": 0.013939805385833899, + "grad_norm": 1.5059100405196593, + "learning_rate": 4.6304675716440425e-06, + "loss": 0.5498, + "step": 308 + }, + { + "epoch": 0.013985064494229463, + "grad_norm": 0.917428534519775, + "learning_rate": 4.64555052790347e-06, + "loss": 0.5412, + "step": 309 + }, + { + "epoch": 0.014030323602625029, + "grad_norm": 0.9499732405768552, + "learning_rate": 4.660633484162896e-06, + "loss": 0.5303, + "step": 310 + }, + { + "epoch": 0.014075582711020593, + "grad_norm": 0.9636543513031476, + "learning_rate": 4.675716440422323e-06, + "loss": 0.552, + "step": 311 + }, + { + "epoch": 0.014120841819416157, + "grad_norm": 0.9242787914214284, + "learning_rate": 4.69079939668175e-06, + "loss": 0.4908, + "step": 312 + }, + { + "epoch": 0.014166100927811722, + "grad_norm": 1.081272531811085, + "learning_rate": 4.705882352941177e-06, + "loss": 0.5389, + "step": 313 + }, + { + "epoch": 0.014211360036207286, + "grad_norm": 0.9253075444801346, + "learning_rate": 4.720965309200603e-06, + "loss": 0.5807, + "step": 314 + }, + { + "epoch": 0.014256619144602852, + "grad_norm": 0.49351422143918683, + "learning_rate": 4.7360482654600305e-06, + "loss": 0.5956, + "step": 315 + }, + { + "epoch": 0.014301878252998416, + "grad_norm": 0.9741852578327058, + "learning_rate": 4.751131221719457e-06, + "loss": 0.5276, + "step": 316 + }, + { + "epoch": 0.01434713736139398, + "grad_norm": 0.42603570176596806, + "learning_rate": 4.766214177978885e-06, + "loss": 0.6086, + "step": 317 + }, + { + "epoch": 0.014392396469789546, + "grad_norm": 0.9694041654859664, + "learning_rate": 4.781297134238311e-06, + "loss": 0.5729, + "step": 318 + }, + { + "epoch": 0.01443765557818511, + "grad_norm": 0.8927176798511097, + "learning_rate": 4.7963800904977385e-06, + "loss": 0.5127, + "step": 319 + }, + { + "epoch": 0.014482914686580674, + "grad_norm": 0.504617438746344, + "learning_rate": 4.811463046757165e-06, + "loss": 0.6003, + "step": 320 + }, + { + "epoch": 0.01452817379497624, + "grad_norm": 0.4540099755279664, + "learning_rate": 4.826546003016592e-06, + "loss": 0.6335, + "step": 321 + }, + { + "epoch": 0.014573432903371803, + "grad_norm": 1.1454920132721478, + "learning_rate": 4.8416289592760185e-06, + "loss": 0.5226, + "step": 322 + }, + { + "epoch": 0.014618692011767367, + "grad_norm": 0.9128245799208095, + "learning_rate": 4.856711915535446e-06, + "loss": 0.523, + "step": 323 + }, + { + "epoch": 0.014663951120162933, + "grad_norm": 0.42641660214287797, + "learning_rate": 4.871794871794872e-06, + "loss": 0.6013, + "step": 324 + }, + { + "epoch": 0.014709210228558497, + "grad_norm": 0.9227995753110831, + "learning_rate": 4.886877828054299e-06, + "loss": 0.5273, + "step": 325 + }, + { + "epoch": 0.014754469336954063, + "grad_norm": 0.859961581416437, + "learning_rate": 4.901960784313726e-06, + "loss": 0.51, + "step": 326 + }, + { + "epoch": 0.014799728445349627, + "grad_norm": 0.9260793685617191, + "learning_rate": 4.917043740573153e-06, + "loss": 0.4988, + "step": 327 + }, + { + "epoch": 0.01484498755374519, + "grad_norm": 0.9780194052175253, + "learning_rate": 4.93212669683258e-06, + "loss": 0.5041, + "step": 328 + }, + { + "epoch": 0.014890246662140756, + "grad_norm": 0.5176024283585878, + "learning_rate": 4.9472096530920064e-06, + "loss": 0.5843, + "step": 329 + }, + { + "epoch": 0.01493550577053632, + "grad_norm": 0.4840598729523819, + "learning_rate": 4.962292609351434e-06, + "loss": 0.594, + "step": 330 + }, + { + "epoch": 0.014980764878931884, + "grad_norm": 0.38051714853312013, + "learning_rate": 4.97737556561086e-06, + "loss": 0.5979, + "step": 331 + }, + { + "epoch": 0.01502602398732745, + "grad_norm": 0.9995763964660412, + "learning_rate": 4.992458521870287e-06, + "loss": 0.5146, + "step": 332 + }, + { + "epoch": 0.015071283095723014, + "grad_norm": 0.974717356695917, + "learning_rate": 5.007541478129714e-06, + "loss": 0.5428, + "step": 333 + }, + { + "epoch": 0.015116542204118578, + "grad_norm": 0.9470024372074591, + "learning_rate": 5.022624434389141e-06, + "loss": 0.5165, + "step": 334 + }, + { + "epoch": 0.015161801312514144, + "grad_norm": 1.1126527648607103, + "learning_rate": 5.037707390648567e-06, + "loss": 0.5282, + "step": 335 + }, + { + "epoch": 0.015207060420909708, + "grad_norm": 1.0448542636444378, + "learning_rate": 5.052790346907994e-06, + "loss": 0.5556, + "step": 336 + }, + { + "epoch": 0.015252319529305274, + "grad_norm": 1.0030573569299337, + "learning_rate": 5.067873303167421e-06, + "loss": 0.4889, + "step": 337 + }, + { + "epoch": 0.015297578637700837, + "grad_norm": 1.010629182095211, + "learning_rate": 5.082956259426848e-06, + "loss": 0.5208, + "step": 338 + }, + { + "epoch": 0.015342837746096401, + "grad_norm": 0.8679356439233207, + "learning_rate": 5.098039215686274e-06, + "loss": 0.4962, + "step": 339 + }, + { + "epoch": 0.015388096854491967, + "grad_norm": 0.9161320845529888, + "learning_rate": 5.1131221719457016e-06, + "loss": 0.5067, + "step": 340 + }, + { + "epoch": 0.015433355962887531, + "grad_norm": 1.1114898863130425, + "learning_rate": 5.128205128205128e-06, + "loss": 0.5856, + "step": 341 + }, + { + "epoch": 0.015478615071283095, + "grad_norm": 0.9330607826133401, + "learning_rate": 5.143288084464555e-06, + "loss": 0.5067, + "step": 342 + }, + { + "epoch": 0.01552387417967866, + "grad_norm": 0.944407245141589, + "learning_rate": 5.158371040723983e-06, + "loss": 0.5558, + "step": 343 + }, + { + "epoch": 0.015569133288074225, + "grad_norm": 1.1333977043998347, + "learning_rate": 5.1734539969834096e-06, + "loss": 0.5319, + "step": 344 + }, + { + "epoch": 0.015614392396469789, + "grad_norm": 0.873397313220358, + "learning_rate": 5.188536953242837e-06, + "loss": 0.5263, + "step": 345 + }, + { + "epoch": 0.015659651504865355, + "grad_norm": 0.763591213995416, + "learning_rate": 5.203619909502263e-06, + "loss": 0.6024, + "step": 346 + }, + { + "epoch": 0.01570491061326092, + "grad_norm": 1.1260558397244373, + "learning_rate": 5.21870286576169e-06, + "loss": 0.5125, + "step": 347 + }, + { + "epoch": 0.015750169721656482, + "grad_norm": 0.9889199273006318, + "learning_rate": 5.233785822021117e-06, + "loss": 0.5169, + "step": 348 + }, + { + "epoch": 0.015795428830052048, + "grad_norm": 0.9146677322541702, + "learning_rate": 5.248868778280544e-06, + "loss": 0.4975, + "step": 349 + }, + { + "epoch": 0.015840687938447614, + "grad_norm": 0.8626251002848274, + "learning_rate": 5.26395173453997e-06, + "loss": 0.4895, + "step": 350 + }, + { + "epoch": 0.015885947046843176, + "grad_norm": 1.009332375677646, + "learning_rate": 5.2790346907993975e-06, + "loss": 0.4803, + "step": 351 + }, + { + "epoch": 0.015931206155238742, + "grad_norm": 1.04132783036604, + "learning_rate": 5.294117647058824e-06, + "loss": 0.5219, + "step": 352 + }, + { + "epoch": 0.015976465263634308, + "grad_norm": 0.8918609702573956, + "learning_rate": 5.309200603318251e-06, + "loss": 0.5251, + "step": 353 + }, + { + "epoch": 0.01602172437202987, + "grad_norm": 0.9777210319694408, + "learning_rate": 5.3242835595776775e-06, + "loss": 0.5295, + "step": 354 + }, + { + "epoch": 0.016066983480425436, + "grad_norm": 0.7434341418735497, + "learning_rate": 5.339366515837105e-06, + "loss": 0.5909, + "step": 355 + }, + { + "epoch": 0.016112242588821, + "grad_norm": 1.1328565365265588, + "learning_rate": 5.354449472096531e-06, + "loss": 0.5164, + "step": 356 + }, + { + "epoch": 0.016157501697216563, + "grad_norm": 0.9334067691411315, + "learning_rate": 5.369532428355958e-06, + "loss": 0.5455, + "step": 357 + }, + { + "epoch": 0.01620276080561213, + "grad_norm": 0.8700250681219853, + "learning_rate": 5.384615384615385e-06, + "loss": 0.5212, + "step": 358 + }, + { + "epoch": 0.016248019914007695, + "grad_norm": 0.8928502707857849, + "learning_rate": 5.399698340874812e-06, + "loss": 0.5332, + "step": 359 + }, + { + "epoch": 0.016293279022403257, + "grad_norm": 1.0109170289938698, + "learning_rate": 5.414781297134238e-06, + "loss": 0.5707, + "step": 360 + }, + { + "epoch": 0.016338538130798823, + "grad_norm": 0.47521107319293504, + "learning_rate": 5.4298642533936655e-06, + "loss": 0.5669, + "step": 361 + }, + { + "epoch": 0.01638379723919439, + "grad_norm": 0.9306733378237048, + "learning_rate": 5.444947209653092e-06, + "loss": 0.4959, + "step": 362 + }, + { + "epoch": 0.01642905634758995, + "grad_norm": 0.9194562783119116, + "learning_rate": 5.460030165912519e-06, + "loss": 0.5333, + "step": 363 + }, + { + "epoch": 0.016474315455985516, + "grad_norm": 1.0013647007578086, + "learning_rate": 5.475113122171946e-06, + "loss": 0.4786, + "step": 364 + }, + { + "epoch": 0.016519574564381082, + "grad_norm": 0.961270745112822, + "learning_rate": 5.4901960784313735e-06, + "loss": 0.5266, + "step": 365 + }, + { + "epoch": 0.016564833672776648, + "grad_norm": 0.9891712982691216, + "learning_rate": 5.505279034690801e-06, + "loss": 0.5228, + "step": 366 + }, + { + "epoch": 0.01661009278117221, + "grad_norm": 0.9361874821945951, + "learning_rate": 5.520361990950227e-06, + "loss": 0.532, + "step": 367 + }, + { + "epoch": 0.016655351889567776, + "grad_norm": 1.01010060145753, + "learning_rate": 5.535444947209654e-06, + "loss": 0.5114, + "step": 368 + }, + { + "epoch": 0.01670061099796334, + "grad_norm": 0.9379850817433849, + "learning_rate": 5.550527903469081e-06, + "loss": 0.5285, + "step": 369 + }, + { + "epoch": 0.016745870106358904, + "grad_norm": 1.016504623607867, + "learning_rate": 5.565610859728508e-06, + "loss": 0.4726, + "step": 370 + }, + { + "epoch": 0.01679112921475447, + "grad_norm": 1.082966973882081, + "learning_rate": 5.580693815987934e-06, + "loss": 0.5037, + "step": 371 + }, + { + "epoch": 0.016836388323150035, + "grad_norm": 0.997881502882024, + "learning_rate": 5.5957767722473614e-06, + "loss": 0.5725, + "step": 372 + }, + { + "epoch": 0.016881647431545597, + "grad_norm": 0.5965032348908443, + "learning_rate": 5.610859728506788e-06, + "loss": 0.592, + "step": 373 + }, + { + "epoch": 0.016926906539941163, + "grad_norm": 0.9535705328720748, + "learning_rate": 5.625942684766215e-06, + "loss": 0.5252, + "step": 374 + }, + { + "epoch": 0.01697216564833673, + "grad_norm": 0.9635724454974052, + "learning_rate": 5.641025641025641e-06, + "loss": 0.5908, + "step": 375 + }, + { + "epoch": 0.01701742475673229, + "grad_norm": 0.920845074054737, + "learning_rate": 5.656108597285069e-06, + "loss": 0.573, + "step": 376 + }, + { + "epoch": 0.017062683865127857, + "grad_norm": 0.8683200822495826, + "learning_rate": 5.671191553544495e-06, + "loss": 0.5179, + "step": 377 + }, + { + "epoch": 0.017107942973523423, + "grad_norm": 0.9308637166603803, + "learning_rate": 5.686274509803922e-06, + "loss": 0.4988, + "step": 378 + }, + { + "epoch": 0.017153202081918985, + "grad_norm": 0.5493833687064147, + "learning_rate": 5.7013574660633486e-06, + "loss": 0.6014, + "step": 379 + }, + { + "epoch": 0.01719846119031455, + "grad_norm": 0.9211708064016626, + "learning_rate": 5.716440422322776e-06, + "loss": 0.5174, + "step": 380 + }, + { + "epoch": 0.017243720298710116, + "grad_norm": 0.936329971148168, + "learning_rate": 5.731523378582202e-06, + "loss": 0.5162, + "step": 381 + }, + { + "epoch": 0.01728897940710568, + "grad_norm": 1.064735933806856, + "learning_rate": 5.746606334841629e-06, + "loss": 0.4509, + "step": 382 + }, + { + "epoch": 0.017334238515501244, + "grad_norm": 0.9441184309473762, + "learning_rate": 5.761689291101056e-06, + "loss": 0.531, + "step": 383 + }, + { + "epoch": 0.01737949762389681, + "grad_norm": 0.891932270187907, + "learning_rate": 5.776772247360483e-06, + "loss": 0.5829, + "step": 384 + }, + { + "epoch": 0.017424756732292372, + "grad_norm": 0.5207360378802349, + "learning_rate": 5.79185520361991e-06, + "loss": 0.5725, + "step": 385 + }, + { + "epoch": 0.017470015840687938, + "grad_norm": 0.9828487126912975, + "learning_rate": 5.806938159879337e-06, + "loss": 0.5431, + "step": 386 + }, + { + "epoch": 0.017515274949083504, + "grad_norm": 0.879686558754779, + "learning_rate": 5.822021116138764e-06, + "loss": 0.571, + "step": 387 + }, + { + "epoch": 0.01756053405747907, + "grad_norm": 0.867256262719138, + "learning_rate": 5.837104072398191e-06, + "loss": 0.4815, + "step": 388 + }, + { + "epoch": 0.01760579316587463, + "grad_norm": 0.9583720954401007, + "learning_rate": 5.852187028657617e-06, + "loss": 0.571, + "step": 389 + }, + { + "epoch": 0.017651052274270197, + "grad_norm": 0.8976973382623153, + "learning_rate": 5.8672699849170446e-06, + "loss": 0.5015, + "step": 390 + }, + { + "epoch": 0.017696311382665763, + "grad_norm": 0.8935749651867885, + "learning_rate": 5.882352941176471e-06, + "loss": 0.4882, + "step": 391 + }, + { + "epoch": 0.017741570491061325, + "grad_norm": 0.8648779834677215, + "learning_rate": 5.897435897435898e-06, + "loss": 0.5131, + "step": 392 + }, + { + "epoch": 0.01778682959945689, + "grad_norm": 0.9431042049088775, + "learning_rate": 5.9125188536953245e-06, + "loss": 0.4949, + "step": 393 + }, + { + "epoch": 0.017832088707852457, + "grad_norm": 0.5114510065663831, + "learning_rate": 5.927601809954752e-06, + "loss": 0.5519, + "step": 394 + }, + { + "epoch": 0.01787734781624802, + "grad_norm": 1.0818380636393499, + "learning_rate": 5.942684766214178e-06, + "loss": 0.4862, + "step": 395 + }, + { + "epoch": 0.017922606924643585, + "grad_norm": 0.9493273137564501, + "learning_rate": 5.957767722473605e-06, + "loss": 0.5129, + "step": 396 + }, + { + "epoch": 0.01796786603303915, + "grad_norm": 0.40185565651478844, + "learning_rate": 5.972850678733032e-06, + "loss": 0.5866, + "step": 397 + }, + { + "epoch": 0.018013125141434713, + "grad_norm": 0.8638395089291984, + "learning_rate": 5.987933634992459e-06, + "loss": 0.4719, + "step": 398 + }, + { + "epoch": 0.018058384249830278, + "grad_norm": 1.0007749499746714, + "learning_rate": 6.003016591251885e-06, + "loss": 0.5821, + "step": 399 + }, + { + "epoch": 0.018103643358225844, + "grad_norm": 0.5148875748111391, + "learning_rate": 6.0180995475113125e-06, + "loss": 0.6172, + "step": 400 + }, + { + "epoch": 0.018148902466621406, + "grad_norm": 0.4632624398180578, + "learning_rate": 6.033182503770739e-06, + "loss": 0.5892, + "step": 401 + }, + { + "epoch": 0.018194161575016972, + "grad_norm": 0.9138373595599119, + "learning_rate": 6.048265460030166e-06, + "loss": 0.5006, + "step": 402 + }, + { + "epoch": 0.018239420683412538, + "grad_norm": 1.002764279783093, + "learning_rate": 6.0633484162895924e-06, + "loss": 0.5384, + "step": 403 + }, + { + "epoch": 0.0182846797918081, + "grad_norm": 0.9330664682506379, + "learning_rate": 6.07843137254902e-06, + "loss": 0.4955, + "step": 404 + }, + { + "epoch": 0.018329938900203666, + "grad_norm": 0.8863526835691686, + "learning_rate": 6.093514328808446e-06, + "loss": 0.5018, + "step": 405 + }, + { + "epoch": 0.01837519800859923, + "grad_norm": 0.8723865491435912, + "learning_rate": 6.108597285067874e-06, + "loss": 0.5304, + "step": 406 + }, + { + "epoch": 0.018420457116994794, + "grad_norm": 0.7326950236339584, + "learning_rate": 6.123680241327301e-06, + "loss": 0.5871, + "step": 407 + }, + { + "epoch": 0.01846571622539036, + "grad_norm": 1.1416361451930475, + "learning_rate": 6.138763197586728e-06, + "loss": 0.5121, + "step": 408 + }, + { + "epoch": 0.018510975333785925, + "grad_norm": 0.9460814774142131, + "learning_rate": 6.153846153846155e-06, + "loss": 0.4931, + "step": 409 + }, + { + "epoch": 0.01855623444218149, + "grad_norm": 0.43571051461036575, + "learning_rate": 6.168929110105581e-06, + "loss": 0.577, + "step": 410 + }, + { + "epoch": 0.018601493550577053, + "grad_norm": 0.9005078431887299, + "learning_rate": 6.1840120663650085e-06, + "loss": 0.5226, + "step": 411 + }, + { + "epoch": 0.01864675265897262, + "grad_norm": 1.0887276487488817, + "learning_rate": 6.199095022624435e-06, + "loss": 0.4509, + "step": 412 + }, + { + "epoch": 0.018692011767368184, + "grad_norm": 0.8428854780274067, + "learning_rate": 6.214177978883862e-06, + "loss": 0.4858, + "step": 413 + }, + { + "epoch": 0.018737270875763747, + "grad_norm": 0.9252658031535186, + "learning_rate": 6.229260935143288e-06, + "loss": 0.4748, + "step": 414 + }, + { + "epoch": 0.018782529984159312, + "grad_norm": 0.9412040538128673, + "learning_rate": 6.244343891402716e-06, + "loss": 0.5015, + "step": 415 + }, + { + "epoch": 0.018827789092554878, + "grad_norm": 0.9138434969166862, + "learning_rate": 6.259426847662142e-06, + "loss": 0.4821, + "step": 416 + }, + { + "epoch": 0.01887304820095044, + "grad_norm": 0.8656063917318203, + "learning_rate": 6.274509803921569e-06, + "loss": 0.5217, + "step": 417 + }, + { + "epoch": 0.018918307309346006, + "grad_norm": 0.94991968312089, + "learning_rate": 6.2895927601809956e-06, + "loss": 0.5392, + "step": 418 + }, + { + "epoch": 0.01896356641774157, + "grad_norm": 0.9372279572345605, + "learning_rate": 6.304675716440423e-06, + "loss": 0.4799, + "step": 419 + }, + { + "epoch": 0.019008825526137134, + "grad_norm": 0.9533178482186907, + "learning_rate": 6.319758672699849e-06, + "loss": 0.5858, + "step": 420 + }, + { + "epoch": 0.0190540846345327, + "grad_norm": 0.9034610556331467, + "learning_rate": 6.334841628959276e-06, + "loss": 0.5204, + "step": 421 + }, + { + "epoch": 0.019099343742928265, + "grad_norm": 0.9342840894396373, + "learning_rate": 6.349924585218703e-06, + "loss": 0.5085, + "step": 422 + }, + { + "epoch": 0.019144602851323828, + "grad_norm": 0.8600603730023817, + "learning_rate": 6.36500754147813e-06, + "loss": 0.4955, + "step": 423 + }, + { + "epoch": 0.019189861959719393, + "grad_norm": 0.6397248799825535, + "learning_rate": 6.380090497737556e-06, + "loss": 0.5843, + "step": 424 + }, + { + "epoch": 0.01923512106811496, + "grad_norm": 1.0534317002502531, + "learning_rate": 6.3951734539969835e-06, + "loss": 0.512, + "step": 425 + }, + { + "epoch": 0.01928038017651052, + "grad_norm": 0.9708662444314573, + "learning_rate": 6.410256410256412e-06, + "loss": 0.5013, + "step": 426 + }, + { + "epoch": 0.019325639284906087, + "grad_norm": 0.6262308260974387, + "learning_rate": 6.425339366515838e-06, + "loss": 0.5656, + "step": 427 + }, + { + "epoch": 0.019370898393301653, + "grad_norm": 1.104132946757567, + "learning_rate": 6.440422322775265e-06, + "loss": 0.5492, + "step": 428 + }, + { + "epoch": 0.019416157501697215, + "grad_norm": 1.0780508879991042, + "learning_rate": 6.4555052790346916e-06, + "loss": 0.513, + "step": 429 + }, + { + "epoch": 0.01946141661009278, + "grad_norm": 0.8014315963924707, + "learning_rate": 6.470588235294119e-06, + "loss": 0.5113, + "step": 430 + }, + { + "epoch": 0.019506675718488346, + "grad_norm": 0.9294933497639883, + "learning_rate": 6.485671191553545e-06, + "loss": 0.5298, + "step": 431 + }, + { + "epoch": 0.019551934826883912, + "grad_norm": 1.0134726540671148, + "learning_rate": 6.500754147812972e-06, + "loss": 0.4905, + "step": 432 + }, + { + "epoch": 0.019597193935279474, + "grad_norm": 0.935200214361148, + "learning_rate": 6.515837104072399e-06, + "loss": 0.5372, + "step": 433 + }, + { + "epoch": 0.01964245304367504, + "grad_norm": 0.9332971164790316, + "learning_rate": 6.530920060331826e-06, + "loss": 0.463, + "step": 434 + }, + { + "epoch": 0.019687712152070606, + "grad_norm": 0.867110760375185, + "learning_rate": 6.546003016591252e-06, + "loss": 0.5862, + "step": 435 + }, + { + "epoch": 0.019732971260466168, + "grad_norm": 0.8950935667741385, + "learning_rate": 6.5610859728506795e-06, + "loss": 0.5442, + "step": 436 + }, + { + "epoch": 0.019778230368861734, + "grad_norm": 0.9721133660738163, + "learning_rate": 6.576168929110106e-06, + "loss": 0.4917, + "step": 437 + }, + { + "epoch": 0.0198234894772573, + "grad_norm": 0.8960955325001373, + "learning_rate": 6.591251885369533e-06, + "loss": 0.49, + "step": 438 + }, + { + "epoch": 0.01986874858565286, + "grad_norm": 0.9513126805609666, + "learning_rate": 6.6063348416289595e-06, + "loss": 0.5162, + "step": 439 + }, + { + "epoch": 0.019914007694048427, + "grad_norm": 0.9823253412027514, + "learning_rate": 6.621417797888387e-06, + "loss": 0.5152, + "step": 440 + }, + { + "epoch": 0.019959266802443993, + "grad_norm": 0.9045371589165514, + "learning_rate": 6.636500754147813e-06, + "loss": 0.527, + "step": 441 + }, + { + "epoch": 0.020004525910839555, + "grad_norm": 0.8782531559250729, + "learning_rate": 6.65158371040724e-06, + "loss": 0.5669, + "step": 442 + }, + { + "epoch": 0.02004978501923512, + "grad_norm": 0.8859706150934012, + "learning_rate": 6.666666666666667e-06, + "loss": 0.4585, + "step": 443 + }, + { + "epoch": 0.020095044127630687, + "grad_norm": 0.6991315890774688, + "learning_rate": 6.681749622926094e-06, + "loss": 0.5682, + "step": 444 + }, + { + "epoch": 0.02014030323602625, + "grad_norm": 0.9485402451951149, + "learning_rate": 6.69683257918552e-06, + "loss": 0.5208, + "step": 445 + }, + { + "epoch": 0.020185562344421815, + "grad_norm": 0.9873924486992607, + "learning_rate": 6.7119155354449474e-06, + "loss": 0.5101, + "step": 446 + }, + { + "epoch": 0.02023082145281738, + "grad_norm": 0.9276034420485871, + "learning_rate": 6.7269984917043755e-06, + "loss": 0.5268, + "step": 447 + }, + { + "epoch": 0.020276080561212943, + "grad_norm": 0.9603097707029032, + "learning_rate": 6.742081447963802e-06, + "loss": 0.5419, + "step": 448 + }, + { + "epoch": 0.02032133966960851, + "grad_norm": 0.9290487002204546, + "learning_rate": 6.757164404223229e-06, + "loss": 0.527, + "step": 449 + }, + { + "epoch": 0.020366598778004074, + "grad_norm": 1.0113822614648986, + "learning_rate": 6.7722473604826555e-06, + "loss": 0.486, + "step": 450 + }, + { + "epoch": 0.020411857886399636, + "grad_norm": 0.9560484644844321, + "learning_rate": 6.787330316742083e-06, + "loss": 0.5126, + "step": 451 + }, + { + "epoch": 0.020457116994795202, + "grad_norm": 0.9513817484526905, + "learning_rate": 6.802413273001509e-06, + "loss": 0.5092, + "step": 452 + }, + { + "epoch": 0.020502376103190768, + "grad_norm": 1.0341828220958489, + "learning_rate": 6.817496229260936e-06, + "loss": 0.4636, + "step": 453 + }, + { + "epoch": 0.020547635211586333, + "grad_norm": 0.836876325711259, + "learning_rate": 6.832579185520363e-06, + "loss": 0.4935, + "step": 454 + }, + { + "epoch": 0.020592894319981896, + "grad_norm": 0.7154257246692087, + "learning_rate": 6.84766214177979e-06, + "loss": 0.5816, + "step": 455 + }, + { + "epoch": 0.02063815342837746, + "grad_norm": 0.8883803720828303, + "learning_rate": 6.862745098039216e-06, + "loss": 0.525, + "step": 456 + }, + { + "epoch": 0.020683412536773027, + "grad_norm": 0.8812155648935334, + "learning_rate": 6.8778280542986434e-06, + "loss": 0.4803, + "step": 457 + }, + { + "epoch": 0.02072867164516859, + "grad_norm": 0.8914599212352133, + "learning_rate": 6.89291101055807e-06, + "loss": 0.5146, + "step": 458 + }, + { + "epoch": 0.020773930753564155, + "grad_norm": 0.9610108620275446, + "learning_rate": 6.907993966817497e-06, + "loss": 0.4794, + "step": 459 + }, + { + "epoch": 0.02081918986195972, + "grad_norm": 0.9236182851861297, + "learning_rate": 6.923076923076923e-06, + "loss": 0.5321, + "step": 460 + }, + { + "epoch": 0.020864448970355283, + "grad_norm": 0.7895517788163228, + "learning_rate": 6.938159879336351e-06, + "loss": 0.6144, + "step": 461 + }, + { + "epoch": 0.02090970807875085, + "grad_norm": 1.4224341444472233, + "learning_rate": 6.953242835595777e-06, + "loss": 0.558, + "step": 462 + }, + { + "epoch": 0.020954967187146414, + "grad_norm": 0.8534888875322985, + "learning_rate": 6.968325791855204e-06, + "loss": 0.5194, + "step": 463 + }, + { + "epoch": 0.021000226295541977, + "grad_norm": 0.8878493254499618, + "learning_rate": 6.9834087481146306e-06, + "loss": 0.484, + "step": 464 + }, + { + "epoch": 0.021045485403937542, + "grad_norm": 0.7689726276830536, + "learning_rate": 6.998491704374058e-06, + "loss": 0.4687, + "step": 465 + }, + { + "epoch": 0.021090744512333108, + "grad_norm": 0.8431041350728392, + "learning_rate": 7.013574660633484e-06, + "loss": 0.4896, + "step": 466 + }, + { + "epoch": 0.02113600362072867, + "grad_norm": 0.9131741584792719, + "learning_rate": 7.028657616892911e-06, + "loss": 0.5267, + "step": 467 + }, + { + "epoch": 0.021181262729124236, + "grad_norm": 0.7000414706451139, + "learning_rate": 7.0437405731523386e-06, + "loss": 0.5891, + "step": 468 + }, + { + "epoch": 0.0212265218375198, + "grad_norm": 0.5274632467798588, + "learning_rate": 7.058823529411766e-06, + "loss": 0.5661, + "step": 469 + }, + { + "epoch": 0.021271780945915364, + "grad_norm": 1.0489074712596098, + "learning_rate": 7.073906485671192e-06, + "loss": 0.5283, + "step": 470 + }, + { + "epoch": 0.02131704005431093, + "grad_norm": 0.8608740580398528, + "learning_rate": 7.088989441930619e-06, + "loss": 0.4743, + "step": 471 + }, + { + "epoch": 0.021362299162706495, + "grad_norm": 0.8993120418263926, + "learning_rate": 7.104072398190046e-06, + "loss": 0.5414, + "step": 472 + }, + { + "epoch": 0.021407558271102058, + "grad_norm": 0.8519655085775868, + "learning_rate": 7.119155354449473e-06, + "loss": 0.4785, + "step": 473 + }, + { + "epoch": 0.021452817379497623, + "grad_norm": 0.9992364234055222, + "learning_rate": 7.134238310708899e-06, + "loss": 0.4768, + "step": 474 + }, + { + "epoch": 0.02149807648789319, + "grad_norm": 0.8211878630216686, + "learning_rate": 7.1493212669683265e-06, + "loss": 0.512, + "step": 475 + }, + { + "epoch": 0.021543335596288755, + "grad_norm": 0.8673840729651399, + "learning_rate": 7.164404223227753e-06, + "loss": 0.4972, + "step": 476 + }, + { + "epoch": 0.021588594704684317, + "grad_norm": 0.9626081409011095, + "learning_rate": 7.17948717948718e-06, + "loss": 0.5009, + "step": 477 + }, + { + "epoch": 0.021633853813079883, + "grad_norm": 0.9324671863783013, + "learning_rate": 7.1945701357466065e-06, + "loss": 0.5291, + "step": 478 + }, + { + "epoch": 0.02167911292147545, + "grad_norm": 1.6119787378474646, + "learning_rate": 7.209653092006034e-06, + "loss": 0.5974, + "step": 479 + }, + { + "epoch": 0.02172437202987101, + "grad_norm": 0.8936322895814439, + "learning_rate": 7.22473604826546e-06, + "loss": 0.4819, + "step": 480 + }, + { + "epoch": 0.021769631138266576, + "grad_norm": 1.1833559492371089, + "learning_rate": 7.239819004524887e-06, + "loss": 0.5015, + "step": 481 + }, + { + "epoch": 0.021814890246662142, + "grad_norm": 0.9542089624937165, + "learning_rate": 7.2549019607843145e-06, + "loss": 0.5349, + "step": 482 + }, + { + "epoch": 0.021860149355057704, + "grad_norm": 0.8717232550801574, + "learning_rate": 7.269984917043741e-06, + "loss": 0.4842, + "step": 483 + }, + { + "epoch": 0.02190540846345327, + "grad_norm": 0.8784660417369917, + "learning_rate": 7.285067873303168e-06, + "loss": 0.4522, + "step": 484 + }, + { + "epoch": 0.021950667571848836, + "grad_norm": 0.8714297614485044, + "learning_rate": 7.3001508295625945e-06, + "loss": 0.486, + "step": 485 + }, + { + "epoch": 0.021995926680244398, + "grad_norm": 1.0117867219550658, + "learning_rate": 7.315233785822022e-06, + "loss": 0.518, + "step": 486 + }, + { + "epoch": 0.022041185788639964, + "grad_norm": 0.8551402658731775, + "learning_rate": 7.330316742081448e-06, + "loss": 0.5309, + "step": 487 + }, + { + "epoch": 0.02208644489703553, + "grad_norm": 0.8520731544633671, + "learning_rate": 7.345399698340876e-06, + "loss": 0.4849, + "step": 488 + }, + { + "epoch": 0.02213170400543109, + "grad_norm": 0.945220429406945, + "learning_rate": 7.3604826546003025e-06, + "loss": 0.5183, + "step": 489 + }, + { + "epoch": 0.022176963113826657, + "grad_norm": 0.8965594690346885, + "learning_rate": 7.37556561085973e-06, + "loss": 0.5068, + "step": 490 + }, + { + "epoch": 0.022222222222222223, + "grad_norm": 0.9383416027268775, + "learning_rate": 7.390648567119156e-06, + "loss": 0.5172, + "step": 491 + }, + { + "epoch": 0.022267481330617785, + "grad_norm": 2.8137379093185415, + "learning_rate": 7.405731523378583e-06, + "loss": 0.6656, + "step": 492 + }, + { + "epoch": 0.02231274043901335, + "grad_norm": 0.8911437812376426, + "learning_rate": 7.42081447963801e-06, + "loss": 0.517, + "step": 493 + }, + { + "epoch": 0.022357999547408917, + "grad_norm": 0.8746627228365279, + "learning_rate": 7.435897435897437e-06, + "loss": 0.476, + "step": 494 + }, + { + "epoch": 0.02240325865580448, + "grad_norm": 1.0210061302597564, + "learning_rate": 7.450980392156863e-06, + "loss": 0.4964, + "step": 495 + }, + { + "epoch": 0.022448517764200045, + "grad_norm": 1.3930772745192121, + "learning_rate": 7.4660633484162904e-06, + "loss": 0.5769, + "step": 496 + }, + { + "epoch": 0.02249377687259561, + "grad_norm": 0.9354966630918502, + "learning_rate": 7.481146304675717e-06, + "loss": 0.5273, + "step": 497 + }, + { + "epoch": 0.022539035980991176, + "grad_norm": 1.0404930623959125, + "learning_rate": 7.496229260935144e-06, + "loss": 0.5322, + "step": 498 + }, + { + "epoch": 0.02258429508938674, + "grad_norm": 0.9212989516000655, + "learning_rate": 7.51131221719457e-06, + "loss": 0.5197, + "step": 499 + }, + { + "epoch": 0.022629554197782304, + "grad_norm": 1.1837040152817129, + "learning_rate": 7.526395173453998e-06, + "loss": 0.4601, + "step": 500 + }, + { + "epoch": 0.02267481330617787, + "grad_norm": 0.9831508145374277, + "learning_rate": 7.541478129713424e-06, + "loss": 0.5082, + "step": 501 + }, + { + "epoch": 0.022720072414573432, + "grad_norm": 1.7317500998832773, + "learning_rate": 7.556561085972851e-06, + "loss": 0.6088, + "step": 502 + }, + { + "epoch": 0.022765331522968998, + "grad_norm": 1.4020978387603402, + "learning_rate": 7.5716440422322776e-06, + "loss": 0.5859, + "step": 503 + }, + { + "epoch": 0.022810590631364563, + "grad_norm": 0.9968935655490997, + "learning_rate": 7.586726998491705e-06, + "loss": 0.4931, + "step": 504 + }, + { + "epoch": 0.022855849739760126, + "grad_norm": 0.9737894931726774, + "learning_rate": 7.601809954751131e-06, + "loss": 0.5629, + "step": 505 + }, + { + "epoch": 0.02290110884815569, + "grad_norm": 0.8123883604016281, + "learning_rate": 7.616892911010558e-06, + "loss": 0.5703, + "step": 506 + }, + { + "epoch": 0.022946367956551257, + "grad_norm": 0.9854901638439508, + "learning_rate": 7.631975867269985e-06, + "loss": 0.4761, + "step": 507 + }, + { + "epoch": 0.02299162706494682, + "grad_norm": 1.4197575971354461, + "learning_rate": 7.647058823529411e-06, + "loss": 0.5448, + "step": 508 + }, + { + "epoch": 0.023036886173342385, + "grad_norm": 0.923807733562872, + "learning_rate": 7.66214177978884e-06, + "loss": 0.5432, + "step": 509 + }, + { + "epoch": 0.02308214528173795, + "grad_norm": 0.9263412579857541, + "learning_rate": 7.677224736048267e-06, + "loss": 0.507, + "step": 510 + }, + { + "epoch": 0.023127404390133513, + "grad_norm": 0.854932864691546, + "learning_rate": 7.692307692307694e-06, + "loss": 0.4931, + "step": 511 + }, + { + "epoch": 0.02317266349852908, + "grad_norm": 0.9010694714875057, + "learning_rate": 7.70739064856712e-06, + "loss": 0.5055, + "step": 512 + }, + { + "epoch": 0.023217922606924644, + "grad_norm": 0.9302825746028861, + "learning_rate": 7.722473604826546e-06, + "loss": 0.5165, + "step": 513 + }, + { + "epoch": 0.023263181715320207, + "grad_norm": 0.815629604194157, + "learning_rate": 7.737556561085974e-06, + "loss": 0.5036, + "step": 514 + }, + { + "epoch": 0.023308440823715772, + "grad_norm": 0.873286630615026, + "learning_rate": 7.7526395173454e-06, + "loss": 0.509, + "step": 515 + }, + { + "epoch": 0.023353699932111338, + "grad_norm": 1.3120833057706391, + "learning_rate": 7.767722473604827e-06, + "loss": 0.5643, + "step": 516 + }, + { + "epoch": 0.0233989590405069, + "grad_norm": 0.8369186657274733, + "learning_rate": 7.782805429864253e-06, + "loss": 0.5009, + "step": 517 + }, + { + "epoch": 0.023444218148902466, + "grad_norm": 0.8544449557940702, + "learning_rate": 7.797888386123682e-06, + "loss": 0.4528, + "step": 518 + }, + { + "epoch": 0.023489477257298032, + "grad_norm": 1.0406352425448877, + "learning_rate": 7.812971342383108e-06, + "loss": 0.5267, + "step": 519 + }, + { + "epoch": 0.023534736365693597, + "grad_norm": 0.8706975403009801, + "learning_rate": 7.828054298642534e-06, + "loss": 0.5368, + "step": 520 + }, + { + "epoch": 0.02357999547408916, + "grad_norm": 0.8513980861118772, + "learning_rate": 7.84313725490196e-06, + "loss": 0.476, + "step": 521 + }, + { + "epoch": 0.023625254582484725, + "grad_norm": 0.8850946942100224, + "learning_rate": 7.858220211161389e-06, + "loss": 0.5099, + "step": 522 + }, + { + "epoch": 0.02367051369088029, + "grad_norm": 0.9757505338538881, + "learning_rate": 7.873303167420815e-06, + "loss": 0.5034, + "step": 523 + }, + { + "epoch": 0.023715772799275853, + "grad_norm": 0.8863131755025008, + "learning_rate": 7.888386123680241e-06, + "loss": 0.5835, + "step": 524 + }, + { + "epoch": 0.02376103190767142, + "grad_norm": 0.7221129697816255, + "learning_rate": 7.903469079939668e-06, + "loss": 0.5908, + "step": 525 + }, + { + "epoch": 0.023806291016066985, + "grad_norm": 1.1644628967771642, + "learning_rate": 7.918552036199096e-06, + "loss": 0.5293, + "step": 526 + }, + { + "epoch": 0.023851550124462547, + "grad_norm": 0.9771835472342165, + "learning_rate": 7.933634992458522e-06, + "loss": 0.5114, + "step": 527 + }, + { + "epoch": 0.023896809232858113, + "grad_norm": 0.84653531874801, + "learning_rate": 7.948717948717949e-06, + "loss": 0.5591, + "step": 528 + }, + { + "epoch": 0.02394206834125368, + "grad_norm": 1.1156798418233669, + "learning_rate": 7.963800904977375e-06, + "loss": 0.4927, + "step": 529 + }, + { + "epoch": 0.02398732744964924, + "grad_norm": 0.6547282981964502, + "learning_rate": 7.978883861236803e-06, + "loss": 0.5492, + "step": 530 + }, + { + "epoch": 0.024032586558044806, + "grad_norm": 0.4978885832627992, + "learning_rate": 7.993966817496231e-06, + "loss": 0.553, + "step": 531 + }, + { + "epoch": 0.024077845666440372, + "grad_norm": 0.9638003747027917, + "learning_rate": 8.009049773755657e-06, + "loss": 0.4927, + "step": 532 + }, + { + "epoch": 0.024123104774835934, + "grad_norm": 0.9485547114845543, + "learning_rate": 8.024132730015084e-06, + "loss": 0.5389, + "step": 533 + }, + { + "epoch": 0.0241683638832315, + "grad_norm": 0.970251325477242, + "learning_rate": 8.03921568627451e-06, + "loss": 0.5218, + "step": 534 + }, + { + "epoch": 0.024213622991627066, + "grad_norm": 1.066770352433271, + "learning_rate": 8.054298642533938e-06, + "loss": 0.5245, + "step": 535 + }, + { + "epoch": 0.024258882100022628, + "grad_norm": 1.2508405584119022, + "learning_rate": 8.069381598793365e-06, + "loss": 0.5851, + "step": 536 + }, + { + "epoch": 0.024304141208418194, + "grad_norm": 0.8977303585116398, + "learning_rate": 8.084464555052791e-06, + "loss": 0.4804, + "step": 537 + }, + { + "epoch": 0.02434940031681376, + "grad_norm": 0.8550682869491487, + "learning_rate": 8.099547511312217e-06, + "loss": 0.4385, + "step": 538 + }, + { + "epoch": 0.02439465942520932, + "grad_norm": 0.5604442567533277, + "learning_rate": 8.114630467571645e-06, + "loss": 0.5873, + "step": 539 + }, + { + "epoch": 0.024439918533604887, + "grad_norm": 1.0044342170560538, + "learning_rate": 8.129713423831072e-06, + "loss": 0.4735, + "step": 540 + }, + { + "epoch": 0.024485177642000453, + "grad_norm": 0.6410626077944116, + "learning_rate": 8.144796380090498e-06, + "loss": 0.5873, + "step": 541 + }, + { + "epoch": 0.02453043675039602, + "grad_norm": 0.9384899463609325, + "learning_rate": 8.159879336349925e-06, + "loss": 0.4954, + "step": 542 + }, + { + "epoch": 0.02457569585879158, + "grad_norm": 0.9117160909143945, + "learning_rate": 8.174962292609353e-06, + "loss": 0.5302, + "step": 543 + }, + { + "epoch": 0.024620954967187147, + "grad_norm": 0.9444687825633534, + "learning_rate": 8.190045248868779e-06, + "loss": 0.5089, + "step": 544 + }, + { + "epoch": 0.024666214075582713, + "grad_norm": 0.8911153707648989, + "learning_rate": 8.205128205128205e-06, + "loss": 0.4722, + "step": 545 + }, + { + "epoch": 0.024711473183978275, + "grad_norm": 0.8783990335001564, + "learning_rate": 8.220211161387632e-06, + "loss": 0.5297, + "step": 546 + }, + { + "epoch": 0.02475673229237384, + "grad_norm": 0.9749831735866099, + "learning_rate": 8.23529411764706e-06, + "loss": 0.5089, + "step": 547 + }, + { + "epoch": 0.024801991400769406, + "grad_norm": 0.9534394716345836, + "learning_rate": 8.250377073906486e-06, + "loss": 0.4903, + "step": 548 + }, + { + "epoch": 0.02484725050916497, + "grad_norm": 0.986230968075295, + "learning_rate": 8.265460030165913e-06, + "loss": 0.5271, + "step": 549 + }, + { + "epoch": 0.024892509617560534, + "grad_norm": 0.9445929420045507, + "learning_rate": 8.280542986425339e-06, + "loss": 0.5049, + "step": 550 + }, + { + "epoch": 0.0249377687259561, + "grad_norm": 0.8445435072400197, + "learning_rate": 8.295625942684767e-06, + "loss": 0.4706, + "step": 551 + }, + { + "epoch": 0.024983027834351662, + "grad_norm": 0.8146518680172017, + "learning_rate": 8.310708898944195e-06, + "loss": 0.495, + "step": 552 + }, + { + "epoch": 0.025028286942747228, + "grad_norm": 1.2530851684753053, + "learning_rate": 8.325791855203621e-06, + "loss": 0.5775, + "step": 553 + }, + { + "epoch": 0.025073546051142794, + "grad_norm": 0.8546866880086258, + "learning_rate": 8.340874811463048e-06, + "loss": 0.6055, + "step": 554 + }, + { + "epoch": 0.025118805159538356, + "grad_norm": 1.1621279479943107, + "learning_rate": 8.355957767722474e-06, + "loss": 0.5157, + "step": 555 + }, + { + "epoch": 0.02516406426793392, + "grad_norm": 0.9994237527753784, + "learning_rate": 8.371040723981902e-06, + "loss": 0.4294, + "step": 556 + }, + { + "epoch": 0.025209323376329487, + "grad_norm": 0.8970428825907083, + "learning_rate": 8.386123680241329e-06, + "loss": 0.4898, + "step": 557 + }, + { + "epoch": 0.02525458248472505, + "grad_norm": 1.1646020881114376, + "learning_rate": 8.401206636500755e-06, + "loss": 0.5023, + "step": 558 + }, + { + "epoch": 0.025299841593120615, + "grad_norm": 1.0114259306681477, + "learning_rate": 8.416289592760181e-06, + "loss": 0.4947, + "step": 559 + }, + { + "epoch": 0.02534510070151618, + "grad_norm": 1.0786177518368725, + "learning_rate": 8.43137254901961e-06, + "loss": 0.4837, + "step": 560 + }, + { + "epoch": 0.025390359809911743, + "grad_norm": 1.1702295424811555, + "learning_rate": 8.446455505279036e-06, + "loss": 0.5464, + "step": 561 + }, + { + "epoch": 0.02543561891830731, + "grad_norm": 0.9776929278671919, + "learning_rate": 8.461538461538462e-06, + "loss": 0.5263, + "step": 562 + }, + { + "epoch": 0.025480878026702874, + "grad_norm": 2.158204647935258, + "learning_rate": 8.476621417797888e-06, + "loss": 0.5842, + "step": 563 + }, + { + "epoch": 0.02552613713509844, + "grad_norm": 1.2324208092362534, + "learning_rate": 8.491704374057317e-06, + "loss": 0.5163, + "step": 564 + }, + { + "epoch": 0.025571396243494002, + "grad_norm": 1.1914425382161538, + "learning_rate": 8.506787330316743e-06, + "loss": 0.5762, + "step": 565 + }, + { + "epoch": 0.025616655351889568, + "grad_norm": 0.9005565179098027, + "learning_rate": 8.52187028657617e-06, + "loss": 0.4902, + "step": 566 + }, + { + "epoch": 0.025661914460285134, + "grad_norm": 0.8924173594211934, + "learning_rate": 8.536953242835596e-06, + "loss": 0.5824, + "step": 567 + }, + { + "epoch": 0.025707173568680696, + "grad_norm": 1.0033799904869471, + "learning_rate": 8.552036199095024e-06, + "loss": 0.5912, + "step": 568 + }, + { + "epoch": 0.025752432677076262, + "grad_norm": 1.3324490342886244, + "learning_rate": 8.56711915535445e-06, + "loss": 0.5026, + "step": 569 + }, + { + "epoch": 0.025797691785471828, + "grad_norm": 1.013502674657888, + "learning_rate": 8.582202111613876e-06, + "loss": 0.4814, + "step": 570 + }, + { + "epoch": 0.02584295089386739, + "grad_norm": 1.0831743227926793, + "learning_rate": 8.597285067873304e-06, + "loss": 0.4891, + "step": 571 + }, + { + "epoch": 0.025888210002262955, + "grad_norm": 1.2332349946569197, + "learning_rate": 8.612368024132731e-06, + "loss": 0.4562, + "step": 572 + }, + { + "epoch": 0.02593346911065852, + "grad_norm": 0.9177188668689421, + "learning_rate": 8.627450980392157e-06, + "loss": 0.4895, + "step": 573 + }, + { + "epoch": 0.025978728219054083, + "grad_norm": 1.1644222868364908, + "learning_rate": 8.642533936651585e-06, + "loss": 0.5196, + "step": 574 + }, + { + "epoch": 0.02602398732744965, + "grad_norm": 3.131192676139937, + "learning_rate": 8.657616892911012e-06, + "loss": 0.6046, + "step": 575 + }, + { + "epoch": 0.026069246435845215, + "grad_norm": 1.5353274640022994, + "learning_rate": 8.672699849170438e-06, + "loss": 0.541, + "step": 576 + }, + { + "epoch": 0.026114505544240777, + "grad_norm": 1.0917678418917096, + "learning_rate": 8.687782805429864e-06, + "loss": 0.5182, + "step": 577 + }, + { + "epoch": 0.026159764652636343, + "grad_norm": 1.5763631164913774, + "learning_rate": 8.702865761689292e-06, + "loss": 0.5366, + "step": 578 + }, + { + "epoch": 0.02620502376103191, + "grad_norm": 1.0361965455336255, + "learning_rate": 8.717948717948719e-06, + "loss": 0.5253, + "step": 579 + }, + { + "epoch": 0.02625028286942747, + "grad_norm": 1.083518580000751, + "learning_rate": 8.733031674208145e-06, + "loss": 0.5001, + "step": 580 + }, + { + "epoch": 0.026295541977823036, + "grad_norm": 1.155423558488019, + "learning_rate": 8.748114630467572e-06, + "loss": 0.5259, + "step": 581 + }, + { + "epoch": 0.026340801086218602, + "grad_norm": 1.0320656726255666, + "learning_rate": 8.763197586727e-06, + "loss": 0.5254, + "step": 582 + }, + { + "epoch": 0.026386060194614164, + "grad_norm": 1.2339557419740619, + "learning_rate": 8.778280542986426e-06, + "loss": 0.5735, + "step": 583 + }, + { + "epoch": 0.02643131930300973, + "grad_norm": 0.9770656904444553, + "learning_rate": 8.793363499245852e-06, + "loss": 0.5164, + "step": 584 + }, + { + "epoch": 0.026476578411405296, + "grad_norm": 1.2688613297497584, + "learning_rate": 8.808446455505279e-06, + "loss": 0.487, + "step": 585 + }, + { + "epoch": 0.02652183751980086, + "grad_norm": 1.034534302426547, + "learning_rate": 8.823529411764707e-06, + "loss": 0.5501, + "step": 586 + }, + { + "epoch": 0.026567096628196424, + "grad_norm": 1.0523407452463565, + "learning_rate": 8.838612368024133e-06, + "loss": 0.5342, + "step": 587 + }, + { + "epoch": 0.02661235573659199, + "grad_norm": 1.0254835996160458, + "learning_rate": 8.85369532428356e-06, + "loss": 0.484, + "step": 588 + }, + { + "epoch": 0.026657614844987555, + "grad_norm": 0.9622678518511545, + "learning_rate": 8.868778280542986e-06, + "loss": 0.5375, + "step": 589 + }, + { + "epoch": 0.026702873953383117, + "grad_norm": 0.9677691980588815, + "learning_rate": 8.883861236802414e-06, + "loss": 0.5067, + "step": 590 + }, + { + "epoch": 0.026748133061778683, + "grad_norm": 0.9627736768818012, + "learning_rate": 8.89894419306184e-06, + "loss": 0.5187, + "step": 591 + }, + { + "epoch": 0.02679339217017425, + "grad_norm": 0.9440199612227524, + "learning_rate": 8.914027149321268e-06, + "loss": 0.4758, + "step": 592 + }, + { + "epoch": 0.02683865127856981, + "grad_norm": 0.93767786410755, + "learning_rate": 8.929110105580695e-06, + "loss": 0.5087, + "step": 593 + }, + { + "epoch": 0.026883910386965377, + "grad_norm": 1.1964384917445936, + "learning_rate": 8.944193061840121e-06, + "loss": 0.5766, + "step": 594 + }, + { + "epoch": 0.026929169495360943, + "grad_norm": 1.0117724924229, + "learning_rate": 8.95927601809955e-06, + "loss": 0.4784, + "step": 595 + }, + { + "epoch": 0.026974428603756505, + "grad_norm": 0.9925820906569525, + "learning_rate": 8.974358974358976e-06, + "loss": 0.4617, + "step": 596 + }, + { + "epoch": 0.02701968771215207, + "grad_norm": 0.6677243498813, + "learning_rate": 8.989441930618402e-06, + "loss": 0.5879, + "step": 597 + }, + { + "epoch": 0.027064946820547636, + "grad_norm": 0.9912709074709852, + "learning_rate": 9.004524886877828e-06, + "loss": 0.5333, + "step": 598 + }, + { + "epoch": 0.0271102059289432, + "grad_norm": 1.0272835433313774, + "learning_rate": 9.019607843137256e-06, + "loss": 0.521, + "step": 599 + }, + { + "epoch": 0.027155465037338764, + "grad_norm": 0.683172105248878, + "learning_rate": 9.034690799396683e-06, + "loss": 0.5702, + "step": 600 + }, + { + "epoch": 0.02720072414573433, + "grad_norm": 0.8123114594776241, + "learning_rate": 9.049773755656109e-06, + "loss": 0.4317, + "step": 601 + }, + { + "epoch": 0.027245983254129892, + "grad_norm": 0.8680303400075743, + "learning_rate": 9.064856711915535e-06, + "loss": 0.4948, + "step": 602 + }, + { + "epoch": 0.027291242362525458, + "grad_norm": 0.8525579255911381, + "learning_rate": 9.079939668174964e-06, + "loss": 0.5081, + "step": 603 + }, + { + "epoch": 0.027336501470921024, + "grad_norm": 0.8102455407975535, + "learning_rate": 9.09502262443439e-06, + "loss": 0.474, + "step": 604 + }, + { + "epoch": 0.027381760579316586, + "grad_norm": 0.8259249514304119, + "learning_rate": 9.110105580693816e-06, + "loss": 0.5591, + "step": 605 + }, + { + "epoch": 0.02742701968771215, + "grad_norm": 0.8613389960609926, + "learning_rate": 9.125188536953243e-06, + "loss": 0.4717, + "step": 606 + }, + { + "epoch": 0.027472278796107717, + "grad_norm": 0.6540097401182121, + "learning_rate": 9.14027149321267e-06, + "loss": 0.5509, + "step": 607 + }, + { + "epoch": 0.027517537904503283, + "grad_norm": 0.9461405945810982, + "learning_rate": 9.155354449472097e-06, + "loss": 0.4877, + "step": 608 + }, + { + "epoch": 0.027562797012898845, + "grad_norm": 0.9013544304168348, + "learning_rate": 9.170437405731523e-06, + "loss": 0.5194, + "step": 609 + }, + { + "epoch": 0.02760805612129441, + "grad_norm": 1.1402918439029173, + "learning_rate": 9.18552036199095e-06, + "loss": 0.4829, + "step": 610 + }, + { + "epoch": 0.027653315229689977, + "grad_norm": 0.9047977925822974, + "learning_rate": 9.200603318250378e-06, + "loss": 0.5196, + "step": 611 + }, + { + "epoch": 0.02769857433808554, + "grad_norm": 0.9207106618040918, + "learning_rate": 9.215686274509804e-06, + "loss": 0.4846, + "step": 612 + }, + { + "epoch": 0.027743833446481105, + "grad_norm": 0.8252106661434595, + "learning_rate": 9.230769230769232e-06, + "loss": 0.46, + "step": 613 + }, + { + "epoch": 0.02778909255487667, + "grad_norm": 0.9010037438995498, + "learning_rate": 9.245852187028659e-06, + "loss": 0.5667, + "step": 614 + }, + { + "epoch": 0.027834351663272233, + "grad_norm": 0.8647698992314747, + "learning_rate": 9.260935143288085e-06, + "loss": 0.4788, + "step": 615 + }, + { + "epoch": 0.027879610771667798, + "grad_norm": 0.9697514771488839, + "learning_rate": 9.276018099547513e-06, + "loss": 0.5116, + "step": 616 + }, + { + "epoch": 0.027924869880063364, + "grad_norm": 0.8975155881110004, + "learning_rate": 9.29110105580694e-06, + "loss": 0.5, + "step": 617 + }, + { + "epoch": 0.027970128988458926, + "grad_norm": 0.9331083075291978, + "learning_rate": 9.306184012066366e-06, + "loss": 0.4704, + "step": 618 + }, + { + "epoch": 0.028015388096854492, + "grad_norm": 0.8356134579308436, + "learning_rate": 9.321266968325792e-06, + "loss": 0.466, + "step": 619 + }, + { + "epoch": 0.028060647205250058, + "grad_norm": 1.0477143205408235, + "learning_rate": 9.33634992458522e-06, + "loss": 0.4672, + "step": 620 + }, + { + "epoch": 0.02810590631364562, + "grad_norm": 1.4144678930589356, + "learning_rate": 9.351432880844647e-06, + "loss": 0.5469, + "step": 621 + }, + { + "epoch": 0.028151165422041186, + "grad_norm": 0.7897408114102736, + "learning_rate": 9.366515837104073e-06, + "loss": 0.5242, + "step": 622 + }, + { + "epoch": 0.02819642453043675, + "grad_norm": 0.9966835571160938, + "learning_rate": 9.3815987933635e-06, + "loss": 0.5051, + "step": 623 + }, + { + "epoch": 0.028241683638832314, + "grad_norm": 1.0012891418820922, + "learning_rate": 9.396681749622927e-06, + "loss": 0.4974, + "step": 624 + }, + { + "epoch": 0.02828694274722788, + "grad_norm": 0.8643487579115816, + "learning_rate": 9.411764705882354e-06, + "loss": 0.478, + "step": 625 + }, + { + "epoch": 0.028332201855623445, + "grad_norm": 1.0272295174288926, + "learning_rate": 9.42684766214178e-06, + "loss": 0.4871, + "step": 626 + }, + { + "epoch": 0.028377460964019007, + "grad_norm": 0.9497133963033242, + "learning_rate": 9.441930618401207e-06, + "loss": 0.4988, + "step": 627 + }, + { + "epoch": 0.028422720072414573, + "grad_norm": 0.885996701031616, + "learning_rate": 9.457013574660635e-06, + "loss": 0.5156, + "step": 628 + }, + { + "epoch": 0.02846797918081014, + "grad_norm": 0.9061066748339962, + "learning_rate": 9.472096530920061e-06, + "loss": 0.4292, + "step": 629 + }, + { + "epoch": 0.028513238289205704, + "grad_norm": 2.3887004974429993, + "learning_rate": 9.487179487179487e-06, + "loss": 0.5426, + "step": 630 + }, + { + "epoch": 0.028558497397601267, + "grad_norm": 1.0015617083513035, + "learning_rate": 9.502262443438914e-06, + "loss": 0.5036, + "step": 631 + }, + { + "epoch": 0.028603756505996832, + "grad_norm": 1.1310600352677773, + "learning_rate": 9.517345399698342e-06, + "loss": 0.519, + "step": 632 + }, + { + "epoch": 0.028649015614392398, + "grad_norm": 1.0516032606117616, + "learning_rate": 9.53242835595777e-06, + "loss": 0.5245, + "step": 633 + }, + { + "epoch": 0.02869427472278796, + "grad_norm": 0.9627150600636653, + "learning_rate": 9.547511312217196e-06, + "loss": 0.4858, + "step": 634 + }, + { + "epoch": 0.028739533831183526, + "grad_norm": 0.9908734595681895, + "learning_rate": 9.562594268476623e-06, + "loss": 0.5066, + "step": 635 + }, + { + "epoch": 0.02878479293957909, + "grad_norm": 0.8558945955157928, + "learning_rate": 9.577677224736049e-06, + "loss": 0.4889, + "step": 636 + }, + { + "epoch": 0.028830052047974654, + "grad_norm": 1.1896758395015434, + "learning_rate": 9.592760180995477e-06, + "loss": 0.4829, + "step": 637 + }, + { + "epoch": 0.02887531115637022, + "grad_norm": 1.0550264618476963, + "learning_rate": 9.607843137254903e-06, + "loss": 0.5054, + "step": 638 + }, + { + "epoch": 0.028920570264765785, + "grad_norm": 0.9297556565489958, + "learning_rate": 9.62292609351433e-06, + "loss": 0.5131, + "step": 639 + }, + { + "epoch": 0.028965829373161348, + "grad_norm": 1.8024189426984918, + "learning_rate": 9.638009049773756e-06, + "loss": 0.5572, + "step": 640 + }, + { + "epoch": 0.029011088481556913, + "grad_norm": 1.031101602190451, + "learning_rate": 9.653092006033184e-06, + "loss": 0.4433, + "step": 641 + }, + { + "epoch": 0.02905634758995248, + "grad_norm": 0.9243046569458107, + "learning_rate": 9.66817496229261e-06, + "loss": 0.5722, + "step": 642 + }, + { + "epoch": 0.02910160669834804, + "grad_norm": 0.9572833331050026, + "learning_rate": 9.683257918552037e-06, + "loss": 0.5154, + "step": 643 + }, + { + "epoch": 0.029146865806743607, + "grad_norm": 0.9545263635012465, + "learning_rate": 9.698340874811463e-06, + "loss": 0.5023, + "step": 644 + }, + { + "epoch": 0.029192124915139173, + "grad_norm": 1.1494387901770513, + "learning_rate": 9.713423831070891e-06, + "loss": 0.4989, + "step": 645 + }, + { + "epoch": 0.029237384023534735, + "grad_norm": 0.811442686170712, + "learning_rate": 9.728506787330318e-06, + "loss": 0.4753, + "step": 646 + }, + { + "epoch": 0.0292826431319303, + "grad_norm": 0.9594851119019182, + "learning_rate": 9.743589743589744e-06, + "loss": 0.4897, + "step": 647 + }, + { + "epoch": 0.029327902240325866, + "grad_norm": 0.9577158737270751, + "learning_rate": 9.75867269984917e-06, + "loss": 0.5069, + "step": 648 + }, + { + "epoch": 0.02937316134872143, + "grad_norm": 0.9677059426724289, + "learning_rate": 9.773755656108599e-06, + "loss": 0.4707, + "step": 649 + }, + { + "epoch": 0.029418420457116994, + "grad_norm": 0.9131491960385424, + "learning_rate": 9.788838612368025e-06, + "loss": 0.5233, + "step": 650 + }, + { + "epoch": 0.02946367956551256, + "grad_norm": 0.8903706265312409, + "learning_rate": 9.803921568627451e-06, + "loss": 0.494, + "step": 651 + }, + { + "epoch": 0.029508938673908126, + "grad_norm": 0.8897802621593002, + "learning_rate": 9.819004524886878e-06, + "loss": 0.5024, + "step": 652 + }, + { + "epoch": 0.029554197782303688, + "grad_norm": 0.886489545618575, + "learning_rate": 9.834087481146306e-06, + "loss": 0.4947, + "step": 653 + }, + { + "epoch": 0.029599456890699254, + "grad_norm": 1.142183417297171, + "learning_rate": 9.849170437405732e-06, + "loss": 0.5074, + "step": 654 + }, + { + "epoch": 0.02964471599909482, + "grad_norm": 0.7920238350891496, + "learning_rate": 9.86425339366516e-06, + "loss": 0.4285, + "step": 655 + }, + { + "epoch": 0.02968997510749038, + "grad_norm": 0.7817895238319269, + "learning_rate": 9.879336349924586e-06, + "loss": 0.4933, + "step": 656 + }, + { + "epoch": 0.029735234215885947, + "grad_norm": 3.3052962392165166, + "learning_rate": 9.894419306184013e-06, + "loss": 0.6201, + "step": 657 + }, + { + "epoch": 0.029780493324281513, + "grad_norm": 1.8967947995531824, + "learning_rate": 9.90950226244344e-06, + "loss": 0.5664, + "step": 658 + }, + { + "epoch": 0.029825752432677075, + "grad_norm": 1.016181045275243, + "learning_rate": 9.924585218702867e-06, + "loss": 0.468, + "step": 659 + }, + { + "epoch": 0.02987101154107264, + "grad_norm": 1.0906829653780257, + "learning_rate": 9.939668174962294e-06, + "loss": 0.5306, + "step": 660 + }, + { + "epoch": 0.029916270649468207, + "grad_norm": 1.0567718662960948, + "learning_rate": 9.95475113122172e-06, + "loss": 0.4589, + "step": 661 + }, + { + "epoch": 0.02996152975786377, + "grad_norm": 1.155731681459567, + "learning_rate": 9.969834087481146e-06, + "loss": 0.5076, + "step": 662 + }, + { + "epoch": 0.030006788866259335, + "grad_norm": 0.9147348944501993, + "learning_rate": 9.984917043740574e-06, + "loss": 0.432, + "step": 663 + }, + { + "epoch": 0.0300520479746549, + "grad_norm": 0.9209779765845878, + "learning_rate": 1e-05, + "loss": 0.4823, + "step": 664 + }, + { + "epoch": 0.030097307083050463, + "grad_norm": 0.9271797626630768, + "learning_rate": 9.999999946282679e-06, + "loss": 0.5029, + "step": 665 + }, + { + "epoch": 0.03014256619144603, + "grad_norm": 1.039138585186304, + "learning_rate": 9.999999785130714e-06, + "loss": 0.5299, + "step": 666 + }, + { + "epoch": 0.030187825299841594, + "grad_norm": 0.969861000408359, + "learning_rate": 9.999999516544111e-06, + "loss": 0.4611, + "step": 667 + }, + { + "epoch": 0.030233084408237156, + "grad_norm": 1.077682631894601, + "learning_rate": 9.999999140522874e-06, + "loss": 0.5032, + "step": 668 + }, + { + "epoch": 0.030278343516632722, + "grad_norm": 5.948473688153386, + "learning_rate": 9.999998657067014e-06, + "loss": 0.7728, + "step": 669 + }, + { + "epoch": 0.030323602625028288, + "grad_norm": 3.676232499482188, + "learning_rate": 9.999998066176536e-06, + "loss": 0.6763, + "step": 670 + }, + { + "epoch": 0.03036886173342385, + "grad_norm": 1.2775316773570315, + "learning_rate": 9.999997367851456e-06, + "loss": 0.5174, + "step": 671 + }, + { + "epoch": 0.030414120841819416, + "grad_norm": 1.6908085425895913, + "learning_rate": 9.999996562091792e-06, + "loss": 0.5926, + "step": 672 + }, + { + "epoch": 0.03045937995021498, + "grad_norm": 1.2246977137958381, + "learning_rate": 9.999995648897555e-06, + "loss": 0.4605, + "step": 673 + }, + { + "epoch": 0.030504639058610547, + "grad_norm": 1.3528227854220647, + "learning_rate": 9.99999462826877e-06, + "loss": 0.5713, + "step": 674 + }, + { + "epoch": 0.03054989816700611, + "grad_norm": 3.362052766446197, + "learning_rate": 9.999993500205456e-06, + "loss": 0.651, + "step": 675 + }, + { + "epoch": 0.030595157275401675, + "grad_norm": 0.958564212895757, + "learning_rate": 9.999992264707636e-06, + "loss": 0.5178, + "step": 676 + }, + { + "epoch": 0.03064041638379724, + "grad_norm": 1.0395509750067198, + "learning_rate": 9.999990921775341e-06, + "loss": 0.5313, + "step": 677 + }, + { + "epoch": 0.030685675492192803, + "grad_norm": 1.120139810370774, + "learning_rate": 9.999989471408598e-06, + "loss": 0.5664, + "step": 678 + }, + { + "epoch": 0.03073093460058837, + "grad_norm": 1.032136947745328, + "learning_rate": 9.999987913607437e-06, + "loss": 0.4841, + "step": 679 + }, + { + "epoch": 0.030776193708983934, + "grad_norm": 2.467769359435823, + "learning_rate": 9.999986248371889e-06, + "loss": 0.6585, + "step": 680 + }, + { + "epoch": 0.030821452817379497, + "grad_norm": 1.0114156966604324, + "learning_rate": 9.999984475701996e-06, + "loss": 0.5235, + "step": 681 + }, + { + "epoch": 0.030866711925775062, + "grad_norm": 2.1678025103559753, + "learning_rate": 9.999982595597793e-06, + "loss": 0.6423, + "step": 682 + }, + { + "epoch": 0.030911971034170628, + "grad_norm": 1.0222023327367475, + "learning_rate": 9.99998060805932e-06, + "loss": 0.4334, + "step": 683 + }, + { + "epoch": 0.03095723014256619, + "grad_norm": 1.0264965752501367, + "learning_rate": 9.999978513086617e-06, + "loss": 0.5546, + "step": 684 + }, + { + "epoch": 0.031002489250961756, + "grad_norm": 1.2861062861613923, + "learning_rate": 9.999976310679735e-06, + "loss": 0.5974, + "step": 685 + }, + { + "epoch": 0.03104774835935732, + "grad_norm": 0.9369415311783983, + "learning_rate": 9.999974000838716e-06, + "loss": 0.4734, + "step": 686 + }, + { + "epoch": 0.031093007467752884, + "grad_norm": 0.9003760578237145, + "learning_rate": 9.999971583563615e-06, + "loss": 0.5228, + "step": 687 + }, + { + "epoch": 0.03113826657614845, + "grad_norm": 0.79968319184062, + "learning_rate": 9.99996905885448e-06, + "loss": 0.5705, + "step": 688 + }, + { + "epoch": 0.031183525684544015, + "grad_norm": 0.7660832635821488, + "learning_rate": 9.999966426711364e-06, + "loss": 0.5749, + "step": 689 + }, + { + "epoch": 0.031228784792939578, + "grad_norm": 0.6958497061532944, + "learning_rate": 9.99996368713433e-06, + "loss": 0.5822, + "step": 690 + }, + { + "epoch": 0.03127404390133515, + "grad_norm": 0.8745363662409338, + "learning_rate": 9.999960840123428e-06, + "loss": 0.6022, + "step": 691 + }, + { + "epoch": 0.03131930300973071, + "grad_norm": 1.3696394690577367, + "learning_rate": 9.999957885678725e-06, + "loss": 0.4984, + "step": 692 + }, + { + "epoch": 0.03136456211812627, + "grad_norm": 1.0403799882410576, + "learning_rate": 9.999954823800287e-06, + "loss": 0.5122, + "step": 693 + }, + { + "epoch": 0.03140982122652184, + "grad_norm": 1.2900716363078422, + "learning_rate": 9.99995165448817e-06, + "loss": 0.509, + "step": 694 + }, + { + "epoch": 0.0314550803349174, + "grad_norm": 1.0807483041647399, + "learning_rate": 9.999948377742453e-06, + "loss": 0.5161, + "step": 695 + }, + { + "epoch": 0.031500339443312965, + "grad_norm": 1.0028670752550366, + "learning_rate": 9.9999449935632e-06, + "loss": 0.5443, + "step": 696 + }, + { + "epoch": 0.031545598551708534, + "grad_norm": 0.7870478530335979, + "learning_rate": 9.999941501950484e-06, + "loss": 0.4537, + "step": 697 + }, + { + "epoch": 0.031590857660104096, + "grad_norm": 1.0264232534437348, + "learning_rate": 9.999937902904382e-06, + "loss": 0.4937, + "step": 698 + }, + { + "epoch": 0.03163611676849966, + "grad_norm": 1.0829604759779226, + "learning_rate": 9.999934196424972e-06, + "loss": 0.459, + "step": 699 + }, + { + "epoch": 0.03168137587689523, + "grad_norm": 0.966309088508198, + "learning_rate": 9.999930382512331e-06, + "loss": 0.5105, + "step": 700 + }, + { + "epoch": 0.03172663498529079, + "grad_norm": 1.0366245263100786, + "learning_rate": 9.999926461166541e-06, + "loss": 0.4671, + "step": 701 + }, + { + "epoch": 0.03177189409368635, + "grad_norm": 1.2234134756053456, + "learning_rate": 9.99992243238769e-06, + "loss": 0.4912, + "step": 702 + }, + { + "epoch": 0.03181715320208192, + "grad_norm": 0.9590494692266611, + "learning_rate": 9.99991829617586e-06, + "loss": 0.5133, + "step": 703 + }, + { + "epoch": 0.031862412310477484, + "grad_norm": 1.3428036136373722, + "learning_rate": 9.999914052531143e-06, + "loss": 0.5341, + "step": 704 + }, + { + "epoch": 0.031907671418873046, + "grad_norm": 1.4649979468611838, + "learning_rate": 9.999909701453629e-06, + "loss": 0.5954, + "step": 705 + }, + { + "epoch": 0.031952930527268615, + "grad_norm": 1.4747032246725622, + "learning_rate": 9.99990524294341e-06, + "loss": 0.5009, + "step": 706 + }, + { + "epoch": 0.03199818963566418, + "grad_norm": 0.8628686083875655, + "learning_rate": 9.999900677000584e-06, + "loss": 0.5205, + "step": 707 + }, + { + "epoch": 0.03204344874405974, + "grad_norm": 0.7491389733083518, + "learning_rate": 9.99989600362525e-06, + "loss": 0.5734, + "step": 708 + }, + { + "epoch": 0.03208870785245531, + "grad_norm": 0.5221698127186557, + "learning_rate": 9.999891222817507e-06, + "loss": 0.5635, + "step": 709 + }, + { + "epoch": 0.03213396696085087, + "grad_norm": 1.759719976659955, + "learning_rate": 9.999886334577456e-06, + "loss": 0.5399, + "step": 710 + }, + { + "epoch": 0.03217922606924643, + "grad_norm": 0.616650168587763, + "learning_rate": 9.999881338905204e-06, + "loss": 0.5543, + "step": 711 + }, + { + "epoch": 0.032224485177642, + "grad_norm": 0.9961499565001543, + "learning_rate": 9.999876235800859e-06, + "loss": 0.4935, + "step": 712 + }, + { + "epoch": 0.032269744286037565, + "grad_norm": 1.0072414232661828, + "learning_rate": 9.999871025264528e-06, + "loss": 0.4725, + "step": 713 + }, + { + "epoch": 0.03231500339443313, + "grad_norm": 1.0472179363754566, + "learning_rate": 9.999865707296326e-06, + "loss": 0.5199, + "step": 714 + }, + { + "epoch": 0.032360262502828696, + "grad_norm": 0.8728295602150761, + "learning_rate": 9.999860281896366e-06, + "loss": 0.5595, + "step": 715 + }, + { + "epoch": 0.03240552161122426, + "grad_norm": 0.8090690179012936, + "learning_rate": 9.999854749064764e-06, + "loss": 0.5402, + "step": 716 + }, + { + "epoch": 0.03245078071961982, + "grad_norm": 0.6068390919302309, + "learning_rate": 9.999849108801637e-06, + "loss": 0.5795, + "step": 717 + }, + { + "epoch": 0.03249603982801539, + "grad_norm": 0.5094045186994052, + "learning_rate": 9.999843361107111e-06, + "loss": 0.5798, + "step": 718 + }, + { + "epoch": 0.03254129893641095, + "grad_norm": 1.3118168143611104, + "learning_rate": 9.999837505981308e-06, + "loss": 0.5546, + "step": 719 + }, + { + "epoch": 0.032586558044806514, + "grad_norm": 1.0749377351159282, + "learning_rate": 9.99983154342435e-06, + "loss": 0.5243, + "step": 720 + }, + { + "epoch": 0.03263181715320208, + "grad_norm": 1.4815041547299679, + "learning_rate": 9.99982547343637e-06, + "loss": 0.4979, + "step": 721 + }, + { + "epoch": 0.032677076261597646, + "grad_norm": 1.1919403721482398, + "learning_rate": 9.999819296017496e-06, + "loss": 0.4916, + "step": 722 + }, + { + "epoch": 0.03272233536999321, + "grad_norm": 0.9116590492540936, + "learning_rate": 9.999813011167861e-06, + "loss": 0.4929, + "step": 723 + }, + { + "epoch": 0.03276759447838878, + "grad_norm": 1.2629616711327594, + "learning_rate": 9.9998066188876e-06, + "loss": 0.5855, + "step": 724 + }, + { + "epoch": 0.03281285358678434, + "grad_norm": 0.9248013179371635, + "learning_rate": 9.99980011917685e-06, + "loss": 0.5008, + "step": 725 + }, + { + "epoch": 0.0328581126951799, + "grad_norm": 1.0678999576310209, + "learning_rate": 9.999793512035751e-06, + "loss": 0.4596, + "step": 726 + }, + { + "epoch": 0.03290337180357547, + "grad_norm": 0.8909248268182276, + "learning_rate": 9.999786797464446e-06, + "loss": 0.5499, + "step": 727 + }, + { + "epoch": 0.03294863091197103, + "grad_norm": 0.7216813737007891, + "learning_rate": 9.999779975463079e-06, + "loss": 0.5702, + "step": 728 + }, + { + "epoch": 0.0329938900203666, + "grad_norm": 1.0018022162445315, + "learning_rate": 9.999773046031795e-06, + "loss": 0.5352, + "step": 729 + }, + { + "epoch": 0.033039149128762164, + "grad_norm": 0.919647573300584, + "learning_rate": 9.999766009170743e-06, + "loss": 0.4565, + "step": 730 + }, + { + "epoch": 0.03308440823715773, + "grad_norm": 0.8796753543539122, + "learning_rate": 9.999758864880078e-06, + "loss": 0.4837, + "step": 731 + }, + { + "epoch": 0.033129667345553296, + "grad_norm": 0.849129211511892, + "learning_rate": 9.999751613159947e-06, + "loss": 0.4907, + "step": 732 + }, + { + "epoch": 0.03317492645394886, + "grad_norm": 0.8562079088711478, + "learning_rate": 9.99974425401051e-06, + "loss": 0.5033, + "step": 733 + }, + { + "epoch": 0.03322018556234442, + "grad_norm": 0.8722424149204497, + "learning_rate": 9.999736787431927e-06, + "loss": 0.4548, + "step": 734 + }, + { + "epoch": 0.03326544467073999, + "grad_norm": 0.8075078284399309, + "learning_rate": 9.999729213424355e-06, + "loss": 0.4592, + "step": 735 + }, + { + "epoch": 0.03331070377913555, + "grad_norm": 0.9474371118555418, + "learning_rate": 9.999721531987958e-06, + "loss": 0.4876, + "step": 736 + }, + { + "epoch": 0.033355962887531114, + "grad_norm": 0.9740697650234126, + "learning_rate": 9.999713743122898e-06, + "loss": 0.5161, + "step": 737 + }, + { + "epoch": 0.03340122199592668, + "grad_norm": 0.7762903958059998, + "learning_rate": 9.999705846829348e-06, + "loss": 0.4738, + "step": 738 + }, + { + "epoch": 0.033446481104322245, + "grad_norm": 0.6913393805537109, + "learning_rate": 9.999697843107475e-06, + "loss": 0.5282, + "step": 739 + }, + { + "epoch": 0.03349174021271781, + "grad_norm": 0.9631280627628708, + "learning_rate": 9.99968973195745e-06, + "loss": 0.4951, + "step": 740 + }, + { + "epoch": 0.03353699932111338, + "grad_norm": 1.1401384416707492, + "learning_rate": 9.999681513379447e-06, + "loss": 0.496, + "step": 741 + }, + { + "epoch": 0.03358225842950894, + "grad_norm": 0.8209194165070912, + "learning_rate": 9.999673187373644e-06, + "loss": 0.4793, + "step": 742 + }, + { + "epoch": 0.0336275175379045, + "grad_norm": 0.43857969694489585, + "learning_rate": 9.99966475394022e-06, + "loss": 0.5692, + "step": 743 + }, + { + "epoch": 0.03367277664630007, + "grad_norm": 1.0790565795223694, + "learning_rate": 9.999656213079356e-06, + "loss": 0.4542, + "step": 744 + }, + { + "epoch": 0.03371803575469563, + "grad_norm": 0.8621474937716063, + "learning_rate": 9.999647564791234e-06, + "loss": 0.4916, + "step": 745 + }, + { + "epoch": 0.033763294863091195, + "grad_norm": 0.8492549723137861, + "learning_rate": 9.999638809076043e-06, + "loss": 0.4985, + "step": 746 + }, + { + "epoch": 0.033808553971486764, + "grad_norm": 1.0321269679631884, + "learning_rate": 9.999629945933967e-06, + "loss": 0.5065, + "step": 747 + }, + { + "epoch": 0.033853813079882326, + "grad_norm": 1.0591850566806524, + "learning_rate": 9.9996209753652e-06, + "loss": 0.5104, + "step": 748 + }, + { + "epoch": 0.03389907218827789, + "grad_norm": 0.9046110188284266, + "learning_rate": 9.999611897369933e-06, + "loss": 0.5013, + "step": 749 + }, + { + "epoch": 0.03394433129667346, + "grad_norm": 0.8114624134750734, + "learning_rate": 9.999602711948362e-06, + "loss": 0.4607, + "step": 750 + }, + { + "epoch": 0.03398959040506902, + "grad_norm": 0.4443264274419598, + "learning_rate": 9.999593419100683e-06, + "loss": 0.5433, + "step": 751 + }, + { + "epoch": 0.03403484951346458, + "grad_norm": 0.43502422992640793, + "learning_rate": 9.999584018827097e-06, + "loss": 0.5734, + "step": 752 + }, + { + "epoch": 0.03408010862186015, + "grad_norm": 1.2444799823008759, + "learning_rate": 9.999574511127806e-06, + "loss": 0.5093, + "step": 753 + }, + { + "epoch": 0.034125367730255714, + "grad_norm": 1.0738710755456755, + "learning_rate": 9.999564896003013e-06, + "loss": 0.5308, + "step": 754 + }, + { + "epoch": 0.034170626838651276, + "grad_norm": 0.8692108687564998, + "learning_rate": 9.999555173452925e-06, + "loss": 0.5181, + "step": 755 + }, + { + "epoch": 0.034215885947046845, + "grad_norm": 0.9737398693974376, + "learning_rate": 9.999545343477752e-06, + "loss": 0.4986, + "step": 756 + }, + { + "epoch": 0.03426114505544241, + "grad_norm": 0.9555525860038144, + "learning_rate": 9.999535406077706e-06, + "loss": 0.5009, + "step": 757 + }, + { + "epoch": 0.03430640416383797, + "grad_norm": 0.8457483843791993, + "learning_rate": 9.999525361252996e-06, + "loss": 0.4735, + "step": 758 + }, + { + "epoch": 0.03435166327223354, + "grad_norm": 0.9139228689176659, + "learning_rate": 9.999515209003842e-06, + "loss": 0.459, + "step": 759 + }, + { + "epoch": 0.0343969223806291, + "grad_norm": 1.0397618730118945, + "learning_rate": 9.99950494933046e-06, + "loss": 0.477, + "step": 760 + }, + { + "epoch": 0.03444218148902466, + "grad_norm": 0.6038324799341142, + "learning_rate": 9.999494582233074e-06, + "loss": 0.5614, + "step": 761 + }, + { + "epoch": 0.03448744059742023, + "grad_norm": 0.8974012894329598, + "learning_rate": 9.999484107711904e-06, + "loss": 0.4982, + "step": 762 + }, + { + "epoch": 0.034532699705815795, + "grad_norm": 1.0877263021222965, + "learning_rate": 9.999473525767173e-06, + "loss": 0.4917, + "step": 763 + }, + { + "epoch": 0.03457795881421136, + "grad_norm": 0.7823596745969484, + "learning_rate": 9.999462836399112e-06, + "loss": 0.4959, + "step": 764 + }, + { + "epoch": 0.034623217922606926, + "grad_norm": 0.8617310112728173, + "learning_rate": 9.999452039607948e-06, + "loss": 0.4362, + "step": 765 + }, + { + "epoch": 0.03466847703100249, + "grad_norm": 0.8658866389167234, + "learning_rate": 9.999441135393917e-06, + "loss": 0.462, + "step": 766 + }, + { + "epoch": 0.03471373613939805, + "grad_norm": 0.41883019477992395, + "learning_rate": 9.99943012375725e-06, + "loss": 0.5354, + "step": 767 + }, + { + "epoch": 0.03475899524779362, + "grad_norm": 0.813158518464342, + "learning_rate": 9.999419004698182e-06, + "loss": 0.4495, + "step": 768 + }, + { + "epoch": 0.03480425435618918, + "grad_norm": 0.8880549031334604, + "learning_rate": 9.999407778216957e-06, + "loss": 0.4945, + "step": 769 + }, + { + "epoch": 0.034849513464584744, + "grad_norm": 0.9003853192572606, + "learning_rate": 9.999396444313811e-06, + "loss": 0.5068, + "step": 770 + }, + { + "epoch": 0.034894772572980313, + "grad_norm": 0.8752619001869574, + "learning_rate": 9.99938500298899e-06, + "loss": 0.4964, + "step": 771 + }, + { + "epoch": 0.034940031681375876, + "grad_norm": 0.8271100830472271, + "learning_rate": 9.99937345424274e-06, + "loss": 0.4564, + "step": 772 + }, + { + "epoch": 0.034985290789771445, + "grad_norm": 0.7203352211017896, + "learning_rate": 9.99936179807531e-06, + "loss": 0.4577, + "step": 773 + }, + { + "epoch": 0.03503054989816701, + "grad_norm": 0.8500740344609287, + "learning_rate": 9.999350034486948e-06, + "loss": 0.4508, + "step": 774 + }, + { + "epoch": 0.03507580900656257, + "grad_norm": 0.39547626177355233, + "learning_rate": 9.99933816347791e-06, + "loss": 0.5338, + "step": 775 + }, + { + "epoch": 0.03512106811495814, + "grad_norm": 0.9311817658265301, + "learning_rate": 9.999326185048447e-06, + "loss": 0.5512, + "step": 776 + }, + { + "epoch": 0.0351663272233537, + "grad_norm": 0.8800233195349324, + "learning_rate": 9.99931409919882e-06, + "loss": 0.5399, + "step": 777 + }, + { + "epoch": 0.03521158633174926, + "grad_norm": 0.3313894794355621, + "learning_rate": 9.999301905929286e-06, + "loss": 0.5335, + "step": 778 + }, + { + "epoch": 0.03525684544014483, + "grad_norm": 0.8317066378641015, + "learning_rate": 9.999289605240109e-06, + "loss": 0.4468, + "step": 779 + }, + { + "epoch": 0.035302104548540394, + "grad_norm": 0.7974651494986721, + "learning_rate": 9.999277197131551e-06, + "loss": 0.4734, + "step": 780 + }, + { + "epoch": 0.03534736365693596, + "grad_norm": 0.7944228025196886, + "learning_rate": 9.999264681603881e-06, + "loss": 0.5195, + "step": 781 + }, + { + "epoch": 0.035392622765331526, + "grad_norm": 0.8433968983805096, + "learning_rate": 9.99925205865737e-06, + "loss": 0.4342, + "step": 782 + }, + { + "epoch": 0.03543788187372709, + "grad_norm": 0.8021298240348729, + "learning_rate": 9.999239328292283e-06, + "loss": 0.5107, + "step": 783 + }, + { + "epoch": 0.03548314098212265, + "grad_norm": 0.9387637066650226, + "learning_rate": 9.999226490508897e-06, + "loss": 0.4739, + "step": 784 + }, + { + "epoch": 0.03552840009051822, + "grad_norm": 0.4872050722136279, + "learning_rate": 9.999213545307488e-06, + "loss": 0.554, + "step": 785 + }, + { + "epoch": 0.03557365919891378, + "grad_norm": 0.9012196342130966, + "learning_rate": 9.999200492688334e-06, + "loss": 0.5362, + "step": 786 + }, + { + "epoch": 0.035618918307309344, + "grad_norm": 0.9506473252668751, + "learning_rate": 9.999187332651716e-06, + "loss": 0.4624, + "step": 787 + }, + { + "epoch": 0.03566417741570491, + "grad_norm": 0.8901086070418615, + "learning_rate": 9.999174065197916e-06, + "loss": 0.4634, + "step": 788 + }, + { + "epoch": 0.035709436524100475, + "grad_norm": 0.36537172901012394, + "learning_rate": 9.999160690327218e-06, + "loss": 0.5775, + "step": 789 + }, + { + "epoch": 0.03575469563249604, + "grad_norm": 1.3411043185866383, + "learning_rate": 9.999147208039912e-06, + "loss": 0.4727, + "step": 790 + }, + { + "epoch": 0.03579995474089161, + "grad_norm": 0.9409963730058601, + "learning_rate": 9.999133618336285e-06, + "loss": 0.523, + "step": 791 + }, + { + "epoch": 0.03584521384928717, + "grad_norm": 0.377734894810001, + "learning_rate": 9.99911992121663e-06, + "loss": 0.5649, + "step": 792 + }, + { + "epoch": 0.03589047295768273, + "grad_norm": 1.0227197057908568, + "learning_rate": 9.999106116681243e-06, + "loss": 0.5063, + "step": 793 + }, + { + "epoch": 0.0359357320660783, + "grad_norm": 0.3577445076404578, + "learning_rate": 9.999092204730418e-06, + "loss": 0.5136, + "step": 794 + }, + { + "epoch": 0.03598099117447386, + "grad_norm": 0.8851794694546333, + "learning_rate": 9.999078185364455e-06, + "loss": 0.4668, + "step": 795 + }, + { + "epoch": 0.036026250282869425, + "grad_norm": 0.822351255754194, + "learning_rate": 9.999064058583657e-06, + "loss": 0.5141, + "step": 796 + }, + { + "epoch": 0.036071509391264994, + "grad_norm": 0.8727820448841807, + "learning_rate": 9.999049824388324e-06, + "loss": 0.5556, + "step": 797 + }, + { + "epoch": 0.036116768499660556, + "grad_norm": 0.9039671640308168, + "learning_rate": 9.999035482778764e-06, + "loss": 0.5346, + "step": 798 + }, + { + "epoch": 0.03616202760805612, + "grad_norm": 0.8457605924012196, + "learning_rate": 9.999021033755286e-06, + "loss": 0.5, + "step": 799 + }, + { + "epoch": 0.03620728671645169, + "grad_norm": 0.9032493102294806, + "learning_rate": 9.999006477318197e-06, + "loss": 0.4503, + "step": 800 + }, + { + "epoch": 0.03625254582484725, + "grad_norm": 0.7868413613589126, + "learning_rate": 9.998991813467814e-06, + "loss": 0.4662, + "step": 801 + }, + { + "epoch": 0.03629780493324281, + "grad_norm": 0.93526853556894, + "learning_rate": 9.998977042204449e-06, + "loss": 0.4836, + "step": 802 + }, + { + "epoch": 0.03634306404163838, + "grad_norm": 0.8455875935885705, + "learning_rate": 9.998962163528421e-06, + "loss": 0.4806, + "step": 803 + }, + { + "epoch": 0.036388323150033944, + "grad_norm": 0.8344451586941963, + "learning_rate": 9.998947177440048e-06, + "loss": 0.4291, + "step": 804 + }, + { + "epoch": 0.036433582258429506, + "grad_norm": 0.8522693202810683, + "learning_rate": 9.998932083939657e-06, + "loss": 0.5004, + "step": 805 + }, + { + "epoch": 0.036478841366825075, + "grad_norm": 0.905163368072537, + "learning_rate": 9.998916883027565e-06, + "loss": 0.4948, + "step": 806 + }, + { + "epoch": 0.03652410047522064, + "grad_norm": 0.73147243210702, + "learning_rate": 9.998901574704102e-06, + "loss": 0.4602, + "step": 807 + }, + { + "epoch": 0.0365693595836162, + "grad_norm": 0.4513364849291583, + "learning_rate": 9.9988861589696e-06, + "loss": 0.5818, + "step": 808 + }, + { + "epoch": 0.03661461869201177, + "grad_norm": 0.40403113807498325, + "learning_rate": 9.998870635824385e-06, + "loss": 0.5678, + "step": 809 + }, + { + "epoch": 0.03665987780040733, + "grad_norm": 1.07302313691336, + "learning_rate": 9.998855005268794e-06, + "loss": 0.495, + "step": 810 + }, + { + "epoch": 0.03670513690880289, + "grad_norm": 0.3781018473275081, + "learning_rate": 9.998839267303163e-06, + "loss": 0.5311, + "step": 811 + }, + { + "epoch": 0.03675039601719846, + "grad_norm": 0.8388566810721623, + "learning_rate": 9.998823421927826e-06, + "loss": 0.4647, + "step": 812 + }, + { + "epoch": 0.036795655125594025, + "grad_norm": 0.83966169332251, + "learning_rate": 9.998807469143129e-06, + "loss": 0.4687, + "step": 813 + }, + { + "epoch": 0.03684091423398959, + "grad_norm": 0.860897967280018, + "learning_rate": 9.998791408949408e-06, + "loss": 0.4837, + "step": 814 + }, + { + "epoch": 0.036886173342385156, + "grad_norm": 0.8062004578592479, + "learning_rate": 9.998775241347017e-06, + "loss": 0.4928, + "step": 815 + }, + { + "epoch": 0.03693143245078072, + "grad_norm": 0.7764292399554144, + "learning_rate": 9.998758966336296e-06, + "loss": 0.4549, + "step": 816 + }, + { + "epoch": 0.03697669155917629, + "grad_norm": 0.7996994905115854, + "learning_rate": 9.998742583917598e-06, + "loss": 0.4537, + "step": 817 + }, + { + "epoch": 0.03702195066757185, + "grad_norm": 0.8265343193477142, + "learning_rate": 9.998726094091275e-06, + "loss": 0.4563, + "step": 818 + }, + { + "epoch": 0.03706720977596741, + "grad_norm": 0.82518409601697, + "learning_rate": 9.99870949685768e-06, + "loss": 0.5115, + "step": 819 + }, + { + "epoch": 0.03711246888436298, + "grad_norm": 0.8645585275224266, + "learning_rate": 9.99869279221717e-06, + "loss": 0.4773, + "step": 820 + }, + { + "epoch": 0.037157727992758544, + "grad_norm": 0.8706170286664132, + "learning_rate": 9.998675980170106e-06, + "loss": 0.4519, + "step": 821 + }, + { + "epoch": 0.037202987101154106, + "grad_norm": 0.7940021232898752, + "learning_rate": 9.998659060716844e-06, + "loss": 0.4296, + "step": 822 + }, + { + "epoch": 0.037248246209549675, + "grad_norm": 0.8167594306281625, + "learning_rate": 9.998642033857753e-06, + "loss": 0.4646, + "step": 823 + }, + { + "epoch": 0.03729350531794524, + "grad_norm": 1.7764689686555373, + "learning_rate": 9.998624899593197e-06, + "loss": 0.475, + "step": 824 + }, + { + "epoch": 0.0373387644263408, + "grad_norm": 0.8476519593212136, + "learning_rate": 9.998607657923545e-06, + "loss": 0.4667, + "step": 825 + }, + { + "epoch": 0.03738402353473637, + "grad_norm": 0.6990683330559072, + "learning_rate": 9.998590308849164e-06, + "loss": 0.57, + "step": 826 + }, + { + "epoch": 0.03742928264313193, + "grad_norm": 0.7878434383789473, + "learning_rate": 9.998572852370432e-06, + "loss": 0.4403, + "step": 827 + }, + { + "epoch": 0.03747454175152749, + "grad_norm": 0.8452597507527643, + "learning_rate": 9.998555288487719e-06, + "loss": 0.4564, + "step": 828 + }, + { + "epoch": 0.03751980085992306, + "grad_norm": 0.8291505593866499, + "learning_rate": 9.998537617201405e-06, + "loss": 0.4431, + "step": 829 + }, + { + "epoch": 0.037565059968318625, + "grad_norm": 0.8739561479854944, + "learning_rate": 9.998519838511872e-06, + "loss": 0.5116, + "step": 830 + }, + { + "epoch": 0.03761031907671419, + "grad_norm": 0.8685439340711074, + "learning_rate": 9.998501952419496e-06, + "loss": 0.4549, + "step": 831 + }, + { + "epoch": 0.037655578185109756, + "grad_norm": 0.8176380913106567, + "learning_rate": 9.998483958924666e-06, + "loss": 0.4933, + "step": 832 + }, + { + "epoch": 0.03770083729350532, + "grad_norm": 0.5896711368819945, + "learning_rate": 9.998465858027769e-06, + "loss": 0.5262, + "step": 833 + }, + { + "epoch": 0.03774609640190088, + "grad_norm": 0.9204416208370488, + "learning_rate": 9.99844764972919e-06, + "loss": 0.4775, + "step": 834 + }, + { + "epoch": 0.03779135551029645, + "grad_norm": 0.8974279098852376, + "learning_rate": 9.998429334029323e-06, + "loss": 0.438, + "step": 835 + }, + { + "epoch": 0.03783661461869201, + "grad_norm": 0.4566914236668775, + "learning_rate": 9.998410910928562e-06, + "loss": 0.5537, + "step": 836 + }, + { + "epoch": 0.037881873727087574, + "grad_norm": 0.9354308373334939, + "learning_rate": 9.998392380427302e-06, + "loss": 0.473, + "step": 837 + }, + { + "epoch": 0.03792713283548314, + "grad_norm": 0.9415680530950489, + "learning_rate": 9.998373742525941e-06, + "loss": 0.4528, + "step": 838 + }, + { + "epoch": 0.037972391943878706, + "grad_norm": 0.8566551875156379, + "learning_rate": 9.998354997224879e-06, + "loss": 0.4451, + "step": 839 + }, + { + "epoch": 0.03801765105227427, + "grad_norm": 0.8394095318186099, + "learning_rate": 9.998336144524521e-06, + "loss": 0.488, + "step": 840 + }, + { + "epoch": 0.03806291016066984, + "grad_norm": 0.8683511332108413, + "learning_rate": 9.998317184425268e-06, + "loss": 0.5012, + "step": 841 + }, + { + "epoch": 0.0381081692690654, + "grad_norm": 0.8028785010939439, + "learning_rate": 9.998298116927532e-06, + "loss": 0.5156, + "step": 842 + }, + { + "epoch": 0.03815342837746096, + "grad_norm": 0.9152126508013158, + "learning_rate": 9.99827894203172e-06, + "loss": 0.5676, + "step": 843 + }, + { + "epoch": 0.03819868748585653, + "grad_norm": 0.9331450310430767, + "learning_rate": 9.998259659738243e-06, + "loss": 0.5213, + "step": 844 + }, + { + "epoch": 0.03824394659425209, + "grad_norm": 0.8624765340420999, + "learning_rate": 9.998240270047519e-06, + "loss": 0.47, + "step": 845 + }, + { + "epoch": 0.038289205702647655, + "grad_norm": 0.9051223838196641, + "learning_rate": 9.998220772959962e-06, + "loss": 0.5183, + "step": 846 + }, + { + "epoch": 0.038334464811043224, + "grad_norm": 0.38497024559095944, + "learning_rate": 9.998201168475991e-06, + "loss": 0.563, + "step": 847 + }, + { + "epoch": 0.038379723919438787, + "grad_norm": 1.0886459904950612, + "learning_rate": 9.998181456596027e-06, + "loss": 0.4387, + "step": 848 + }, + { + "epoch": 0.03842498302783435, + "grad_norm": 0.906151568063953, + "learning_rate": 9.998161637320495e-06, + "loss": 0.5013, + "step": 849 + }, + { + "epoch": 0.03847024213622992, + "grad_norm": 0.8703851030575666, + "learning_rate": 9.998141710649822e-06, + "loss": 0.4966, + "step": 850 + }, + { + "epoch": 0.03851550124462548, + "grad_norm": 0.8775907858377049, + "learning_rate": 9.998121676584432e-06, + "loss": 0.4399, + "step": 851 + }, + { + "epoch": 0.03856076035302104, + "grad_norm": 0.5793465331166096, + "learning_rate": 9.998101535124758e-06, + "loss": 0.5776, + "step": 852 + }, + { + "epoch": 0.03860601946141661, + "grad_norm": 0.8748602831935688, + "learning_rate": 9.998081286271234e-06, + "loss": 0.5187, + "step": 853 + }, + { + "epoch": 0.038651278569812174, + "grad_norm": 0.7891627206628827, + "learning_rate": 9.99806093002429e-06, + "loss": 0.5106, + "step": 854 + }, + { + "epoch": 0.038696537678207736, + "grad_norm": 0.804830597119037, + "learning_rate": 9.99804046638437e-06, + "loss": 0.4622, + "step": 855 + }, + { + "epoch": 0.038741796786603305, + "grad_norm": 0.8470820634732646, + "learning_rate": 9.99801989535191e-06, + "loss": 0.4602, + "step": 856 + }, + { + "epoch": 0.03878705589499887, + "grad_norm": 0.7899585020497784, + "learning_rate": 9.997999216927352e-06, + "loss": 0.4819, + "step": 857 + }, + { + "epoch": 0.03883231500339443, + "grad_norm": 0.7852368236713659, + "learning_rate": 9.997978431111142e-06, + "loss": 0.4689, + "step": 858 + }, + { + "epoch": 0.03887757411179, + "grad_norm": 0.7943676416848869, + "learning_rate": 9.997957537903727e-06, + "loss": 0.4799, + "step": 859 + }, + { + "epoch": 0.03892283322018556, + "grad_norm": 0.892529635684416, + "learning_rate": 9.997936537305551e-06, + "loss": 0.4807, + "step": 860 + }, + { + "epoch": 0.03896809232858113, + "grad_norm": 0.45745492269747656, + "learning_rate": 9.997915429317071e-06, + "loss": 0.5707, + "step": 861 + }, + { + "epoch": 0.03901335143697669, + "grad_norm": 0.8100899395043848, + "learning_rate": 9.997894213938738e-06, + "loss": 0.4792, + "step": 862 + }, + { + "epoch": 0.039058610545372255, + "grad_norm": 0.8251870779862273, + "learning_rate": 9.997872891171009e-06, + "loss": 0.5256, + "step": 863 + }, + { + "epoch": 0.039103869653767824, + "grad_norm": 0.9052449244530871, + "learning_rate": 9.99785146101434e-06, + "loss": 0.46, + "step": 864 + }, + { + "epoch": 0.039149128762163386, + "grad_norm": 0.8233054083332147, + "learning_rate": 9.997829923469194e-06, + "loss": 0.5034, + "step": 865 + }, + { + "epoch": 0.03919438787055895, + "grad_norm": 0.7663398755861442, + "learning_rate": 9.997808278536032e-06, + "loss": 0.4548, + "step": 866 + }, + { + "epoch": 0.03923964697895452, + "grad_norm": 0.8238704523328892, + "learning_rate": 9.99778652621532e-06, + "loss": 0.51, + "step": 867 + }, + { + "epoch": 0.03928490608735008, + "grad_norm": 0.4089041222135545, + "learning_rate": 9.997764666507523e-06, + "loss": 0.5214, + "step": 868 + }, + { + "epoch": 0.03933016519574564, + "grad_norm": 0.8337966943875073, + "learning_rate": 9.997742699413115e-06, + "loss": 0.448, + "step": 869 + }, + { + "epoch": 0.03937542430414121, + "grad_norm": 0.3718911507575922, + "learning_rate": 9.997720624932566e-06, + "loss": 0.5238, + "step": 870 + }, + { + "epoch": 0.039420683412536774, + "grad_norm": 0.8330502280175536, + "learning_rate": 9.99769844306635e-06, + "loss": 0.492, + "step": 871 + }, + { + "epoch": 0.039465942520932336, + "grad_norm": 0.8869907730560409, + "learning_rate": 9.997676153814944e-06, + "loss": 0.523, + "step": 872 + }, + { + "epoch": 0.039511201629327905, + "grad_norm": 0.8564115335442091, + "learning_rate": 9.997653757178824e-06, + "loss": 0.5044, + "step": 873 + }, + { + "epoch": 0.03955646073772347, + "grad_norm": 0.3409783012433021, + "learning_rate": 9.997631253158477e-06, + "loss": 0.5594, + "step": 874 + }, + { + "epoch": 0.03960171984611903, + "grad_norm": 0.8857825685787568, + "learning_rate": 9.997608641754381e-06, + "loss": 0.477, + "step": 875 + }, + { + "epoch": 0.0396469789545146, + "grad_norm": 0.40045123455066095, + "learning_rate": 9.997585922967026e-06, + "loss": 0.5545, + "step": 876 + }, + { + "epoch": 0.03969223806291016, + "grad_norm": 0.3827260476019697, + "learning_rate": 9.997563096796899e-06, + "loss": 0.5172, + "step": 877 + }, + { + "epoch": 0.03973749717130572, + "grad_norm": 0.34473254672516823, + "learning_rate": 9.997540163244487e-06, + "loss": 0.5662, + "step": 878 + }, + { + "epoch": 0.03978275627970129, + "grad_norm": 0.3722334175738685, + "learning_rate": 9.997517122310287e-06, + "loss": 0.553, + "step": 879 + }, + { + "epoch": 0.039828015388096855, + "grad_norm": 1.1596370622984065, + "learning_rate": 9.997493973994793e-06, + "loss": 0.4872, + "step": 880 + }, + { + "epoch": 0.03987327449649242, + "grad_norm": 0.9422109365205645, + "learning_rate": 9.997470718298503e-06, + "loss": 0.5366, + "step": 881 + }, + { + "epoch": 0.039918533604887986, + "grad_norm": 0.9376681347956652, + "learning_rate": 9.997447355221915e-06, + "loss": 0.5092, + "step": 882 + }, + { + "epoch": 0.03996379271328355, + "grad_norm": 0.9194276554143219, + "learning_rate": 9.997423884765532e-06, + "loss": 0.5064, + "step": 883 + }, + { + "epoch": 0.04000905182167911, + "grad_norm": 0.5705723711046609, + "learning_rate": 9.99740030692986e-06, + "loss": 0.5557, + "step": 884 + }, + { + "epoch": 0.04005431093007468, + "grad_norm": 1.1046214239155359, + "learning_rate": 9.9973766217154e-06, + "loss": 0.4448, + "step": 885 + }, + { + "epoch": 0.04009957003847024, + "grad_norm": 0.8280146005762715, + "learning_rate": 9.997352829122667e-06, + "loss": 0.4865, + "step": 886 + }, + { + "epoch": 0.040144829146865804, + "grad_norm": 0.8686670011300119, + "learning_rate": 9.99732892915217e-06, + "loss": 0.4488, + "step": 887 + }, + { + "epoch": 0.04019008825526137, + "grad_norm": 0.43280572475101964, + "learning_rate": 9.99730492180442e-06, + "loss": 0.5701, + "step": 888 + }, + { + "epoch": 0.040235347363656936, + "grad_norm": 0.8997349614022049, + "learning_rate": 9.997280807079938e-06, + "loss": 0.4958, + "step": 889 + }, + { + "epoch": 0.0402806064720525, + "grad_norm": 0.8726634607887849, + "learning_rate": 9.997256584979239e-06, + "loss": 0.4799, + "step": 890 + }, + { + "epoch": 0.04032586558044807, + "grad_norm": 0.4248470639109258, + "learning_rate": 9.997232255502842e-06, + "loss": 0.5752, + "step": 891 + }, + { + "epoch": 0.04037112468884363, + "grad_norm": 0.8720384694863267, + "learning_rate": 9.997207818651273e-06, + "loss": 0.5101, + "step": 892 + }, + { + "epoch": 0.04041638379723919, + "grad_norm": 0.9066900693499089, + "learning_rate": 9.997183274425058e-06, + "loss": 0.4938, + "step": 893 + }, + { + "epoch": 0.04046164290563476, + "grad_norm": 0.9292081807321683, + "learning_rate": 9.997158622824719e-06, + "loss": 0.4438, + "step": 894 + }, + { + "epoch": 0.04050690201403032, + "grad_norm": 1.057943792521059, + "learning_rate": 9.99713386385079e-06, + "loss": 0.4396, + "step": 895 + }, + { + "epoch": 0.040552161122425885, + "grad_norm": 0.44618197731567005, + "learning_rate": 9.9971089975038e-06, + "loss": 0.5388, + "step": 896 + }, + { + "epoch": 0.040597420230821454, + "grad_norm": 1.0347025476143397, + "learning_rate": 9.997084023784286e-06, + "loss": 0.4858, + "step": 897 + }, + { + "epoch": 0.04064267933921702, + "grad_norm": 0.9476775847258264, + "learning_rate": 9.997058942692786e-06, + "loss": 0.4632, + "step": 898 + }, + { + "epoch": 0.04068793844761258, + "grad_norm": 0.7912189795373192, + "learning_rate": 9.997033754229835e-06, + "loss": 0.4905, + "step": 899 + }, + { + "epoch": 0.04073319755600815, + "grad_norm": 0.9864123150456614, + "learning_rate": 9.997008458395975e-06, + "loss": 0.4605, + "step": 900 + }, + { + "epoch": 0.04077845666440371, + "grad_norm": 0.40215490381501273, + "learning_rate": 9.996983055191752e-06, + "loss": 0.551, + "step": 901 + }, + { + "epoch": 0.04082371577279927, + "grad_norm": 0.3822696497597169, + "learning_rate": 9.99695754461771e-06, + "loss": 0.5483, + "step": 902 + }, + { + "epoch": 0.04086897488119484, + "grad_norm": 1.0110205632233296, + "learning_rate": 9.996931926674396e-06, + "loss": 0.4921, + "step": 903 + }, + { + "epoch": 0.040914233989590404, + "grad_norm": 0.9296152330438265, + "learning_rate": 9.996906201362361e-06, + "loss": 0.5194, + "step": 904 + }, + { + "epoch": 0.04095949309798597, + "grad_norm": 0.8565404745687755, + "learning_rate": 9.99688036868216e-06, + "loss": 0.4809, + "step": 905 + }, + { + "epoch": 0.041004752206381535, + "grad_norm": 0.8590242523016869, + "learning_rate": 9.996854428634348e-06, + "loss": 0.4932, + "step": 906 + }, + { + "epoch": 0.0410500113147771, + "grad_norm": 0.8493136418082664, + "learning_rate": 9.996828381219479e-06, + "loss": 0.4946, + "step": 907 + }, + { + "epoch": 0.04109527042317267, + "grad_norm": 0.6124772508865317, + "learning_rate": 9.996802226438117e-06, + "loss": 0.5735, + "step": 908 + }, + { + "epoch": 0.04114052953156823, + "grad_norm": 0.8954342427228704, + "learning_rate": 9.996775964290819e-06, + "loss": 0.4609, + "step": 909 + }, + { + "epoch": 0.04118578863996379, + "grad_norm": 0.8279174198415556, + "learning_rate": 9.996749594778153e-06, + "loss": 0.4952, + "step": 910 + }, + { + "epoch": 0.04123104774835936, + "grad_norm": 0.3864797621662077, + "learning_rate": 9.996723117900684e-06, + "loss": 0.5361, + "step": 911 + }, + { + "epoch": 0.04127630685675492, + "grad_norm": 0.900106757577596, + "learning_rate": 9.996696533658981e-06, + "loss": 0.4824, + "step": 912 + }, + { + "epoch": 0.041321565965150485, + "grad_norm": 0.8550971074550466, + "learning_rate": 9.996669842053617e-06, + "loss": 0.4586, + "step": 913 + }, + { + "epoch": 0.041366825073546054, + "grad_norm": 0.9222917536345409, + "learning_rate": 9.996643043085164e-06, + "loss": 0.4704, + "step": 914 + }, + { + "epoch": 0.041412084181941616, + "grad_norm": 0.8242120274667823, + "learning_rate": 9.996616136754198e-06, + "loss": 0.4926, + "step": 915 + }, + { + "epoch": 0.04145734329033718, + "grad_norm": 0.7706995384843042, + "learning_rate": 9.996589123061297e-06, + "loss": 0.4383, + "step": 916 + }, + { + "epoch": 0.04150260239873275, + "grad_norm": 0.7484393968818605, + "learning_rate": 9.996562002007042e-06, + "loss": 0.4811, + "step": 917 + }, + { + "epoch": 0.04154786150712831, + "grad_norm": 0.5945477305149327, + "learning_rate": 9.996534773592016e-06, + "loss": 0.561, + "step": 918 + }, + { + "epoch": 0.04159312061552387, + "grad_norm": 0.88348900798707, + "learning_rate": 9.9965074378168e-06, + "loss": 0.4753, + "step": 919 + }, + { + "epoch": 0.04163837972391944, + "grad_norm": 0.843212052146494, + "learning_rate": 9.996479994681989e-06, + "loss": 0.4554, + "step": 920 + }, + { + "epoch": 0.041683638832315004, + "grad_norm": 0.8267504174103804, + "learning_rate": 9.996452444188166e-06, + "loss": 0.446, + "step": 921 + }, + { + "epoch": 0.041728897940710566, + "grad_norm": 0.39741826226838095, + "learning_rate": 9.996424786335925e-06, + "loss": 0.5399, + "step": 922 + }, + { + "epoch": 0.041774157049106135, + "grad_norm": 0.3831494947571296, + "learning_rate": 9.996397021125862e-06, + "loss": 0.5416, + "step": 923 + }, + { + "epoch": 0.0418194161575017, + "grad_norm": 0.8422075131373115, + "learning_rate": 9.996369148558573e-06, + "loss": 0.449, + "step": 924 + }, + { + "epoch": 0.04186467526589726, + "grad_norm": 0.8486980902404835, + "learning_rate": 9.996341168634653e-06, + "loss": 0.4113, + "step": 925 + }, + { + "epoch": 0.04190993437429283, + "grad_norm": 0.889356449999675, + "learning_rate": 9.99631308135471e-06, + "loss": 0.4738, + "step": 926 + }, + { + "epoch": 0.04195519348268839, + "grad_norm": 0.8520583531149387, + "learning_rate": 9.996284886719342e-06, + "loss": 0.508, + "step": 927 + }, + { + "epoch": 0.04200045259108395, + "grad_norm": 0.8429515410229458, + "learning_rate": 9.996256584729157e-06, + "loss": 0.4892, + "step": 928 + }, + { + "epoch": 0.04204571169947952, + "grad_norm": 0.8478899080075085, + "learning_rate": 9.996228175384764e-06, + "loss": 0.5311, + "step": 929 + }, + { + "epoch": 0.042090970807875085, + "grad_norm": 0.6460493207491624, + "learning_rate": 9.996199658686769e-06, + "loss": 0.561, + "step": 930 + }, + { + "epoch": 0.04213622991627065, + "grad_norm": 0.7654622141397179, + "learning_rate": 9.99617103463579e-06, + "loss": 0.4714, + "step": 931 + }, + { + "epoch": 0.042181489024666216, + "grad_norm": 1.0725569589615345, + "learning_rate": 9.99614230323244e-06, + "loss": 0.4912, + "step": 932 + }, + { + "epoch": 0.04222674813306178, + "grad_norm": 0.7822955176560418, + "learning_rate": 9.996113464477337e-06, + "loss": 0.4458, + "step": 933 + }, + { + "epoch": 0.04227200724145734, + "grad_norm": 0.803557154231793, + "learning_rate": 9.996084518371101e-06, + "loss": 0.5008, + "step": 934 + }, + { + "epoch": 0.04231726634985291, + "grad_norm": 0.8629657209874251, + "learning_rate": 9.996055464914351e-06, + "loss": 0.4699, + "step": 935 + }, + { + "epoch": 0.04236252545824847, + "grad_norm": 0.9756308146322324, + "learning_rate": 9.996026304107713e-06, + "loss": 0.4988, + "step": 936 + }, + { + "epoch": 0.042407784566644034, + "grad_norm": 0.8561456799235027, + "learning_rate": 9.995997035951816e-06, + "loss": 0.4693, + "step": 937 + }, + { + "epoch": 0.0424530436750396, + "grad_norm": 0.8924522605663066, + "learning_rate": 9.995967660447285e-06, + "loss": 0.4863, + "step": 938 + }, + { + "epoch": 0.042498302783435166, + "grad_norm": 0.8418644621286018, + "learning_rate": 9.995938177594753e-06, + "loss": 0.4681, + "step": 939 + }, + { + "epoch": 0.04254356189183073, + "grad_norm": 1.2365889696528232, + "learning_rate": 9.995908587394854e-06, + "loss": 0.469, + "step": 940 + }, + { + "epoch": 0.0425888210002263, + "grad_norm": 0.6024320407477483, + "learning_rate": 9.995878889848223e-06, + "loss": 0.5168, + "step": 941 + }, + { + "epoch": 0.04263408010862186, + "grad_norm": 0.9491137737305888, + "learning_rate": 9.995849084955498e-06, + "loss": 0.4466, + "step": 942 + }, + { + "epoch": 0.04267933921701742, + "grad_norm": 0.851853575868263, + "learning_rate": 9.99581917271732e-06, + "loss": 0.4709, + "step": 943 + }, + { + "epoch": 0.04272459832541299, + "grad_norm": 0.8028920300451445, + "learning_rate": 9.995789153134333e-06, + "loss": 0.4656, + "step": 944 + }, + { + "epoch": 0.04276985743380855, + "grad_norm": 0.4079955958058016, + "learning_rate": 9.995759026207179e-06, + "loss": 0.5244, + "step": 945 + }, + { + "epoch": 0.042815116542204115, + "grad_norm": 0.4484634107614797, + "learning_rate": 9.995728791936505e-06, + "loss": 0.5344, + "step": 946 + }, + { + "epoch": 0.042860375650599684, + "grad_norm": 0.906131738275771, + "learning_rate": 9.995698450322965e-06, + "loss": 0.4561, + "step": 947 + }, + { + "epoch": 0.04290563475899525, + "grad_norm": 1.0408129265137542, + "learning_rate": 9.995668001367208e-06, + "loss": 0.4295, + "step": 948 + }, + { + "epoch": 0.042950893867390816, + "grad_norm": 0.8137121642370845, + "learning_rate": 9.995637445069889e-06, + "loss": 0.4805, + "step": 949 + }, + { + "epoch": 0.04299615297578638, + "grad_norm": 0.8978674731733924, + "learning_rate": 9.995606781431664e-06, + "loss": 0.5397, + "step": 950 + }, + { + "epoch": 0.04304141208418194, + "grad_norm": 0.8323695041137925, + "learning_rate": 9.99557601045319e-06, + "loss": 0.4466, + "step": 951 + }, + { + "epoch": 0.04308667119257751, + "grad_norm": 0.8388981810246476, + "learning_rate": 9.995545132135133e-06, + "loss": 0.4916, + "step": 952 + }, + { + "epoch": 0.04313193030097307, + "grad_norm": 0.5734877288235656, + "learning_rate": 9.995514146478152e-06, + "loss": 0.5394, + "step": 953 + }, + { + "epoch": 0.043177189409368634, + "grad_norm": 0.9670137618853054, + "learning_rate": 9.995483053482917e-06, + "loss": 0.4966, + "step": 954 + }, + { + "epoch": 0.0432224485177642, + "grad_norm": 0.8820830424535521, + "learning_rate": 9.995451853150091e-06, + "loss": 0.4859, + "step": 955 + }, + { + "epoch": 0.043267707626159765, + "grad_norm": 0.7973854009399443, + "learning_rate": 9.995420545480349e-06, + "loss": 0.512, + "step": 956 + }, + { + "epoch": 0.04331296673455533, + "grad_norm": 0.8844927712303589, + "learning_rate": 9.99538913047436e-06, + "loss": 0.4833, + "step": 957 + }, + { + "epoch": 0.0433582258429509, + "grad_norm": 0.8569865302662765, + "learning_rate": 9.9953576081328e-06, + "loss": 0.4711, + "step": 958 + }, + { + "epoch": 0.04340348495134646, + "grad_norm": 0.8153974165603668, + "learning_rate": 9.995325978456349e-06, + "loss": 0.439, + "step": 959 + }, + { + "epoch": 0.04344874405974202, + "grad_norm": 0.8034190547420145, + "learning_rate": 9.995294241445685e-06, + "loss": 0.445, + "step": 960 + }, + { + "epoch": 0.04349400316813759, + "grad_norm": 0.8022765956843442, + "learning_rate": 9.995262397101489e-06, + "loss": 0.4781, + "step": 961 + }, + { + "epoch": 0.04353926227653315, + "grad_norm": 0.5816020147571523, + "learning_rate": 9.995230445424446e-06, + "loss": 0.544, + "step": 962 + }, + { + "epoch": 0.043584521384928715, + "grad_norm": 0.9986172227674865, + "learning_rate": 9.995198386415241e-06, + "loss": 0.4766, + "step": 963 + }, + { + "epoch": 0.043629780493324284, + "grad_norm": 0.791646512962987, + "learning_rate": 9.995166220074566e-06, + "loss": 0.4591, + "step": 964 + }, + { + "epoch": 0.043675039601719846, + "grad_norm": 0.8885119876985477, + "learning_rate": 9.995133946403111e-06, + "loss": 0.4547, + "step": 965 + }, + { + "epoch": 0.04372029871011541, + "grad_norm": 0.4175285658178469, + "learning_rate": 9.995101565401566e-06, + "loss": 0.5103, + "step": 966 + }, + { + "epoch": 0.04376555781851098, + "grad_norm": 0.44806347188990125, + "learning_rate": 9.995069077070632e-06, + "loss": 0.5563, + "step": 967 + }, + { + "epoch": 0.04381081692690654, + "grad_norm": 1.2349779804925443, + "learning_rate": 9.995036481411005e-06, + "loss": 0.492, + "step": 968 + }, + { + "epoch": 0.0438560760353021, + "grad_norm": 0.9400022460617656, + "learning_rate": 9.995003778423383e-06, + "loss": 0.4622, + "step": 969 + }, + { + "epoch": 0.04390133514369767, + "grad_norm": 0.42943118298688115, + "learning_rate": 9.994970968108473e-06, + "loss": 0.536, + "step": 970 + }, + { + "epoch": 0.043946594252093234, + "grad_norm": 1.161378142208096, + "learning_rate": 9.994938050466976e-06, + "loss": 0.4773, + "step": 971 + }, + { + "epoch": 0.043991853360488796, + "grad_norm": 1.1044222313320406, + "learning_rate": 9.994905025499602e-06, + "loss": 0.5134, + "step": 972 + }, + { + "epoch": 0.044037112468884365, + "grad_norm": 0.8840976288998802, + "learning_rate": 9.994871893207058e-06, + "loss": 0.4961, + "step": 973 + }, + { + "epoch": 0.04408237157727993, + "grad_norm": 0.9243901215383543, + "learning_rate": 9.99483865359006e-06, + "loss": 0.5218, + "step": 974 + }, + { + "epoch": 0.04412763068567549, + "grad_norm": 0.924911837548586, + "learning_rate": 9.99480530664932e-06, + "loss": 0.464, + "step": 975 + }, + { + "epoch": 0.04417288979407106, + "grad_norm": 0.875085936045096, + "learning_rate": 9.994771852385552e-06, + "loss": 0.4863, + "step": 976 + }, + { + "epoch": 0.04421814890246662, + "grad_norm": 0.8127041858417101, + "learning_rate": 9.994738290799479e-06, + "loss": 0.4658, + "step": 977 + }, + { + "epoch": 0.04426340801086218, + "grad_norm": 0.5493520559000986, + "learning_rate": 9.99470462189182e-06, + "loss": 0.5463, + "step": 978 + }, + { + "epoch": 0.04430866711925775, + "grad_norm": 1.02408537343092, + "learning_rate": 9.994670845663297e-06, + "loss": 0.463, + "step": 979 + }, + { + "epoch": 0.044353926227653315, + "grad_norm": 0.916174894899694, + "learning_rate": 9.99463696211464e-06, + "loss": 0.4892, + "step": 980 + }, + { + "epoch": 0.04439918533604888, + "grad_norm": 0.8152376323392234, + "learning_rate": 9.994602971246573e-06, + "loss": 0.4441, + "step": 981 + }, + { + "epoch": 0.044444444444444446, + "grad_norm": 0.8328042792025057, + "learning_rate": 9.994568873059829e-06, + "loss": 0.4403, + "step": 982 + }, + { + "epoch": 0.04448970355284001, + "grad_norm": 2.0243382061600372, + "learning_rate": 9.994534667555138e-06, + "loss": 0.5063, + "step": 983 + }, + { + "epoch": 0.04453496266123557, + "grad_norm": 0.8762795255672268, + "learning_rate": 9.994500354733238e-06, + "loss": 0.4542, + "step": 984 + }, + { + "epoch": 0.04458022176963114, + "grad_norm": 0.4677123917860248, + "learning_rate": 9.994465934594863e-06, + "loss": 0.5622, + "step": 985 + }, + { + "epoch": 0.0446254808780267, + "grad_norm": 1.0928191852342053, + "learning_rate": 9.994431407140757e-06, + "loss": 0.4326, + "step": 986 + }, + { + "epoch": 0.044670739986422264, + "grad_norm": 0.8365114456102918, + "learning_rate": 9.994396772371658e-06, + "loss": 0.4831, + "step": 987 + }, + { + "epoch": 0.044715999094817833, + "grad_norm": 0.40524873724140714, + "learning_rate": 9.994362030288312e-06, + "loss": 0.5172, + "step": 988 + }, + { + "epoch": 0.044761258203213396, + "grad_norm": 0.7934455189770376, + "learning_rate": 9.994327180891462e-06, + "loss": 0.4512, + "step": 989 + }, + { + "epoch": 0.04480651731160896, + "grad_norm": 0.43443711211319097, + "learning_rate": 9.994292224181864e-06, + "loss": 0.5455, + "step": 990 + }, + { + "epoch": 0.04485177642000453, + "grad_norm": 1.078608052597576, + "learning_rate": 9.994257160160263e-06, + "loss": 0.4939, + "step": 991 + }, + { + "epoch": 0.04489703552840009, + "grad_norm": 0.8074285900663641, + "learning_rate": 9.994221988827415e-06, + "loss": 0.4862, + "step": 992 + }, + { + "epoch": 0.04494229463679566, + "grad_norm": 0.790319743842942, + "learning_rate": 9.994186710184073e-06, + "loss": 0.43, + "step": 993 + }, + { + "epoch": 0.04498755374519122, + "grad_norm": 0.8873418220441408, + "learning_rate": 9.994151324231e-06, + "loss": 0.4658, + "step": 994 + }, + { + "epoch": 0.04503281285358678, + "grad_norm": 0.8225925871404844, + "learning_rate": 9.994115830968951e-06, + "loss": 0.417, + "step": 995 + }, + { + "epoch": 0.04507807196198235, + "grad_norm": 0.8076197995626312, + "learning_rate": 9.994080230398693e-06, + "loss": 0.474, + "step": 996 + }, + { + "epoch": 0.045123331070377914, + "grad_norm": 0.8566188848933955, + "learning_rate": 9.994044522520988e-06, + "loss": 0.5055, + "step": 997 + }, + { + "epoch": 0.04516859017877348, + "grad_norm": 0.7046398217059011, + "learning_rate": 9.994008707336604e-06, + "loss": 0.5656, + "step": 998 + }, + { + "epoch": 0.045213849287169046, + "grad_norm": 1.267284206330529, + "learning_rate": 9.99397278484631e-06, + "loss": 0.5063, + "step": 999 + }, + { + "epoch": 0.04525910839556461, + "grad_norm": 0.8217192392842306, + "learning_rate": 9.993936755050881e-06, + "loss": 0.5099, + "step": 1000 + }, + { + "epoch": 0.04530436750396017, + "grad_norm": 0.866571381344001, + "learning_rate": 9.993900617951087e-06, + "loss": 0.4602, + "step": 1001 + }, + { + "epoch": 0.04534962661235574, + "grad_norm": 0.8053116757932492, + "learning_rate": 9.993864373547707e-06, + "loss": 0.4631, + "step": 1002 + }, + { + "epoch": 0.0453948857207513, + "grad_norm": 0.8906153508438541, + "learning_rate": 9.993828021841518e-06, + "loss": 0.4536, + "step": 1003 + }, + { + "epoch": 0.045440144829146864, + "grad_norm": 0.7761743131090554, + "learning_rate": 9.993791562833303e-06, + "loss": 0.4591, + "step": 1004 + }, + { + "epoch": 0.04548540393754243, + "grad_norm": 0.8438694699439588, + "learning_rate": 9.993754996523846e-06, + "loss": 0.5054, + "step": 1005 + }, + { + "epoch": 0.045530663045937995, + "grad_norm": 0.9633033027437268, + "learning_rate": 9.99371832291393e-06, + "loss": 0.4456, + "step": 1006 + }, + { + "epoch": 0.04557592215433356, + "grad_norm": 0.7624238288048312, + "learning_rate": 9.993681542004343e-06, + "loss": 0.5288, + "step": 1007 + }, + { + "epoch": 0.04562118126272913, + "grad_norm": 1.0441430641968361, + "learning_rate": 9.99364465379588e-06, + "loss": 0.4678, + "step": 1008 + }, + { + "epoch": 0.04566644037112469, + "grad_norm": 0.8104564043918575, + "learning_rate": 9.993607658289325e-06, + "loss": 0.428, + "step": 1009 + }, + { + "epoch": 0.04571169947952025, + "grad_norm": 0.39650819228795653, + "learning_rate": 9.993570555485484e-06, + "loss": 0.5174, + "step": 1010 + }, + { + "epoch": 0.04575695858791582, + "grad_norm": 1.0830099479547428, + "learning_rate": 9.993533345385145e-06, + "loss": 0.5068, + "step": 1011 + }, + { + "epoch": 0.04580221769631138, + "grad_norm": 0.5169965065083604, + "learning_rate": 9.993496027989112e-06, + "loss": 0.5052, + "step": 1012 + }, + { + "epoch": 0.045847476804706945, + "grad_norm": 0.4772035033679194, + "learning_rate": 9.993458603298184e-06, + "loss": 0.5631, + "step": 1013 + }, + { + "epoch": 0.045892735913102514, + "grad_norm": 1.0771229663771391, + "learning_rate": 9.993421071313168e-06, + "loss": 0.4949, + "step": 1014 + }, + { + "epoch": 0.045937995021498076, + "grad_norm": 0.9200033400407627, + "learning_rate": 9.993383432034869e-06, + "loss": 0.4857, + "step": 1015 + }, + { + "epoch": 0.04598325412989364, + "grad_norm": 0.8675512644866923, + "learning_rate": 9.993345685464097e-06, + "loss": 0.4762, + "step": 1016 + }, + { + "epoch": 0.04602851323828921, + "grad_norm": 0.8495883898640996, + "learning_rate": 9.993307831601661e-06, + "loss": 0.4652, + "step": 1017 + }, + { + "epoch": 0.04607377234668477, + "grad_norm": 0.8723604577429355, + "learning_rate": 9.993269870448375e-06, + "loss": 0.4401, + "step": 1018 + }, + { + "epoch": 0.04611903145508033, + "grad_norm": 0.695706230625535, + "learning_rate": 9.993231802005056e-06, + "loss": 0.4295, + "step": 1019 + }, + { + "epoch": 0.0461642905634759, + "grad_norm": 0.8270086618965578, + "learning_rate": 9.99319362627252e-06, + "loss": 0.4856, + "step": 1020 + }, + { + "epoch": 0.046209549671871464, + "grad_norm": 0.8445513154871833, + "learning_rate": 9.993155343251592e-06, + "loss": 0.476, + "step": 1021 + }, + { + "epoch": 0.046254808780267026, + "grad_norm": 0.8926816239824479, + "learning_rate": 9.993116952943087e-06, + "loss": 0.5158, + "step": 1022 + }, + { + "epoch": 0.046300067888662595, + "grad_norm": 1.0848942171687697, + "learning_rate": 9.993078455347835e-06, + "loss": 0.5605, + "step": 1023 + }, + { + "epoch": 0.04634532699705816, + "grad_norm": 0.7114123741793746, + "learning_rate": 9.993039850466664e-06, + "loss": 0.523, + "step": 1024 + }, + { + "epoch": 0.04639058610545372, + "grad_norm": 0.41718334147884434, + "learning_rate": 9.9930011383004e-06, + "loss": 0.5228, + "step": 1025 + }, + { + "epoch": 0.04643584521384929, + "grad_norm": 1.0948263552696134, + "learning_rate": 9.992962318849876e-06, + "loss": 0.4418, + "step": 1026 + }, + { + "epoch": 0.04648110432224485, + "grad_norm": 1.3184136429115096, + "learning_rate": 9.992923392115927e-06, + "loss": 0.5414, + "step": 1027 + }, + { + "epoch": 0.04652636343064041, + "grad_norm": 0.988322664954066, + "learning_rate": 9.992884358099389e-06, + "loss": 0.4914, + "step": 1028 + }, + { + "epoch": 0.04657162253903598, + "grad_norm": 1.03237806355917, + "learning_rate": 9.9928452168011e-06, + "loss": 0.5398, + "step": 1029 + }, + { + "epoch": 0.046616881647431545, + "grad_norm": 1.0051916183489378, + "learning_rate": 9.992805968221902e-06, + "loss": 0.4913, + "step": 1030 + }, + { + "epoch": 0.04666214075582711, + "grad_norm": 0.823719675186997, + "learning_rate": 9.99276661236264e-06, + "loss": 0.5054, + "step": 1031 + }, + { + "epoch": 0.046707399864222676, + "grad_norm": 0.8540554077712061, + "learning_rate": 9.992727149224155e-06, + "loss": 0.4993, + "step": 1032 + }, + { + "epoch": 0.04675265897261824, + "grad_norm": 0.9522023086516167, + "learning_rate": 9.992687578807296e-06, + "loss": 0.477, + "step": 1033 + }, + { + "epoch": 0.0467979180810138, + "grad_norm": 1.1578859999622484, + "learning_rate": 9.992647901112918e-06, + "loss": 0.525, + "step": 1034 + }, + { + "epoch": 0.04684317718940937, + "grad_norm": 0.7745434808777305, + "learning_rate": 9.992608116141868e-06, + "loss": 0.4579, + "step": 1035 + }, + { + "epoch": 0.04688843629780493, + "grad_norm": 0.7848365984109472, + "learning_rate": 9.992568223895007e-06, + "loss": 0.4992, + "step": 1036 + }, + { + "epoch": 0.0469336954062005, + "grad_norm": 1.4260545518131067, + "learning_rate": 9.992528224373184e-06, + "loss": 0.4865, + "step": 1037 + }, + { + "epoch": 0.046978954514596064, + "grad_norm": 0.973652136914254, + "learning_rate": 9.992488117577265e-06, + "loss": 0.4634, + "step": 1038 + }, + { + "epoch": 0.047024213622991626, + "grad_norm": 0.7644367544615681, + "learning_rate": 9.99244790350811e-06, + "loss": 0.4649, + "step": 1039 + }, + { + "epoch": 0.047069472731387195, + "grad_norm": 0.8781215453997191, + "learning_rate": 9.992407582166582e-06, + "loss": 0.474, + "step": 1040 + }, + { + "epoch": 0.04711473183978276, + "grad_norm": 0.8450792350739496, + "learning_rate": 9.992367153553549e-06, + "loss": 0.446, + "step": 1041 + }, + { + "epoch": 0.04715999094817832, + "grad_norm": 0.9136083369013077, + "learning_rate": 9.992326617669876e-06, + "loss": 0.455, + "step": 1042 + }, + { + "epoch": 0.04720525005657389, + "grad_norm": 2.3247242416841916, + "learning_rate": 9.99228597451644e-06, + "loss": 0.5615, + "step": 1043 + }, + { + "epoch": 0.04725050916496945, + "grad_norm": 0.980824771184377, + "learning_rate": 9.99224522409411e-06, + "loss": 0.4526, + "step": 1044 + }, + { + "epoch": 0.04729576827336501, + "grad_norm": 0.7777888346815359, + "learning_rate": 9.992204366403761e-06, + "loss": 0.5381, + "step": 1045 + }, + { + "epoch": 0.04734102738176058, + "grad_norm": 0.568469961832735, + "learning_rate": 9.992163401446274e-06, + "loss": 0.5143, + "step": 1046 + }, + { + "epoch": 0.047386286490156145, + "grad_norm": 1.0356275444400265, + "learning_rate": 9.992122329222527e-06, + "loss": 0.514, + "step": 1047 + }, + { + "epoch": 0.04743154559855171, + "grad_norm": 0.9332493931025058, + "learning_rate": 9.992081149733404e-06, + "loss": 0.4338, + "step": 1048 + }, + { + "epoch": 0.047476804706947276, + "grad_norm": 1.0333833578504823, + "learning_rate": 9.99203986297979e-06, + "loss": 0.5054, + "step": 1049 + }, + { + "epoch": 0.04752206381534284, + "grad_norm": 0.8779286388663978, + "learning_rate": 9.99199846896257e-06, + "loss": 0.4828, + "step": 1050 + }, + { + "epoch": 0.0475673229237384, + "grad_norm": 0.7946450784816697, + "learning_rate": 9.991956967682635e-06, + "loss": 0.488, + "step": 1051 + }, + { + "epoch": 0.04761258203213397, + "grad_norm": 2.8390151308444898, + "learning_rate": 9.991915359140876e-06, + "loss": 0.5759, + "step": 1052 + }, + { + "epoch": 0.04765784114052953, + "grad_norm": 0.795466215922787, + "learning_rate": 9.991873643338187e-06, + "loss": 0.4438, + "step": 1053 + }, + { + "epoch": 0.047703100248925094, + "grad_norm": 0.89522116285953, + "learning_rate": 9.991831820275466e-06, + "loss": 0.509, + "step": 1054 + }, + { + "epoch": 0.04774835935732066, + "grad_norm": 0.8460939918115581, + "learning_rate": 9.99178988995361e-06, + "loss": 0.5155, + "step": 1055 + }, + { + "epoch": 0.047793618465716226, + "grad_norm": 0.9491912394916352, + "learning_rate": 9.991747852373522e-06, + "loss": 0.4917, + "step": 1056 + }, + { + "epoch": 0.04783887757411179, + "grad_norm": 1.1064849521692737, + "learning_rate": 9.9917057075361e-06, + "loss": 0.4638, + "step": 1057 + }, + { + "epoch": 0.04788413668250736, + "grad_norm": 0.8443091766555462, + "learning_rate": 9.991663455442255e-06, + "loss": 0.498, + "step": 1058 + }, + { + "epoch": 0.04792939579090292, + "grad_norm": 1.4157963360282313, + "learning_rate": 9.991621096092895e-06, + "loss": 0.5387, + "step": 1059 + }, + { + "epoch": 0.04797465489929848, + "grad_norm": 0.8466878016153724, + "learning_rate": 9.991578629488926e-06, + "loss": 0.4364, + "step": 1060 + }, + { + "epoch": 0.04801991400769405, + "grad_norm": 0.907026124451798, + "learning_rate": 9.991536055631263e-06, + "loss": 0.4207, + "step": 1061 + }, + { + "epoch": 0.04806517311608961, + "grad_norm": 1.3059124793214225, + "learning_rate": 9.99149337452082e-06, + "loss": 0.5143, + "step": 1062 + }, + { + "epoch": 0.048110432224485175, + "grad_norm": 1.0645852262999596, + "learning_rate": 9.991450586158515e-06, + "loss": 0.4878, + "step": 1063 + }, + { + "epoch": 0.048155691332880744, + "grad_norm": 0.8253282261334028, + "learning_rate": 9.991407690545267e-06, + "loss": 0.493, + "step": 1064 + }, + { + "epoch": 0.048200950441276307, + "grad_norm": 0.841953712734116, + "learning_rate": 9.991364687681998e-06, + "loss": 0.4481, + "step": 1065 + }, + { + "epoch": 0.04824620954967187, + "grad_norm": 1.8495652063684374, + "learning_rate": 9.991321577569632e-06, + "loss": 0.4993, + "step": 1066 + }, + { + "epoch": 0.04829146865806744, + "grad_norm": 0.7895317716602368, + "learning_rate": 9.991278360209094e-06, + "loss": 0.4786, + "step": 1067 + }, + { + "epoch": 0.048336727766463, + "grad_norm": 0.7317553707364586, + "learning_rate": 9.991235035601314e-06, + "loss": 0.4064, + "step": 1068 + }, + { + "epoch": 0.04838198687485856, + "grad_norm": 4.832414373312705, + "learning_rate": 9.991191603747223e-06, + "loss": 0.4873, + "step": 1069 + }, + { + "epoch": 0.04842724598325413, + "grad_norm": 0.8183438730813508, + "learning_rate": 9.991148064647753e-06, + "loss": 0.4324, + "step": 1070 + }, + { + "epoch": 0.048472505091649694, + "grad_norm": 0.8433086571476347, + "learning_rate": 9.99110441830384e-06, + "loss": 0.4406, + "step": 1071 + }, + { + "epoch": 0.048517764200045256, + "grad_norm": 0.8047137664912013, + "learning_rate": 9.991060664716423e-06, + "loss": 0.4656, + "step": 1072 + }, + { + "epoch": 0.048563023308440825, + "grad_norm": 0.8109246254977853, + "learning_rate": 9.991016803886441e-06, + "loss": 0.4845, + "step": 1073 + }, + { + "epoch": 0.04860828241683639, + "grad_norm": 0.88284711254973, + "learning_rate": 9.990972835814836e-06, + "loss": 0.4508, + "step": 1074 + }, + { + "epoch": 0.04865354152523195, + "grad_norm": 0.9783194550591081, + "learning_rate": 9.990928760502554e-06, + "loss": 0.4699, + "step": 1075 + }, + { + "epoch": 0.04869880063362752, + "grad_norm": 0.8693059736340535, + "learning_rate": 9.990884577950542e-06, + "loss": 0.5487, + "step": 1076 + }, + { + "epoch": 0.04874405974202308, + "grad_norm": 0.8105918598711073, + "learning_rate": 9.990840288159747e-06, + "loss": 0.4735, + "step": 1077 + }, + { + "epoch": 0.04878931885041864, + "grad_norm": 0.4456518130885961, + "learning_rate": 9.990795891131125e-06, + "loss": 0.564, + "step": 1078 + }, + { + "epoch": 0.04883457795881421, + "grad_norm": 0.7670428008042319, + "learning_rate": 9.990751386865624e-06, + "loss": 0.4433, + "step": 1079 + }, + { + "epoch": 0.048879837067209775, + "grad_norm": 0.917325505413355, + "learning_rate": 9.990706775364204e-06, + "loss": 0.4988, + "step": 1080 + }, + { + "epoch": 0.048925096175605344, + "grad_norm": 0.8182023141846203, + "learning_rate": 9.990662056627825e-06, + "loss": 0.4192, + "step": 1081 + }, + { + "epoch": 0.048970355284000906, + "grad_norm": 0.913312253395318, + "learning_rate": 9.990617230657446e-06, + "loss": 0.5304, + "step": 1082 + }, + { + "epoch": 0.04901561439239647, + "grad_norm": 0.8520793636486157, + "learning_rate": 9.990572297454031e-06, + "loss": 0.5529, + "step": 1083 + }, + { + "epoch": 0.04906087350079204, + "grad_norm": 0.7872118274791575, + "learning_rate": 9.990527257018544e-06, + "loss": 0.4579, + "step": 1084 + }, + { + "epoch": 0.0491061326091876, + "grad_norm": 0.7971591462553888, + "learning_rate": 9.990482109351951e-06, + "loss": 0.4798, + "step": 1085 + }, + { + "epoch": 0.04915139171758316, + "grad_norm": 0.5425751844160779, + "learning_rate": 9.990436854455228e-06, + "loss": 0.5453, + "step": 1086 + }, + { + "epoch": 0.04919665082597873, + "grad_norm": 0.885036195458033, + "learning_rate": 9.990391492329341e-06, + "loss": 0.4667, + "step": 1087 + }, + { + "epoch": 0.049241909934374294, + "grad_norm": 0.8812596963416286, + "learning_rate": 9.99034602297527e-06, + "loss": 0.4687, + "step": 1088 + }, + { + "epoch": 0.049287169042769856, + "grad_norm": 0.7548044713322737, + "learning_rate": 9.990300446393988e-06, + "loss": 0.4574, + "step": 1089 + }, + { + "epoch": 0.049332428151165425, + "grad_norm": 1.3872353232776937, + "learning_rate": 9.990254762586477e-06, + "loss": 0.4417, + "step": 1090 + }, + { + "epoch": 0.04937768725956099, + "grad_norm": 0.6820528986305062, + "learning_rate": 9.990208971553716e-06, + "loss": 0.5387, + "step": 1091 + }, + { + "epoch": 0.04942294636795655, + "grad_norm": 0.8846682032693766, + "learning_rate": 9.990163073296692e-06, + "loss": 0.4802, + "step": 1092 + }, + { + "epoch": 0.04946820547635212, + "grad_norm": 0.4990089890256931, + "learning_rate": 9.99011706781639e-06, + "loss": 0.5377, + "step": 1093 + }, + { + "epoch": 0.04951346458474768, + "grad_norm": 0.7752264950430418, + "learning_rate": 9.990070955113798e-06, + "loss": 0.4238, + "step": 1094 + }, + { + "epoch": 0.04955872369314324, + "grad_norm": 0.8435283800432144, + "learning_rate": 9.990024735189907e-06, + "loss": 0.4477, + "step": 1095 + }, + { + "epoch": 0.04960398280153881, + "grad_norm": 0.7803389134460659, + "learning_rate": 9.989978408045709e-06, + "loss": 0.4369, + "step": 1096 + }, + { + "epoch": 0.049649241909934375, + "grad_norm": 0.7334389651169826, + "learning_rate": 9.989931973682202e-06, + "loss": 0.5141, + "step": 1097 + }, + { + "epoch": 0.04969450101832994, + "grad_norm": 0.820447034179538, + "learning_rate": 9.989885432100381e-06, + "loss": 0.5037, + "step": 1098 + }, + { + "epoch": 0.049739760126725506, + "grad_norm": 0.6248412960611177, + "learning_rate": 9.989838783301248e-06, + "loss": 0.5438, + "step": 1099 + }, + { + "epoch": 0.04978501923512107, + "grad_norm": 0.8232431368916311, + "learning_rate": 9.989792027285805e-06, + "loss": 0.4732, + "step": 1100 + }, + { + "epoch": 0.04983027834351663, + "grad_norm": 0.8808472161055362, + "learning_rate": 9.989745164055056e-06, + "loss": 0.4218, + "step": 1101 + }, + { + "epoch": 0.0498755374519122, + "grad_norm": 0.8609138479748117, + "learning_rate": 9.989698193610007e-06, + "loss": 0.4684, + "step": 1102 + }, + { + "epoch": 0.04992079656030776, + "grad_norm": 0.7324041656570668, + "learning_rate": 9.98965111595167e-06, + "loss": 0.4427, + "step": 1103 + }, + { + "epoch": 0.049966055668703324, + "grad_norm": 0.7668886253685008, + "learning_rate": 9.989603931081055e-06, + "loss": 0.46, + "step": 1104 + }, + { + "epoch": 0.05001131477709889, + "grad_norm": 0.8435203086437428, + "learning_rate": 9.989556638999175e-06, + "loss": 0.5022, + "step": 1105 + }, + { + "epoch": 0.050056573885494456, + "grad_norm": 0.4261132058486116, + "learning_rate": 9.989509239707047e-06, + "loss": 0.5337, + "step": 1106 + }, + { + "epoch": 0.05010183299389002, + "grad_norm": 0.8578128070222978, + "learning_rate": 9.989461733205692e-06, + "loss": 0.4439, + "step": 1107 + }, + { + "epoch": 0.05014709210228559, + "grad_norm": 0.836209356153558, + "learning_rate": 9.989414119496126e-06, + "loss": 0.4679, + "step": 1108 + }, + { + "epoch": 0.05019235121068115, + "grad_norm": 0.7941865579121077, + "learning_rate": 9.989366398579375e-06, + "loss": 0.4532, + "step": 1109 + }, + { + "epoch": 0.05023761031907671, + "grad_norm": 0.35779933739851233, + "learning_rate": 9.989318570456463e-06, + "loss": 0.5278, + "step": 1110 + }, + { + "epoch": 0.05028286942747228, + "grad_norm": 0.8641871319948795, + "learning_rate": 9.989270635128418e-06, + "loss": 0.4225, + "step": 1111 + }, + { + "epoch": 0.05032812853586784, + "grad_norm": 0.5511830790669976, + "learning_rate": 9.989222592596272e-06, + "loss": 0.5678, + "step": 1112 + }, + { + "epoch": 0.050373387644263405, + "grad_norm": 0.8417673671234892, + "learning_rate": 9.989174442861056e-06, + "loss": 0.4781, + "step": 1113 + }, + { + "epoch": 0.050418646752658974, + "grad_norm": 0.8276051715311175, + "learning_rate": 9.989126185923803e-06, + "loss": 0.4283, + "step": 1114 + }, + { + "epoch": 0.05046390586105454, + "grad_norm": 0.8593958832271233, + "learning_rate": 9.989077821785552e-06, + "loss": 0.4295, + "step": 1115 + }, + { + "epoch": 0.0505091649694501, + "grad_norm": 0.7195755355913743, + "learning_rate": 9.98902935044734e-06, + "loss": 0.4601, + "step": 1116 + }, + { + "epoch": 0.05055442407784567, + "grad_norm": 0.4017362161163235, + "learning_rate": 9.988980771910213e-06, + "loss": 0.5335, + "step": 1117 + }, + { + "epoch": 0.05059968318624123, + "grad_norm": 0.8845074196050715, + "learning_rate": 9.988932086175209e-06, + "loss": 0.471, + "step": 1118 + }, + { + "epoch": 0.05064494229463679, + "grad_norm": 0.761327368968983, + "learning_rate": 9.988883293243378e-06, + "loss": 0.4394, + "step": 1119 + }, + { + "epoch": 0.05069020140303236, + "grad_norm": 0.7783332130399511, + "learning_rate": 9.988834393115768e-06, + "loss": 0.4287, + "step": 1120 + }, + { + "epoch": 0.050735460511427924, + "grad_norm": 0.7309564495384006, + "learning_rate": 9.988785385793427e-06, + "loss": 0.4478, + "step": 1121 + }, + { + "epoch": 0.050780719619823486, + "grad_norm": 0.8208787390443257, + "learning_rate": 9.98873627127741e-06, + "loss": 0.4197, + "step": 1122 + }, + { + "epoch": 0.050825978728219055, + "grad_norm": 0.4006151609810976, + "learning_rate": 9.988687049568772e-06, + "loss": 0.5226, + "step": 1123 + }, + { + "epoch": 0.05087123783661462, + "grad_norm": 0.36589360608826726, + "learning_rate": 9.988637720668573e-06, + "loss": 0.5386, + "step": 1124 + }, + { + "epoch": 0.05091649694501019, + "grad_norm": 0.7734634813347808, + "learning_rate": 9.98858828457787e-06, + "loss": 0.4795, + "step": 1125 + }, + { + "epoch": 0.05096175605340575, + "grad_norm": 0.8394436286581666, + "learning_rate": 9.988538741297724e-06, + "loss": 0.4619, + "step": 1126 + }, + { + "epoch": 0.05100701516180131, + "grad_norm": 0.8843306463699511, + "learning_rate": 9.988489090829204e-06, + "loss": 0.4081, + "step": 1127 + }, + { + "epoch": 0.05105227427019688, + "grad_norm": 0.7403667217778743, + "learning_rate": 9.988439333173373e-06, + "loss": 0.4804, + "step": 1128 + }, + { + "epoch": 0.05109753337859244, + "grad_norm": 1.013403764971607, + "learning_rate": 9.988389468331304e-06, + "loss": 0.4803, + "step": 1129 + }, + { + "epoch": 0.051142792486988005, + "grad_norm": 0.757680606016578, + "learning_rate": 9.988339496304062e-06, + "loss": 0.4575, + "step": 1130 + }, + { + "epoch": 0.051188051595383574, + "grad_norm": 0.7396867410136777, + "learning_rate": 9.988289417092729e-06, + "loss": 0.4178, + "step": 1131 + }, + { + "epoch": 0.051233310703779136, + "grad_norm": 0.8297804902848872, + "learning_rate": 9.988239230698373e-06, + "loss": 0.5086, + "step": 1132 + }, + { + "epoch": 0.0512785698121747, + "grad_norm": 0.7912505856561427, + "learning_rate": 9.988188937122078e-06, + "loss": 0.4618, + "step": 1133 + }, + { + "epoch": 0.05132382892057027, + "grad_norm": 0.9367111562458711, + "learning_rate": 9.988138536364922e-06, + "loss": 0.4813, + "step": 1134 + }, + { + "epoch": 0.05136908802896583, + "grad_norm": 0.6628874626437402, + "learning_rate": 9.988088028427992e-06, + "loss": 0.5245, + "step": 1135 + }, + { + "epoch": 0.05141434713736139, + "grad_norm": 1.7104204042871356, + "learning_rate": 9.988037413312365e-06, + "loss": 0.4558, + "step": 1136 + }, + { + "epoch": 0.05145960624575696, + "grad_norm": 0.8066081014907509, + "learning_rate": 9.987986691019136e-06, + "loss": 0.4825, + "step": 1137 + }, + { + "epoch": 0.051504865354152524, + "grad_norm": 0.806073020603354, + "learning_rate": 9.987935861549393e-06, + "loss": 0.4536, + "step": 1138 + }, + { + "epoch": 0.051550124462548086, + "grad_norm": 0.7535675067601677, + "learning_rate": 9.987884924904228e-06, + "loss": 0.4765, + "step": 1139 + }, + { + "epoch": 0.051595383570943655, + "grad_norm": 0.8168490660049519, + "learning_rate": 9.987833881084734e-06, + "loss": 0.4676, + "step": 1140 + }, + { + "epoch": 0.05164064267933922, + "grad_norm": 0.849879574073609, + "learning_rate": 9.987782730092009e-06, + "loss": 0.485, + "step": 1141 + }, + { + "epoch": 0.05168590178773478, + "grad_norm": 0.8437989117176353, + "learning_rate": 9.987731471927152e-06, + "loss": 0.4804, + "step": 1142 + }, + { + "epoch": 0.05173116089613035, + "grad_norm": 1.1720073013495276, + "learning_rate": 9.987680106591264e-06, + "loss": 0.4653, + "step": 1143 + }, + { + "epoch": 0.05177642000452591, + "grad_norm": 0.6529217354151006, + "learning_rate": 9.98762863408545e-06, + "loss": 0.5479, + "step": 1144 + }, + { + "epoch": 0.05182167911292147, + "grad_norm": 0.8299860927045014, + "learning_rate": 9.987577054410813e-06, + "loss": 0.4758, + "step": 1145 + }, + { + "epoch": 0.05186693822131704, + "grad_norm": 0.7964057437485828, + "learning_rate": 9.987525367568464e-06, + "loss": 0.4813, + "step": 1146 + }, + { + "epoch": 0.051912197329712605, + "grad_norm": 0.7868668624743436, + "learning_rate": 9.987473573559514e-06, + "loss": 0.4751, + "step": 1147 + }, + { + "epoch": 0.05195745643810817, + "grad_norm": 0.36553766406529065, + "learning_rate": 9.987421672385073e-06, + "loss": 0.5253, + "step": 1148 + }, + { + "epoch": 0.052002715546503736, + "grad_norm": 0.8803059079274371, + "learning_rate": 9.98736966404626e-06, + "loss": 0.474, + "step": 1149 + }, + { + "epoch": 0.0520479746548993, + "grad_norm": 0.7268443082420789, + "learning_rate": 9.98731754854419e-06, + "loss": 0.4543, + "step": 1150 + }, + { + "epoch": 0.05209323376329486, + "grad_norm": 0.38910048527339913, + "learning_rate": 9.987265325879983e-06, + "loss": 0.5388, + "step": 1151 + }, + { + "epoch": 0.05213849287169043, + "grad_norm": 0.37753851519302517, + "learning_rate": 9.98721299605476e-06, + "loss": 0.5392, + "step": 1152 + }, + { + "epoch": 0.05218375198008599, + "grad_norm": 1.2202861369230587, + "learning_rate": 9.987160559069649e-06, + "loss": 0.4305, + "step": 1153 + }, + { + "epoch": 0.052229011088481554, + "grad_norm": 0.8826481532913922, + "learning_rate": 9.987108014925772e-06, + "loss": 0.5202, + "step": 1154 + }, + { + "epoch": 0.05227427019687712, + "grad_norm": 0.8066686419753973, + "learning_rate": 9.987055363624263e-06, + "loss": 0.4912, + "step": 1155 + }, + { + "epoch": 0.052319529305272686, + "grad_norm": 0.9693526840801042, + "learning_rate": 9.98700260516625e-06, + "loss": 0.4606, + "step": 1156 + }, + { + "epoch": 0.05236478841366825, + "grad_norm": 0.7720688308741186, + "learning_rate": 9.986949739552867e-06, + "loss": 0.4528, + "step": 1157 + }, + { + "epoch": 0.05241004752206382, + "grad_norm": 0.6195630876054363, + "learning_rate": 9.98689676678525e-06, + "loss": 0.5449, + "step": 1158 + }, + { + "epoch": 0.05245530663045938, + "grad_norm": 1.0037880322159685, + "learning_rate": 9.986843686864538e-06, + "loss": 0.4996, + "step": 1159 + }, + { + "epoch": 0.05250056573885494, + "grad_norm": 0.8977655363937432, + "learning_rate": 9.986790499791872e-06, + "loss": 0.4897, + "step": 1160 + }, + { + "epoch": 0.05254582484725051, + "grad_norm": 0.42159467768136355, + "learning_rate": 9.986737205568393e-06, + "loss": 0.5529, + "step": 1161 + }, + { + "epoch": 0.05259108395564607, + "grad_norm": 0.7786886256636473, + "learning_rate": 9.986683804195248e-06, + "loss": 0.4662, + "step": 1162 + }, + { + "epoch": 0.052636343064041635, + "grad_norm": 0.9184508577564199, + "learning_rate": 9.98663029567358e-06, + "loss": 0.5036, + "step": 1163 + }, + { + "epoch": 0.052681602172437204, + "grad_norm": 0.445883140071055, + "learning_rate": 9.986576680004546e-06, + "loss": 0.5074, + "step": 1164 + }, + { + "epoch": 0.05272686128083277, + "grad_norm": 0.7543499978541274, + "learning_rate": 9.986522957189293e-06, + "loss": 0.4596, + "step": 1165 + }, + { + "epoch": 0.05277212038922833, + "grad_norm": 0.3332988296091536, + "learning_rate": 9.986469127228977e-06, + "loss": 0.5426, + "step": 1166 + }, + { + "epoch": 0.0528173794976239, + "grad_norm": 0.29582601007727477, + "learning_rate": 9.986415190124754e-06, + "loss": 0.5026, + "step": 1167 + }, + { + "epoch": 0.05286263860601946, + "grad_norm": 0.9204990477506604, + "learning_rate": 9.986361145877783e-06, + "loss": 0.442, + "step": 1168 + }, + { + "epoch": 0.05290789771441503, + "grad_norm": 0.4160967570525296, + "learning_rate": 9.986306994489226e-06, + "loss": 0.5665, + "step": 1169 + }, + { + "epoch": 0.05295315682281059, + "grad_norm": 0.8304239963080823, + "learning_rate": 9.986252735960245e-06, + "loss": 0.4393, + "step": 1170 + }, + { + "epoch": 0.052998415931206154, + "grad_norm": 0.3926909854775528, + "learning_rate": 9.986198370292007e-06, + "loss": 0.5234, + "step": 1171 + }, + { + "epoch": 0.05304367503960172, + "grad_norm": 0.8733750774138885, + "learning_rate": 9.98614389748568e-06, + "loss": 0.5123, + "step": 1172 + }, + { + "epoch": 0.053088934147997285, + "grad_norm": 0.8887474864844891, + "learning_rate": 9.986089317542434e-06, + "loss": 0.5097, + "step": 1173 + }, + { + "epoch": 0.05313419325639285, + "grad_norm": 0.7862722297713525, + "learning_rate": 9.986034630463443e-06, + "loss": 0.4245, + "step": 1174 + }, + { + "epoch": 0.05317945236478842, + "grad_norm": 0.7915565801064068, + "learning_rate": 9.985979836249882e-06, + "loss": 0.4794, + "step": 1175 + }, + { + "epoch": 0.05322471147318398, + "grad_norm": 0.8468539133747583, + "learning_rate": 9.985924934902927e-06, + "loss": 0.4669, + "step": 1176 + }, + { + "epoch": 0.05326997058157954, + "grad_norm": 0.6026584322786569, + "learning_rate": 9.985869926423757e-06, + "loss": 0.5246, + "step": 1177 + }, + { + "epoch": 0.05331522968997511, + "grad_norm": 0.49181922663684496, + "learning_rate": 9.985814810813556e-06, + "loss": 0.5579, + "step": 1178 + }, + { + "epoch": 0.05336048879837067, + "grad_norm": 0.865929858522939, + "learning_rate": 9.985759588073508e-06, + "loss": 0.4746, + "step": 1179 + }, + { + "epoch": 0.053405747906766235, + "grad_norm": 0.4006188616165955, + "learning_rate": 9.985704258204798e-06, + "loss": 0.5232, + "step": 1180 + }, + { + "epoch": 0.053451007015161804, + "grad_norm": 0.44661749061090317, + "learning_rate": 9.985648821208616e-06, + "loss": 0.5318, + "step": 1181 + }, + { + "epoch": 0.053496266123557366, + "grad_norm": 0.8150202842917483, + "learning_rate": 9.985593277086155e-06, + "loss": 0.472, + "step": 1182 + }, + { + "epoch": 0.05354152523195293, + "grad_norm": 0.9321471164895254, + "learning_rate": 9.985537625838603e-06, + "loss": 0.4737, + "step": 1183 + }, + { + "epoch": 0.0535867843403485, + "grad_norm": 0.4787161789655883, + "learning_rate": 9.985481867467162e-06, + "loss": 0.5248, + "step": 1184 + }, + { + "epoch": 0.05363204344874406, + "grad_norm": 0.781597037845619, + "learning_rate": 9.985426001973026e-06, + "loss": 0.447, + "step": 1185 + }, + { + "epoch": 0.05367730255713962, + "grad_norm": 0.7739348385383168, + "learning_rate": 9.985370029357399e-06, + "loss": 0.4877, + "step": 1186 + }, + { + "epoch": 0.05372256166553519, + "grad_norm": 0.8249985271286937, + "learning_rate": 9.98531394962148e-06, + "loss": 0.4825, + "step": 1187 + }, + { + "epoch": 0.053767820773930754, + "grad_norm": 0.7769368520306797, + "learning_rate": 9.985257762766476e-06, + "loss": 0.4585, + "step": 1188 + }, + { + "epoch": 0.053813079882326316, + "grad_norm": 0.49925545381022285, + "learning_rate": 9.985201468793593e-06, + "loss": 0.5321, + "step": 1189 + }, + { + "epoch": 0.053858338990721885, + "grad_norm": 0.3838928358822514, + "learning_rate": 9.985145067704042e-06, + "loss": 0.5298, + "step": 1190 + }, + { + "epoch": 0.05390359809911745, + "grad_norm": 0.9635548790408048, + "learning_rate": 9.985088559499032e-06, + "loss": 0.4555, + "step": 1191 + }, + { + "epoch": 0.05394885720751301, + "grad_norm": 0.8111640620370049, + "learning_rate": 9.985031944179781e-06, + "loss": 0.476, + "step": 1192 + }, + { + "epoch": 0.05399411631590858, + "grad_norm": 0.8288410981511287, + "learning_rate": 9.984975221747505e-06, + "loss": 0.4952, + "step": 1193 + }, + { + "epoch": 0.05403937542430414, + "grad_norm": 0.8598494946424741, + "learning_rate": 9.984918392203421e-06, + "loss": 0.4564, + "step": 1194 + }, + { + "epoch": 0.0540846345326997, + "grad_norm": 0.7828458336222965, + "learning_rate": 9.98486145554875e-06, + "loss": 0.4415, + "step": 1195 + }, + { + "epoch": 0.05412989364109527, + "grad_norm": 0.7851268025912339, + "learning_rate": 9.984804411784717e-06, + "loss": 0.4606, + "step": 1196 + }, + { + "epoch": 0.054175152749490835, + "grad_norm": 0.7997777564795878, + "learning_rate": 9.984747260912546e-06, + "loss": 0.4906, + "step": 1197 + }, + { + "epoch": 0.0542204118578864, + "grad_norm": 0.7557512579413929, + "learning_rate": 9.984690002933465e-06, + "loss": 0.4964, + "step": 1198 + }, + { + "epoch": 0.054265670966281966, + "grad_norm": 1.010347993778274, + "learning_rate": 9.984632637848708e-06, + "loss": 0.5612, + "step": 1199 + }, + { + "epoch": 0.05431093007467753, + "grad_norm": 0.5411492624911337, + "learning_rate": 9.984575165659503e-06, + "loss": 0.5502, + "step": 1200 + }, + { + "epoch": 0.05435618918307309, + "grad_norm": 1.0723421207044346, + "learning_rate": 9.984517586367088e-06, + "loss": 0.4881, + "step": 1201 + }, + { + "epoch": 0.05440144829146866, + "grad_norm": 1.024700727373943, + "learning_rate": 9.984459899972696e-06, + "loss": 0.4582, + "step": 1202 + }, + { + "epoch": 0.05444670739986422, + "grad_norm": 0.8541022994896453, + "learning_rate": 9.984402106477572e-06, + "loss": 0.4682, + "step": 1203 + }, + { + "epoch": 0.054491966508259784, + "grad_norm": 0.8690265217212084, + "learning_rate": 9.984344205882954e-06, + "loss": 0.4897, + "step": 1204 + }, + { + "epoch": 0.05453722561665535, + "grad_norm": 1.574950294258745, + "learning_rate": 9.984286198190087e-06, + "loss": 0.5448, + "step": 1205 + }, + { + "epoch": 0.054582484725050916, + "grad_norm": 0.9788112855639516, + "learning_rate": 9.984228083400218e-06, + "loss": 0.48, + "step": 1206 + }, + { + "epoch": 0.05462774383344648, + "grad_norm": 0.8732371443748891, + "learning_rate": 9.984169861514597e-06, + "loss": 0.4679, + "step": 1207 + }, + { + "epoch": 0.05467300294184205, + "grad_norm": 0.6303672878123504, + "learning_rate": 9.98411153253447e-06, + "loss": 0.5437, + "step": 1208 + }, + { + "epoch": 0.05471826205023761, + "grad_norm": 0.9558686425270596, + "learning_rate": 9.984053096461098e-06, + "loss": 0.4852, + "step": 1209 + }, + { + "epoch": 0.05476352115863317, + "grad_norm": 1.087269887409945, + "learning_rate": 9.983994553295728e-06, + "loss": 0.4598, + "step": 1210 + }, + { + "epoch": 0.05480878026702874, + "grad_norm": 0.8303647766368976, + "learning_rate": 9.983935903039625e-06, + "loss": 0.4845, + "step": 1211 + }, + { + "epoch": 0.0548540393754243, + "grad_norm": 0.8183369574706792, + "learning_rate": 9.983877145694046e-06, + "loss": 0.5078, + "step": 1212 + }, + { + "epoch": 0.05489929848381987, + "grad_norm": 0.8522053359132349, + "learning_rate": 9.983818281260253e-06, + "loss": 0.4419, + "step": 1213 + }, + { + "epoch": 0.054944557592215434, + "grad_norm": 0.9621572091092583, + "learning_rate": 9.983759309739512e-06, + "loss": 0.4218, + "step": 1214 + }, + { + "epoch": 0.054989816700611, + "grad_norm": 0.9053413715699286, + "learning_rate": 9.98370023113309e-06, + "loss": 0.5094, + "step": 1215 + }, + { + "epoch": 0.055035075809006566, + "grad_norm": 0.7835898344779059, + "learning_rate": 9.983641045442256e-06, + "loss": 0.4825, + "step": 1216 + }, + { + "epoch": 0.05508033491740213, + "grad_norm": 0.9485400715813633, + "learning_rate": 9.983581752668283e-06, + "loss": 0.5178, + "step": 1217 + }, + { + "epoch": 0.05512559402579769, + "grad_norm": 0.7566415295491471, + "learning_rate": 9.983522352812443e-06, + "loss": 0.4506, + "step": 1218 + }, + { + "epoch": 0.05517085313419326, + "grad_norm": 0.7828487329955025, + "learning_rate": 9.983462845876015e-06, + "loss": 0.4485, + "step": 1219 + }, + { + "epoch": 0.05521611224258882, + "grad_norm": 0.8264869845338306, + "learning_rate": 9.983403231860273e-06, + "loss": 0.4515, + "step": 1220 + }, + { + "epoch": 0.055261371350984384, + "grad_norm": 1.5117493089257694, + "learning_rate": 9.983343510766504e-06, + "loss": 0.5786, + "step": 1221 + }, + { + "epoch": 0.05530663045937995, + "grad_norm": 0.9014398579583309, + "learning_rate": 9.983283682595986e-06, + "loss": 0.4313, + "step": 1222 + }, + { + "epoch": 0.055351889567775515, + "grad_norm": 0.614288103847374, + "learning_rate": 9.983223747350008e-06, + "loss": 0.5528, + "step": 1223 + }, + { + "epoch": 0.05539714867617108, + "grad_norm": 0.9713214207122499, + "learning_rate": 9.983163705029857e-06, + "loss": 0.4304, + "step": 1224 + }, + { + "epoch": 0.05544240778456665, + "grad_norm": 0.9031593512931938, + "learning_rate": 9.983103555636821e-06, + "loss": 0.4615, + "step": 1225 + }, + { + "epoch": 0.05548766689296221, + "grad_norm": 0.9316822418803643, + "learning_rate": 9.983043299172195e-06, + "loss": 0.4663, + "step": 1226 + }, + { + "epoch": 0.05553292600135777, + "grad_norm": 1.5925339118559703, + "learning_rate": 9.982982935637272e-06, + "loss": 0.5504, + "step": 1227 + }, + { + "epoch": 0.05557818510975334, + "grad_norm": 0.8044607515862033, + "learning_rate": 9.98292246503335e-06, + "loss": 0.4547, + "step": 1228 + }, + { + "epoch": 0.0556234442181489, + "grad_norm": 0.7912690572472879, + "learning_rate": 9.982861887361728e-06, + "loss": 0.4676, + "step": 1229 + }, + { + "epoch": 0.055668703326544465, + "grad_norm": 0.9431332129547105, + "learning_rate": 9.982801202623708e-06, + "loss": 0.4764, + "step": 1230 + }, + { + "epoch": 0.055713962434940034, + "grad_norm": 0.8081337093685754, + "learning_rate": 9.982740410820595e-06, + "loss": 0.4274, + "step": 1231 + }, + { + "epoch": 0.055759221543335596, + "grad_norm": 0.763449935761009, + "learning_rate": 9.98267951195369e-06, + "loss": 0.406, + "step": 1232 + }, + { + "epoch": 0.05580448065173116, + "grad_norm": 0.8422970501795058, + "learning_rate": 9.982618506024309e-06, + "loss": 0.451, + "step": 1233 + }, + { + "epoch": 0.05584973976012673, + "grad_norm": 0.757141131265391, + "learning_rate": 9.982557393033758e-06, + "loss": 0.4609, + "step": 1234 + }, + { + "epoch": 0.05589499886852229, + "grad_norm": 0.7797311152704947, + "learning_rate": 9.98249617298335e-06, + "loss": 0.4758, + "step": 1235 + }, + { + "epoch": 0.05594025797691785, + "grad_norm": 0.7571561418356503, + "learning_rate": 9.982434845874405e-06, + "loss": 0.4443, + "step": 1236 + }, + { + "epoch": 0.05598551708531342, + "grad_norm": 0.940424347691789, + "learning_rate": 9.982373411708237e-06, + "loss": 0.4379, + "step": 1237 + }, + { + "epoch": 0.056030776193708984, + "grad_norm": 0.8378688150058623, + "learning_rate": 9.982311870486166e-06, + "loss": 0.5175, + "step": 1238 + }, + { + "epoch": 0.056076035302104546, + "grad_norm": 0.7725216832344305, + "learning_rate": 9.982250222209513e-06, + "loss": 0.5485, + "step": 1239 + }, + { + "epoch": 0.056121294410500115, + "grad_norm": 0.5805096150729553, + "learning_rate": 9.982188466879607e-06, + "loss": 0.5171, + "step": 1240 + }, + { + "epoch": 0.05616655351889568, + "grad_norm": 0.857878552541859, + "learning_rate": 9.98212660449777e-06, + "loss": 0.432, + "step": 1241 + }, + { + "epoch": 0.05621181262729124, + "grad_norm": 0.4158987046032542, + "learning_rate": 9.982064635065336e-06, + "loss": 0.5439, + "step": 1242 + }, + { + "epoch": 0.05625707173568681, + "grad_norm": 0.4930079344799815, + "learning_rate": 9.982002558583633e-06, + "loss": 0.5426, + "step": 1243 + }, + { + "epoch": 0.05630233084408237, + "grad_norm": 0.9266098650508937, + "learning_rate": 9.981940375053996e-06, + "loss": 0.4712, + "step": 1244 + }, + { + "epoch": 0.05634758995247793, + "grad_norm": 0.8588623400330749, + "learning_rate": 9.981878084477764e-06, + "loss": 0.4478, + "step": 1245 + }, + { + "epoch": 0.0563928490608735, + "grad_norm": 0.7961382205379587, + "learning_rate": 9.981815686856268e-06, + "loss": 0.4534, + "step": 1246 + }, + { + "epoch": 0.056438108169269065, + "grad_norm": 0.8930930962002508, + "learning_rate": 9.981753182190856e-06, + "loss": 0.4689, + "step": 1247 + }, + { + "epoch": 0.05648336727766463, + "grad_norm": 0.907959707580663, + "learning_rate": 9.981690570482869e-06, + "loss": 0.4321, + "step": 1248 + }, + { + "epoch": 0.056528626386060196, + "grad_norm": 0.7678510229566248, + "learning_rate": 9.981627851733651e-06, + "loss": 0.5635, + "step": 1249 + }, + { + "epoch": 0.05657388549445576, + "grad_norm": 0.8515278105378038, + "learning_rate": 9.98156502594455e-06, + "loss": 0.4135, + "step": 1250 + }, + { + "epoch": 0.05661914460285132, + "grad_norm": 0.7744151104367206, + "learning_rate": 9.981502093116917e-06, + "loss": 0.4919, + "step": 1251 + }, + { + "epoch": 0.05666440371124689, + "grad_norm": 0.7920093313730772, + "learning_rate": 9.981439053252102e-06, + "loss": 0.4288, + "step": 1252 + }, + { + "epoch": 0.05670966281964245, + "grad_norm": 0.8486807298027813, + "learning_rate": 9.981375906351463e-06, + "loss": 0.4679, + "step": 1253 + }, + { + "epoch": 0.056754921928038014, + "grad_norm": 1.0169113659412827, + "learning_rate": 9.981312652416353e-06, + "loss": 0.5354, + "step": 1254 + }, + { + "epoch": 0.056800181036433584, + "grad_norm": 0.5822200648528999, + "learning_rate": 9.981249291448134e-06, + "loss": 0.5397, + "step": 1255 + }, + { + "epoch": 0.056845440144829146, + "grad_norm": 0.8313948880129977, + "learning_rate": 9.981185823448166e-06, + "loss": 0.4753, + "step": 1256 + }, + { + "epoch": 0.056890699253224715, + "grad_norm": 0.43699857233451633, + "learning_rate": 9.981122248417815e-06, + "loss": 0.5286, + "step": 1257 + }, + { + "epoch": 0.05693595836162028, + "grad_norm": 0.3821736230004524, + "learning_rate": 9.981058566358443e-06, + "loss": 0.5527, + "step": 1258 + }, + { + "epoch": 0.05698121747001584, + "grad_norm": 0.7856472855765193, + "learning_rate": 9.98099477727142e-06, + "loss": 0.4282, + "step": 1259 + }, + { + "epoch": 0.05702647657841141, + "grad_norm": 0.4076765456528284, + "learning_rate": 9.98093088115812e-06, + "loss": 0.5309, + "step": 1260 + }, + { + "epoch": 0.05707173568680697, + "grad_norm": 0.8071172893195849, + "learning_rate": 9.980866878019911e-06, + "loss": 0.4614, + "step": 1261 + }, + { + "epoch": 0.05711699479520253, + "grad_norm": 0.8475654460872483, + "learning_rate": 9.98080276785817e-06, + "loss": 0.4701, + "step": 1262 + }, + { + "epoch": 0.0571622539035981, + "grad_norm": 0.47535055779038554, + "learning_rate": 9.980738550674277e-06, + "loss": 0.5347, + "step": 1263 + }, + { + "epoch": 0.057207513011993665, + "grad_norm": 0.7909484322536614, + "learning_rate": 9.980674226469608e-06, + "loss": 0.485, + "step": 1264 + }, + { + "epoch": 0.05725277212038923, + "grad_norm": 0.8709597748130423, + "learning_rate": 9.980609795245548e-06, + "loss": 0.4867, + "step": 1265 + }, + { + "epoch": 0.057298031228784796, + "grad_norm": 0.8005551517657652, + "learning_rate": 9.980545257003481e-06, + "loss": 0.4603, + "step": 1266 + }, + { + "epoch": 0.05734329033718036, + "grad_norm": 0.7964388047236337, + "learning_rate": 9.980480611744791e-06, + "loss": 0.4336, + "step": 1267 + }, + { + "epoch": 0.05738854944557592, + "grad_norm": 0.7983523114274277, + "learning_rate": 9.980415859470872e-06, + "loss": 0.4721, + "step": 1268 + }, + { + "epoch": 0.05743380855397149, + "grad_norm": 0.829019588478564, + "learning_rate": 9.980351000183108e-06, + "loss": 0.4537, + "step": 1269 + }, + { + "epoch": 0.05747906766236705, + "grad_norm": 0.8097302752292416, + "learning_rate": 9.9802860338829e-06, + "loss": 0.4967, + "step": 1270 + }, + { + "epoch": 0.057524326770762614, + "grad_norm": 0.9222055660580988, + "learning_rate": 9.98022096057164e-06, + "loss": 0.4537, + "step": 1271 + }, + { + "epoch": 0.05756958587915818, + "grad_norm": 0.8110170634260588, + "learning_rate": 9.980155780250728e-06, + "loss": 0.4337, + "step": 1272 + }, + { + "epoch": 0.057614844987553746, + "grad_norm": 0.7978741699808711, + "learning_rate": 9.980090492921563e-06, + "loss": 0.4801, + "step": 1273 + }, + { + "epoch": 0.05766010409594931, + "grad_norm": 0.8338140737252998, + "learning_rate": 9.98002509858555e-06, + "loss": 0.5115, + "step": 1274 + }, + { + "epoch": 0.05770536320434488, + "grad_norm": 0.7310445282087151, + "learning_rate": 9.979959597244089e-06, + "loss": 0.4572, + "step": 1275 + }, + { + "epoch": 0.05775062231274044, + "grad_norm": 0.7531542525602791, + "learning_rate": 9.979893988898592e-06, + "loss": 0.4782, + "step": 1276 + }, + { + "epoch": 0.057795881421136, + "grad_norm": 0.7523160239405309, + "learning_rate": 9.97982827355047e-06, + "loss": 0.4425, + "step": 1277 + }, + { + "epoch": 0.05784114052953157, + "grad_norm": 0.5112046197859239, + "learning_rate": 9.979762451201132e-06, + "loss": 0.5545, + "step": 1278 + }, + { + "epoch": 0.05788639963792713, + "grad_norm": 0.8668024028019767, + "learning_rate": 9.979696521851992e-06, + "loss": 0.4949, + "step": 1279 + }, + { + "epoch": 0.057931658746322695, + "grad_norm": 0.779857898757288, + "learning_rate": 9.979630485504468e-06, + "loss": 0.4369, + "step": 1280 + }, + { + "epoch": 0.057976917854718264, + "grad_norm": 0.751721968005427, + "learning_rate": 9.97956434215998e-06, + "loss": 0.4641, + "step": 1281 + }, + { + "epoch": 0.058022176963113826, + "grad_norm": 0.7025221316380706, + "learning_rate": 9.979498091819946e-06, + "loss": 0.429, + "step": 1282 + }, + { + "epoch": 0.05806743607150939, + "grad_norm": 0.7194116567943578, + "learning_rate": 9.979431734485794e-06, + "loss": 0.4303, + "step": 1283 + }, + { + "epoch": 0.05811269517990496, + "grad_norm": 0.7585130981008433, + "learning_rate": 9.979365270158945e-06, + "loss": 0.4604, + "step": 1284 + }, + { + "epoch": 0.05815795428830052, + "grad_norm": 0.7978623772369193, + "learning_rate": 9.979298698840829e-06, + "loss": 0.4644, + "step": 1285 + }, + { + "epoch": 0.05820321339669608, + "grad_norm": 0.7284784348084953, + "learning_rate": 9.979232020532877e-06, + "loss": 0.4468, + "step": 1286 + }, + { + "epoch": 0.05824847250509165, + "grad_norm": 0.7619367485973766, + "learning_rate": 9.979165235236523e-06, + "loss": 0.4488, + "step": 1287 + }, + { + "epoch": 0.058293731613487214, + "grad_norm": 0.6968847961889845, + "learning_rate": 9.979098342953198e-06, + "loss": 0.4614, + "step": 1288 + }, + { + "epoch": 0.058338990721882776, + "grad_norm": 1.0272658772877736, + "learning_rate": 9.979031343684344e-06, + "loss": 0.4614, + "step": 1289 + }, + { + "epoch": 0.058384249830278345, + "grad_norm": 0.5435257113968114, + "learning_rate": 9.978964237431396e-06, + "loss": 0.5455, + "step": 1290 + }, + { + "epoch": 0.05842950893867391, + "grad_norm": 0.8353831810505, + "learning_rate": 9.978897024195801e-06, + "loss": 0.4309, + "step": 1291 + }, + { + "epoch": 0.05847476804706947, + "grad_norm": 0.7290873281764264, + "learning_rate": 9.978829703978999e-06, + "loss": 0.4579, + "step": 1292 + }, + { + "epoch": 0.05852002715546504, + "grad_norm": 0.3411679847978857, + "learning_rate": 9.978762276782438e-06, + "loss": 0.5133, + "step": 1293 + }, + { + "epoch": 0.0585652862638606, + "grad_norm": 0.3718275823828351, + "learning_rate": 9.978694742607566e-06, + "loss": 0.5583, + "step": 1294 + }, + { + "epoch": 0.05861054537225616, + "grad_norm": 0.9738128977028798, + "learning_rate": 9.978627101455836e-06, + "loss": 0.4861, + "step": 1295 + }, + { + "epoch": 0.05865580448065173, + "grad_norm": 0.9236918922061743, + "learning_rate": 9.9785593533287e-06, + "loss": 0.5127, + "step": 1296 + }, + { + "epoch": 0.058701063589047295, + "grad_norm": 0.6878931249632689, + "learning_rate": 9.978491498227615e-06, + "loss": 0.4449, + "step": 1297 + }, + { + "epoch": 0.05874632269744286, + "grad_norm": 0.7557270130278592, + "learning_rate": 9.978423536154036e-06, + "loss": 0.4625, + "step": 1298 + }, + { + "epoch": 0.058791581805838426, + "grad_norm": 1.0642306750672659, + "learning_rate": 9.978355467109427e-06, + "loss": 0.4336, + "step": 1299 + }, + { + "epoch": 0.05883684091423399, + "grad_norm": 0.5117178183518919, + "learning_rate": 9.978287291095248e-06, + "loss": 0.5213, + "step": 1300 + }, + { + "epoch": 0.05888210002262955, + "grad_norm": 0.794491939155005, + "learning_rate": 9.978219008112965e-06, + "loss": 0.4541, + "step": 1301 + }, + { + "epoch": 0.05892735913102512, + "grad_norm": 0.8099921246940341, + "learning_rate": 9.978150618164044e-06, + "loss": 0.4702, + "step": 1302 + }, + { + "epoch": 0.05897261823942068, + "grad_norm": 0.7695831619969989, + "learning_rate": 9.978082121249957e-06, + "loss": 0.4413, + "step": 1303 + }, + { + "epoch": 0.05901787734781625, + "grad_norm": 0.8738306669138687, + "learning_rate": 9.978013517372173e-06, + "loss": 0.4808, + "step": 1304 + }, + { + "epoch": 0.059063136456211814, + "grad_norm": 0.8250768119015344, + "learning_rate": 9.977944806532169e-06, + "loss": 0.5147, + "step": 1305 + }, + { + "epoch": 0.059108395564607376, + "grad_norm": 0.49320793021794157, + "learning_rate": 9.977875988731418e-06, + "loss": 0.5311, + "step": 1306 + }, + { + "epoch": 0.059153654673002945, + "grad_norm": 0.6911527640318451, + "learning_rate": 9.977807063971401e-06, + "loss": 0.411, + "step": 1307 + }, + { + "epoch": 0.05919891378139851, + "grad_norm": 0.7413756021870055, + "learning_rate": 9.977738032253598e-06, + "loss": 0.4715, + "step": 1308 + }, + { + "epoch": 0.05924417288979407, + "grad_norm": 0.8133220610132956, + "learning_rate": 9.977668893579493e-06, + "loss": 0.4609, + "step": 1309 + }, + { + "epoch": 0.05928943199818964, + "grad_norm": 0.8196571698370698, + "learning_rate": 9.977599647950572e-06, + "loss": 0.4714, + "step": 1310 + }, + { + "epoch": 0.0593346911065852, + "grad_norm": 0.8644105862836049, + "learning_rate": 9.977530295368321e-06, + "loss": 0.4819, + "step": 1311 + }, + { + "epoch": 0.05937995021498076, + "grad_norm": 1.0647079301335312, + "learning_rate": 9.977460835834231e-06, + "loss": 0.4529, + "step": 1312 + }, + { + "epoch": 0.05942520932337633, + "grad_norm": 0.8206674886176298, + "learning_rate": 9.977391269349795e-06, + "loss": 0.458, + "step": 1313 + }, + { + "epoch": 0.059470468431771895, + "grad_norm": 0.5318759671503385, + "learning_rate": 9.977321595916507e-06, + "loss": 0.5106, + "step": 1314 + }, + { + "epoch": 0.05951572754016746, + "grad_norm": 0.8727685781150271, + "learning_rate": 9.977251815535867e-06, + "loss": 0.4952, + "step": 1315 + }, + { + "epoch": 0.059560986648563026, + "grad_norm": 0.744440840375188, + "learning_rate": 9.97718192820937e-06, + "loss": 0.4188, + "step": 1316 + }, + { + "epoch": 0.05960624575695859, + "grad_norm": 1.0437273788657893, + "learning_rate": 9.977111933938519e-06, + "loss": 0.4429, + "step": 1317 + }, + { + "epoch": 0.05965150486535415, + "grad_norm": 0.46107440696628565, + "learning_rate": 9.97704183272482e-06, + "loss": 0.525, + "step": 1318 + }, + { + "epoch": 0.05969676397374972, + "grad_norm": 0.9797169729349386, + "learning_rate": 9.976971624569776e-06, + "loss": 0.4579, + "step": 1319 + }, + { + "epoch": 0.05974202308214528, + "grad_norm": 0.8694097288243677, + "learning_rate": 9.9769013094749e-06, + "loss": 0.4914, + "step": 1320 + }, + { + "epoch": 0.059787282190540844, + "grad_norm": 0.8524501643852321, + "learning_rate": 9.976830887441699e-06, + "loss": 0.4999, + "step": 1321 + }, + { + "epoch": 0.05983254129893641, + "grad_norm": 0.8575124599427206, + "learning_rate": 9.976760358471687e-06, + "loss": 0.4622, + "step": 1322 + }, + { + "epoch": 0.059877800407331976, + "grad_norm": 0.786340660897482, + "learning_rate": 9.976689722566379e-06, + "loss": 0.4204, + "step": 1323 + }, + { + "epoch": 0.05992305951572754, + "grad_norm": 0.4433058625534793, + "learning_rate": 9.976618979727295e-06, + "loss": 0.5612, + "step": 1324 + }, + { + "epoch": 0.05996831862412311, + "grad_norm": 0.8653521916140352, + "learning_rate": 9.976548129955953e-06, + "loss": 0.4716, + "step": 1325 + }, + { + "epoch": 0.06001357773251867, + "grad_norm": 0.796213598131806, + "learning_rate": 9.976477173253878e-06, + "loss": 0.4307, + "step": 1326 + }, + { + "epoch": 0.06005883684091423, + "grad_norm": 0.8431570163012182, + "learning_rate": 9.97640610962259e-06, + "loss": 0.4347, + "step": 1327 + }, + { + "epoch": 0.0601040959493098, + "grad_norm": 0.7192342906246789, + "learning_rate": 9.97633493906362e-06, + "loss": 0.4251, + "step": 1328 + }, + { + "epoch": 0.06014935505770536, + "grad_norm": 0.3889310353578457, + "learning_rate": 9.976263661578495e-06, + "loss": 0.5414, + "step": 1329 + }, + { + "epoch": 0.060194614166100925, + "grad_norm": 0.8408450911647659, + "learning_rate": 9.976192277168748e-06, + "loss": 0.455, + "step": 1330 + }, + { + "epoch": 0.060239873274496494, + "grad_norm": 0.7791459262815543, + "learning_rate": 9.976120785835912e-06, + "loss": 0.4486, + "step": 1331 + }, + { + "epoch": 0.06028513238289206, + "grad_norm": 1.5368903877653917, + "learning_rate": 9.976049187581523e-06, + "loss": 0.4438, + "step": 1332 + }, + { + "epoch": 0.06033039149128762, + "grad_norm": 0.8423946896063987, + "learning_rate": 9.97597748240712e-06, + "loss": 0.4697, + "step": 1333 + }, + { + "epoch": 0.06037565059968319, + "grad_norm": 0.7581531607654294, + "learning_rate": 9.975905670314243e-06, + "loss": 0.4845, + "step": 1334 + }, + { + "epoch": 0.06042090970807875, + "grad_norm": 0.8046542695421153, + "learning_rate": 9.975833751304435e-06, + "loss": 0.44, + "step": 1335 + }, + { + "epoch": 0.06046616881647431, + "grad_norm": 0.4528822879915298, + "learning_rate": 9.975761725379243e-06, + "loss": 0.5409, + "step": 1336 + }, + { + "epoch": 0.06051142792486988, + "grad_norm": 0.850037337299667, + "learning_rate": 9.975689592540214e-06, + "loss": 0.452, + "step": 1337 + }, + { + "epoch": 0.060556687033265444, + "grad_norm": 0.99034556951951, + "learning_rate": 9.975617352788897e-06, + "loss": 0.4389, + "step": 1338 + }, + { + "epoch": 0.060601946141661006, + "grad_norm": 1.0271152953869538, + "learning_rate": 9.975545006126843e-06, + "loss": 0.5071, + "step": 1339 + }, + { + "epoch": 0.060647205250056575, + "grad_norm": 1.1420043625492036, + "learning_rate": 9.975472552555609e-06, + "loss": 0.4876, + "step": 1340 + }, + { + "epoch": 0.06069246435845214, + "grad_norm": 0.6971624850612915, + "learning_rate": 9.975399992076752e-06, + "loss": 0.4107, + "step": 1341 + }, + { + "epoch": 0.0607377234668477, + "grad_norm": 0.9569774396437504, + "learning_rate": 9.975327324691828e-06, + "loss": 0.4331, + "step": 1342 + }, + { + "epoch": 0.06078298257524327, + "grad_norm": 0.7914603074563847, + "learning_rate": 9.9752545504024e-06, + "loss": 0.5065, + "step": 1343 + }, + { + "epoch": 0.06082824168363883, + "grad_norm": 0.9088018964302117, + "learning_rate": 9.975181669210034e-06, + "loss": 0.4993, + "step": 1344 + }, + { + "epoch": 0.06087350079203439, + "grad_norm": 1.1608321903492493, + "learning_rate": 9.975108681116293e-06, + "loss": 0.5226, + "step": 1345 + }, + { + "epoch": 0.06091875990042996, + "grad_norm": 0.7616726835673043, + "learning_rate": 9.975035586122746e-06, + "loss": 0.4445, + "step": 1346 + }, + { + "epoch": 0.060964019008825525, + "grad_norm": 0.4804652401570814, + "learning_rate": 9.974962384230965e-06, + "loss": 0.5431, + "step": 1347 + }, + { + "epoch": 0.061009278117221094, + "grad_norm": 0.4173257563644276, + "learning_rate": 9.97488907544252e-06, + "loss": 0.5413, + "step": 1348 + }, + { + "epoch": 0.061054537225616656, + "grad_norm": 0.962069270717757, + "learning_rate": 9.97481565975899e-06, + "loss": 0.4772, + "step": 1349 + }, + { + "epoch": 0.06109979633401222, + "grad_norm": 0.36001383419618155, + "learning_rate": 9.97474213718195e-06, + "loss": 0.5404, + "step": 1350 + }, + { + "epoch": 0.06114505544240779, + "grad_norm": 0.7685197910763001, + "learning_rate": 9.974668507712979e-06, + "loss": 0.4357, + "step": 1351 + }, + { + "epoch": 0.06119031455080335, + "grad_norm": 0.7682770127489988, + "learning_rate": 9.974594771353662e-06, + "loss": 0.4166, + "step": 1352 + }, + { + "epoch": 0.06123557365919891, + "grad_norm": 0.8399578495600116, + "learning_rate": 9.97452092810558e-06, + "loss": 0.4639, + "step": 1353 + }, + { + "epoch": 0.06128083276759448, + "grad_norm": 0.7164554717054931, + "learning_rate": 9.974446977970322e-06, + "loss": 0.489, + "step": 1354 + }, + { + "epoch": 0.061326091875990044, + "grad_norm": 0.8019348898901124, + "learning_rate": 9.974372920949478e-06, + "loss": 0.4225, + "step": 1355 + }, + { + "epoch": 0.061371350984385606, + "grad_norm": 0.7457433655397221, + "learning_rate": 9.974298757044636e-06, + "loss": 0.4416, + "step": 1356 + }, + { + "epoch": 0.061416610092781175, + "grad_norm": 0.8093746951320654, + "learning_rate": 9.97422448625739e-06, + "loss": 0.4496, + "step": 1357 + }, + { + "epoch": 0.06146186920117674, + "grad_norm": 0.7702467986450054, + "learning_rate": 9.974150108589338e-06, + "loss": 0.4122, + "step": 1358 + }, + { + "epoch": 0.0615071283095723, + "grad_norm": 0.74345550836823, + "learning_rate": 9.974075624042076e-06, + "loss": 0.5027, + "step": 1359 + }, + { + "epoch": 0.06155238741796787, + "grad_norm": 0.7484354102674841, + "learning_rate": 9.974001032617208e-06, + "loss": 0.4435, + "step": 1360 + }, + { + "epoch": 0.06159764652636343, + "grad_norm": 0.749996191394422, + "learning_rate": 9.973926334316332e-06, + "loss": 0.5201, + "step": 1361 + }, + { + "epoch": 0.06164290563475899, + "grad_norm": 0.8440489977478883, + "learning_rate": 9.973851529141056e-06, + "loss": 0.4307, + "step": 1362 + }, + { + "epoch": 0.06168816474315456, + "grad_norm": 0.8720090740465758, + "learning_rate": 9.973776617092988e-06, + "loss": 0.4501, + "step": 1363 + }, + { + "epoch": 0.061733423851550125, + "grad_norm": 0.7660819683254514, + "learning_rate": 9.973701598173736e-06, + "loss": 0.5035, + "step": 1364 + }, + { + "epoch": 0.06177868295994569, + "grad_norm": 0.8494876594535249, + "learning_rate": 9.973626472384911e-06, + "loss": 0.4673, + "step": 1365 + }, + { + "epoch": 0.061823942068341256, + "grad_norm": 0.48390760598129084, + "learning_rate": 9.973551239728129e-06, + "loss": 0.5281, + "step": 1366 + }, + { + "epoch": 0.06186920117673682, + "grad_norm": 0.4369140872787393, + "learning_rate": 9.973475900205005e-06, + "loss": 0.5271, + "step": 1367 + }, + { + "epoch": 0.06191446028513238, + "grad_norm": 0.7571630206601964, + "learning_rate": 9.97340045381716e-06, + "loss": 0.457, + "step": 1368 + }, + { + "epoch": 0.06195971939352795, + "grad_norm": 0.7646121483191878, + "learning_rate": 9.973324900566214e-06, + "loss": 0.4085, + "step": 1369 + }, + { + "epoch": 0.06200497850192351, + "grad_norm": 1.3464464658815254, + "learning_rate": 9.973249240453789e-06, + "loss": 0.4159, + "step": 1370 + }, + { + "epoch": 0.062050237610319074, + "grad_norm": 0.903357108813622, + "learning_rate": 9.973173473481513e-06, + "loss": 0.4295, + "step": 1371 + }, + { + "epoch": 0.06209549671871464, + "grad_norm": 0.7900153899952508, + "learning_rate": 9.973097599651013e-06, + "loss": 0.4428, + "step": 1372 + }, + { + "epoch": 0.062140755827110206, + "grad_norm": 0.9107180209952301, + "learning_rate": 9.973021618963919e-06, + "loss": 0.4394, + "step": 1373 + }, + { + "epoch": 0.06218601493550577, + "grad_norm": 0.7416484939825573, + "learning_rate": 9.972945531421863e-06, + "loss": 0.4603, + "step": 1374 + }, + { + "epoch": 0.06223127404390134, + "grad_norm": 1.465734093495972, + "learning_rate": 9.972869337026482e-06, + "loss": 0.4159, + "step": 1375 + }, + { + "epoch": 0.0622765331522969, + "grad_norm": 0.780362708639645, + "learning_rate": 9.972793035779412e-06, + "loss": 0.4091, + "step": 1376 + }, + { + "epoch": 0.06232179226069246, + "grad_norm": 0.808375725476795, + "learning_rate": 9.972716627682292e-06, + "loss": 0.494, + "step": 1377 + }, + { + "epoch": 0.06236705136908803, + "grad_norm": 0.8677968454058532, + "learning_rate": 9.972640112736764e-06, + "loss": 0.5243, + "step": 1378 + }, + { + "epoch": 0.06241231047748359, + "grad_norm": 0.7190882477891677, + "learning_rate": 9.972563490944474e-06, + "loss": 0.4548, + "step": 1379 + }, + { + "epoch": 0.062457569585879155, + "grad_norm": 0.8145344529184204, + "learning_rate": 9.972486762307064e-06, + "loss": 0.4349, + "step": 1380 + }, + { + "epoch": 0.06250282869427472, + "grad_norm": 0.82848896121603, + "learning_rate": 9.972409926826188e-06, + "loss": 0.419, + "step": 1381 + }, + { + "epoch": 0.0625480878026703, + "grad_norm": 0.8915242371627252, + "learning_rate": 9.972332984503493e-06, + "loss": 0.5263, + "step": 1382 + }, + { + "epoch": 0.06259334691106586, + "grad_norm": 0.8686588179710028, + "learning_rate": 9.972255935340631e-06, + "loss": 0.4617, + "step": 1383 + }, + { + "epoch": 0.06263860601946142, + "grad_norm": 0.7461021561719915, + "learning_rate": 9.972178779339264e-06, + "loss": 0.4577, + "step": 1384 + }, + { + "epoch": 0.06268386512785698, + "grad_norm": 0.767860526526951, + "learning_rate": 9.972101516501043e-06, + "loss": 0.468, + "step": 1385 + }, + { + "epoch": 0.06272912423625254, + "grad_norm": 0.7755025230348713, + "learning_rate": 9.972024146827633e-06, + "loss": 0.4581, + "step": 1386 + }, + { + "epoch": 0.0627743833446481, + "grad_norm": 0.7136222251907728, + "learning_rate": 9.971946670320693e-06, + "loss": 0.4258, + "step": 1387 + }, + { + "epoch": 0.06281964245304368, + "grad_norm": 0.7962994471669949, + "learning_rate": 9.971869086981892e-06, + "loss": 0.418, + "step": 1388 + }, + { + "epoch": 0.06286490156143924, + "grad_norm": 0.9369097066828213, + "learning_rate": 9.971791396812891e-06, + "loss": 0.5509, + "step": 1389 + }, + { + "epoch": 0.0629101606698348, + "grad_norm": 0.85928628848663, + "learning_rate": 9.971713599815364e-06, + "loss": 0.4512, + "step": 1390 + }, + { + "epoch": 0.06295541977823037, + "grad_norm": 0.4760552251345332, + "learning_rate": 9.971635695990981e-06, + "loss": 0.5308, + "step": 1391 + }, + { + "epoch": 0.06300067888662593, + "grad_norm": 0.8016075510443832, + "learning_rate": 9.971557685341415e-06, + "loss": 0.5004, + "step": 1392 + }, + { + "epoch": 0.06304593799502149, + "grad_norm": 0.8390371784654099, + "learning_rate": 9.971479567868345e-06, + "loss": 0.447, + "step": 1393 + }, + { + "epoch": 0.06309119710341707, + "grad_norm": 0.8424654439840261, + "learning_rate": 9.971401343573448e-06, + "loss": 0.464, + "step": 1394 + }, + { + "epoch": 0.06313645621181263, + "grad_norm": 0.7607383258531611, + "learning_rate": 9.971323012458403e-06, + "loss": 0.4349, + "step": 1395 + }, + { + "epoch": 0.06318171532020819, + "grad_norm": 0.8127223782573194, + "learning_rate": 9.971244574524897e-06, + "loss": 0.4361, + "step": 1396 + }, + { + "epoch": 0.06322697442860375, + "grad_norm": 1.0790888839572594, + "learning_rate": 9.97116602977461e-06, + "loss": 0.5568, + "step": 1397 + }, + { + "epoch": 0.06327223353699932, + "grad_norm": 1.2668559111678692, + "learning_rate": 9.971087378209235e-06, + "loss": 0.4803, + "step": 1398 + }, + { + "epoch": 0.06331749264539488, + "grad_norm": 0.755449835673747, + "learning_rate": 9.97100861983046e-06, + "loss": 0.4364, + "step": 1399 + }, + { + "epoch": 0.06336275175379046, + "grad_norm": 0.8453741315660476, + "learning_rate": 9.970929754639976e-06, + "loss": 0.4613, + "step": 1400 + }, + { + "epoch": 0.06340801086218602, + "grad_norm": 0.8970543727300306, + "learning_rate": 9.970850782639478e-06, + "loss": 0.4654, + "step": 1401 + }, + { + "epoch": 0.06345326997058158, + "grad_norm": 0.9167840194035326, + "learning_rate": 9.970771703830666e-06, + "loss": 0.3953, + "step": 1402 + }, + { + "epoch": 0.06349852907897714, + "grad_norm": 0.5676117809549915, + "learning_rate": 9.970692518215236e-06, + "loss": 0.5232, + "step": 1403 + }, + { + "epoch": 0.0635437881873727, + "grad_norm": 0.8284466275722184, + "learning_rate": 9.970613225794887e-06, + "loss": 0.4419, + "step": 1404 + }, + { + "epoch": 0.06358904729576827, + "grad_norm": 0.916990534856046, + "learning_rate": 9.970533826571329e-06, + "loss": 0.4603, + "step": 1405 + }, + { + "epoch": 0.06363430640416384, + "grad_norm": 0.5007907202491658, + "learning_rate": 9.970454320546264e-06, + "loss": 0.512, + "step": 1406 + }, + { + "epoch": 0.0636795655125594, + "grad_norm": 0.8791752963483216, + "learning_rate": 9.9703747077214e-06, + "loss": 0.4818, + "step": 1407 + }, + { + "epoch": 0.06372482462095497, + "grad_norm": 0.8003276708620475, + "learning_rate": 9.970294988098452e-06, + "loss": 0.4426, + "step": 1408 + }, + { + "epoch": 0.06377008372935053, + "grad_norm": 0.45111031550043196, + "learning_rate": 9.970215161679126e-06, + "loss": 0.5424, + "step": 1409 + }, + { + "epoch": 0.06381534283774609, + "grad_norm": 0.7672189227879728, + "learning_rate": 9.970135228465144e-06, + "loss": 0.4591, + "step": 1410 + }, + { + "epoch": 0.06386060194614165, + "grad_norm": 0.9335458891124578, + "learning_rate": 9.970055188458219e-06, + "loss": 0.4507, + "step": 1411 + }, + { + "epoch": 0.06390586105453723, + "grad_norm": 0.7114136093026613, + "learning_rate": 9.969975041660073e-06, + "loss": 0.4377, + "step": 1412 + }, + { + "epoch": 0.06395112016293279, + "grad_norm": 0.8747199646442678, + "learning_rate": 9.969894788072427e-06, + "loss": 0.3875, + "step": 1413 + }, + { + "epoch": 0.06399637927132835, + "grad_norm": 0.829182408703576, + "learning_rate": 9.969814427697007e-06, + "loss": 0.4383, + "step": 1414 + }, + { + "epoch": 0.06404163837972392, + "grad_norm": 0.7319960516188317, + "learning_rate": 9.969733960535537e-06, + "loss": 0.4635, + "step": 1415 + }, + { + "epoch": 0.06408689748811948, + "grad_norm": 0.9267499056159455, + "learning_rate": 9.969653386589749e-06, + "loss": 0.5478, + "step": 1416 + }, + { + "epoch": 0.06413215659651506, + "grad_norm": 0.4920810844031071, + "learning_rate": 9.969572705861371e-06, + "loss": 0.5244, + "step": 1417 + }, + { + "epoch": 0.06417741570491062, + "grad_norm": 0.8843609262456265, + "learning_rate": 9.96949191835214e-06, + "loss": 0.4943, + "step": 1418 + }, + { + "epoch": 0.06422267481330618, + "grad_norm": 0.7939118677093964, + "learning_rate": 9.96941102406379e-06, + "loss": 0.4671, + "step": 1419 + }, + { + "epoch": 0.06426793392170174, + "grad_norm": 0.8074312906259381, + "learning_rate": 9.969330022998057e-06, + "loss": 0.4537, + "step": 1420 + }, + { + "epoch": 0.0643131930300973, + "grad_norm": 0.7941660570380797, + "learning_rate": 9.969248915156689e-06, + "loss": 0.4675, + "step": 1421 + }, + { + "epoch": 0.06435845213849287, + "grad_norm": 0.739298603734896, + "learning_rate": 9.96916770054142e-06, + "loss": 0.4102, + "step": 1422 + }, + { + "epoch": 0.06440371124688844, + "grad_norm": 0.7447203329144491, + "learning_rate": 9.969086379154e-06, + "loss": 0.4473, + "step": 1423 + }, + { + "epoch": 0.064448970355284, + "grad_norm": 1.1690352979094978, + "learning_rate": 9.969004950996175e-06, + "loss": 0.5181, + "step": 1424 + }, + { + "epoch": 0.06449422946367957, + "grad_norm": 0.6911976096981919, + "learning_rate": 9.968923416069694e-06, + "loss": 0.4239, + "step": 1425 + }, + { + "epoch": 0.06453948857207513, + "grad_norm": 0.5637242347474052, + "learning_rate": 9.96884177437631e-06, + "loss": 0.514, + "step": 1426 + }, + { + "epoch": 0.06458474768047069, + "grad_norm": 0.7441386514999966, + "learning_rate": 9.968760025917777e-06, + "loss": 0.467, + "step": 1427 + }, + { + "epoch": 0.06463000678886625, + "grad_norm": 0.784708982521003, + "learning_rate": 9.968678170695851e-06, + "loss": 0.4294, + "step": 1428 + }, + { + "epoch": 0.06467526589726183, + "grad_norm": 0.6990928541859651, + "learning_rate": 9.968596208712293e-06, + "loss": 0.4495, + "step": 1429 + }, + { + "epoch": 0.06472052500565739, + "grad_norm": 1.0658537709299807, + "learning_rate": 9.968514139968862e-06, + "loss": 0.4917, + "step": 1430 + }, + { + "epoch": 0.06476578411405295, + "grad_norm": 0.7858687905006959, + "learning_rate": 9.96843196446732e-06, + "loss": 0.4447, + "step": 1431 + }, + { + "epoch": 0.06481104322244852, + "grad_norm": 0.8138918788039744, + "learning_rate": 9.968349682209434e-06, + "loss": 0.4743, + "step": 1432 + }, + { + "epoch": 0.06485630233084408, + "grad_norm": 0.7115302776794071, + "learning_rate": 9.968267293196976e-06, + "loss": 0.4507, + "step": 1433 + }, + { + "epoch": 0.06490156143923964, + "grad_norm": 1.566811431079173, + "learning_rate": 9.96818479743171e-06, + "loss": 0.5437, + "step": 1434 + }, + { + "epoch": 0.06494682054763522, + "grad_norm": 0.7712599008875365, + "learning_rate": 9.968102194915411e-06, + "loss": 0.4627, + "step": 1435 + }, + { + "epoch": 0.06499207965603078, + "grad_norm": 0.8098764007806255, + "learning_rate": 9.968019485649856e-06, + "loss": 0.4519, + "step": 1436 + }, + { + "epoch": 0.06503733876442634, + "grad_norm": 0.730530054382985, + "learning_rate": 9.967936669636818e-06, + "loss": 0.4679, + "step": 1437 + }, + { + "epoch": 0.0650825978728219, + "grad_norm": 0.7298010549478803, + "learning_rate": 9.96785374687808e-06, + "loss": 0.3934, + "step": 1438 + }, + { + "epoch": 0.06512785698121747, + "grad_norm": 0.7719987609607117, + "learning_rate": 9.967770717375423e-06, + "loss": 0.4488, + "step": 1439 + }, + { + "epoch": 0.06517311608961303, + "grad_norm": 0.7576083931078323, + "learning_rate": 9.967687581130632e-06, + "loss": 0.4493, + "step": 1440 + }, + { + "epoch": 0.0652183751980086, + "grad_norm": 0.7456916900585282, + "learning_rate": 9.967604338145488e-06, + "loss": 0.434, + "step": 1441 + }, + { + "epoch": 0.06526363430640417, + "grad_norm": 0.7609509382040559, + "learning_rate": 9.967520988421788e-06, + "loss": 0.4175, + "step": 1442 + }, + { + "epoch": 0.06530889341479973, + "grad_norm": 0.8318454322580573, + "learning_rate": 9.967437531961316e-06, + "loss": 0.465, + "step": 1443 + }, + { + "epoch": 0.06535415252319529, + "grad_norm": 0.7591737815983305, + "learning_rate": 9.967353968765868e-06, + "loss": 0.4394, + "step": 1444 + }, + { + "epoch": 0.06539941163159085, + "grad_norm": 0.7765098393214346, + "learning_rate": 9.967270298837239e-06, + "loss": 0.47, + "step": 1445 + }, + { + "epoch": 0.06544467073998642, + "grad_norm": 0.7409110390658145, + "learning_rate": 9.967186522177228e-06, + "loss": 0.3877, + "step": 1446 + }, + { + "epoch": 0.06548992984838199, + "grad_norm": 0.9312488082329916, + "learning_rate": 9.967102638787634e-06, + "loss": 0.4224, + "step": 1447 + }, + { + "epoch": 0.06553518895677755, + "grad_norm": 0.749522516130679, + "learning_rate": 9.96701864867026e-06, + "loss": 0.4626, + "step": 1448 + }, + { + "epoch": 0.06558044806517312, + "grad_norm": 0.8037830099599014, + "learning_rate": 9.96693455182691e-06, + "loss": 0.5151, + "step": 1449 + }, + { + "epoch": 0.06562570717356868, + "grad_norm": 0.8896635354781706, + "learning_rate": 9.96685034825939e-06, + "loss": 0.5014, + "step": 1450 + }, + { + "epoch": 0.06567096628196424, + "grad_norm": 0.9075654261092597, + "learning_rate": 9.966766037969512e-06, + "loss": 0.48, + "step": 1451 + }, + { + "epoch": 0.0657162253903598, + "grad_norm": 0.779067559983051, + "learning_rate": 9.966681620959085e-06, + "loss": 0.4136, + "step": 1452 + }, + { + "epoch": 0.06576148449875538, + "grad_norm": 0.8202536297853454, + "learning_rate": 9.966597097229925e-06, + "loss": 0.4761, + "step": 1453 + }, + { + "epoch": 0.06580674360715094, + "grad_norm": 1.5381184164373207, + "learning_rate": 9.966512466783846e-06, + "loss": 0.468, + "step": 1454 + }, + { + "epoch": 0.0658520027155465, + "grad_norm": 0.7781534864045977, + "learning_rate": 9.966427729622668e-06, + "loss": 0.4855, + "step": 1455 + }, + { + "epoch": 0.06589726182394207, + "grad_norm": 0.7798522596941327, + "learning_rate": 9.966342885748212e-06, + "loss": 0.4623, + "step": 1456 + }, + { + "epoch": 0.06594252093233763, + "grad_norm": 0.7384237234710249, + "learning_rate": 9.9662579351623e-06, + "loss": 0.483, + "step": 1457 + }, + { + "epoch": 0.0659877800407332, + "grad_norm": 0.7826289098351965, + "learning_rate": 9.966172877866757e-06, + "loss": 0.4736, + "step": 1458 + }, + { + "epoch": 0.06603303914912877, + "grad_norm": 1.0297726700330934, + "learning_rate": 9.966087713863412e-06, + "loss": 0.4721, + "step": 1459 + }, + { + "epoch": 0.06607829825752433, + "grad_norm": 0.8561847199763449, + "learning_rate": 9.966002443154095e-06, + "loss": 0.4884, + "step": 1460 + }, + { + "epoch": 0.06612355736591989, + "grad_norm": 2.3350403237905915, + "learning_rate": 9.965917065740636e-06, + "loss": 0.5438, + "step": 1461 + }, + { + "epoch": 0.06616881647431545, + "grad_norm": 0.8562932774188359, + "learning_rate": 9.965831581624872e-06, + "loss": 0.473, + "step": 1462 + }, + { + "epoch": 0.06621407558271102, + "grad_norm": 0.6826153820493718, + "learning_rate": 9.965745990808638e-06, + "loss": 0.547, + "step": 1463 + }, + { + "epoch": 0.06625933469110659, + "grad_norm": 0.7976139884418457, + "learning_rate": 9.965660293293773e-06, + "loss": 0.4536, + "step": 1464 + }, + { + "epoch": 0.06630459379950215, + "grad_norm": 0.9966975784442647, + "learning_rate": 9.96557448908212e-06, + "loss": 0.5063, + "step": 1465 + }, + { + "epoch": 0.06634985290789772, + "grad_norm": 0.8040762001174595, + "learning_rate": 9.965488578175522e-06, + "loss": 0.4333, + "step": 1466 + }, + { + "epoch": 0.06639511201629328, + "grad_norm": 0.8261180768400028, + "learning_rate": 9.965402560575825e-06, + "loss": 0.4308, + "step": 1467 + }, + { + "epoch": 0.06644037112468884, + "grad_norm": 0.9011136586096863, + "learning_rate": 9.965316436284877e-06, + "loss": 0.4597, + "step": 1468 + }, + { + "epoch": 0.0664856302330844, + "grad_norm": 1.111941679470362, + "learning_rate": 9.965230205304528e-06, + "loss": 0.4384, + "step": 1469 + }, + { + "epoch": 0.06653088934147998, + "grad_norm": 0.7204158964166693, + "learning_rate": 9.96514386763663e-06, + "loss": 0.4624, + "step": 1470 + }, + { + "epoch": 0.06657614844987554, + "grad_norm": 0.9149661678745, + "learning_rate": 9.965057423283043e-06, + "loss": 0.4667, + "step": 1471 + }, + { + "epoch": 0.0666214075582711, + "grad_norm": 0.7932039742728114, + "learning_rate": 9.964970872245618e-06, + "loss": 0.4311, + "step": 1472 + }, + { + "epoch": 0.06666666666666667, + "grad_norm": 1.89014538464223, + "learning_rate": 9.96488421452622e-06, + "loss": 0.5952, + "step": 1473 + }, + { + "epoch": 0.06671192577506223, + "grad_norm": 0.8032671819143476, + "learning_rate": 9.964797450126708e-06, + "loss": 0.443, + "step": 1474 + }, + { + "epoch": 0.06675718488345779, + "grad_norm": 0.7481664383546026, + "learning_rate": 9.964710579048947e-06, + "loss": 0.4828, + "step": 1475 + }, + { + "epoch": 0.06680244399185337, + "grad_norm": 0.7386169349436833, + "learning_rate": 9.964623601294802e-06, + "loss": 0.4415, + "step": 1476 + }, + { + "epoch": 0.06684770310024893, + "grad_norm": 0.779597142522534, + "learning_rate": 9.964536516866146e-06, + "loss": 0.4436, + "step": 1477 + }, + { + "epoch": 0.06689296220864449, + "grad_norm": 0.6576504922840656, + "learning_rate": 9.964449325764846e-06, + "loss": 0.5342, + "step": 1478 + }, + { + "epoch": 0.06693822131704005, + "grad_norm": 0.8003162853179788, + "learning_rate": 9.964362027992777e-06, + "loss": 0.4446, + "step": 1479 + }, + { + "epoch": 0.06698348042543562, + "grad_norm": 0.7921636322644379, + "learning_rate": 9.964274623551814e-06, + "loss": 0.4222, + "step": 1480 + }, + { + "epoch": 0.06702873953383118, + "grad_norm": 0.8123469531045502, + "learning_rate": 9.964187112443839e-06, + "loss": 0.4191, + "step": 1481 + }, + { + "epoch": 0.06707399864222675, + "grad_norm": 0.820058490946606, + "learning_rate": 9.964099494670727e-06, + "loss": 0.459, + "step": 1482 + }, + { + "epoch": 0.06711925775062232, + "grad_norm": 0.7893140272997659, + "learning_rate": 9.964011770234364e-06, + "loss": 0.4884, + "step": 1483 + }, + { + "epoch": 0.06716451685901788, + "grad_norm": 1.0204350131333586, + "learning_rate": 9.963923939136632e-06, + "loss": 0.5305, + "step": 1484 + }, + { + "epoch": 0.06720977596741344, + "grad_norm": 1.1363273251767987, + "learning_rate": 9.963836001379423e-06, + "loss": 0.47, + "step": 1485 + }, + { + "epoch": 0.067255035075809, + "grad_norm": 0.8064235598914097, + "learning_rate": 9.963747956964623e-06, + "loss": 0.4702, + "step": 1486 + }, + { + "epoch": 0.06730029418420456, + "grad_norm": 0.728234755597837, + "learning_rate": 9.963659805894123e-06, + "loss": 0.4592, + "step": 1487 + }, + { + "epoch": 0.06734555329260014, + "grad_norm": 0.8039241658387191, + "learning_rate": 9.96357154816982e-06, + "loss": 0.4776, + "step": 1488 + }, + { + "epoch": 0.0673908124009957, + "grad_norm": 0.7661464342835728, + "learning_rate": 9.963483183793606e-06, + "loss": 0.4504, + "step": 1489 + }, + { + "epoch": 0.06743607150939127, + "grad_norm": 0.737037447449985, + "learning_rate": 9.963394712767385e-06, + "loss": 0.4531, + "step": 1490 + }, + { + "epoch": 0.06748133061778683, + "grad_norm": 0.5540904667559855, + "learning_rate": 9.963306135093054e-06, + "loss": 0.5121, + "step": 1491 + }, + { + "epoch": 0.06752658972618239, + "grad_norm": 0.8491171776657304, + "learning_rate": 9.96321745077252e-06, + "loss": 0.4768, + "step": 1492 + }, + { + "epoch": 0.06757184883457795, + "grad_norm": 0.43270575186531257, + "learning_rate": 9.963128659807684e-06, + "loss": 0.5299, + "step": 1493 + }, + { + "epoch": 0.06761710794297353, + "grad_norm": 0.8745420939942341, + "learning_rate": 9.963039762200457e-06, + "loss": 0.4687, + "step": 1494 + }, + { + "epoch": 0.06766236705136909, + "grad_norm": 0.8373452601614464, + "learning_rate": 9.96295075795275e-06, + "loss": 0.459, + "step": 1495 + }, + { + "epoch": 0.06770762615976465, + "grad_norm": 0.7698332952658042, + "learning_rate": 9.962861647066472e-06, + "loss": 0.5082, + "step": 1496 + }, + { + "epoch": 0.06775288526816022, + "grad_norm": 0.8100270250771073, + "learning_rate": 9.962772429543539e-06, + "loss": 0.456, + "step": 1497 + }, + { + "epoch": 0.06779814437655578, + "grad_norm": 0.7843116494944076, + "learning_rate": 9.96268310538587e-06, + "loss": 0.4587, + "step": 1498 + }, + { + "epoch": 0.06784340348495134, + "grad_norm": 0.7879384228895121, + "learning_rate": 9.962593674595382e-06, + "loss": 0.438, + "step": 1499 + }, + { + "epoch": 0.06788866259334692, + "grad_norm": 0.7403443187198006, + "learning_rate": 9.962504137173997e-06, + "loss": 0.4559, + "step": 1500 + }, + { + "epoch": 0.06793392170174248, + "grad_norm": 0.7619760500772016, + "learning_rate": 9.96241449312364e-06, + "loss": 0.4454, + "step": 1501 + }, + { + "epoch": 0.06797918081013804, + "grad_norm": 0.8274554589173836, + "learning_rate": 9.962324742446237e-06, + "loss": 0.4684, + "step": 1502 + }, + { + "epoch": 0.0680244399185336, + "grad_norm": 0.8122743257002134, + "learning_rate": 9.962234885143715e-06, + "loss": 0.4558, + "step": 1503 + }, + { + "epoch": 0.06806969902692916, + "grad_norm": 0.8125418203275906, + "learning_rate": 9.962144921218005e-06, + "loss": 0.5069, + "step": 1504 + }, + { + "epoch": 0.06811495813532474, + "grad_norm": 1.197255811058083, + "learning_rate": 9.962054850671042e-06, + "loss": 0.5401, + "step": 1505 + }, + { + "epoch": 0.0681602172437203, + "grad_norm": 0.7745585414790322, + "learning_rate": 9.961964673504759e-06, + "loss": 0.3907, + "step": 1506 + }, + { + "epoch": 0.06820547635211587, + "grad_norm": 0.493849832077196, + "learning_rate": 9.961874389721095e-06, + "loss": 0.5421, + "step": 1507 + }, + { + "epoch": 0.06825073546051143, + "grad_norm": 0.7940022723097947, + "learning_rate": 9.96178399932199e-06, + "loss": 0.4773, + "step": 1508 + }, + { + "epoch": 0.06829599456890699, + "grad_norm": 0.8717583485304328, + "learning_rate": 9.961693502309385e-06, + "loss": 0.4817, + "step": 1509 + }, + { + "epoch": 0.06834125367730255, + "grad_norm": 0.8235441414406076, + "learning_rate": 9.961602898685225e-06, + "loss": 0.4492, + "step": 1510 + }, + { + "epoch": 0.06838651278569813, + "grad_norm": 1.0502485038616896, + "learning_rate": 9.961512188451458e-06, + "loss": 0.5486, + "step": 1511 + }, + { + "epoch": 0.06843177189409369, + "grad_norm": 0.7696161022692278, + "learning_rate": 9.961421371610034e-06, + "loss": 0.4708, + "step": 1512 + }, + { + "epoch": 0.06847703100248925, + "grad_norm": 0.7810384269518527, + "learning_rate": 9.9613304481629e-06, + "loss": 0.5473, + "step": 1513 + }, + { + "epoch": 0.06852229011088481, + "grad_norm": 0.5406957384016672, + "learning_rate": 9.961239418112013e-06, + "loss": 0.5205, + "step": 1514 + }, + { + "epoch": 0.06856754921928038, + "grad_norm": 0.9623025256050911, + "learning_rate": 9.961148281459328e-06, + "loss": 0.4817, + "step": 1515 + }, + { + "epoch": 0.06861280832767594, + "grad_norm": 0.8031406777854115, + "learning_rate": 9.961057038206804e-06, + "loss": 0.4505, + "step": 1516 + }, + { + "epoch": 0.06865806743607152, + "grad_norm": 0.8468517938114531, + "learning_rate": 9.960965688356401e-06, + "loss": 0.4735, + "step": 1517 + }, + { + "epoch": 0.06870332654446708, + "grad_norm": 0.7971237582102288, + "learning_rate": 9.960874231910081e-06, + "loss": 0.4216, + "step": 1518 + }, + { + "epoch": 0.06874858565286264, + "grad_norm": 1.1521788910843045, + "learning_rate": 9.960782668869811e-06, + "loss": 0.5635, + "step": 1519 + }, + { + "epoch": 0.0687938447612582, + "grad_norm": 0.7942431019486376, + "learning_rate": 9.960690999237555e-06, + "loss": 0.489, + "step": 1520 + }, + { + "epoch": 0.06883910386965376, + "grad_norm": 0.7702082381966745, + "learning_rate": 9.960599223015287e-06, + "loss": 0.43, + "step": 1521 + }, + { + "epoch": 0.06888436297804933, + "grad_norm": 0.6724548920276606, + "learning_rate": 9.960507340204977e-06, + "loss": 0.4509, + "step": 1522 + }, + { + "epoch": 0.0689296220864449, + "grad_norm": 0.8431250938808247, + "learning_rate": 9.960415350808598e-06, + "loss": 0.4686, + "step": 1523 + }, + { + "epoch": 0.06897488119484046, + "grad_norm": 0.7809711877829256, + "learning_rate": 9.960323254828129e-06, + "loss": 0.4505, + "step": 1524 + }, + { + "epoch": 0.06902014030323603, + "grad_norm": 0.6213389147642807, + "learning_rate": 9.960231052265548e-06, + "loss": 0.5229, + "step": 1525 + }, + { + "epoch": 0.06906539941163159, + "grad_norm": 0.759452138984007, + "learning_rate": 9.960138743122835e-06, + "loss": 0.4357, + "step": 1526 + }, + { + "epoch": 0.06911065852002715, + "grad_norm": 0.7715047876751977, + "learning_rate": 9.960046327401975e-06, + "loss": 0.4177, + "step": 1527 + }, + { + "epoch": 0.06915591762842271, + "grad_norm": 0.7671577146844714, + "learning_rate": 9.959953805104953e-06, + "loss": 0.4023, + "step": 1528 + }, + { + "epoch": 0.06920117673681829, + "grad_norm": 0.7603786601404778, + "learning_rate": 9.959861176233756e-06, + "loss": 0.4729, + "step": 1529 + }, + { + "epoch": 0.06924643584521385, + "grad_norm": 1.2263879084410303, + "learning_rate": 9.959768440790377e-06, + "loss": 0.4791, + "step": 1530 + }, + { + "epoch": 0.06929169495360941, + "grad_norm": 0.8042621884641189, + "learning_rate": 9.959675598776805e-06, + "loss": 0.4855, + "step": 1531 + }, + { + "epoch": 0.06933695406200498, + "grad_norm": 0.7607417462032463, + "learning_rate": 9.95958265019504e-06, + "loss": 0.4193, + "step": 1532 + }, + { + "epoch": 0.06938221317040054, + "grad_norm": 0.7396140867796288, + "learning_rate": 9.959489595047074e-06, + "loss": 0.4299, + "step": 1533 + }, + { + "epoch": 0.0694274722787961, + "grad_norm": 0.8098941502403151, + "learning_rate": 9.959396433334907e-06, + "loss": 0.5049, + "step": 1534 + }, + { + "epoch": 0.06947273138719168, + "grad_norm": 0.6644968450292126, + "learning_rate": 9.959303165060546e-06, + "loss": 0.5169, + "step": 1535 + }, + { + "epoch": 0.06951799049558724, + "grad_norm": 0.8017479067575274, + "learning_rate": 9.959209790225987e-06, + "loss": 0.4529, + "step": 1536 + }, + { + "epoch": 0.0695632496039828, + "grad_norm": 1.6091086506950214, + "learning_rate": 9.959116308833244e-06, + "loss": 0.4178, + "step": 1537 + }, + { + "epoch": 0.06960850871237836, + "grad_norm": 0.7569888031961698, + "learning_rate": 9.959022720884321e-06, + "loss": 0.4357, + "step": 1538 + }, + { + "epoch": 0.06965376782077393, + "grad_norm": 0.34109004209053556, + "learning_rate": 9.95892902638123e-06, + "loss": 0.5254, + "step": 1539 + }, + { + "epoch": 0.06969902692916949, + "grad_norm": 0.829481890585957, + "learning_rate": 9.958835225325984e-06, + "loss": 0.4158, + "step": 1540 + }, + { + "epoch": 0.06974428603756506, + "grad_norm": 0.7756169671760623, + "learning_rate": 9.9587413177206e-06, + "loss": 0.3823, + "step": 1541 + }, + { + "epoch": 0.06978954514596063, + "grad_norm": 0.44633982647223575, + "learning_rate": 9.958647303567094e-06, + "loss": 0.5427, + "step": 1542 + }, + { + "epoch": 0.06983480425435619, + "grad_norm": 0.883531560550747, + "learning_rate": 9.958553182867488e-06, + "loss": 0.4481, + "step": 1543 + }, + { + "epoch": 0.06988006336275175, + "grad_norm": 0.4300749273865749, + "learning_rate": 9.958458955623802e-06, + "loss": 0.5135, + "step": 1544 + }, + { + "epoch": 0.06992532247114731, + "grad_norm": 0.7996971504896987, + "learning_rate": 9.958364621838062e-06, + "loss": 0.4874, + "step": 1545 + }, + { + "epoch": 0.06997058157954289, + "grad_norm": 0.7474858485034108, + "learning_rate": 9.958270181512295e-06, + "loss": 0.4484, + "step": 1546 + }, + { + "epoch": 0.07001584068793845, + "grad_norm": 0.8790061160587301, + "learning_rate": 9.95817563464853e-06, + "loss": 0.4148, + "step": 1547 + }, + { + "epoch": 0.07006109979633401, + "grad_norm": 0.49396382028010033, + "learning_rate": 9.958080981248798e-06, + "loss": 0.5323, + "step": 1548 + }, + { + "epoch": 0.07010635890472958, + "grad_norm": 0.7998562590968145, + "learning_rate": 9.957986221315134e-06, + "loss": 0.4221, + "step": 1549 + }, + { + "epoch": 0.07015161801312514, + "grad_norm": 0.8086282984928738, + "learning_rate": 9.957891354849573e-06, + "loss": 0.4593, + "step": 1550 + }, + { + "epoch": 0.0701968771215207, + "grad_norm": 0.8162934805047718, + "learning_rate": 9.957796381854152e-06, + "loss": 0.4597, + "step": 1551 + }, + { + "epoch": 0.07024213622991628, + "grad_norm": 0.7663579788681755, + "learning_rate": 9.957701302330915e-06, + "loss": 0.4515, + "step": 1552 + }, + { + "epoch": 0.07028739533831184, + "grad_norm": 0.7919053756584956, + "learning_rate": 9.957606116281905e-06, + "loss": 0.4443, + "step": 1553 + }, + { + "epoch": 0.0703326544467074, + "grad_norm": 0.8133292991518659, + "learning_rate": 9.957510823709165e-06, + "loss": 0.4557, + "step": 1554 + }, + { + "epoch": 0.07037791355510296, + "grad_norm": 0.6882397202006979, + "learning_rate": 9.957415424614742e-06, + "loss": 0.5269, + "step": 1555 + }, + { + "epoch": 0.07042317266349853, + "grad_norm": 0.8998616260792923, + "learning_rate": 9.957319919000687e-06, + "loss": 0.435, + "step": 1556 + }, + { + "epoch": 0.07046843177189409, + "grad_norm": 0.7549419519105975, + "learning_rate": 9.957224306869053e-06, + "loss": 0.497, + "step": 1557 + }, + { + "epoch": 0.07051369088028966, + "grad_norm": 0.7234189199464256, + "learning_rate": 9.957128588221895e-06, + "loss": 0.4492, + "step": 1558 + }, + { + "epoch": 0.07055894998868523, + "grad_norm": 0.7526551273534683, + "learning_rate": 9.957032763061264e-06, + "loss": 0.3977, + "step": 1559 + }, + { + "epoch": 0.07060420909708079, + "grad_norm": 0.7661108290816389, + "learning_rate": 9.956936831389228e-06, + "loss": 0.479, + "step": 1560 + }, + { + "epoch": 0.07064946820547635, + "grad_norm": 0.7268298686703955, + "learning_rate": 9.956840793207841e-06, + "loss": 0.5161, + "step": 1561 + }, + { + "epoch": 0.07069472731387191, + "grad_norm": 0.8351450729731379, + "learning_rate": 9.95674464851917e-06, + "loss": 0.4463, + "step": 1562 + }, + { + "epoch": 0.07073998642226748, + "grad_norm": 0.7466578559219901, + "learning_rate": 9.95664839732528e-06, + "loss": 0.4756, + "step": 1563 + }, + { + "epoch": 0.07078524553066305, + "grad_norm": 0.7005965800541605, + "learning_rate": 9.956552039628237e-06, + "loss": 0.4096, + "step": 1564 + }, + { + "epoch": 0.07083050463905861, + "grad_norm": 0.7945484153877097, + "learning_rate": 9.956455575430115e-06, + "loss": 0.4333, + "step": 1565 + }, + { + "epoch": 0.07087576374745418, + "grad_norm": 0.7964664261929095, + "learning_rate": 9.956359004732986e-06, + "loss": 0.4668, + "step": 1566 + }, + { + "epoch": 0.07092102285584974, + "grad_norm": 0.4334662715457474, + "learning_rate": 9.956262327538924e-06, + "loss": 0.5346, + "step": 1567 + }, + { + "epoch": 0.0709662819642453, + "grad_norm": 0.8466745017880247, + "learning_rate": 9.956165543850007e-06, + "loss": 0.4911, + "step": 1568 + }, + { + "epoch": 0.07101154107264086, + "grad_norm": 0.33019104388466125, + "learning_rate": 9.956068653668314e-06, + "loss": 0.5066, + "step": 1569 + }, + { + "epoch": 0.07105680018103644, + "grad_norm": 0.3623870468116331, + "learning_rate": 9.955971656995927e-06, + "loss": 0.5325, + "step": 1570 + }, + { + "epoch": 0.071102059289432, + "grad_norm": 0.3741449779498846, + "learning_rate": 9.955874553834928e-06, + "loss": 0.5381, + "step": 1571 + }, + { + "epoch": 0.07114731839782756, + "grad_norm": 0.3768306568499116, + "learning_rate": 9.955777344187407e-06, + "loss": 0.5692, + "step": 1572 + }, + { + "epoch": 0.07119257750622313, + "grad_norm": 1.085101554006371, + "learning_rate": 9.955680028055453e-06, + "loss": 0.4282, + "step": 1573 + }, + { + "epoch": 0.07123783661461869, + "grad_norm": 0.783608464689036, + "learning_rate": 9.955582605441154e-06, + "loss": 0.4335, + "step": 1574 + }, + { + "epoch": 0.07128309572301425, + "grad_norm": 0.9016573804866266, + "learning_rate": 9.955485076346605e-06, + "loss": 0.4817, + "step": 1575 + }, + { + "epoch": 0.07132835483140983, + "grad_norm": 0.8464255427197236, + "learning_rate": 9.955387440773902e-06, + "loss": 0.4541, + "step": 1576 + }, + { + "epoch": 0.07137361393980539, + "grad_norm": 0.8352333785219662, + "learning_rate": 9.955289698725141e-06, + "loss": 0.5068, + "step": 1577 + }, + { + "epoch": 0.07141887304820095, + "grad_norm": 0.7678664991367014, + "learning_rate": 9.955191850202424e-06, + "loss": 0.4476, + "step": 1578 + }, + { + "epoch": 0.07146413215659651, + "grad_norm": 0.7432997228864974, + "learning_rate": 9.955093895207853e-06, + "loss": 0.4361, + "step": 1579 + }, + { + "epoch": 0.07150939126499208, + "grad_norm": 1.043347006843631, + "learning_rate": 9.954995833743532e-06, + "loss": 0.466, + "step": 1580 + }, + { + "epoch": 0.07155465037338764, + "grad_norm": 0.7511877513268755, + "learning_rate": 9.95489766581157e-06, + "loss": 0.4462, + "step": 1581 + }, + { + "epoch": 0.07159990948178321, + "grad_norm": 0.7138314733618735, + "learning_rate": 9.954799391414073e-06, + "loss": 0.5603, + "step": 1582 + }, + { + "epoch": 0.07164516859017878, + "grad_norm": 1.0080615313646797, + "learning_rate": 9.954701010553156e-06, + "loss": 0.45, + "step": 1583 + }, + { + "epoch": 0.07169042769857434, + "grad_norm": 0.44949036623651845, + "learning_rate": 9.95460252323093e-06, + "loss": 0.5231, + "step": 1584 + }, + { + "epoch": 0.0717356868069699, + "grad_norm": 0.814469135656858, + "learning_rate": 9.954503929449513e-06, + "loss": 0.4641, + "step": 1585 + }, + { + "epoch": 0.07178094591536546, + "grad_norm": 0.780742667966943, + "learning_rate": 9.954405229211025e-06, + "loss": 0.4783, + "step": 1586 + }, + { + "epoch": 0.07182620502376103, + "grad_norm": 0.9876274201894842, + "learning_rate": 9.954306422517583e-06, + "loss": 0.4102, + "step": 1587 + }, + { + "epoch": 0.0718714641321566, + "grad_norm": 0.8347915917303684, + "learning_rate": 9.954207509371313e-06, + "loss": 0.5033, + "step": 1588 + }, + { + "epoch": 0.07191672324055216, + "grad_norm": 0.7436863290744097, + "learning_rate": 9.954108489774339e-06, + "loss": 0.4545, + "step": 1589 + }, + { + "epoch": 0.07196198234894773, + "grad_norm": 0.7512176662693054, + "learning_rate": 9.95400936372879e-06, + "loss": 0.483, + "step": 1590 + }, + { + "epoch": 0.07200724145734329, + "grad_norm": 0.6968344530808637, + "learning_rate": 9.953910131236793e-06, + "loss": 0.5386, + "step": 1591 + }, + { + "epoch": 0.07205250056573885, + "grad_norm": 0.9115392018944554, + "learning_rate": 9.953810792300482e-06, + "loss": 0.4793, + "step": 1592 + }, + { + "epoch": 0.07209775967413443, + "grad_norm": 0.7593058631745393, + "learning_rate": 9.953711346921994e-06, + "loss": 0.4179, + "step": 1593 + }, + { + "epoch": 0.07214301878252999, + "grad_norm": 0.8363184414562198, + "learning_rate": 9.953611795103462e-06, + "loss": 0.4427, + "step": 1594 + }, + { + "epoch": 0.07218827789092555, + "grad_norm": 0.7961544165941885, + "learning_rate": 9.953512136847026e-06, + "loss": 0.4654, + "step": 1595 + }, + { + "epoch": 0.07223353699932111, + "grad_norm": 0.8475644647503084, + "learning_rate": 9.953412372154826e-06, + "loss": 0.4491, + "step": 1596 + }, + { + "epoch": 0.07227879610771668, + "grad_norm": 0.7591172312405083, + "learning_rate": 9.95331250102901e-06, + "loss": 0.4721, + "step": 1597 + }, + { + "epoch": 0.07232405521611224, + "grad_norm": 1.2064906579866868, + "learning_rate": 9.95321252347172e-06, + "loss": 0.4261, + "step": 1598 + }, + { + "epoch": 0.07236931432450781, + "grad_norm": 0.8037664893843064, + "learning_rate": 9.953112439485107e-06, + "loss": 0.4595, + "step": 1599 + }, + { + "epoch": 0.07241457343290338, + "grad_norm": 0.6037274223376878, + "learning_rate": 9.95301224907132e-06, + "loss": 0.5278, + "step": 1600 + }, + { + "epoch": 0.07245983254129894, + "grad_norm": 0.8423451104175681, + "learning_rate": 9.95291195223251e-06, + "loss": 0.468, + "step": 1601 + }, + { + "epoch": 0.0725050916496945, + "grad_norm": 0.734562033088208, + "learning_rate": 9.952811548970834e-06, + "loss": 0.4573, + "step": 1602 + }, + { + "epoch": 0.07255035075809006, + "grad_norm": 0.7980025152030702, + "learning_rate": 9.952711039288451e-06, + "loss": 0.4247, + "step": 1603 + }, + { + "epoch": 0.07259560986648562, + "grad_norm": 0.8268544917981663, + "learning_rate": 9.952610423187516e-06, + "loss": 0.4378, + "step": 1604 + }, + { + "epoch": 0.0726408689748812, + "grad_norm": 0.7288491743965827, + "learning_rate": 9.952509700670197e-06, + "loss": 0.4295, + "step": 1605 + }, + { + "epoch": 0.07268612808327676, + "grad_norm": 0.574267631988978, + "learning_rate": 9.952408871738652e-06, + "loss": 0.5117, + "step": 1606 + }, + { + "epoch": 0.07273138719167233, + "grad_norm": 0.810368491171752, + "learning_rate": 9.952307936395054e-06, + "loss": 0.4359, + "step": 1607 + }, + { + "epoch": 0.07277664630006789, + "grad_norm": 0.7467048796188573, + "learning_rate": 9.952206894641565e-06, + "loss": 0.4529, + "step": 1608 + }, + { + "epoch": 0.07282190540846345, + "grad_norm": 0.7670762293890041, + "learning_rate": 9.952105746480361e-06, + "loss": 0.4459, + "step": 1609 + }, + { + "epoch": 0.07286716451685901, + "grad_norm": 0.8677749241903157, + "learning_rate": 9.952004491913613e-06, + "loss": 0.5131, + "step": 1610 + }, + { + "epoch": 0.07291242362525459, + "grad_norm": 0.43473157187032946, + "learning_rate": 9.9519031309435e-06, + "loss": 0.5079, + "step": 1611 + }, + { + "epoch": 0.07295768273365015, + "grad_norm": 0.7709482615496647, + "learning_rate": 9.951801663572194e-06, + "loss": 0.4676, + "step": 1612 + }, + { + "epoch": 0.07300294184204571, + "grad_norm": 0.7734986634642428, + "learning_rate": 9.951700089801879e-06, + "loss": 0.4816, + "step": 1613 + }, + { + "epoch": 0.07304820095044127, + "grad_norm": 0.712129054682761, + "learning_rate": 9.951598409634738e-06, + "loss": 0.4398, + "step": 1614 + }, + { + "epoch": 0.07309346005883684, + "grad_norm": 0.6855724174862319, + "learning_rate": 9.951496623072955e-06, + "loss": 0.4412, + "step": 1615 + }, + { + "epoch": 0.0731387191672324, + "grad_norm": 0.7685554643700105, + "learning_rate": 9.951394730118717e-06, + "loss": 0.4403, + "step": 1616 + }, + { + "epoch": 0.07318397827562798, + "grad_norm": 0.4755960850993418, + "learning_rate": 9.951292730774213e-06, + "loss": 0.5291, + "step": 1617 + }, + { + "epoch": 0.07322923738402354, + "grad_norm": 0.4204314232545453, + "learning_rate": 9.951190625041634e-06, + "loss": 0.5251, + "step": 1618 + }, + { + "epoch": 0.0732744964924191, + "grad_norm": 0.7848766554034355, + "learning_rate": 9.951088412923175e-06, + "loss": 0.4715, + "step": 1619 + }, + { + "epoch": 0.07331975560081466, + "grad_norm": 0.7101940377324135, + "learning_rate": 9.950986094421033e-06, + "loss": 0.471, + "step": 1620 + }, + { + "epoch": 0.07336501470921022, + "grad_norm": 0.9173872355738407, + "learning_rate": 9.950883669537405e-06, + "loss": 0.4493, + "step": 1621 + }, + { + "epoch": 0.07341027381760579, + "grad_norm": 0.8585729042659949, + "learning_rate": 9.950781138274494e-06, + "loss": 0.4719, + "step": 1622 + }, + { + "epoch": 0.07345553292600136, + "grad_norm": 0.7234837833909473, + "learning_rate": 9.950678500634501e-06, + "loss": 0.5371, + "step": 1623 + }, + { + "epoch": 0.07350079203439693, + "grad_norm": 0.7175744371928864, + "learning_rate": 9.95057575661963e-06, + "loss": 0.4383, + "step": 1624 + }, + { + "epoch": 0.07354605114279249, + "grad_norm": 0.7903698818392364, + "learning_rate": 9.950472906232091e-06, + "loss": 0.4679, + "step": 1625 + }, + { + "epoch": 0.07359131025118805, + "grad_norm": 0.3575880075219209, + "learning_rate": 9.950369949474095e-06, + "loss": 0.5099, + "step": 1626 + }, + { + "epoch": 0.07363656935958361, + "grad_norm": 0.785889252127557, + "learning_rate": 9.950266886347852e-06, + "loss": 0.4453, + "step": 1627 + }, + { + "epoch": 0.07368182846797917, + "grad_norm": 0.35653778542954306, + "learning_rate": 9.950163716855578e-06, + "loss": 0.5208, + "step": 1628 + }, + { + "epoch": 0.07372708757637475, + "grad_norm": 1.5450850497688764, + "learning_rate": 9.950060440999486e-06, + "loss": 0.4549, + "step": 1629 + }, + { + "epoch": 0.07377234668477031, + "grad_norm": 0.7693214263174912, + "learning_rate": 9.949957058781802e-06, + "loss": 0.4813, + "step": 1630 + }, + { + "epoch": 0.07381760579316587, + "grad_norm": 0.8123776819315764, + "learning_rate": 9.949853570204742e-06, + "loss": 0.4426, + "step": 1631 + }, + { + "epoch": 0.07386286490156144, + "grad_norm": 0.745047866822542, + "learning_rate": 9.94974997527053e-06, + "loss": 0.4226, + "step": 1632 + }, + { + "epoch": 0.073908124009957, + "grad_norm": 1.1293663462337062, + "learning_rate": 9.949646273981394e-06, + "loss": 0.473, + "step": 1633 + }, + { + "epoch": 0.07395338311835258, + "grad_norm": 0.725690333793923, + "learning_rate": 9.949542466339561e-06, + "loss": 0.4421, + "step": 1634 + }, + { + "epoch": 0.07399864222674814, + "grad_norm": 0.7562534174587207, + "learning_rate": 9.949438552347262e-06, + "loss": 0.4429, + "step": 1635 + }, + { + "epoch": 0.0740439013351437, + "grad_norm": 0.9048310824567884, + "learning_rate": 9.94933453200673e-06, + "loss": 0.4806, + "step": 1636 + }, + { + "epoch": 0.07408916044353926, + "grad_norm": 0.7639953887241905, + "learning_rate": 9.949230405320198e-06, + "loss": 0.4829, + "step": 1637 + }, + { + "epoch": 0.07413441955193482, + "grad_norm": 0.7243486351207513, + "learning_rate": 9.949126172289905e-06, + "loss": 0.4355, + "step": 1638 + }, + { + "epoch": 0.07417967866033039, + "grad_norm": 0.6082662853357984, + "learning_rate": 9.949021832918092e-06, + "loss": 0.5237, + "step": 1639 + }, + { + "epoch": 0.07422493776872596, + "grad_norm": 0.8794722206912542, + "learning_rate": 9.948917387206999e-06, + "loss": 0.4821, + "step": 1640 + }, + { + "epoch": 0.07427019687712152, + "grad_norm": 0.8696598828377334, + "learning_rate": 9.948812835158872e-06, + "loss": 0.4723, + "step": 1641 + }, + { + "epoch": 0.07431545598551709, + "grad_norm": 0.7322349900873856, + "learning_rate": 9.948708176775954e-06, + "loss": 0.4216, + "step": 1642 + }, + { + "epoch": 0.07436071509391265, + "grad_norm": 0.8653735420761837, + "learning_rate": 9.948603412060498e-06, + "loss": 0.4641, + "step": 1643 + }, + { + "epoch": 0.07440597420230821, + "grad_norm": 0.7424619254038732, + "learning_rate": 9.948498541014752e-06, + "loss": 0.4344, + "step": 1644 + }, + { + "epoch": 0.07445123331070377, + "grad_norm": 0.7369603541667813, + "learning_rate": 9.94839356364097e-06, + "loss": 0.4083, + "step": 1645 + }, + { + "epoch": 0.07449649241909935, + "grad_norm": 0.7140327682794468, + "learning_rate": 9.94828847994141e-06, + "loss": 0.4047, + "step": 1646 + }, + { + "epoch": 0.07454175152749491, + "grad_norm": 0.7216918324659736, + "learning_rate": 9.948183289918327e-06, + "loss": 0.4515, + "step": 1647 + }, + { + "epoch": 0.07458701063589047, + "grad_norm": 0.6294579395283374, + "learning_rate": 9.948077993573983e-06, + "loss": 0.5288, + "step": 1648 + }, + { + "epoch": 0.07463226974428604, + "grad_norm": 0.7958923321672456, + "learning_rate": 9.947972590910639e-06, + "loss": 0.5042, + "step": 1649 + }, + { + "epoch": 0.0746775288526816, + "grad_norm": 0.8417226582025827, + "learning_rate": 9.94786708193056e-06, + "loss": 0.4503, + "step": 1650 + }, + { + "epoch": 0.07472278796107716, + "grad_norm": 0.3480153243530504, + "learning_rate": 9.947761466636014e-06, + "loss": 0.5113, + "step": 1651 + }, + { + "epoch": 0.07476804706947274, + "grad_norm": 0.7706541931247691, + "learning_rate": 9.94765574502927e-06, + "loss": 0.4482, + "step": 1652 + }, + { + "epoch": 0.0748133061778683, + "grad_norm": 0.7555172102442301, + "learning_rate": 9.947549917112601e-06, + "loss": 0.4164, + "step": 1653 + }, + { + "epoch": 0.07485856528626386, + "grad_norm": 0.4662745249690845, + "learning_rate": 9.947443982888279e-06, + "loss": 0.5317, + "step": 1654 + }, + { + "epoch": 0.07490382439465942, + "grad_norm": 0.838637572397322, + "learning_rate": 9.947337942358579e-06, + "loss": 0.4558, + "step": 1655 + }, + { + "epoch": 0.07494908350305499, + "grad_norm": 0.816122648381445, + "learning_rate": 9.947231795525782e-06, + "loss": 0.4353, + "step": 1656 + }, + { + "epoch": 0.07499434261145055, + "grad_norm": 0.7673786090994253, + "learning_rate": 9.94712554239217e-06, + "loss": 0.5044, + "step": 1657 + }, + { + "epoch": 0.07503960171984612, + "grad_norm": 0.704259071940599, + "learning_rate": 9.947019182960023e-06, + "loss": 0.449, + "step": 1658 + }, + { + "epoch": 0.07508486082824169, + "grad_norm": 0.7164377948782436, + "learning_rate": 9.946912717231625e-06, + "loss": 0.4738, + "step": 1659 + }, + { + "epoch": 0.07513011993663725, + "grad_norm": 0.6995814898789535, + "learning_rate": 9.94680614520927e-06, + "loss": 0.4134, + "step": 1660 + }, + { + "epoch": 0.07517537904503281, + "grad_norm": 0.857002781818847, + "learning_rate": 9.94669946689524e-06, + "loss": 0.4305, + "step": 1661 + }, + { + "epoch": 0.07522063815342837, + "grad_norm": 0.7424988428382787, + "learning_rate": 9.946592682291834e-06, + "loss": 0.4683, + "step": 1662 + }, + { + "epoch": 0.07526589726182394, + "grad_norm": 0.7630417153053298, + "learning_rate": 9.94648579140134e-06, + "loss": 0.4157, + "step": 1663 + }, + { + "epoch": 0.07531115637021951, + "grad_norm": 0.7893442002173218, + "learning_rate": 9.946378794226062e-06, + "loss": 0.4425, + "step": 1664 + }, + { + "epoch": 0.07535641547861507, + "grad_norm": 0.6564682733195059, + "learning_rate": 9.946271690768295e-06, + "loss": 0.4324, + "step": 1665 + }, + { + "epoch": 0.07540167458701064, + "grad_norm": 0.8107525179394984, + "learning_rate": 9.946164481030339e-06, + "loss": 0.4887, + "step": 1666 + }, + { + "epoch": 0.0754469336954062, + "grad_norm": 0.7832898959544311, + "learning_rate": 9.9460571650145e-06, + "loss": 0.4952, + "step": 1667 + }, + { + "epoch": 0.07549219280380176, + "grad_norm": 0.7275954330682075, + "learning_rate": 9.945949742723083e-06, + "loss": 0.4314, + "step": 1668 + }, + { + "epoch": 0.07553745191219732, + "grad_norm": 0.8272072509791674, + "learning_rate": 9.945842214158397e-06, + "loss": 0.4737, + "step": 1669 + }, + { + "epoch": 0.0755827110205929, + "grad_norm": 0.7302759518481456, + "learning_rate": 9.94573457932275e-06, + "loss": 0.446, + "step": 1670 + }, + { + "epoch": 0.07562797012898846, + "grad_norm": 0.7285007459322589, + "learning_rate": 9.945626838218458e-06, + "loss": 0.439, + "step": 1671 + }, + { + "epoch": 0.07567322923738402, + "grad_norm": 0.600806103249455, + "learning_rate": 9.945518990847835e-06, + "loss": 0.5277, + "step": 1672 + }, + { + "epoch": 0.07571848834577959, + "grad_norm": 0.7644840288674761, + "learning_rate": 9.945411037213198e-06, + "loss": 0.4852, + "step": 1673 + }, + { + "epoch": 0.07576374745417515, + "grad_norm": 0.7606077392588975, + "learning_rate": 9.945302977316864e-06, + "loss": 0.4512, + "step": 1674 + }, + { + "epoch": 0.07580900656257071, + "grad_norm": 0.717075980182358, + "learning_rate": 9.94519481116116e-06, + "loss": 0.4448, + "step": 1675 + }, + { + "epoch": 0.07585426567096629, + "grad_norm": 0.8581548432084788, + "learning_rate": 9.945086538748407e-06, + "loss": 0.4802, + "step": 1676 + }, + { + "epoch": 0.07589952477936185, + "grad_norm": 0.7421139594180161, + "learning_rate": 9.944978160080932e-06, + "loss": 0.4677, + "step": 1677 + }, + { + "epoch": 0.07594478388775741, + "grad_norm": 0.7329069408677304, + "learning_rate": 9.944869675161062e-06, + "loss": 0.4558, + "step": 1678 + }, + { + "epoch": 0.07599004299615297, + "grad_norm": 0.6783854819129181, + "learning_rate": 9.944761083991131e-06, + "loss": 0.4025, + "step": 1679 + }, + { + "epoch": 0.07603530210454854, + "grad_norm": 0.7926734375394854, + "learning_rate": 9.944652386573472e-06, + "loss": 0.4869, + "step": 1680 + }, + { + "epoch": 0.07608056121294411, + "grad_norm": 0.7747418613948901, + "learning_rate": 9.944543582910417e-06, + "loss": 0.4493, + "step": 1681 + }, + { + "epoch": 0.07612582032133967, + "grad_norm": 0.6748968711002219, + "learning_rate": 9.944434673004308e-06, + "loss": 0.5339, + "step": 1682 + }, + { + "epoch": 0.07617107942973524, + "grad_norm": 0.8246562125565327, + "learning_rate": 9.944325656857485e-06, + "loss": 0.4748, + "step": 1683 + }, + { + "epoch": 0.0762163385381308, + "grad_norm": 0.7619113155947301, + "learning_rate": 9.944216534472287e-06, + "loss": 0.4123, + "step": 1684 + }, + { + "epoch": 0.07626159764652636, + "grad_norm": 0.8584651446158137, + "learning_rate": 9.94410730585106e-06, + "loss": 0.4604, + "step": 1685 + }, + { + "epoch": 0.07630685675492192, + "grad_norm": 0.8277957948886835, + "learning_rate": 9.943997970996153e-06, + "loss": 0.4572, + "step": 1686 + }, + { + "epoch": 0.0763521158633175, + "grad_norm": 0.7801979933818924, + "learning_rate": 9.943888529909916e-06, + "loss": 0.492, + "step": 1687 + }, + { + "epoch": 0.07639737497171306, + "grad_norm": 0.8391519761417111, + "learning_rate": 9.943778982594695e-06, + "loss": 0.449, + "step": 1688 + }, + { + "epoch": 0.07644263408010862, + "grad_norm": 0.5006693997641047, + "learning_rate": 9.943669329052848e-06, + "loss": 0.5261, + "step": 1689 + }, + { + "epoch": 0.07648789318850419, + "grad_norm": 0.8170731618555016, + "learning_rate": 9.943559569286731e-06, + "loss": 0.4434, + "step": 1690 + }, + { + "epoch": 0.07653315229689975, + "grad_norm": 0.3748653606553218, + "learning_rate": 9.943449703298703e-06, + "loss": 0.5147, + "step": 1691 + }, + { + "epoch": 0.07657841140529531, + "grad_norm": 0.7888227413039046, + "learning_rate": 9.943339731091122e-06, + "loss": 0.437, + "step": 1692 + }, + { + "epoch": 0.07662367051369089, + "grad_norm": 0.38129147182439493, + "learning_rate": 9.943229652666353e-06, + "loss": 0.5146, + "step": 1693 + }, + { + "epoch": 0.07666892962208645, + "grad_norm": 0.7875704407185139, + "learning_rate": 9.94311946802676e-06, + "loss": 0.4428, + "step": 1694 + }, + { + "epoch": 0.07671418873048201, + "grad_norm": 1.7171840921170323, + "learning_rate": 9.943009177174712e-06, + "loss": 0.4613, + "step": 1695 + }, + { + "epoch": 0.07675944783887757, + "grad_norm": 0.5894610471131189, + "learning_rate": 9.942898780112578e-06, + "loss": 0.54, + "step": 1696 + }, + { + "epoch": 0.07680470694727314, + "grad_norm": 0.9081380234402502, + "learning_rate": 9.94278827684273e-06, + "loss": 0.4184, + "step": 1697 + }, + { + "epoch": 0.0768499660556687, + "grad_norm": 0.7823344173089999, + "learning_rate": 9.942677667367541e-06, + "loss": 0.4672, + "step": 1698 + }, + { + "epoch": 0.07689522516406427, + "grad_norm": 0.704900119854084, + "learning_rate": 9.942566951689391e-06, + "loss": 0.4232, + "step": 1699 + }, + { + "epoch": 0.07694048427245984, + "grad_norm": 0.9097669547846146, + "learning_rate": 9.942456129810658e-06, + "loss": 0.4317, + "step": 1700 + }, + { + "epoch": 0.0769857433808554, + "grad_norm": 0.7346865602242609, + "learning_rate": 9.942345201733722e-06, + "loss": 0.4731, + "step": 1701 + }, + { + "epoch": 0.07703100248925096, + "grad_norm": 0.7587183898655953, + "learning_rate": 9.942234167460966e-06, + "loss": 0.4739, + "step": 1702 + }, + { + "epoch": 0.07707626159764652, + "grad_norm": 0.8201545355377298, + "learning_rate": 9.942123026994776e-06, + "loss": 0.4157, + "step": 1703 + }, + { + "epoch": 0.07712152070604208, + "grad_norm": 0.7966173846718374, + "learning_rate": 9.942011780337542e-06, + "loss": 0.4148, + "step": 1704 + }, + { + "epoch": 0.07716677981443766, + "grad_norm": 0.7314677884036063, + "learning_rate": 9.941900427491652e-06, + "loss": 0.4087, + "step": 1705 + }, + { + "epoch": 0.07721203892283322, + "grad_norm": 0.5109860171092886, + "learning_rate": 9.941788968459502e-06, + "loss": 0.5203, + "step": 1706 + }, + { + "epoch": 0.07725729803122879, + "grad_norm": 0.8133081034298886, + "learning_rate": 9.941677403243482e-06, + "loss": 0.4036, + "step": 1707 + }, + { + "epoch": 0.07730255713962435, + "grad_norm": 0.7755811147059083, + "learning_rate": 9.941565731845993e-06, + "loss": 0.4689, + "step": 1708 + }, + { + "epoch": 0.07734781624801991, + "grad_norm": 0.7232739678213123, + "learning_rate": 9.941453954269434e-06, + "loss": 0.4253, + "step": 1709 + }, + { + "epoch": 0.07739307535641547, + "grad_norm": 0.8649248462101308, + "learning_rate": 9.941342070516205e-06, + "loss": 0.4546, + "step": 1710 + }, + { + "epoch": 0.07743833446481105, + "grad_norm": 0.8732318494015278, + "learning_rate": 9.941230080588711e-06, + "loss": 0.4374, + "step": 1711 + }, + { + "epoch": 0.07748359357320661, + "grad_norm": 0.7775579716115757, + "learning_rate": 9.941117984489358e-06, + "loss": 0.44, + "step": 1712 + }, + { + "epoch": 0.07752885268160217, + "grad_norm": 0.8352613535775968, + "learning_rate": 9.941005782220557e-06, + "loss": 0.4301, + "step": 1713 + }, + { + "epoch": 0.07757411178999774, + "grad_norm": 1.600225175835997, + "learning_rate": 9.940893473784714e-06, + "loss": 0.4748, + "step": 1714 + }, + { + "epoch": 0.0776193708983933, + "grad_norm": 0.7210361373387383, + "learning_rate": 9.940781059184246e-06, + "loss": 0.468, + "step": 1715 + }, + { + "epoch": 0.07766463000678886, + "grad_norm": 0.7215114442344711, + "learning_rate": 9.940668538421569e-06, + "loss": 0.4502, + "step": 1716 + }, + { + "epoch": 0.07770988911518444, + "grad_norm": 0.7494374543013032, + "learning_rate": 9.940555911499098e-06, + "loss": 0.4619, + "step": 1717 + }, + { + "epoch": 0.07775514822358, + "grad_norm": 0.8060329308220311, + "learning_rate": 9.940443178419255e-06, + "loss": 0.4807, + "step": 1718 + }, + { + "epoch": 0.07780040733197556, + "grad_norm": 0.7348165098234182, + "learning_rate": 9.940330339184461e-06, + "loss": 0.4063, + "step": 1719 + }, + { + "epoch": 0.07784566644037112, + "grad_norm": 0.7759411143569876, + "learning_rate": 9.94021739379714e-06, + "loss": 0.4637, + "step": 1720 + }, + { + "epoch": 0.07789092554876668, + "grad_norm": 0.6940545281776684, + "learning_rate": 9.940104342259721e-06, + "loss": 0.4375, + "step": 1721 + }, + { + "epoch": 0.07793618465716226, + "grad_norm": 0.6427432439226576, + "learning_rate": 9.939991184574632e-06, + "loss": 0.5262, + "step": 1722 + }, + { + "epoch": 0.07798144376555782, + "grad_norm": 0.7415240348599722, + "learning_rate": 9.939877920744305e-06, + "loss": 0.4516, + "step": 1723 + }, + { + "epoch": 0.07802670287395339, + "grad_norm": 0.7042025867455868, + "learning_rate": 9.939764550771172e-06, + "loss": 0.4324, + "step": 1724 + }, + { + "epoch": 0.07807196198234895, + "grad_norm": 0.7534567023710437, + "learning_rate": 9.939651074657672e-06, + "loss": 0.4533, + "step": 1725 + }, + { + "epoch": 0.07811722109074451, + "grad_norm": 0.5824720249805632, + "learning_rate": 9.939537492406239e-06, + "loss": 0.5252, + "step": 1726 + }, + { + "epoch": 0.07816248019914007, + "grad_norm": 0.7635615190018861, + "learning_rate": 9.939423804019316e-06, + "loss": 0.4304, + "step": 1727 + }, + { + "epoch": 0.07820773930753565, + "grad_norm": 0.806469733151042, + "learning_rate": 9.939310009499348e-06, + "loss": 0.4557, + "step": 1728 + }, + { + "epoch": 0.07825299841593121, + "grad_norm": 0.46661235464145895, + "learning_rate": 9.939196108848777e-06, + "loss": 0.5468, + "step": 1729 + }, + { + "epoch": 0.07829825752432677, + "grad_norm": 0.7065872520486806, + "learning_rate": 9.93908210207005e-06, + "loss": 0.3917, + "step": 1730 + }, + { + "epoch": 0.07834351663272233, + "grad_norm": 0.7716556106668118, + "learning_rate": 9.93896798916562e-06, + "loss": 0.4622, + "step": 1731 + }, + { + "epoch": 0.0783887757411179, + "grad_norm": 0.7320673064635882, + "learning_rate": 9.938853770137935e-06, + "loss": 0.4564, + "step": 1732 + }, + { + "epoch": 0.07843403484951346, + "grad_norm": 0.7696490103344769, + "learning_rate": 9.938739444989452e-06, + "loss": 0.453, + "step": 1733 + }, + { + "epoch": 0.07847929395790904, + "grad_norm": 0.7618050527451413, + "learning_rate": 9.938625013722625e-06, + "loss": 0.4298, + "step": 1734 + }, + { + "epoch": 0.0785245530663046, + "grad_norm": 0.7287327328684455, + "learning_rate": 9.938510476339915e-06, + "loss": 0.4492, + "step": 1735 + }, + { + "epoch": 0.07856981217470016, + "grad_norm": 0.7355669694141859, + "learning_rate": 9.938395832843784e-06, + "loss": 0.4397, + "step": 1736 + }, + { + "epoch": 0.07861507128309572, + "grad_norm": 0.5986083608600049, + "learning_rate": 9.938281083236692e-06, + "loss": 0.5109, + "step": 1737 + }, + { + "epoch": 0.07866033039149128, + "grad_norm": 0.8267103372429406, + "learning_rate": 9.938166227521106e-06, + "loss": 0.464, + "step": 1738 + }, + { + "epoch": 0.07870558949988685, + "grad_norm": 0.8498829525864762, + "learning_rate": 9.938051265699495e-06, + "loss": 0.4495, + "step": 1739 + }, + { + "epoch": 0.07875084860828242, + "grad_norm": 0.7371846718545788, + "learning_rate": 9.937936197774328e-06, + "loss": 0.493, + "step": 1740 + }, + { + "epoch": 0.07879610771667798, + "grad_norm": 0.7612591001481491, + "learning_rate": 9.937821023748077e-06, + "loss": 0.506, + "step": 1741 + }, + { + "epoch": 0.07884136682507355, + "grad_norm": 0.7921542623351243, + "learning_rate": 9.93770574362322e-06, + "loss": 0.4469, + "step": 1742 + }, + { + "epoch": 0.07888662593346911, + "grad_norm": 0.7471390352933903, + "learning_rate": 9.937590357402229e-06, + "loss": 0.4762, + "step": 1743 + }, + { + "epoch": 0.07893188504186467, + "grad_norm": 0.6834612586390713, + "learning_rate": 9.937474865087588e-06, + "loss": 0.5168, + "step": 1744 + }, + { + "epoch": 0.07897714415026023, + "grad_norm": 0.4588021154978975, + "learning_rate": 9.937359266681774e-06, + "loss": 0.5015, + "step": 1745 + }, + { + "epoch": 0.07902240325865581, + "grad_norm": 0.8696667108506297, + "learning_rate": 9.937243562187276e-06, + "loss": 0.3916, + "step": 1746 + }, + { + "epoch": 0.07906766236705137, + "grad_norm": 0.9082653531391771, + "learning_rate": 9.937127751606577e-06, + "loss": 0.4699, + "step": 1747 + }, + { + "epoch": 0.07911292147544693, + "grad_norm": 0.5583039423714269, + "learning_rate": 9.937011834942165e-06, + "loss": 0.5257, + "step": 1748 + }, + { + "epoch": 0.0791581805838425, + "grad_norm": 0.7838632052997255, + "learning_rate": 9.936895812196531e-06, + "loss": 0.4654, + "step": 1749 + }, + { + "epoch": 0.07920343969223806, + "grad_norm": 0.8033674155474345, + "learning_rate": 9.936779683372169e-06, + "loss": 0.4551, + "step": 1750 + }, + { + "epoch": 0.07924869880063362, + "grad_norm": 0.7960851566293581, + "learning_rate": 9.936663448471573e-06, + "loss": 0.4933, + "step": 1751 + }, + { + "epoch": 0.0792939579090292, + "grad_norm": 0.75375389407784, + "learning_rate": 9.936547107497243e-06, + "loss": 0.4504, + "step": 1752 + }, + { + "epoch": 0.07933921701742476, + "grad_norm": 0.8160771745066261, + "learning_rate": 9.936430660451676e-06, + "loss": 0.4346, + "step": 1753 + }, + { + "epoch": 0.07938447612582032, + "grad_norm": 0.7558067831789662, + "learning_rate": 9.936314107337375e-06, + "loss": 0.4562, + "step": 1754 + }, + { + "epoch": 0.07942973523421588, + "grad_norm": 0.7699610133117941, + "learning_rate": 9.936197448156845e-06, + "loss": 0.4346, + "step": 1755 + }, + { + "epoch": 0.07947499434261145, + "grad_norm": 0.4979692975191816, + "learning_rate": 9.936080682912594e-06, + "loss": 0.54, + "step": 1756 + }, + { + "epoch": 0.07952025345100701, + "grad_norm": 0.8028363930031575, + "learning_rate": 9.935963811607127e-06, + "loss": 0.4594, + "step": 1757 + }, + { + "epoch": 0.07956551255940258, + "grad_norm": 0.7932475224690867, + "learning_rate": 9.935846834242956e-06, + "loss": 0.4588, + "step": 1758 + }, + { + "epoch": 0.07961077166779815, + "grad_norm": 0.7325005955848916, + "learning_rate": 9.935729750822598e-06, + "loss": 0.4699, + "step": 1759 + }, + { + "epoch": 0.07965603077619371, + "grad_norm": 0.3710574617410941, + "learning_rate": 9.935612561348566e-06, + "loss": 0.5169, + "step": 1760 + }, + { + "epoch": 0.07970128988458927, + "grad_norm": 0.7945140363859222, + "learning_rate": 9.935495265823379e-06, + "loss": 0.4356, + "step": 1761 + }, + { + "epoch": 0.07974654899298483, + "grad_norm": 1.1506262279272965, + "learning_rate": 9.935377864249558e-06, + "loss": 0.4829, + "step": 1762 + }, + { + "epoch": 0.0797918081013804, + "grad_norm": 0.7278323845638668, + "learning_rate": 9.935260356629623e-06, + "loss": 0.4144, + "step": 1763 + }, + { + "epoch": 0.07983706720977597, + "grad_norm": 0.7497010631773515, + "learning_rate": 9.935142742966099e-06, + "loss": 0.3977, + "step": 1764 + }, + { + "epoch": 0.07988232631817153, + "grad_norm": 0.7404458281691546, + "learning_rate": 9.935025023261518e-06, + "loss": 0.4732, + "step": 1765 + }, + { + "epoch": 0.0799275854265671, + "grad_norm": 0.46932200375510835, + "learning_rate": 9.934907197518405e-06, + "loss": 0.5196, + "step": 1766 + }, + { + "epoch": 0.07997284453496266, + "grad_norm": 0.7566804198935556, + "learning_rate": 9.934789265739291e-06, + "loss": 0.4248, + "step": 1767 + }, + { + "epoch": 0.08001810364335822, + "grad_norm": 0.36487050083608025, + "learning_rate": 9.934671227926714e-06, + "loss": 0.5084, + "step": 1768 + }, + { + "epoch": 0.0800633627517538, + "grad_norm": 0.7996572718306318, + "learning_rate": 9.934553084083205e-06, + "loss": 0.4203, + "step": 1769 + }, + { + "epoch": 0.08010862186014936, + "grad_norm": 0.7649702206192669, + "learning_rate": 9.934434834211309e-06, + "loss": 0.4812, + "step": 1770 + }, + { + "epoch": 0.08015388096854492, + "grad_norm": 0.7872672685702111, + "learning_rate": 9.93431647831356e-06, + "loss": 0.4276, + "step": 1771 + }, + { + "epoch": 0.08019914007694048, + "grad_norm": 0.7374095690002961, + "learning_rate": 9.934198016392507e-06, + "loss": 0.4045, + "step": 1772 + }, + { + "epoch": 0.08024439918533605, + "grad_norm": 0.46296745243555587, + "learning_rate": 9.934079448450692e-06, + "loss": 0.5258, + "step": 1773 + }, + { + "epoch": 0.08028965829373161, + "grad_norm": 0.9402761167337232, + "learning_rate": 9.933960774490663e-06, + "loss": 0.4316, + "step": 1774 + }, + { + "epoch": 0.08033491740212718, + "grad_norm": 0.770679153259081, + "learning_rate": 9.933841994514972e-06, + "loss": 0.4937, + "step": 1775 + }, + { + "epoch": 0.08038017651052275, + "grad_norm": 0.7540461540716024, + "learning_rate": 9.933723108526168e-06, + "loss": 0.456, + "step": 1776 + }, + { + "epoch": 0.08042543561891831, + "grad_norm": 0.4096177953587281, + "learning_rate": 9.933604116526807e-06, + "loss": 0.5169, + "step": 1777 + }, + { + "epoch": 0.08047069472731387, + "grad_norm": 0.846509551981744, + "learning_rate": 9.933485018519448e-06, + "loss": 0.4286, + "step": 1778 + }, + { + "epoch": 0.08051595383570943, + "grad_norm": 0.7339440286381098, + "learning_rate": 9.933365814506646e-06, + "loss": 0.4304, + "step": 1779 + }, + { + "epoch": 0.080561212944105, + "grad_norm": 0.7571291353344685, + "learning_rate": 9.933246504490966e-06, + "loss": 0.4615, + "step": 1780 + }, + { + "epoch": 0.08060647205250057, + "grad_norm": 0.785352191096861, + "learning_rate": 9.933127088474968e-06, + "loss": 0.4042, + "step": 1781 + }, + { + "epoch": 0.08065173116089613, + "grad_norm": 0.7196603766142141, + "learning_rate": 9.93300756646122e-06, + "loss": 0.4385, + "step": 1782 + }, + { + "epoch": 0.0806969902692917, + "grad_norm": 0.790373245354847, + "learning_rate": 9.932887938452292e-06, + "loss": 0.4111, + "step": 1783 + }, + { + "epoch": 0.08074224937768726, + "grad_norm": 0.7157238169714207, + "learning_rate": 9.932768204450751e-06, + "loss": 0.4257, + "step": 1784 + }, + { + "epoch": 0.08078750848608282, + "grad_norm": 0.8043970204059999, + "learning_rate": 9.932648364459172e-06, + "loss": 0.4602, + "step": 1785 + }, + { + "epoch": 0.08083276759447838, + "grad_norm": 0.8122044645865316, + "learning_rate": 9.93252841848013e-06, + "loss": 0.4547, + "step": 1786 + }, + { + "epoch": 0.08087802670287396, + "grad_norm": 0.7857914577282751, + "learning_rate": 9.932408366516198e-06, + "loss": 0.4828, + "step": 1787 + }, + { + "epoch": 0.08092328581126952, + "grad_norm": 0.7751491400233553, + "learning_rate": 9.932288208569961e-06, + "loss": 0.3924, + "step": 1788 + }, + { + "epoch": 0.08096854491966508, + "grad_norm": 0.7582334131958042, + "learning_rate": 9.932167944643998e-06, + "loss": 0.4319, + "step": 1789 + }, + { + "epoch": 0.08101380402806065, + "grad_norm": 0.7161260925348351, + "learning_rate": 9.932047574740895e-06, + "loss": 0.4755, + "step": 1790 + }, + { + "epoch": 0.08105906313645621, + "grad_norm": 0.8180310284049336, + "learning_rate": 9.931927098863237e-06, + "loss": 0.4175, + "step": 1791 + }, + { + "epoch": 0.08110432224485177, + "grad_norm": 0.6835845823674249, + "learning_rate": 9.931806517013612e-06, + "loss": 0.4034, + "step": 1792 + }, + { + "epoch": 0.08114958135324735, + "grad_norm": 0.577581697777701, + "learning_rate": 9.931685829194612e-06, + "loss": 0.4926, + "step": 1793 + }, + { + "epoch": 0.08119484046164291, + "grad_norm": 0.788125945271417, + "learning_rate": 9.931565035408833e-06, + "loss": 0.4507, + "step": 1794 + }, + { + "epoch": 0.08124009957003847, + "grad_norm": 0.7967217507636467, + "learning_rate": 9.931444135658864e-06, + "loss": 0.4145, + "step": 1795 + }, + { + "epoch": 0.08128535867843403, + "grad_norm": 0.7126756325676225, + "learning_rate": 9.931323129947306e-06, + "loss": 0.4061, + "step": 1796 + }, + { + "epoch": 0.0813306177868296, + "grad_norm": 0.7425565928836916, + "learning_rate": 9.931202018276761e-06, + "loss": 0.4138, + "step": 1797 + }, + { + "epoch": 0.08137587689522516, + "grad_norm": 0.7435477086974337, + "learning_rate": 9.93108080064983e-06, + "loss": 0.4607, + "step": 1798 + }, + { + "epoch": 0.08142113600362073, + "grad_norm": 0.772240860997725, + "learning_rate": 9.930959477069117e-06, + "loss": 0.4587, + "step": 1799 + }, + { + "epoch": 0.0814663951120163, + "grad_norm": 0.7435660703263862, + "learning_rate": 9.930838047537228e-06, + "loss": 0.4311, + "step": 1800 + }, + { + "epoch": 0.08151165422041186, + "grad_norm": 0.6831940907334916, + "learning_rate": 9.930716512056775e-06, + "loss": 0.4285, + "step": 1801 + }, + { + "epoch": 0.08155691332880742, + "grad_norm": 0.8106025492808707, + "learning_rate": 9.930594870630365e-06, + "loss": 0.4521, + "step": 1802 + }, + { + "epoch": 0.08160217243720298, + "grad_norm": 0.752269782845294, + "learning_rate": 9.930473123260618e-06, + "loss": 0.5216, + "step": 1803 + }, + { + "epoch": 0.08164743154559854, + "grad_norm": 0.7751702345109988, + "learning_rate": 9.930351269950144e-06, + "loss": 0.4725, + "step": 1804 + }, + { + "epoch": 0.08169269065399412, + "grad_norm": 0.7742768817473613, + "learning_rate": 9.930229310701563e-06, + "loss": 0.424, + "step": 1805 + }, + { + "epoch": 0.08173794976238968, + "grad_norm": 0.46471716913368016, + "learning_rate": 9.930107245517498e-06, + "loss": 0.55, + "step": 1806 + }, + { + "epoch": 0.08178320887078525, + "grad_norm": 0.3865056561970093, + "learning_rate": 9.929985074400569e-06, + "loss": 0.5319, + "step": 1807 + }, + { + "epoch": 0.08182846797918081, + "grad_norm": 0.9958802419213363, + "learning_rate": 9.929862797353402e-06, + "loss": 0.4585, + "step": 1808 + }, + { + "epoch": 0.08187372708757637, + "grad_norm": 0.8995192761447293, + "learning_rate": 9.929740414378625e-06, + "loss": 0.4438, + "step": 1809 + }, + { + "epoch": 0.08191898619597195, + "grad_norm": 0.5321022825681664, + "learning_rate": 9.929617925478868e-06, + "loss": 0.5199, + "step": 1810 + }, + { + "epoch": 0.08196424530436751, + "grad_norm": 0.9978659299427117, + "learning_rate": 9.92949533065676e-06, + "loss": 0.4453, + "step": 1811 + }, + { + "epoch": 0.08200950441276307, + "grad_norm": 0.9841211011383583, + "learning_rate": 9.929372629914937e-06, + "loss": 0.4665, + "step": 1812 + }, + { + "epoch": 0.08205476352115863, + "grad_norm": 0.47226880302660784, + "learning_rate": 9.929249823256037e-06, + "loss": 0.4928, + "step": 1813 + }, + { + "epoch": 0.0821000226295542, + "grad_norm": 0.44030382896499054, + "learning_rate": 9.929126910682697e-06, + "loss": 0.5312, + "step": 1814 + }, + { + "epoch": 0.08214528173794976, + "grad_norm": 1.0380986223119235, + "learning_rate": 9.929003892197558e-06, + "loss": 0.437, + "step": 1815 + }, + { + "epoch": 0.08219054084634533, + "grad_norm": 0.8010625789004381, + "learning_rate": 9.928880767803264e-06, + "loss": 0.4426, + "step": 1816 + }, + { + "epoch": 0.0822357999547409, + "grad_norm": 0.8061411711841202, + "learning_rate": 9.928757537502458e-06, + "loss": 0.444, + "step": 1817 + }, + { + "epoch": 0.08228105906313646, + "grad_norm": 0.9451107724245381, + "learning_rate": 9.928634201297793e-06, + "loss": 0.4575, + "step": 1818 + }, + { + "epoch": 0.08232631817153202, + "grad_norm": 0.7274563128661418, + "learning_rate": 9.928510759191914e-06, + "loss": 0.4367, + "step": 1819 + }, + { + "epoch": 0.08237157727992758, + "grad_norm": 0.8482490358684864, + "learning_rate": 9.928387211187478e-06, + "loss": 0.43, + "step": 1820 + }, + { + "epoch": 0.08241683638832314, + "grad_norm": 0.9110378559090746, + "learning_rate": 9.928263557287135e-06, + "loss": 0.4179, + "step": 1821 + }, + { + "epoch": 0.08246209549671872, + "grad_norm": 0.9439672447412728, + "learning_rate": 9.928139797493545e-06, + "loss": 0.4434, + "step": 1822 + }, + { + "epoch": 0.08250735460511428, + "grad_norm": 0.6352867452932015, + "learning_rate": 9.928015931809368e-06, + "loss": 0.5682, + "step": 1823 + }, + { + "epoch": 0.08255261371350985, + "grad_norm": 0.8239888115018645, + "learning_rate": 9.927891960237261e-06, + "loss": 0.4506, + "step": 1824 + }, + { + "epoch": 0.08259787282190541, + "grad_norm": 0.42021768765074907, + "learning_rate": 9.927767882779892e-06, + "loss": 0.498, + "step": 1825 + }, + { + "epoch": 0.08264313193030097, + "grad_norm": 1.2031228767214417, + "learning_rate": 9.927643699439925e-06, + "loss": 0.4705, + "step": 1826 + }, + { + "epoch": 0.08268839103869653, + "grad_norm": 0.7864408720289386, + "learning_rate": 9.92751941022003e-06, + "loss": 0.4587, + "step": 1827 + }, + { + "epoch": 0.08273365014709211, + "grad_norm": 0.8146428310929719, + "learning_rate": 9.927395015122876e-06, + "loss": 0.4978, + "step": 1828 + }, + { + "epoch": 0.08277890925548767, + "grad_norm": 0.9403173929391236, + "learning_rate": 9.927270514151137e-06, + "loss": 0.4838, + "step": 1829 + }, + { + "epoch": 0.08282416836388323, + "grad_norm": 0.7782652098882176, + "learning_rate": 9.927145907307486e-06, + "loss": 0.4569, + "step": 1830 + }, + { + "epoch": 0.0828694274722788, + "grad_norm": 1.0164338183003059, + "learning_rate": 9.927021194594604e-06, + "loss": 0.5406, + "step": 1831 + }, + { + "epoch": 0.08291468658067436, + "grad_norm": 0.7846927910640304, + "learning_rate": 9.926896376015168e-06, + "loss": 0.4387, + "step": 1832 + }, + { + "epoch": 0.08295994568906992, + "grad_norm": 0.8219142743159832, + "learning_rate": 9.926771451571862e-06, + "loss": 0.5215, + "step": 1833 + }, + { + "epoch": 0.0830052047974655, + "grad_norm": 0.6988546359400207, + "learning_rate": 9.926646421267366e-06, + "loss": 0.4201, + "step": 1834 + }, + { + "epoch": 0.08305046390586106, + "grad_norm": 0.7129991767098324, + "learning_rate": 9.926521285104371e-06, + "loss": 0.4536, + "step": 1835 + }, + { + "epoch": 0.08309572301425662, + "grad_norm": 0.7806065574483061, + "learning_rate": 9.926396043085564e-06, + "loss": 0.4813, + "step": 1836 + }, + { + "epoch": 0.08314098212265218, + "grad_norm": 0.7805909948187303, + "learning_rate": 9.926270695213638e-06, + "loss": 0.4476, + "step": 1837 + }, + { + "epoch": 0.08318624123104774, + "grad_norm": 0.5948129410350549, + "learning_rate": 9.926145241491283e-06, + "loss": 0.4907, + "step": 1838 + }, + { + "epoch": 0.0832315003394433, + "grad_norm": 0.8754548731717574, + "learning_rate": 9.926019681921196e-06, + "loss": 0.4772, + "step": 1839 + }, + { + "epoch": 0.08327675944783888, + "grad_norm": 0.9208126996590229, + "learning_rate": 9.925894016506076e-06, + "loss": 0.4527, + "step": 1840 + }, + { + "epoch": 0.08332201855623445, + "grad_norm": 1.202719505119469, + "learning_rate": 9.925768245248622e-06, + "loss": 0.4071, + "step": 1841 + }, + { + "epoch": 0.08336727766463001, + "grad_norm": 0.8513587887535525, + "learning_rate": 9.925642368151536e-06, + "loss": 0.4702, + "step": 1842 + }, + { + "epoch": 0.08341253677302557, + "grad_norm": 0.8056976169739187, + "learning_rate": 9.925516385217524e-06, + "loss": 0.438, + "step": 1843 + }, + { + "epoch": 0.08345779588142113, + "grad_norm": 0.9725598362627249, + "learning_rate": 9.925390296449293e-06, + "loss": 0.4457, + "step": 1844 + }, + { + "epoch": 0.0835030549898167, + "grad_norm": 0.8121083302424683, + "learning_rate": 9.925264101849552e-06, + "loss": 0.429, + "step": 1845 + }, + { + "epoch": 0.08354831409821227, + "grad_norm": 0.7744796561216578, + "learning_rate": 9.925137801421011e-06, + "loss": 0.5364, + "step": 1846 + }, + { + "epoch": 0.08359357320660783, + "grad_norm": 0.800946010472054, + "learning_rate": 9.925011395166387e-06, + "loss": 0.3983, + "step": 1847 + }, + { + "epoch": 0.0836388323150034, + "grad_norm": 0.7276221221900879, + "learning_rate": 9.924884883088392e-06, + "loss": 0.51, + "step": 1848 + }, + { + "epoch": 0.08368409142339896, + "grad_norm": 0.4285744008499849, + "learning_rate": 9.924758265189746e-06, + "loss": 0.5154, + "step": 1849 + }, + { + "epoch": 0.08372935053179452, + "grad_norm": 0.4265868429049617, + "learning_rate": 9.924631541473174e-06, + "loss": 0.5345, + "step": 1850 + }, + { + "epoch": 0.08377460964019008, + "grad_norm": 0.7860118715571093, + "learning_rate": 9.924504711941391e-06, + "loss": 0.4562, + "step": 1851 + }, + { + "epoch": 0.08381986874858566, + "grad_norm": 0.41053072199017704, + "learning_rate": 9.924377776597128e-06, + "loss": 0.5181, + "step": 1852 + }, + { + "epoch": 0.08386512785698122, + "grad_norm": 0.41695682875868034, + "learning_rate": 9.92425073544311e-06, + "loss": 0.5228, + "step": 1853 + }, + { + "epoch": 0.08391038696537678, + "grad_norm": 0.8054863739728471, + "learning_rate": 9.924123588482068e-06, + "loss": 0.4461, + "step": 1854 + }, + { + "epoch": 0.08395564607377234, + "grad_norm": 0.7739457395814601, + "learning_rate": 9.923996335716732e-06, + "loss": 0.4478, + "step": 1855 + }, + { + "epoch": 0.0840009051821679, + "grad_norm": 0.7623799852036963, + "learning_rate": 9.92386897714984e-06, + "loss": 0.4644, + "step": 1856 + }, + { + "epoch": 0.08404616429056348, + "grad_norm": 0.7923643933942867, + "learning_rate": 9.923741512784124e-06, + "loss": 0.4439, + "step": 1857 + }, + { + "epoch": 0.08409142339895904, + "grad_norm": 0.7400779069636888, + "learning_rate": 9.923613942622326e-06, + "loss": 0.4717, + "step": 1858 + }, + { + "epoch": 0.08413668250735461, + "grad_norm": 0.7773476459849654, + "learning_rate": 9.923486266667186e-06, + "loss": 0.4649, + "step": 1859 + }, + { + "epoch": 0.08418194161575017, + "grad_norm": 0.7908800432339458, + "learning_rate": 9.923358484921447e-06, + "loss": 0.4666, + "step": 1860 + }, + { + "epoch": 0.08422720072414573, + "grad_norm": 0.7190776961384768, + "learning_rate": 9.923230597387856e-06, + "loss": 0.4084, + "step": 1861 + }, + { + "epoch": 0.0842724598325413, + "grad_norm": 0.7009628564366115, + "learning_rate": 9.92310260406916e-06, + "loss": 0.4366, + "step": 1862 + }, + { + "epoch": 0.08431771894093687, + "grad_norm": 0.7328454796007702, + "learning_rate": 9.922974504968107e-06, + "loss": 0.4222, + "step": 1863 + }, + { + "epoch": 0.08436297804933243, + "grad_norm": 0.8678391023098952, + "learning_rate": 9.922846300087454e-06, + "loss": 0.4821, + "step": 1864 + }, + { + "epoch": 0.084408237157728, + "grad_norm": 0.8433282876733529, + "learning_rate": 9.922717989429954e-06, + "loss": 0.4217, + "step": 1865 + }, + { + "epoch": 0.08445349626612356, + "grad_norm": 0.7218042482897301, + "learning_rate": 9.922589572998362e-06, + "loss": 0.5084, + "step": 1866 + }, + { + "epoch": 0.08449875537451912, + "grad_norm": 0.8602340789259252, + "learning_rate": 9.922461050795438e-06, + "loss": 0.4069, + "step": 1867 + }, + { + "epoch": 0.08454401448291468, + "grad_norm": 0.8309640447672152, + "learning_rate": 9.922332422823945e-06, + "loss": 0.5391, + "step": 1868 + }, + { + "epoch": 0.08458927359131026, + "grad_norm": 0.8035478084960879, + "learning_rate": 9.922203689086647e-06, + "loss": 0.4768, + "step": 1869 + }, + { + "epoch": 0.08463453269970582, + "grad_norm": 0.4148660540710434, + "learning_rate": 9.922074849586308e-06, + "loss": 0.5419, + "step": 1870 + }, + { + "epoch": 0.08467979180810138, + "grad_norm": 0.8219306255439327, + "learning_rate": 9.921945904325697e-06, + "loss": 0.43, + "step": 1871 + }, + { + "epoch": 0.08472505091649694, + "grad_norm": 0.5312965844343916, + "learning_rate": 9.921816853307587e-06, + "loss": 0.5292, + "step": 1872 + }, + { + "epoch": 0.0847703100248925, + "grad_norm": 0.7367383375649544, + "learning_rate": 9.921687696534747e-06, + "loss": 0.4661, + "step": 1873 + }, + { + "epoch": 0.08481556913328807, + "grad_norm": 0.7950765321616564, + "learning_rate": 9.921558434009955e-06, + "loss": 0.4288, + "step": 1874 + }, + { + "epoch": 0.08486082824168364, + "grad_norm": 0.7644887707411738, + "learning_rate": 9.921429065735988e-06, + "loss": 0.4307, + "step": 1875 + }, + { + "epoch": 0.0849060873500792, + "grad_norm": 0.7419573945037351, + "learning_rate": 9.921299591715624e-06, + "loss": 0.4117, + "step": 1876 + }, + { + "epoch": 0.08495134645847477, + "grad_norm": 0.7484051985152155, + "learning_rate": 9.921170011951647e-06, + "loss": 0.4789, + "step": 1877 + }, + { + "epoch": 0.08499660556687033, + "grad_norm": 0.7464732006200073, + "learning_rate": 9.921040326446843e-06, + "loss": 0.4159, + "step": 1878 + }, + { + "epoch": 0.0850418646752659, + "grad_norm": 0.7313580668980304, + "learning_rate": 9.920910535203994e-06, + "loss": 0.4521, + "step": 1879 + }, + { + "epoch": 0.08508712378366146, + "grad_norm": 0.8321482272182403, + "learning_rate": 9.92078063822589e-06, + "loss": 0.4779, + "step": 1880 + }, + { + "epoch": 0.08513238289205703, + "grad_norm": 0.6967085482083333, + "learning_rate": 9.920650635515325e-06, + "loss": 0.3927, + "step": 1881 + }, + { + "epoch": 0.0851776420004526, + "grad_norm": 0.9794195771812104, + "learning_rate": 9.92052052707509e-06, + "loss": 0.5055, + "step": 1882 + }, + { + "epoch": 0.08522290110884816, + "grad_norm": 0.6899236636306616, + "learning_rate": 9.92039031290798e-06, + "loss": 0.4186, + "step": 1883 + }, + { + "epoch": 0.08526816021724372, + "grad_norm": 0.725913617196428, + "learning_rate": 9.920259993016797e-06, + "loss": 0.4871, + "step": 1884 + }, + { + "epoch": 0.08531341932563928, + "grad_norm": 0.8332609166219846, + "learning_rate": 9.920129567404335e-06, + "loss": 0.4206, + "step": 1885 + }, + { + "epoch": 0.08535867843403484, + "grad_norm": 0.7069291434494992, + "learning_rate": 9.9199990360734e-06, + "loss": 0.4109, + "step": 1886 + }, + { + "epoch": 0.08540393754243042, + "grad_norm": 0.7991002911044793, + "learning_rate": 9.919868399026797e-06, + "loss": 0.3979, + "step": 1887 + }, + { + "epoch": 0.08544919665082598, + "grad_norm": 0.8346063775026414, + "learning_rate": 9.919737656267335e-06, + "loss": 0.4882, + "step": 1888 + }, + { + "epoch": 0.08549445575922154, + "grad_norm": 0.6406919207588929, + "learning_rate": 9.919606807797817e-06, + "loss": 0.522, + "step": 1889 + }, + { + "epoch": 0.0855397148676171, + "grad_norm": 0.7523586431196658, + "learning_rate": 9.919475853621058e-06, + "loss": 0.4645, + "step": 1890 + }, + { + "epoch": 0.08558497397601267, + "grad_norm": 0.6798200716411895, + "learning_rate": 9.919344793739874e-06, + "loss": 0.4176, + "step": 1891 + }, + { + "epoch": 0.08563023308440823, + "grad_norm": 0.736139778725739, + "learning_rate": 9.919213628157078e-06, + "loss": 0.441, + "step": 1892 + }, + { + "epoch": 0.0856754921928038, + "grad_norm": 0.6998787747239904, + "learning_rate": 9.91908235687549e-06, + "loss": 0.4291, + "step": 1893 + }, + { + "epoch": 0.08572075130119937, + "grad_norm": 0.5684390273842443, + "learning_rate": 9.918950979897928e-06, + "loss": 0.5303, + "step": 1894 + }, + { + "epoch": 0.08576601040959493, + "grad_norm": 0.7239694026683012, + "learning_rate": 9.91881949722722e-06, + "loss": 0.4609, + "step": 1895 + }, + { + "epoch": 0.0858112695179905, + "grad_norm": 0.7182694255332468, + "learning_rate": 9.918687908866185e-06, + "loss": 0.4247, + "step": 1896 + }, + { + "epoch": 0.08585652862638606, + "grad_norm": 0.7599687280674057, + "learning_rate": 9.918556214817655e-06, + "loss": 0.4404, + "step": 1897 + }, + { + "epoch": 0.08590178773478163, + "grad_norm": 0.7937333688188135, + "learning_rate": 9.918424415084458e-06, + "loss": 0.4639, + "step": 1898 + }, + { + "epoch": 0.0859470468431772, + "grad_norm": 0.8085633869903383, + "learning_rate": 9.918292509669426e-06, + "loss": 0.4284, + "step": 1899 + }, + { + "epoch": 0.08599230595157276, + "grad_norm": 0.776740327788825, + "learning_rate": 9.918160498575394e-06, + "loss": 0.4654, + "step": 1900 + }, + { + "epoch": 0.08603756505996832, + "grad_norm": 0.7632130970803997, + "learning_rate": 9.918028381805196e-06, + "loss": 0.422, + "step": 1901 + }, + { + "epoch": 0.08608282416836388, + "grad_norm": 0.7443778623445391, + "learning_rate": 9.917896159361674e-06, + "loss": 0.4558, + "step": 1902 + }, + { + "epoch": 0.08612808327675944, + "grad_norm": 0.7202079960261414, + "learning_rate": 9.917763831247667e-06, + "loss": 0.4543, + "step": 1903 + }, + { + "epoch": 0.08617334238515502, + "grad_norm": 0.7044512276975228, + "learning_rate": 9.91763139746602e-06, + "loss": 0.5185, + "step": 1904 + }, + { + "epoch": 0.08621860149355058, + "grad_norm": 0.5471247904879195, + "learning_rate": 9.917498858019577e-06, + "loss": 0.5281, + "step": 1905 + }, + { + "epoch": 0.08626386060194614, + "grad_norm": 0.7310463517891044, + "learning_rate": 9.917366212911187e-06, + "loss": 0.3936, + "step": 1906 + }, + { + "epoch": 0.0863091197103417, + "grad_norm": 0.7507045212274692, + "learning_rate": 9.917233462143698e-06, + "loss": 0.4251, + "step": 1907 + }, + { + "epoch": 0.08635437881873727, + "grad_norm": 0.7295758228462808, + "learning_rate": 9.917100605719968e-06, + "loss": 0.4258, + "step": 1908 + }, + { + "epoch": 0.08639963792713283, + "grad_norm": 0.7133428447258995, + "learning_rate": 9.916967643642844e-06, + "loss": 0.4553, + "step": 1909 + }, + { + "epoch": 0.0864448970355284, + "grad_norm": 0.7734628826971062, + "learning_rate": 9.916834575915186e-06, + "loss": 0.417, + "step": 1910 + }, + { + "epoch": 0.08649015614392397, + "grad_norm": 0.6985026626342046, + "learning_rate": 9.916701402539857e-06, + "loss": 0.4119, + "step": 1911 + }, + { + "epoch": 0.08653541525231953, + "grad_norm": 1.1633875585458455, + "learning_rate": 9.916568123519713e-06, + "loss": 0.5522, + "step": 1912 + }, + { + "epoch": 0.08658067436071509, + "grad_norm": 0.8129193812189842, + "learning_rate": 9.916434738857621e-06, + "loss": 0.4336, + "step": 1913 + }, + { + "epoch": 0.08662593346911066, + "grad_norm": 0.7459738133804278, + "learning_rate": 9.916301248556446e-06, + "loss": 0.4838, + "step": 1914 + }, + { + "epoch": 0.08667119257750622, + "grad_norm": 0.7379164873726534, + "learning_rate": 9.916167652619058e-06, + "loss": 0.4234, + "step": 1915 + }, + { + "epoch": 0.0867164516859018, + "grad_norm": 0.8413281330999343, + "learning_rate": 9.916033951048322e-06, + "loss": 0.4448, + "step": 1916 + }, + { + "epoch": 0.08676171079429736, + "grad_norm": 0.7615897239798799, + "learning_rate": 9.915900143847119e-06, + "loss": 0.3915, + "step": 1917 + }, + { + "epoch": 0.08680696990269292, + "grad_norm": 0.7395346710437531, + "learning_rate": 9.915766231018317e-06, + "loss": 0.466, + "step": 1918 + }, + { + "epoch": 0.08685222901108848, + "grad_norm": 0.7461409358070803, + "learning_rate": 9.915632212564798e-06, + "loss": 0.4975, + "step": 1919 + }, + { + "epoch": 0.08689748811948404, + "grad_norm": 0.8443578605281495, + "learning_rate": 9.91549808848944e-06, + "loss": 0.5407, + "step": 1920 + }, + { + "epoch": 0.0869427472278796, + "grad_norm": 0.8118802915223485, + "learning_rate": 9.915363858795125e-06, + "loss": 0.4272, + "step": 1921 + }, + { + "epoch": 0.08698800633627518, + "grad_norm": 0.7345275937375815, + "learning_rate": 9.915229523484736e-06, + "loss": 0.4269, + "step": 1922 + }, + { + "epoch": 0.08703326544467074, + "grad_norm": 0.7122964942038054, + "learning_rate": 9.915095082561161e-06, + "loss": 0.4409, + "step": 1923 + }, + { + "epoch": 0.0870785245530663, + "grad_norm": 0.7396962298691463, + "learning_rate": 9.914960536027289e-06, + "loss": 0.4316, + "step": 1924 + }, + { + "epoch": 0.08712378366146187, + "grad_norm": 0.7307867562497987, + "learning_rate": 9.91482588388601e-06, + "loss": 0.4782, + "step": 1925 + }, + { + "epoch": 0.08716904276985743, + "grad_norm": 0.7484534005439776, + "learning_rate": 9.914691126140216e-06, + "loss": 0.4382, + "step": 1926 + }, + { + "epoch": 0.08721430187825299, + "grad_norm": 0.825731600659895, + "learning_rate": 9.914556262792805e-06, + "loss": 0.5151, + "step": 1927 + }, + { + "epoch": 0.08725956098664857, + "grad_norm": 0.7123185730894329, + "learning_rate": 9.914421293846675e-06, + "loss": 0.4662, + "step": 1928 + }, + { + "epoch": 0.08730482009504413, + "grad_norm": 0.7781423627660821, + "learning_rate": 9.914286219304724e-06, + "loss": 0.4698, + "step": 1929 + }, + { + "epoch": 0.08735007920343969, + "grad_norm": 0.6957365833355711, + "learning_rate": 9.914151039169855e-06, + "loss": 0.3905, + "step": 1930 + }, + { + "epoch": 0.08739533831183526, + "grad_norm": 0.6685636570567208, + "learning_rate": 9.914015753444973e-06, + "loss": 0.4447, + "step": 1931 + }, + { + "epoch": 0.08744059742023082, + "grad_norm": 1.0053401661334387, + "learning_rate": 9.913880362132984e-06, + "loss": 0.5551, + "step": 1932 + }, + { + "epoch": 0.08748585652862638, + "grad_norm": 0.6118565640438647, + "learning_rate": 9.913744865236798e-06, + "loss": 0.5343, + "step": 1933 + }, + { + "epoch": 0.08753111563702196, + "grad_norm": 1.0514009365642782, + "learning_rate": 9.913609262759326e-06, + "loss": 0.4909, + "step": 1934 + }, + { + "epoch": 0.08757637474541752, + "grad_norm": 0.855627611367071, + "learning_rate": 9.913473554703483e-06, + "loss": 0.4809, + "step": 1935 + }, + { + "epoch": 0.08762163385381308, + "grad_norm": 0.7468569420197058, + "learning_rate": 9.913337741072183e-06, + "loss": 0.4447, + "step": 1936 + }, + { + "epoch": 0.08766689296220864, + "grad_norm": 0.7679281923297259, + "learning_rate": 9.913201821868345e-06, + "loss": 0.4235, + "step": 1937 + }, + { + "epoch": 0.0877121520706042, + "grad_norm": 0.695156803389326, + "learning_rate": 9.913065797094893e-06, + "loss": 0.3993, + "step": 1938 + }, + { + "epoch": 0.08775741117899977, + "grad_norm": 0.7702685215584218, + "learning_rate": 9.912929666754741e-06, + "loss": 0.422, + "step": 1939 + }, + { + "epoch": 0.08780267028739534, + "grad_norm": 0.7598651277864084, + "learning_rate": 9.912793430850822e-06, + "loss": 0.4281, + "step": 1940 + }, + { + "epoch": 0.0878479293957909, + "grad_norm": 0.6621538356949686, + "learning_rate": 9.912657089386062e-06, + "loss": 0.4467, + "step": 1941 + }, + { + "epoch": 0.08789318850418647, + "grad_norm": 0.738213356518308, + "learning_rate": 9.912520642363387e-06, + "loss": 0.4615, + "step": 1942 + }, + { + "epoch": 0.08793844761258203, + "grad_norm": 2.1635848608348716, + "learning_rate": 9.912384089785731e-06, + "loss": 0.6037, + "step": 1943 + }, + { + "epoch": 0.08798370672097759, + "grad_norm": 1.0352199439365746, + "learning_rate": 9.91224743165603e-06, + "loss": 0.4801, + "step": 1944 + }, + { + "epoch": 0.08802896582937317, + "grad_norm": 0.8709803651125759, + "learning_rate": 9.912110667977218e-06, + "loss": 0.5165, + "step": 1945 + }, + { + "epoch": 0.08807422493776873, + "grad_norm": 0.8736736235706593, + "learning_rate": 9.911973798752232e-06, + "loss": 0.4637, + "step": 1946 + }, + { + "epoch": 0.08811948404616429, + "grad_norm": 0.8532192502651776, + "learning_rate": 9.911836823984018e-06, + "loss": 0.4777, + "step": 1947 + }, + { + "epoch": 0.08816474315455985, + "grad_norm": 0.7883996558296882, + "learning_rate": 9.911699743675513e-06, + "loss": 0.4497, + "step": 1948 + }, + { + "epoch": 0.08821000226295542, + "grad_norm": 0.768457003127408, + "learning_rate": 9.911562557829668e-06, + "loss": 0.4458, + "step": 1949 + }, + { + "epoch": 0.08825526137135098, + "grad_norm": 1.2044000998025475, + "learning_rate": 9.911425266449428e-06, + "loss": 0.521, + "step": 1950 + }, + { + "epoch": 0.08830052047974656, + "grad_norm": 0.8025949214413347, + "learning_rate": 9.911287869537744e-06, + "loss": 0.4367, + "step": 1951 + }, + { + "epoch": 0.08834577958814212, + "grad_norm": 0.8465178290443464, + "learning_rate": 9.911150367097566e-06, + "loss": 0.4638, + "step": 1952 + }, + { + "epoch": 0.08839103869653768, + "grad_norm": 0.8250438554174135, + "learning_rate": 9.911012759131852e-06, + "loss": 0.438, + "step": 1953 + }, + { + "epoch": 0.08843629780493324, + "grad_norm": 0.7497252987029995, + "learning_rate": 9.910875045643555e-06, + "loss": 0.4175, + "step": 1954 + }, + { + "epoch": 0.0884815569133288, + "grad_norm": 0.7660876420547476, + "learning_rate": 9.910737226635636e-06, + "loss": 0.4404, + "step": 1955 + }, + { + "epoch": 0.08852681602172437, + "grad_norm": 0.7384189585829709, + "learning_rate": 9.910599302111057e-06, + "loss": 0.4182, + "step": 1956 + }, + { + "epoch": 0.08857207513011994, + "grad_norm": 0.7495072122782613, + "learning_rate": 9.91046127207278e-06, + "loss": 0.4084, + "step": 1957 + }, + { + "epoch": 0.0886173342385155, + "grad_norm": 0.7627524376716129, + "learning_rate": 9.910323136523773e-06, + "loss": 0.4389, + "step": 1958 + }, + { + "epoch": 0.08866259334691107, + "grad_norm": 0.7313100522135206, + "learning_rate": 9.910184895467001e-06, + "loss": 0.4454, + "step": 1959 + }, + { + "epoch": 0.08870785245530663, + "grad_norm": 0.712201543133738, + "learning_rate": 9.910046548905437e-06, + "loss": 0.4231, + "step": 1960 + }, + { + "epoch": 0.08875311156370219, + "grad_norm": 0.7388104424423865, + "learning_rate": 9.909908096842053e-06, + "loss": 0.4943, + "step": 1961 + }, + { + "epoch": 0.08879837067209775, + "grad_norm": 0.7075371931646569, + "learning_rate": 9.909769539279823e-06, + "loss": 0.473, + "step": 1962 + }, + { + "epoch": 0.08884362978049333, + "grad_norm": 0.6600754349658474, + "learning_rate": 9.909630876221726e-06, + "loss": 0.5189, + "step": 1963 + }, + { + "epoch": 0.08888888888888889, + "grad_norm": 0.7682343599469311, + "learning_rate": 9.909492107670737e-06, + "loss": 0.4538, + "step": 1964 + }, + { + "epoch": 0.08893414799728445, + "grad_norm": 0.7616573920258807, + "learning_rate": 9.909353233629844e-06, + "loss": 0.4521, + "step": 1965 + }, + { + "epoch": 0.08897940710568002, + "grad_norm": 0.7655653986533902, + "learning_rate": 9.909214254102027e-06, + "loss": 0.5001, + "step": 1966 + }, + { + "epoch": 0.08902466621407558, + "grad_norm": 0.7915882076561631, + "learning_rate": 9.909075169090275e-06, + "loss": 0.4719, + "step": 1967 + }, + { + "epoch": 0.08906992532247114, + "grad_norm": 0.8313921499635212, + "learning_rate": 9.90893597859757e-06, + "loss": 0.4376, + "step": 1968 + }, + { + "epoch": 0.08911518443086672, + "grad_norm": 0.7865427117270996, + "learning_rate": 9.908796682626911e-06, + "loss": 0.4728, + "step": 1969 + }, + { + "epoch": 0.08916044353926228, + "grad_norm": 0.7169640304743307, + "learning_rate": 9.908657281181289e-06, + "loss": 0.4036, + "step": 1970 + }, + { + "epoch": 0.08920570264765784, + "grad_norm": 0.7259107241970512, + "learning_rate": 9.908517774263694e-06, + "loss": 0.4504, + "step": 1971 + }, + { + "epoch": 0.0892509617560534, + "grad_norm": 0.8039701538287216, + "learning_rate": 9.90837816187713e-06, + "loss": 0.4208, + "step": 1972 + }, + { + "epoch": 0.08929622086444897, + "grad_norm": 0.8988560217541354, + "learning_rate": 9.908238444024593e-06, + "loss": 0.4544, + "step": 1973 + }, + { + "epoch": 0.08934147997284453, + "grad_norm": 0.7390784236507443, + "learning_rate": 9.908098620709088e-06, + "loss": 0.459, + "step": 1974 + }, + { + "epoch": 0.0893867390812401, + "grad_norm": 0.71640823902626, + "learning_rate": 9.907958691933616e-06, + "loss": 0.4309, + "step": 1975 + }, + { + "epoch": 0.08943199818963567, + "grad_norm": 0.7413322149073592, + "learning_rate": 9.907818657701185e-06, + "loss": 0.4342, + "step": 1976 + }, + { + "epoch": 0.08947725729803123, + "grad_norm": 0.7103835938842725, + "learning_rate": 9.907678518014805e-06, + "loss": 0.5497, + "step": 1977 + }, + { + "epoch": 0.08952251640642679, + "grad_norm": 0.8569735720494202, + "learning_rate": 9.907538272877487e-06, + "loss": 0.4103, + "step": 1978 + }, + { + "epoch": 0.08956777551482235, + "grad_norm": 0.4764015148465107, + "learning_rate": 9.907397922292244e-06, + "loss": 0.5308, + "step": 1979 + }, + { + "epoch": 0.08961303462321792, + "grad_norm": 0.7423437895217264, + "learning_rate": 9.90725746626209e-06, + "loss": 0.4092, + "step": 1980 + }, + { + "epoch": 0.08965829373161349, + "grad_norm": 0.7474565450120157, + "learning_rate": 9.907116904790046e-06, + "loss": 0.4369, + "step": 1981 + }, + { + "epoch": 0.08970355284000905, + "grad_norm": 0.7462368731786069, + "learning_rate": 9.90697623787913e-06, + "loss": 0.4647, + "step": 1982 + }, + { + "epoch": 0.08974881194840462, + "grad_norm": 0.7712449048788309, + "learning_rate": 9.906835465532364e-06, + "loss": 0.4692, + "step": 1983 + }, + { + "epoch": 0.08979407105680018, + "grad_norm": 0.711234265516189, + "learning_rate": 9.906694587752777e-06, + "loss": 0.3805, + "step": 1984 + }, + { + "epoch": 0.08983933016519574, + "grad_norm": 0.8239657790937962, + "learning_rate": 9.906553604543392e-06, + "loss": 0.3879, + "step": 1985 + }, + { + "epoch": 0.08988458927359132, + "grad_norm": 0.6990732298441272, + "learning_rate": 9.90641251590724e-06, + "loss": 0.4371, + "step": 1986 + }, + { + "epoch": 0.08992984838198688, + "grad_norm": 0.7636960030695994, + "learning_rate": 9.906271321847349e-06, + "loss": 0.4598, + "step": 1987 + }, + { + "epoch": 0.08997510749038244, + "grad_norm": 0.7243944420146078, + "learning_rate": 9.906130022366757e-06, + "loss": 0.442, + "step": 1988 + }, + { + "epoch": 0.090020366598778, + "grad_norm": 0.7077679135943856, + "learning_rate": 9.905988617468501e-06, + "loss": 0.4358, + "step": 1989 + }, + { + "epoch": 0.09006562570717357, + "grad_norm": 0.69551264922042, + "learning_rate": 9.905847107155615e-06, + "loss": 0.3987, + "step": 1990 + }, + { + "epoch": 0.09011088481556913, + "grad_norm": 0.7493605737496202, + "learning_rate": 9.905705491431143e-06, + "loss": 0.3991, + "step": 1991 + }, + { + "epoch": 0.0901561439239647, + "grad_norm": 0.7035523261648925, + "learning_rate": 9.905563770298126e-06, + "loss": 0.4341, + "step": 1992 + }, + { + "epoch": 0.09020140303236027, + "grad_norm": 0.6974086929552428, + "learning_rate": 9.905421943759611e-06, + "loss": 0.4504, + "step": 1993 + }, + { + "epoch": 0.09024666214075583, + "grad_norm": 0.724066794803269, + "learning_rate": 9.905280011818642e-06, + "loss": 0.4629, + "step": 1994 + }, + { + "epoch": 0.09029192124915139, + "grad_norm": 0.856557517052248, + "learning_rate": 9.905137974478274e-06, + "loss": 0.4473, + "step": 1995 + }, + { + "epoch": 0.09033718035754695, + "grad_norm": 1.5435171830573482, + "learning_rate": 9.904995831741553e-06, + "loss": 0.5605, + "step": 1996 + }, + { + "epoch": 0.09038243946594252, + "grad_norm": 0.8950493639427329, + "learning_rate": 9.904853583611537e-06, + "loss": 0.5319, + "step": 1997 + }, + { + "epoch": 0.09042769857433809, + "grad_norm": 0.885813931552701, + "learning_rate": 9.904711230091284e-06, + "loss": 0.4585, + "step": 1998 + }, + { + "epoch": 0.09047295768273365, + "grad_norm": 0.8122692652550383, + "learning_rate": 9.904568771183848e-06, + "loss": 0.405, + "step": 1999 + }, + { + "epoch": 0.09051821679112922, + "grad_norm": 0.7032039282943026, + "learning_rate": 9.904426206892292e-06, + "loss": 0.4255, + "step": 2000 + }, + { + "epoch": 0.09056347589952478, + "grad_norm": 0.730949878970527, + "learning_rate": 9.90428353721968e-06, + "loss": 0.4146, + "step": 2001 + }, + { + "epoch": 0.09060873500792034, + "grad_norm": 1.8957984523607985, + "learning_rate": 9.904140762169079e-06, + "loss": 0.5715, + "step": 2002 + }, + { + "epoch": 0.0906539941163159, + "grad_norm": 1.6179371382040924, + "learning_rate": 9.903997881743552e-06, + "loss": 0.5678, + "step": 2003 + }, + { + "epoch": 0.09069925322471148, + "grad_norm": 0.8557369834319728, + "learning_rate": 9.903854895946174e-06, + "loss": 0.4557, + "step": 2004 + }, + { + "epoch": 0.09074451233310704, + "grad_norm": 0.9088274857229913, + "learning_rate": 9.903711804780015e-06, + "loss": 0.4157, + "step": 2005 + }, + { + "epoch": 0.0907897714415026, + "grad_norm": 0.8200194902087647, + "learning_rate": 9.90356860824815e-06, + "loss": 0.4635, + "step": 2006 + }, + { + "epoch": 0.09083503054989817, + "grad_norm": 0.8415326148921095, + "learning_rate": 9.903425306353656e-06, + "loss": 0.48, + "step": 2007 + }, + { + "epoch": 0.09088028965829373, + "grad_norm": 0.8056663597851978, + "learning_rate": 9.90328189909961e-06, + "loss": 0.4895, + "step": 2008 + }, + { + "epoch": 0.09092554876668929, + "grad_norm": 0.8676749506416057, + "learning_rate": 9.903138386489097e-06, + "loss": 0.4835, + "step": 2009 + }, + { + "epoch": 0.09097080787508487, + "grad_norm": 0.7626279262973016, + "learning_rate": 9.902994768525199e-06, + "loss": 0.4351, + "step": 2010 + }, + { + "epoch": 0.09101606698348043, + "grad_norm": 0.9920892948015188, + "learning_rate": 9.902851045211e-06, + "loss": 0.4682, + "step": 2011 + }, + { + "epoch": 0.09106132609187599, + "grad_norm": 0.8113461558852759, + "learning_rate": 9.902707216549592e-06, + "loss": 0.4427, + "step": 2012 + }, + { + "epoch": 0.09110658520027155, + "grad_norm": 0.7395981957725416, + "learning_rate": 9.902563282544061e-06, + "loss": 0.4191, + "step": 2013 + }, + { + "epoch": 0.09115184430866712, + "grad_norm": 0.7629633005452424, + "learning_rate": 9.902419243197505e-06, + "loss": 0.4024, + "step": 2014 + }, + { + "epoch": 0.09119710341706268, + "grad_norm": 0.7563524409039875, + "learning_rate": 9.902275098513015e-06, + "loss": 0.4209, + "step": 2015 + }, + { + "epoch": 0.09124236252545825, + "grad_norm": 0.7238738044857984, + "learning_rate": 9.90213084849369e-06, + "loss": 0.436, + "step": 2016 + }, + { + "epoch": 0.09128762163385382, + "grad_norm": 1.764351025615634, + "learning_rate": 9.901986493142629e-06, + "loss": 0.5675, + "step": 2017 + }, + { + "epoch": 0.09133288074224938, + "grad_norm": 1.4324836252716153, + "learning_rate": 9.901842032462931e-06, + "loss": 0.5895, + "step": 2018 + }, + { + "epoch": 0.09137813985064494, + "grad_norm": 0.8962622205166951, + "learning_rate": 9.901697466457706e-06, + "loss": 0.4472, + "step": 2019 + }, + { + "epoch": 0.0914233989590405, + "grad_norm": 0.8641869503878485, + "learning_rate": 9.901552795130054e-06, + "loss": 0.3986, + "step": 2020 + }, + { + "epoch": 0.09146865806743606, + "grad_norm": 0.9679911613055479, + "learning_rate": 9.901408018483087e-06, + "loss": 0.4555, + "step": 2021 + }, + { + "epoch": 0.09151391717583164, + "grad_norm": 1.7195222230010936, + "learning_rate": 9.901263136519917e-06, + "loss": 0.5488, + "step": 2022 + }, + { + "epoch": 0.0915591762842272, + "grad_norm": 0.8581028478777686, + "learning_rate": 9.901118149243653e-06, + "loss": 0.4668, + "step": 2023 + }, + { + "epoch": 0.09160443539262277, + "grad_norm": 0.7784153654002759, + "learning_rate": 9.900973056657414e-06, + "loss": 0.4235, + "step": 2024 + }, + { + "epoch": 0.09164969450101833, + "grad_norm": 0.7236386410380111, + "learning_rate": 9.900827858764315e-06, + "loss": 0.4437, + "step": 2025 + }, + { + "epoch": 0.09169495360941389, + "grad_norm": 0.8316898164519387, + "learning_rate": 9.900682555567478e-06, + "loss": 0.5253, + "step": 2026 + }, + { + "epoch": 0.09174021271780945, + "grad_norm": 0.792182988032171, + "learning_rate": 9.900537147070025e-06, + "loss": 0.3917, + "step": 2027 + }, + { + "epoch": 0.09178547182620503, + "grad_norm": 0.7853677656386151, + "learning_rate": 9.900391633275079e-06, + "loss": 0.4041, + "step": 2028 + }, + { + "epoch": 0.09183073093460059, + "grad_norm": 0.9331508273972187, + "learning_rate": 9.900246014185765e-06, + "loss": 0.4462, + "step": 2029 + }, + { + "epoch": 0.09187599004299615, + "grad_norm": 1.40698347576658, + "learning_rate": 9.900100289805217e-06, + "loss": 0.5497, + "step": 2030 + }, + { + "epoch": 0.09192124915139172, + "grad_norm": 0.8484668466241169, + "learning_rate": 9.899954460136563e-06, + "loss": 0.3621, + "step": 2031 + }, + { + "epoch": 0.09196650825978728, + "grad_norm": 0.7649499435717951, + "learning_rate": 9.899808525182935e-06, + "loss": 0.4725, + "step": 2032 + }, + { + "epoch": 0.09201176736818285, + "grad_norm": 0.8057014650689612, + "learning_rate": 9.899662484947473e-06, + "loss": 0.5284, + "step": 2033 + }, + { + "epoch": 0.09205702647657842, + "grad_norm": 1.1381958528632035, + "learning_rate": 9.899516339433308e-06, + "loss": 0.4134, + "step": 2034 + }, + { + "epoch": 0.09210228558497398, + "grad_norm": 0.7743025263168356, + "learning_rate": 9.899370088643589e-06, + "loss": 0.455, + "step": 2035 + }, + { + "epoch": 0.09214754469336954, + "grad_norm": 0.8291030740640418, + "learning_rate": 9.899223732581452e-06, + "loss": 0.4497, + "step": 2036 + }, + { + "epoch": 0.0921928038017651, + "grad_norm": 0.6110725868672015, + "learning_rate": 9.899077271250043e-06, + "loss": 0.5065, + "step": 2037 + }, + { + "epoch": 0.09223806291016066, + "grad_norm": 0.9969532555112366, + "learning_rate": 9.898930704652512e-06, + "loss": 0.4626, + "step": 2038 + }, + { + "epoch": 0.09228332201855624, + "grad_norm": 0.7716306199234213, + "learning_rate": 9.898784032792005e-06, + "loss": 0.4295, + "step": 2039 + }, + { + "epoch": 0.0923285811269518, + "grad_norm": 0.7635557014521751, + "learning_rate": 9.898637255671674e-06, + "loss": 0.4323, + "step": 2040 + }, + { + "epoch": 0.09237384023534737, + "grad_norm": 0.832082874007693, + "learning_rate": 9.898490373294673e-06, + "loss": 0.4321, + "step": 2041 + }, + { + "epoch": 0.09241909934374293, + "grad_norm": 0.9848162238601245, + "learning_rate": 9.898343385664161e-06, + "loss": 0.4305, + "step": 2042 + }, + { + "epoch": 0.09246435845213849, + "grad_norm": 0.745330890800276, + "learning_rate": 9.898196292783291e-06, + "loss": 0.4903, + "step": 2043 + }, + { + "epoch": 0.09250961756053405, + "grad_norm": 0.9633770165986147, + "learning_rate": 9.898049094655229e-06, + "loss": 0.4719, + "step": 2044 + }, + { + "epoch": 0.09255487666892963, + "grad_norm": 0.8418196344795355, + "learning_rate": 9.897901791283133e-06, + "loss": 0.4433, + "step": 2045 + }, + { + "epoch": 0.09260013577732519, + "grad_norm": 1.1470649580813783, + "learning_rate": 9.897754382670171e-06, + "loss": 0.55, + "step": 2046 + }, + { + "epoch": 0.09264539488572075, + "grad_norm": 0.8762812853825404, + "learning_rate": 9.897606868819508e-06, + "loss": 0.4489, + "step": 2047 + }, + { + "epoch": 0.09269065399411631, + "grad_norm": 0.8423179020239366, + "learning_rate": 9.897459249734318e-06, + "loss": 0.4265, + "step": 2048 + }, + { + "epoch": 0.09273591310251188, + "grad_norm": 0.7325380667861581, + "learning_rate": 9.89731152541777e-06, + "loss": 0.4449, + "step": 2049 + }, + { + "epoch": 0.09278117221090744, + "grad_norm": 0.7717625136781339, + "learning_rate": 9.897163695873036e-06, + "loss": 0.4298, + "step": 2050 + }, + { + "epoch": 0.09282643131930302, + "grad_norm": 0.7557315397060594, + "learning_rate": 9.897015761103298e-06, + "loss": 0.4139, + "step": 2051 + }, + { + "epoch": 0.09287169042769858, + "grad_norm": 0.7589363242557137, + "learning_rate": 9.896867721111726e-06, + "loss": 0.4668, + "step": 2052 + }, + { + "epoch": 0.09291694953609414, + "grad_norm": 0.8135784534771674, + "learning_rate": 9.89671957590151e-06, + "loss": 0.4654, + "step": 2053 + }, + { + "epoch": 0.0929622086444897, + "grad_norm": 0.770208676126074, + "learning_rate": 9.89657132547583e-06, + "loss": 0.4593, + "step": 2054 + }, + { + "epoch": 0.09300746775288526, + "grad_norm": 0.7130610024807864, + "learning_rate": 9.89642296983787e-06, + "loss": 0.4724, + "step": 2055 + }, + { + "epoch": 0.09305272686128083, + "grad_norm": 0.8139952994416343, + "learning_rate": 9.896274508990818e-06, + "loss": 0.4605, + "step": 2056 + }, + { + "epoch": 0.0930979859696764, + "grad_norm": 0.7597358274853931, + "learning_rate": 9.896125942937865e-06, + "loss": 0.4247, + "step": 2057 + }, + { + "epoch": 0.09314324507807197, + "grad_norm": 0.8903289322010103, + "learning_rate": 9.895977271682203e-06, + "loss": 0.4706, + "step": 2058 + }, + { + "epoch": 0.09318850418646753, + "grad_norm": 0.8455330761971988, + "learning_rate": 9.895828495227026e-06, + "loss": 0.4398, + "step": 2059 + }, + { + "epoch": 0.09323376329486309, + "grad_norm": 0.7911632261161675, + "learning_rate": 9.89567961357553e-06, + "loss": 0.5244, + "step": 2060 + }, + { + "epoch": 0.09327902240325865, + "grad_norm": 0.7714784849728722, + "learning_rate": 9.895530626730917e-06, + "loss": 0.4424, + "step": 2061 + }, + { + "epoch": 0.09332428151165421, + "grad_norm": 0.7595825000992268, + "learning_rate": 9.895381534696385e-06, + "loss": 0.4432, + "step": 2062 + }, + { + "epoch": 0.09336954062004979, + "grad_norm": 0.7933673014458721, + "learning_rate": 9.89523233747514e-06, + "loss": 0.4948, + "step": 2063 + }, + { + "epoch": 0.09341479972844535, + "grad_norm": 0.6566601566759311, + "learning_rate": 9.895083035070386e-06, + "loss": 0.5417, + "step": 2064 + }, + { + "epoch": 0.09346005883684091, + "grad_norm": 0.7426330048072817, + "learning_rate": 9.894933627485332e-06, + "loss": 0.4582, + "step": 2065 + }, + { + "epoch": 0.09350531794523648, + "grad_norm": 0.5430416280961405, + "learning_rate": 9.894784114723186e-06, + "loss": 0.5136, + "step": 2066 + }, + { + "epoch": 0.09355057705363204, + "grad_norm": 0.8886218681236913, + "learning_rate": 9.894634496787166e-06, + "loss": 0.4623, + "step": 2067 + }, + { + "epoch": 0.0935958361620276, + "grad_norm": 0.346019844103863, + "learning_rate": 9.89448477368048e-06, + "loss": 0.4935, + "step": 2068 + }, + { + "epoch": 0.09364109527042318, + "grad_norm": 0.7603346261585872, + "learning_rate": 9.89433494540635e-06, + "loss": 0.4279, + "step": 2069 + }, + { + "epoch": 0.09368635437881874, + "grad_norm": 0.44835072827611927, + "learning_rate": 9.894185011967994e-06, + "loss": 0.5457, + "step": 2070 + }, + { + "epoch": 0.0937316134872143, + "grad_norm": 0.8254020666481041, + "learning_rate": 9.894034973368633e-06, + "loss": 0.4298, + "step": 2071 + }, + { + "epoch": 0.09377687259560986, + "grad_norm": 0.7212908946016292, + "learning_rate": 9.89388482961149e-06, + "loss": 0.4348, + "step": 2072 + }, + { + "epoch": 0.09382213170400543, + "grad_norm": 0.8305662270330699, + "learning_rate": 9.893734580699796e-06, + "loss": 0.4529, + "step": 2073 + }, + { + "epoch": 0.093867390812401, + "grad_norm": 0.7498327315800464, + "learning_rate": 9.893584226636773e-06, + "loss": 0.3987, + "step": 2074 + }, + { + "epoch": 0.09391264992079656, + "grad_norm": 0.7576967573230682, + "learning_rate": 9.893433767425655e-06, + "loss": 0.4056, + "step": 2075 + }, + { + "epoch": 0.09395790902919213, + "grad_norm": 0.8421167565131027, + "learning_rate": 9.893283203069675e-06, + "loss": 0.4209, + "step": 2076 + }, + { + "epoch": 0.09400316813758769, + "grad_norm": 0.9312987055219514, + "learning_rate": 9.893132533572067e-06, + "loss": 0.4547, + "step": 2077 + }, + { + "epoch": 0.09404842724598325, + "grad_norm": 0.7287349796188367, + "learning_rate": 9.892981758936069e-06, + "loss": 0.419, + "step": 2078 + }, + { + "epoch": 0.09409368635437881, + "grad_norm": 0.7886625124732145, + "learning_rate": 9.89283087916492e-06, + "loss": 0.446, + "step": 2079 + }, + { + "epoch": 0.09413894546277439, + "grad_norm": 0.7342267624673294, + "learning_rate": 9.892679894261865e-06, + "loss": 0.4047, + "step": 2080 + }, + { + "epoch": 0.09418420457116995, + "grad_norm": 0.7551290893052203, + "learning_rate": 9.892528804230144e-06, + "loss": 0.4536, + "step": 2081 + }, + { + "epoch": 0.09422946367956551, + "grad_norm": 0.6795891080594817, + "learning_rate": 9.892377609073006e-06, + "loss": 0.4232, + "step": 2082 + }, + { + "epoch": 0.09427472278796108, + "grad_norm": 1.0309268697491385, + "learning_rate": 9.892226308793697e-06, + "loss": 0.51, + "step": 2083 + }, + { + "epoch": 0.09431998189635664, + "grad_norm": 0.999083959427165, + "learning_rate": 9.892074903395472e-06, + "loss": 0.4124, + "step": 2084 + }, + { + "epoch": 0.0943652410047522, + "grad_norm": 0.6626173222054419, + "learning_rate": 9.891923392881581e-06, + "loss": 0.4113, + "step": 2085 + }, + { + "epoch": 0.09441050011314778, + "grad_norm": 0.8008472966595057, + "learning_rate": 9.89177177725528e-06, + "loss": 0.4096, + "step": 2086 + }, + { + "epoch": 0.09445575922154334, + "grad_norm": 0.7070000181672658, + "learning_rate": 9.89162005651983e-06, + "loss": 0.4698, + "step": 2087 + }, + { + "epoch": 0.0945010183299389, + "grad_norm": 0.7102922590146776, + "learning_rate": 9.891468230678487e-06, + "loss": 0.3912, + "step": 2088 + }, + { + "epoch": 0.09454627743833446, + "grad_norm": 0.7965351685251075, + "learning_rate": 9.891316299734514e-06, + "loss": 0.4661, + "step": 2089 + }, + { + "epoch": 0.09459153654673003, + "grad_norm": 0.7133001147134137, + "learning_rate": 9.891164263691178e-06, + "loss": 0.4163, + "step": 2090 + }, + { + "epoch": 0.09463679565512559, + "grad_norm": 0.6919466012287274, + "learning_rate": 9.891012122551742e-06, + "loss": 0.4251, + "step": 2091 + }, + { + "epoch": 0.09468205476352116, + "grad_norm": 0.8172173083042604, + "learning_rate": 9.890859876319479e-06, + "loss": 0.4708, + "step": 2092 + }, + { + "epoch": 0.09472731387191673, + "grad_norm": 0.7157802079281965, + "learning_rate": 9.890707524997657e-06, + "loss": 0.4183, + "step": 2093 + }, + { + "epoch": 0.09477257298031229, + "grad_norm": 0.8012655492564951, + "learning_rate": 9.890555068589552e-06, + "loss": 0.4581, + "step": 2094 + }, + { + "epoch": 0.09481783208870785, + "grad_norm": 0.6616316877473514, + "learning_rate": 9.890402507098437e-06, + "loss": 0.4321, + "step": 2095 + }, + { + "epoch": 0.09486309119710341, + "grad_norm": 0.6796374624881355, + "learning_rate": 9.890249840527593e-06, + "loss": 0.4048, + "step": 2096 + }, + { + "epoch": 0.09490835030549898, + "grad_norm": 0.6595102338230455, + "learning_rate": 9.8900970688803e-06, + "loss": 0.5378, + "step": 2097 + }, + { + "epoch": 0.09495360941389455, + "grad_norm": 0.7408710873526465, + "learning_rate": 9.88994419215984e-06, + "loss": 0.4311, + "step": 2098 + }, + { + "epoch": 0.09499886852229011, + "grad_norm": 0.5541647525789047, + "learning_rate": 9.889791210369496e-06, + "loss": 0.5408, + "step": 2099 + }, + { + "epoch": 0.09504412763068568, + "grad_norm": 0.7911667363009298, + "learning_rate": 9.889638123512557e-06, + "loss": 0.4689, + "step": 2100 + }, + { + "epoch": 0.09508938673908124, + "grad_norm": 0.42855301210537444, + "learning_rate": 9.889484931592313e-06, + "loss": 0.5257, + "step": 2101 + }, + { + "epoch": 0.0951346458474768, + "grad_norm": 0.9473251263121182, + "learning_rate": 9.889331634612053e-06, + "loss": 0.4347, + "step": 2102 + }, + { + "epoch": 0.09517990495587236, + "grad_norm": 0.4418629563484785, + "learning_rate": 9.889178232575074e-06, + "loss": 0.4935, + "step": 2103 + }, + { + "epoch": 0.09522516406426794, + "grad_norm": 0.8028317808705686, + "learning_rate": 9.889024725484672e-06, + "loss": 0.4619, + "step": 2104 + }, + { + "epoch": 0.0952704231726635, + "grad_norm": 0.7433539611390096, + "learning_rate": 9.888871113344144e-06, + "loss": 0.4343, + "step": 2105 + }, + { + "epoch": 0.09531568228105906, + "grad_norm": 0.7133886009247582, + "learning_rate": 9.888717396156788e-06, + "loss": 0.4586, + "step": 2106 + }, + { + "epoch": 0.09536094138945463, + "grad_norm": 0.6706564434461998, + "learning_rate": 9.88856357392591e-06, + "loss": 0.4374, + "step": 2107 + }, + { + "epoch": 0.09540620049785019, + "grad_norm": 0.8111131097505304, + "learning_rate": 9.888409646654818e-06, + "loss": 0.4282, + "step": 2108 + }, + { + "epoch": 0.09545145960624575, + "grad_norm": 0.7686171681775432, + "learning_rate": 9.888255614346813e-06, + "loss": 0.457, + "step": 2109 + }, + { + "epoch": 0.09549671871464133, + "grad_norm": 0.7054175753477833, + "learning_rate": 9.88810147700521e-06, + "loss": 0.4647, + "step": 2110 + }, + { + "epoch": 0.09554197782303689, + "grad_norm": 1.0515788174529657, + "learning_rate": 9.887947234633318e-06, + "loss": 0.4704, + "step": 2111 + }, + { + "epoch": 0.09558723693143245, + "grad_norm": 0.7175508450358566, + "learning_rate": 9.887792887234453e-06, + "loss": 0.4552, + "step": 2112 + }, + { + "epoch": 0.09563249603982801, + "grad_norm": 0.7729794430588015, + "learning_rate": 9.88763843481193e-06, + "loss": 0.4396, + "step": 2113 + }, + { + "epoch": 0.09567775514822358, + "grad_norm": 0.706629623489225, + "learning_rate": 9.887483877369068e-06, + "loss": 0.4271, + "step": 2114 + }, + { + "epoch": 0.09572301425661914, + "grad_norm": 0.7475391012848127, + "learning_rate": 9.88732921490919e-06, + "loss": 0.4409, + "step": 2115 + }, + { + "epoch": 0.09576827336501471, + "grad_norm": 0.7293429645265976, + "learning_rate": 9.887174447435615e-06, + "loss": 0.4801, + "step": 2116 + }, + { + "epoch": 0.09581353247341028, + "grad_norm": 0.6990341610953973, + "learning_rate": 9.88701957495167e-06, + "loss": 0.4306, + "step": 2117 + }, + { + "epoch": 0.09585879158180584, + "grad_norm": 0.7544727476330307, + "learning_rate": 9.886864597460686e-06, + "loss": 0.4297, + "step": 2118 + }, + { + "epoch": 0.0959040506902014, + "grad_norm": 0.7856611371489157, + "learning_rate": 9.88670951496599e-06, + "loss": 0.4923, + "step": 2119 + }, + { + "epoch": 0.09594930979859696, + "grad_norm": 0.7044813533443429, + "learning_rate": 9.886554327470917e-06, + "loss": 0.4295, + "step": 2120 + }, + { + "epoch": 0.09599456890699254, + "grad_norm": 0.872281271578073, + "learning_rate": 9.886399034978798e-06, + "loss": 0.452, + "step": 2121 + }, + { + "epoch": 0.0960398280153881, + "grad_norm": 0.6719432262640053, + "learning_rate": 9.886243637492969e-06, + "loss": 0.4103, + "step": 2122 + }, + { + "epoch": 0.09608508712378366, + "grad_norm": 0.7155813838531678, + "learning_rate": 9.886088135016773e-06, + "loss": 0.4032, + "step": 2123 + }, + { + "epoch": 0.09613034623217923, + "grad_norm": 0.7312506486965615, + "learning_rate": 9.88593252755355e-06, + "loss": 0.4494, + "step": 2124 + }, + { + "epoch": 0.09617560534057479, + "grad_norm": 1.817955979217866, + "learning_rate": 9.885776815106643e-06, + "loss": 0.5495, + "step": 2125 + }, + { + "epoch": 0.09622086444897035, + "grad_norm": 0.7271498190042965, + "learning_rate": 9.885620997679397e-06, + "loss": 0.4755, + "step": 2126 + }, + { + "epoch": 0.09626612355736593, + "grad_norm": 0.9057747660426797, + "learning_rate": 9.88546507527516e-06, + "loss": 0.4454, + "step": 2127 + }, + { + "epoch": 0.09631138266576149, + "grad_norm": 0.7188105353417086, + "learning_rate": 9.885309047897285e-06, + "loss": 0.405, + "step": 2128 + }, + { + "epoch": 0.09635664177415705, + "grad_norm": 0.824647330223287, + "learning_rate": 9.88515291554912e-06, + "loss": 0.4987, + "step": 2129 + }, + { + "epoch": 0.09640190088255261, + "grad_norm": 0.8047057457137237, + "learning_rate": 9.884996678234024e-06, + "loss": 0.4606, + "step": 2130 + }, + { + "epoch": 0.09644715999094818, + "grad_norm": 0.7541646312384799, + "learning_rate": 9.884840335955354e-06, + "loss": 0.4225, + "step": 2131 + }, + { + "epoch": 0.09649241909934374, + "grad_norm": 0.7657084288201877, + "learning_rate": 9.884683888716466e-06, + "loss": 0.4381, + "step": 2132 + }, + { + "epoch": 0.09653767820773931, + "grad_norm": 0.733840982809254, + "learning_rate": 9.884527336520724e-06, + "loss": 0.4376, + "step": 2133 + }, + { + "epoch": 0.09658293731613488, + "grad_norm": 0.7450007075565378, + "learning_rate": 9.88437067937149e-06, + "loss": 0.4756, + "step": 2134 + }, + { + "epoch": 0.09662819642453044, + "grad_norm": 0.8394335018999397, + "learning_rate": 9.884213917272133e-06, + "loss": 0.5089, + "step": 2135 + }, + { + "epoch": 0.096673455532926, + "grad_norm": 0.7165101390264119, + "learning_rate": 9.88405705022602e-06, + "loss": 0.4506, + "step": 2136 + }, + { + "epoch": 0.09671871464132156, + "grad_norm": 0.7136018139841266, + "learning_rate": 9.883900078236519e-06, + "loss": 0.3896, + "step": 2137 + }, + { + "epoch": 0.09676397374971712, + "grad_norm": 1.0973366170490864, + "learning_rate": 9.883743001307007e-06, + "loss": 0.3826, + "step": 2138 + }, + { + "epoch": 0.0968092328581127, + "grad_norm": 0.7498597906623329, + "learning_rate": 9.883585819440854e-06, + "loss": 0.4749, + "step": 2139 + }, + { + "epoch": 0.09685449196650826, + "grad_norm": 0.7252975386637314, + "learning_rate": 9.883428532641445e-06, + "loss": 0.4289, + "step": 2140 + }, + { + "epoch": 0.09689975107490383, + "grad_norm": 0.7609438996643995, + "learning_rate": 9.883271140912153e-06, + "loss": 0.45, + "step": 2141 + }, + { + "epoch": 0.09694501018329939, + "grad_norm": 0.6732376326562703, + "learning_rate": 9.88311364425636e-06, + "loss": 0.4543, + "step": 2142 + }, + { + "epoch": 0.09699026929169495, + "grad_norm": 0.6372499949868802, + "learning_rate": 9.882956042677457e-06, + "loss": 0.391, + "step": 2143 + }, + { + "epoch": 0.09703552840009051, + "grad_norm": 0.7951653118971782, + "learning_rate": 9.882798336178821e-06, + "loss": 0.453, + "step": 2144 + }, + { + "epoch": 0.09708078750848609, + "grad_norm": 0.7507677694816026, + "learning_rate": 9.882640524763847e-06, + "loss": 0.4693, + "step": 2145 + }, + { + "epoch": 0.09712604661688165, + "grad_norm": 0.6870245167157061, + "learning_rate": 9.882482608435924e-06, + "loss": 0.3988, + "step": 2146 + }, + { + "epoch": 0.09717130572527721, + "grad_norm": 0.7071117925139719, + "learning_rate": 9.882324587198446e-06, + "loss": 0.4471, + "step": 2147 + }, + { + "epoch": 0.09721656483367278, + "grad_norm": 0.6773979773976042, + "learning_rate": 9.882166461054806e-06, + "loss": 0.4139, + "step": 2148 + }, + { + "epoch": 0.09726182394206834, + "grad_norm": 0.6139954213727978, + "learning_rate": 9.882008230008403e-06, + "loss": 0.5461, + "step": 2149 + }, + { + "epoch": 0.0973070830504639, + "grad_norm": 0.5598559951748219, + "learning_rate": 9.881849894062639e-06, + "loss": 0.5198, + "step": 2150 + }, + { + "epoch": 0.09735234215885948, + "grad_norm": 0.7831826157949604, + "learning_rate": 9.881691453220912e-06, + "loss": 0.4562, + "step": 2151 + }, + { + "epoch": 0.09739760126725504, + "grad_norm": 0.7644327354430474, + "learning_rate": 9.88153290748663e-06, + "loss": 0.4838, + "step": 2152 + }, + { + "epoch": 0.0974428603756506, + "grad_norm": 0.824965801673457, + "learning_rate": 9.8813742568632e-06, + "loss": 0.4401, + "step": 2153 + }, + { + "epoch": 0.09748811948404616, + "grad_norm": 0.7638742540229725, + "learning_rate": 9.881215501354025e-06, + "loss": 0.4399, + "step": 2154 + }, + { + "epoch": 0.09753337859244172, + "grad_norm": 0.6250697994456945, + "learning_rate": 9.881056640962524e-06, + "loss": 0.5198, + "step": 2155 + }, + { + "epoch": 0.09757863770083729, + "grad_norm": 0.7161150856416505, + "learning_rate": 9.880897675692105e-06, + "loss": 0.4157, + "step": 2156 + }, + { + "epoch": 0.09762389680923286, + "grad_norm": 0.7066523082148944, + "learning_rate": 9.880738605546186e-06, + "loss": 0.3921, + "step": 2157 + }, + { + "epoch": 0.09766915591762843, + "grad_norm": 0.6960762618579934, + "learning_rate": 9.880579430528183e-06, + "loss": 0.398, + "step": 2158 + }, + { + "epoch": 0.09771441502602399, + "grad_norm": 0.6712418881697726, + "learning_rate": 9.880420150641519e-06, + "loss": 0.3952, + "step": 2159 + }, + { + "epoch": 0.09775967413441955, + "grad_norm": 0.8044310928745596, + "learning_rate": 9.880260765889615e-06, + "loss": 0.4779, + "step": 2160 + }, + { + "epoch": 0.09780493324281511, + "grad_norm": 0.7582313840845955, + "learning_rate": 9.880101276275896e-06, + "loss": 0.4259, + "step": 2161 + }, + { + "epoch": 0.09785019235121069, + "grad_norm": 0.411187499329373, + "learning_rate": 9.87994168180379e-06, + "loss": 0.5103, + "step": 2162 + }, + { + "epoch": 0.09789545145960625, + "grad_norm": 0.8352279353409846, + "learning_rate": 9.879781982476722e-06, + "loss": 0.5148, + "step": 2163 + }, + { + "epoch": 0.09794071056800181, + "grad_norm": 0.6975707964456537, + "learning_rate": 9.879622178298128e-06, + "loss": 0.431, + "step": 2164 + }, + { + "epoch": 0.09798596967639737, + "grad_norm": 0.7346753431411169, + "learning_rate": 9.879462269271439e-06, + "loss": 0.4202, + "step": 2165 + }, + { + "epoch": 0.09803122878479294, + "grad_norm": 0.347524141797627, + "learning_rate": 9.879302255400092e-06, + "loss": 0.5198, + "step": 2166 + }, + { + "epoch": 0.0980764878931885, + "grad_norm": 0.3586006925617358, + "learning_rate": 9.879142136687524e-06, + "loss": 0.5421, + "step": 2167 + }, + { + "epoch": 0.09812174700158408, + "grad_norm": 0.9478318513259163, + "learning_rate": 9.878981913137178e-06, + "loss": 0.4372, + "step": 2168 + }, + { + "epoch": 0.09816700610997964, + "grad_norm": 0.7835830213363405, + "learning_rate": 9.878821584752495e-06, + "loss": 0.3997, + "step": 2169 + }, + { + "epoch": 0.0982122652183752, + "grad_norm": 0.7127149457361394, + "learning_rate": 9.878661151536923e-06, + "loss": 0.4473, + "step": 2170 + }, + { + "epoch": 0.09825752432677076, + "grad_norm": 0.9555889260859909, + "learning_rate": 9.878500613493904e-06, + "loss": 0.4679, + "step": 2171 + }, + { + "epoch": 0.09830278343516632, + "grad_norm": 0.741480683152549, + "learning_rate": 9.87833997062689e-06, + "loss": 0.4414, + "step": 2172 + }, + { + "epoch": 0.09834804254356189, + "grad_norm": 0.7204742635936184, + "learning_rate": 9.878179222939333e-06, + "loss": 0.4415, + "step": 2173 + }, + { + "epoch": 0.09839330165195746, + "grad_norm": 0.898233981729686, + "learning_rate": 9.878018370434686e-06, + "loss": 0.4517, + "step": 2174 + }, + { + "epoch": 0.09843856076035302, + "grad_norm": 0.5640581299672587, + "learning_rate": 9.877857413116408e-06, + "loss": 0.5291, + "step": 2175 + }, + { + "epoch": 0.09848381986874859, + "grad_norm": 0.8242186289537619, + "learning_rate": 9.877696350987954e-06, + "loss": 0.4235, + "step": 2176 + }, + { + "epoch": 0.09852907897714415, + "grad_norm": 0.7710921868502916, + "learning_rate": 9.877535184052786e-06, + "loss": 0.4414, + "step": 2177 + }, + { + "epoch": 0.09857433808553971, + "grad_norm": 0.6927559731602532, + "learning_rate": 9.877373912314367e-06, + "loss": 0.4113, + "step": 2178 + }, + { + "epoch": 0.09861959719393527, + "grad_norm": 0.3404435555777899, + "learning_rate": 9.877212535776161e-06, + "loss": 0.5251, + "step": 2179 + }, + { + "epoch": 0.09866485630233085, + "grad_norm": 0.8667350818901997, + "learning_rate": 9.87705105444164e-06, + "loss": 0.4138, + "step": 2180 + }, + { + "epoch": 0.09871011541072641, + "grad_norm": 0.9045198118800067, + "learning_rate": 9.876889468314268e-06, + "loss": 0.4152, + "step": 2181 + }, + { + "epoch": 0.09875537451912197, + "grad_norm": 0.6987822562694133, + "learning_rate": 9.876727777397522e-06, + "loss": 0.4166, + "step": 2182 + }, + { + "epoch": 0.09880063362751754, + "grad_norm": 0.7422912359370816, + "learning_rate": 9.876565981694871e-06, + "loss": 0.4554, + "step": 2183 + }, + { + "epoch": 0.0988458927359131, + "grad_norm": 0.8370568389817287, + "learning_rate": 9.876404081209796e-06, + "loss": 0.4713, + "step": 2184 + }, + { + "epoch": 0.09889115184430866, + "grad_norm": 0.7619494348801985, + "learning_rate": 9.876242075945774e-06, + "loss": 0.4493, + "step": 2185 + }, + { + "epoch": 0.09893641095270424, + "grad_norm": 0.7099278623393065, + "learning_rate": 9.876079965906284e-06, + "loss": 0.4599, + "step": 2186 + }, + { + "epoch": 0.0989816700610998, + "grad_norm": 0.6536632383972745, + "learning_rate": 9.875917751094814e-06, + "loss": 0.3861, + "step": 2187 + }, + { + "epoch": 0.09902692916949536, + "grad_norm": 0.7126200240781412, + "learning_rate": 9.875755431514846e-06, + "loss": 0.4468, + "step": 2188 + }, + { + "epoch": 0.09907218827789092, + "grad_norm": 0.723300049733845, + "learning_rate": 9.875593007169868e-06, + "loss": 0.4283, + "step": 2189 + }, + { + "epoch": 0.09911744738628649, + "grad_norm": 1.9414330444935057, + "learning_rate": 9.87543047806337e-06, + "loss": 0.5392, + "step": 2190 + }, + { + "epoch": 0.09916270649468205, + "grad_norm": 0.7713129418149638, + "learning_rate": 9.875267844198846e-06, + "loss": 0.4594, + "step": 2191 + }, + { + "epoch": 0.09920796560307762, + "grad_norm": 0.36286998565742357, + "learning_rate": 9.875105105579789e-06, + "loss": 0.5206, + "step": 2192 + }, + { + "epoch": 0.09925322471147319, + "grad_norm": 0.7090515722591921, + "learning_rate": 9.874942262209695e-06, + "loss": 0.4469, + "step": 2193 + }, + { + "epoch": 0.09929848381986875, + "grad_norm": 0.35163704632743675, + "learning_rate": 9.874779314092065e-06, + "loss": 0.513, + "step": 2194 + }, + { + "epoch": 0.09934374292826431, + "grad_norm": 0.7991550466891403, + "learning_rate": 9.874616261230398e-06, + "loss": 0.4538, + "step": 2195 + }, + { + "epoch": 0.09938900203665987, + "grad_norm": 0.7567345337215104, + "learning_rate": 9.874453103628201e-06, + "loss": 0.4492, + "step": 2196 + }, + { + "epoch": 0.09943426114505544, + "grad_norm": 0.6729555115890912, + "learning_rate": 9.874289841288976e-06, + "loss": 0.419, + "step": 2197 + }, + { + "epoch": 0.09947952025345101, + "grad_norm": 0.7729185223247828, + "learning_rate": 9.874126474216234e-06, + "loss": 0.4344, + "step": 2198 + }, + { + "epoch": 0.09952477936184657, + "grad_norm": 0.7355415324216681, + "learning_rate": 9.873963002413483e-06, + "loss": 0.4201, + "step": 2199 + }, + { + "epoch": 0.09957003847024214, + "grad_norm": 0.6982893543482479, + "learning_rate": 9.873799425884235e-06, + "loss": 0.4277, + "step": 2200 + }, + { + "epoch": 0.0996152975786377, + "grad_norm": 0.773880454489427, + "learning_rate": 9.873635744632008e-06, + "loss": 0.4539, + "step": 2201 + }, + { + "epoch": 0.09966055668703326, + "grad_norm": 0.8494593639519243, + "learning_rate": 9.873471958660316e-06, + "loss": 0.4192, + "step": 2202 + }, + { + "epoch": 0.09970581579542882, + "grad_norm": 0.8580004122744093, + "learning_rate": 9.873308067972679e-06, + "loss": 0.5519, + "step": 2203 + }, + { + "epoch": 0.0997510749038244, + "grad_norm": 0.7290001443780998, + "learning_rate": 9.87314407257262e-06, + "loss": 0.4467, + "step": 2204 + }, + { + "epoch": 0.09979633401221996, + "grad_norm": 0.7988956837738996, + "learning_rate": 9.87297997246366e-06, + "loss": 0.4354, + "step": 2205 + }, + { + "epoch": 0.09984159312061552, + "grad_norm": 0.7358440002456154, + "learning_rate": 9.872815767649329e-06, + "loss": 0.4403, + "step": 2206 + }, + { + "epoch": 0.09988685222901109, + "grad_norm": 0.7204005245588492, + "learning_rate": 9.87265145813315e-06, + "loss": 0.4011, + "step": 2207 + }, + { + "epoch": 0.09993211133740665, + "grad_norm": 0.9151800895514478, + "learning_rate": 9.872487043918659e-06, + "loss": 0.4647, + "step": 2208 + }, + { + "epoch": 0.09997737044580222, + "grad_norm": 0.6684901673506206, + "learning_rate": 9.872322525009383e-06, + "loss": 0.415, + "step": 2209 + }, + { + "epoch": 0.10002262955419779, + "grad_norm": 0.810784992270613, + "learning_rate": 9.872157901408863e-06, + "loss": 0.4549, + "step": 2210 + }, + { + "epoch": 0.10006788866259335, + "grad_norm": 0.7561176597277325, + "learning_rate": 9.871993173120633e-06, + "loss": 0.4739, + "step": 2211 + }, + { + "epoch": 0.10011314777098891, + "grad_norm": 0.7224618216199857, + "learning_rate": 9.871828340148232e-06, + "loss": 0.3496, + "step": 2212 + }, + { + "epoch": 0.10015840687938447, + "grad_norm": 0.48863707696024233, + "learning_rate": 9.871663402495202e-06, + "loss": 0.537, + "step": 2213 + }, + { + "epoch": 0.10020366598778004, + "grad_norm": 0.7221464520254421, + "learning_rate": 9.87149836016509e-06, + "loss": 0.43, + "step": 2214 + }, + { + "epoch": 0.10024892509617561, + "grad_norm": 0.7771878759212629, + "learning_rate": 9.871333213161438e-06, + "loss": 0.436, + "step": 2215 + }, + { + "epoch": 0.10029418420457117, + "grad_norm": 0.7560332076204587, + "learning_rate": 9.871167961487798e-06, + "loss": 0.4362, + "step": 2216 + }, + { + "epoch": 0.10033944331296674, + "grad_norm": 0.8018401387575208, + "learning_rate": 9.871002605147717e-06, + "loss": 0.4112, + "step": 2217 + }, + { + "epoch": 0.1003847024213623, + "grad_norm": 0.6839784259457237, + "learning_rate": 9.870837144144752e-06, + "loss": 0.4268, + "step": 2218 + }, + { + "epoch": 0.10042996152975786, + "grad_norm": 0.6933617275224555, + "learning_rate": 9.870671578482457e-06, + "loss": 0.4214, + "step": 2219 + }, + { + "epoch": 0.10047522063815342, + "grad_norm": 0.7206012184599776, + "learning_rate": 9.870505908164386e-06, + "loss": 0.4716, + "step": 2220 + }, + { + "epoch": 0.100520479746549, + "grad_norm": 0.8075038548608849, + "learning_rate": 9.870340133194103e-06, + "loss": 0.4395, + "step": 2221 + }, + { + "epoch": 0.10056573885494456, + "grad_norm": 0.7635458649375578, + "learning_rate": 9.870174253575169e-06, + "loss": 0.4831, + "step": 2222 + }, + { + "epoch": 0.10061099796334012, + "grad_norm": 0.8634190586113031, + "learning_rate": 9.870008269311148e-06, + "loss": 0.437, + "step": 2223 + }, + { + "epoch": 0.10065625707173569, + "grad_norm": 0.6343631893568048, + "learning_rate": 9.869842180405607e-06, + "loss": 0.4045, + "step": 2224 + }, + { + "epoch": 0.10070151618013125, + "grad_norm": 0.7399088703990067, + "learning_rate": 9.869675986862113e-06, + "loss": 0.4848, + "step": 2225 + }, + { + "epoch": 0.10074677528852681, + "grad_norm": 0.7663305947086005, + "learning_rate": 9.869509688684238e-06, + "loss": 0.4651, + "step": 2226 + }, + { + "epoch": 0.10079203439692239, + "grad_norm": 0.7544720440825532, + "learning_rate": 9.869343285875556e-06, + "loss": 0.4519, + "step": 2227 + }, + { + "epoch": 0.10083729350531795, + "grad_norm": 0.7707733603624612, + "learning_rate": 9.869176778439641e-06, + "loss": 0.4276, + "step": 2228 + }, + { + "epoch": 0.10088255261371351, + "grad_norm": 0.8454366073314977, + "learning_rate": 9.869010166380074e-06, + "loss": 0.4214, + "step": 2229 + }, + { + "epoch": 0.10092781172210907, + "grad_norm": 0.5461864636688677, + "learning_rate": 9.868843449700429e-06, + "loss": 0.5228, + "step": 2230 + }, + { + "epoch": 0.10097307083050464, + "grad_norm": 0.7294155882889573, + "learning_rate": 9.868676628404294e-06, + "loss": 0.4261, + "step": 2231 + }, + { + "epoch": 0.1010183299389002, + "grad_norm": 0.3168440817594752, + "learning_rate": 9.86850970249525e-06, + "loss": 0.5219, + "step": 2232 + }, + { + "epoch": 0.10106358904729577, + "grad_norm": 0.7579059657604005, + "learning_rate": 9.868342671976887e-06, + "loss": 0.3979, + "step": 2233 + }, + { + "epoch": 0.10110884815569134, + "grad_norm": 0.7292905542346751, + "learning_rate": 9.86817553685279e-06, + "loss": 0.4608, + "step": 2234 + }, + { + "epoch": 0.1011541072640869, + "grad_norm": 0.8458997911415539, + "learning_rate": 9.868008297126552e-06, + "loss": 0.4371, + "step": 2235 + }, + { + "epoch": 0.10119936637248246, + "grad_norm": 0.824759911636903, + "learning_rate": 9.867840952801768e-06, + "loss": 0.3859, + "step": 2236 + }, + { + "epoch": 0.10124462548087802, + "grad_norm": 0.7304273464477413, + "learning_rate": 9.867673503882031e-06, + "loss": 0.44, + "step": 2237 + }, + { + "epoch": 0.10128988458927358, + "grad_norm": 0.7799304327722856, + "learning_rate": 9.867505950370942e-06, + "loss": 0.4505, + "step": 2238 + }, + { + "epoch": 0.10133514369766916, + "grad_norm": 0.7306543877797002, + "learning_rate": 9.8673382922721e-06, + "loss": 0.4801, + "step": 2239 + }, + { + "epoch": 0.10138040280606472, + "grad_norm": 0.7435279790784834, + "learning_rate": 9.867170529589106e-06, + "loss": 0.5353, + "step": 2240 + }, + { + "epoch": 0.10142566191446029, + "grad_norm": 0.6603844615030072, + "learning_rate": 9.867002662325564e-06, + "loss": 0.4005, + "step": 2241 + }, + { + "epoch": 0.10147092102285585, + "grad_norm": 0.8630624848995856, + "learning_rate": 9.866834690485083e-06, + "loss": 0.4415, + "step": 2242 + }, + { + "epoch": 0.10151618013125141, + "grad_norm": 0.9896646400129324, + "learning_rate": 9.866666614071274e-06, + "loss": 0.4243, + "step": 2243 + }, + { + "epoch": 0.10156143923964697, + "grad_norm": 0.41220129274846495, + "learning_rate": 9.866498433087745e-06, + "loss": 0.5234, + "step": 2244 + }, + { + "epoch": 0.10160669834804255, + "grad_norm": 0.8362621005931979, + "learning_rate": 9.86633014753811e-06, + "loss": 0.4474, + "step": 2245 + }, + { + "epoch": 0.10165195745643811, + "grad_norm": 0.7503981593504316, + "learning_rate": 9.866161757425988e-06, + "loss": 0.4534, + "step": 2246 + }, + { + "epoch": 0.10169721656483367, + "grad_norm": 0.41195840898795105, + "learning_rate": 9.865993262754993e-06, + "loss": 0.5063, + "step": 2247 + }, + { + "epoch": 0.10174247567322924, + "grad_norm": 0.7265194341332623, + "learning_rate": 9.86582466352875e-06, + "loss": 0.433, + "step": 2248 + }, + { + "epoch": 0.1017877347816248, + "grad_norm": 0.8409081526645884, + "learning_rate": 9.865655959750877e-06, + "loss": 0.4292, + "step": 2249 + }, + { + "epoch": 0.10183299389002037, + "grad_norm": 3.0630792854097093, + "learning_rate": 9.865487151425003e-06, + "loss": 0.4367, + "step": 2250 + }, + { + "epoch": 0.10187825299841594, + "grad_norm": 0.8861233445538413, + "learning_rate": 9.865318238554754e-06, + "loss": 0.4542, + "step": 2251 + }, + { + "epoch": 0.1019235121068115, + "grad_norm": 0.7822807066601043, + "learning_rate": 9.865149221143755e-06, + "loss": 0.418, + "step": 2252 + }, + { + "epoch": 0.10196877121520706, + "grad_norm": 0.7559943477806504, + "learning_rate": 9.864980099195644e-06, + "loss": 0.4496, + "step": 2253 + }, + { + "epoch": 0.10201403032360262, + "grad_norm": 0.768806042496435, + "learning_rate": 9.864810872714053e-06, + "loss": 0.4134, + "step": 2254 + }, + { + "epoch": 0.10205928943199818, + "grad_norm": 0.7047306300749351, + "learning_rate": 9.864641541702616e-06, + "loss": 0.4246, + "step": 2255 + }, + { + "epoch": 0.10210454854039376, + "grad_norm": 0.7028731886223434, + "learning_rate": 9.864472106164974e-06, + "loss": 0.4247, + "step": 2256 + }, + { + "epoch": 0.10214980764878932, + "grad_norm": 0.7153130576512241, + "learning_rate": 9.864302566104764e-06, + "loss": 0.4625, + "step": 2257 + }, + { + "epoch": 0.10219506675718489, + "grad_norm": 1.048917465822911, + "learning_rate": 9.864132921525633e-06, + "loss": 0.4191, + "step": 2258 + }, + { + "epoch": 0.10224032586558045, + "grad_norm": 0.7899022592166589, + "learning_rate": 9.863963172431225e-06, + "loss": 0.4477, + "step": 2259 + }, + { + "epoch": 0.10228558497397601, + "grad_norm": 0.47204269426321754, + "learning_rate": 9.863793318825186e-06, + "loss": 0.5383, + "step": 2260 + }, + { + "epoch": 0.10233084408237157, + "grad_norm": 0.6892225594080795, + "learning_rate": 9.863623360711167e-06, + "loss": 0.4722, + "step": 2261 + }, + { + "epoch": 0.10237610319076715, + "grad_norm": 0.748046058634716, + "learning_rate": 9.86345329809282e-06, + "loss": 0.4466, + "step": 2262 + }, + { + "epoch": 0.10242136229916271, + "grad_norm": 0.7130768776206144, + "learning_rate": 9.863283130973799e-06, + "loss": 0.4145, + "step": 2263 + }, + { + "epoch": 0.10246662140755827, + "grad_norm": 0.8219802036337167, + "learning_rate": 9.86311285935776e-06, + "loss": 0.4318, + "step": 2264 + }, + { + "epoch": 0.10251188051595383, + "grad_norm": 0.3198446179300901, + "learning_rate": 9.86294248324836e-06, + "loss": 0.5065, + "step": 2265 + }, + { + "epoch": 0.1025571396243494, + "grad_norm": 0.756389577087066, + "learning_rate": 9.862772002649261e-06, + "loss": 0.4529, + "step": 2266 + }, + { + "epoch": 0.10260239873274496, + "grad_norm": 0.7648477142665258, + "learning_rate": 9.862601417564128e-06, + "loss": 0.4489, + "step": 2267 + }, + { + "epoch": 0.10264765784114054, + "grad_norm": 0.7699363576476402, + "learning_rate": 9.862430727996627e-06, + "loss": 0.4219, + "step": 2268 + }, + { + "epoch": 0.1026929169495361, + "grad_norm": 0.7729899239840131, + "learning_rate": 9.86225993395042e-06, + "loss": 0.407, + "step": 2269 + }, + { + "epoch": 0.10273817605793166, + "grad_norm": 0.7204098134653517, + "learning_rate": 9.86208903542918e-06, + "loss": 0.3968, + "step": 2270 + }, + { + "epoch": 0.10278343516632722, + "grad_norm": 0.35254404610092344, + "learning_rate": 9.861918032436582e-06, + "loss": 0.5204, + "step": 2271 + }, + { + "epoch": 0.10282869427472278, + "grad_norm": 0.7655693019851239, + "learning_rate": 9.861746924976297e-06, + "loss": 0.4407, + "step": 2272 + }, + { + "epoch": 0.10287395338311835, + "grad_norm": 0.7358913387904376, + "learning_rate": 9.861575713052e-06, + "loss": 0.444, + "step": 2273 + }, + { + "epoch": 0.10291921249151392, + "grad_norm": 0.7648849776920894, + "learning_rate": 9.861404396667375e-06, + "loss": 0.4331, + "step": 2274 + }, + { + "epoch": 0.10296447159990949, + "grad_norm": 0.7137409472692013, + "learning_rate": 9.861232975826098e-06, + "loss": 0.4632, + "step": 2275 + }, + { + "epoch": 0.10300973070830505, + "grad_norm": 0.8041533524954448, + "learning_rate": 9.861061450531857e-06, + "loss": 0.4217, + "step": 2276 + }, + { + "epoch": 0.10305498981670061, + "grad_norm": 0.6937125693526789, + "learning_rate": 9.860889820788333e-06, + "loss": 0.3682, + "step": 2277 + }, + { + "epoch": 0.10310024892509617, + "grad_norm": 0.3890112300684754, + "learning_rate": 9.860718086599217e-06, + "loss": 0.5088, + "step": 2278 + }, + { + "epoch": 0.10314550803349173, + "grad_norm": 0.755158671146552, + "learning_rate": 9.860546247968196e-06, + "loss": 0.4567, + "step": 2279 + }, + { + "epoch": 0.10319076714188731, + "grad_norm": 0.6964351594584257, + "learning_rate": 9.860374304898966e-06, + "loss": 0.3703, + "step": 2280 + }, + { + "epoch": 0.10323602625028287, + "grad_norm": 0.6942034226642203, + "learning_rate": 9.86020225739522e-06, + "loss": 0.4203, + "step": 2281 + }, + { + "epoch": 0.10328128535867843, + "grad_norm": 0.7493731249129036, + "learning_rate": 9.860030105460655e-06, + "loss": 0.4105, + "step": 2282 + }, + { + "epoch": 0.103326544467074, + "grad_norm": 0.7875242177860882, + "learning_rate": 9.859857849098967e-06, + "loss": 0.3853, + "step": 2283 + }, + { + "epoch": 0.10337180357546956, + "grad_norm": 0.6723055543092835, + "learning_rate": 9.859685488313861e-06, + "loss": 0.4332, + "step": 2284 + }, + { + "epoch": 0.10341706268386512, + "grad_norm": 0.6560360537570137, + "learning_rate": 9.859513023109037e-06, + "loss": 0.4391, + "step": 2285 + }, + { + "epoch": 0.1034623217922607, + "grad_norm": 0.6849114895958732, + "learning_rate": 9.859340453488206e-06, + "loss": 0.4383, + "step": 2286 + }, + { + "epoch": 0.10350758090065626, + "grad_norm": 0.7783040393773684, + "learning_rate": 9.859167779455072e-06, + "loss": 0.4432, + "step": 2287 + }, + { + "epoch": 0.10355284000905182, + "grad_norm": 0.724288770259416, + "learning_rate": 9.858995001013347e-06, + "loss": 0.4384, + "step": 2288 + }, + { + "epoch": 0.10359809911744738, + "grad_norm": 0.7111758444461136, + "learning_rate": 9.858822118166742e-06, + "loss": 0.4456, + "step": 2289 + }, + { + "epoch": 0.10364335822584295, + "grad_norm": 0.6932478030538325, + "learning_rate": 9.85864913091897e-06, + "loss": 0.4351, + "step": 2290 + }, + { + "epoch": 0.10368861733423851, + "grad_norm": 0.7579749515859955, + "learning_rate": 9.858476039273755e-06, + "loss": 0.4304, + "step": 2291 + }, + { + "epoch": 0.10373387644263408, + "grad_norm": 0.7060927633532289, + "learning_rate": 9.85830284323481e-06, + "loss": 0.4056, + "step": 2292 + }, + { + "epoch": 0.10377913555102965, + "grad_norm": 0.7485879479542586, + "learning_rate": 9.858129542805857e-06, + "loss": 0.4651, + "step": 2293 + }, + { + "epoch": 0.10382439465942521, + "grad_norm": 0.48112576797143786, + "learning_rate": 9.857956137990621e-06, + "loss": 0.5417, + "step": 2294 + }, + { + "epoch": 0.10386965376782077, + "grad_norm": 0.7667656764960077, + "learning_rate": 9.857782628792826e-06, + "loss": 0.4429, + "step": 2295 + }, + { + "epoch": 0.10391491287621633, + "grad_norm": 0.6954661751657719, + "learning_rate": 9.857609015216205e-06, + "loss": 0.436, + "step": 2296 + }, + { + "epoch": 0.10396017198461191, + "grad_norm": 0.6967576041244012, + "learning_rate": 9.857435297264484e-06, + "loss": 0.4751, + "step": 2297 + }, + { + "epoch": 0.10400543109300747, + "grad_norm": 0.834588518369712, + "learning_rate": 9.857261474941397e-06, + "loss": 0.4228, + "step": 2298 + }, + { + "epoch": 0.10405069020140303, + "grad_norm": 0.6721588650367362, + "learning_rate": 9.85708754825068e-06, + "loss": 0.3802, + "step": 2299 + }, + { + "epoch": 0.1040959493097986, + "grad_norm": 0.41026951413440077, + "learning_rate": 9.856913517196065e-06, + "loss": 0.5238, + "step": 2300 + }, + { + "epoch": 0.10414120841819416, + "grad_norm": 0.7035267949243749, + "learning_rate": 9.8567393817813e-06, + "loss": 0.4336, + "step": 2301 + }, + { + "epoch": 0.10418646752658972, + "grad_norm": 0.7585486917088173, + "learning_rate": 9.85656514201012e-06, + "loss": 0.4439, + "step": 2302 + }, + { + "epoch": 0.1042317266349853, + "grad_norm": 0.6649764632725609, + "learning_rate": 9.85639079788627e-06, + "loss": 0.403, + "step": 2303 + }, + { + "epoch": 0.10427698574338086, + "grad_norm": 0.7385706952114729, + "learning_rate": 9.856216349413499e-06, + "loss": 0.4479, + "step": 2304 + }, + { + "epoch": 0.10432224485177642, + "grad_norm": 0.6905045514972701, + "learning_rate": 9.856041796595553e-06, + "loss": 0.3824, + "step": 2305 + }, + { + "epoch": 0.10436750396017198, + "grad_norm": 0.6327526228856807, + "learning_rate": 9.855867139436182e-06, + "loss": 0.395, + "step": 2306 + }, + { + "epoch": 0.10441276306856755, + "grad_norm": 0.7546079408741386, + "learning_rate": 9.85569237793914e-06, + "loss": 0.48, + "step": 2307 + }, + { + "epoch": 0.10445802217696311, + "grad_norm": 0.737242030395461, + "learning_rate": 9.855517512108182e-06, + "loss": 0.4505, + "step": 2308 + }, + { + "epoch": 0.10450328128535868, + "grad_norm": 0.6670828689286896, + "learning_rate": 9.855342541947065e-06, + "loss": 0.4319, + "step": 2309 + }, + { + "epoch": 0.10454854039375425, + "grad_norm": 0.7111244403772158, + "learning_rate": 9.855167467459548e-06, + "loss": 0.4621, + "step": 2310 + }, + { + "epoch": 0.10459379950214981, + "grad_norm": 0.7428775133067482, + "learning_rate": 9.854992288649397e-06, + "loss": 0.4183, + "step": 2311 + }, + { + "epoch": 0.10463905861054537, + "grad_norm": 0.7683796554367252, + "learning_rate": 9.85481700552037e-06, + "loss": 0.4512, + "step": 2312 + }, + { + "epoch": 0.10468431771894093, + "grad_norm": 0.6426490380469407, + "learning_rate": 9.854641618076236e-06, + "loss": 0.4567, + "step": 2313 + }, + { + "epoch": 0.1047295768273365, + "grad_norm": 0.6711259744328443, + "learning_rate": 9.854466126320763e-06, + "loss": 0.401, + "step": 2314 + }, + { + "epoch": 0.10477483593573207, + "grad_norm": 0.8455675550799173, + "learning_rate": 9.854290530257723e-06, + "loss": 0.4774, + "step": 2315 + }, + { + "epoch": 0.10482009504412763, + "grad_norm": 0.7467569882797702, + "learning_rate": 9.85411482989089e-06, + "loss": 0.4238, + "step": 2316 + }, + { + "epoch": 0.1048653541525232, + "grad_norm": 0.734320352470716, + "learning_rate": 9.853939025224037e-06, + "loss": 0.4381, + "step": 2317 + }, + { + "epoch": 0.10491061326091876, + "grad_norm": 0.6341079183035991, + "learning_rate": 9.853763116260941e-06, + "loss": 0.4068, + "step": 2318 + }, + { + "epoch": 0.10495587236931432, + "grad_norm": 0.7800650722788848, + "learning_rate": 9.853587103005382e-06, + "loss": 0.4489, + "step": 2319 + }, + { + "epoch": 0.10500113147770988, + "grad_norm": 0.451047452424555, + "learning_rate": 9.853410985461145e-06, + "loss": 0.5282, + "step": 2320 + }, + { + "epoch": 0.10504639058610546, + "grad_norm": 0.38852085304725875, + "learning_rate": 9.85323476363201e-06, + "loss": 0.5363, + "step": 2321 + }, + { + "epoch": 0.10509164969450102, + "grad_norm": 0.31390542967558155, + "learning_rate": 9.853058437521768e-06, + "loss": 0.5195, + "step": 2322 + }, + { + "epoch": 0.10513690880289658, + "grad_norm": 0.7989730108740146, + "learning_rate": 9.852882007134202e-06, + "loss": 0.434, + "step": 2323 + }, + { + "epoch": 0.10518216791129215, + "grad_norm": 0.749871253521505, + "learning_rate": 9.852705472473108e-06, + "loss": 0.4473, + "step": 2324 + }, + { + "epoch": 0.10522742701968771, + "grad_norm": 0.7727191908261867, + "learning_rate": 9.852528833542278e-06, + "loss": 0.4168, + "step": 2325 + }, + { + "epoch": 0.10527268612808327, + "grad_norm": 0.7798115976467546, + "learning_rate": 9.852352090345504e-06, + "loss": 0.4311, + "step": 2326 + }, + { + "epoch": 0.10531794523647885, + "grad_norm": 0.7268627450505267, + "learning_rate": 9.85217524288659e-06, + "loss": 0.4638, + "step": 2327 + }, + { + "epoch": 0.10536320434487441, + "grad_norm": 0.8698990762933937, + "learning_rate": 9.851998291169332e-06, + "loss": 0.4049, + "step": 2328 + }, + { + "epoch": 0.10540846345326997, + "grad_norm": 0.6206157334058049, + "learning_rate": 9.85182123519753e-06, + "loss": 0.5142, + "step": 2329 + }, + { + "epoch": 0.10545372256166553, + "grad_norm": 0.7495598486543176, + "learning_rate": 9.851644074974992e-06, + "loss": 0.4425, + "step": 2330 + }, + { + "epoch": 0.1054989816700611, + "grad_norm": 0.6881993019531567, + "learning_rate": 9.851466810505523e-06, + "loss": 0.4056, + "step": 2331 + }, + { + "epoch": 0.10554424077845666, + "grad_norm": 0.7274463239961425, + "learning_rate": 9.851289441792934e-06, + "loss": 0.4254, + "step": 2332 + }, + { + "epoch": 0.10558949988685223, + "grad_norm": 0.7768959308161464, + "learning_rate": 9.851111968841033e-06, + "loss": 0.4515, + "step": 2333 + }, + { + "epoch": 0.1056347589952478, + "grad_norm": 0.7460630867250454, + "learning_rate": 9.850934391653636e-06, + "loss": 0.3977, + "step": 2334 + }, + { + "epoch": 0.10568001810364336, + "grad_norm": 0.7049755514403179, + "learning_rate": 9.850756710234557e-06, + "loss": 0.3988, + "step": 2335 + }, + { + "epoch": 0.10572527721203892, + "grad_norm": 0.8415832391023809, + "learning_rate": 9.850578924587614e-06, + "loss": 0.4394, + "step": 2336 + }, + { + "epoch": 0.10577053632043448, + "grad_norm": 0.9337415198625878, + "learning_rate": 9.850401034716629e-06, + "loss": 0.3985, + "step": 2337 + }, + { + "epoch": 0.10581579542883006, + "grad_norm": 0.7403823554645752, + "learning_rate": 9.85022304062542e-06, + "loss": 0.4113, + "step": 2338 + }, + { + "epoch": 0.10586105453722562, + "grad_norm": 0.7597975447386219, + "learning_rate": 9.850044942317814e-06, + "loss": 0.4565, + "step": 2339 + }, + { + "epoch": 0.10590631364562118, + "grad_norm": 0.7379521994471612, + "learning_rate": 9.84986673979764e-06, + "loss": 0.4617, + "step": 2340 + }, + { + "epoch": 0.10595157275401675, + "grad_norm": 0.5879028793940111, + "learning_rate": 9.849688433068724e-06, + "loss": 0.4947, + "step": 2341 + }, + { + "epoch": 0.10599683186241231, + "grad_norm": 0.8458052413227429, + "learning_rate": 9.849510022134899e-06, + "loss": 0.4749, + "step": 2342 + }, + { + "epoch": 0.10604209097080787, + "grad_norm": 0.7548523560498813, + "learning_rate": 9.849331506999996e-06, + "loss": 0.4197, + "step": 2343 + }, + { + "epoch": 0.10608735007920345, + "grad_norm": 0.7488304007811081, + "learning_rate": 9.849152887667855e-06, + "loss": 0.438, + "step": 2344 + }, + { + "epoch": 0.10613260918759901, + "grad_norm": 0.7414720569112878, + "learning_rate": 9.848974164142309e-06, + "loss": 0.4229, + "step": 2345 + }, + { + "epoch": 0.10617786829599457, + "grad_norm": 0.7423339816887292, + "learning_rate": 9.848795336427202e-06, + "loss": 0.4464, + "step": 2346 + }, + { + "epoch": 0.10622312740439013, + "grad_norm": 0.78079522511492, + "learning_rate": 9.848616404526374e-06, + "loss": 0.4756, + "step": 2347 + }, + { + "epoch": 0.1062683865127857, + "grad_norm": 0.8166039341128437, + "learning_rate": 9.848437368443672e-06, + "loss": 0.4547, + "step": 2348 + }, + { + "epoch": 0.10631364562118126, + "grad_norm": 0.6081730989414673, + "learning_rate": 9.848258228182943e-06, + "loss": 0.5249, + "step": 2349 + }, + { + "epoch": 0.10635890472957683, + "grad_norm": 0.7141425259010529, + "learning_rate": 9.848078983748032e-06, + "loss": 0.3982, + "step": 2350 + }, + { + "epoch": 0.1064041638379724, + "grad_norm": 0.7046271198271215, + "learning_rate": 9.847899635142797e-06, + "loss": 0.4029, + "step": 2351 + }, + { + "epoch": 0.10644942294636796, + "grad_norm": 0.72872473152322, + "learning_rate": 9.847720182371086e-06, + "loss": 0.4417, + "step": 2352 + }, + { + "epoch": 0.10649468205476352, + "grad_norm": 0.7796132442433459, + "learning_rate": 9.847540625436756e-06, + "loss": 0.4342, + "step": 2353 + }, + { + "epoch": 0.10653994116315908, + "grad_norm": 0.7273745717727493, + "learning_rate": 9.847360964343667e-06, + "loss": 0.4482, + "step": 2354 + }, + { + "epoch": 0.10658520027155464, + "grad_norm": 0.4829803438786883, + "learning_rate": 9.84718119909568e-06, + "loss": 0.4964, + "step": 2355 + }, + { + "epoch": 0.10663045937995022, + "grad_norm": 0.436561756230908, + "learning_rate": 9.847001329696653e-06, + "loss": 0.5238, + "step": 2356 + }, + { + "epoch": 0.10667571848834578, + "grad_norm": 0.8452537247177253, + "learning_rate": 9.846821356150455e-06, + "loss": 0.3898, + "step": 2357 + }, + { + "epoch": 0.10672097759674135, + "grad_norm": 0.3834464300732562, + "learning_rate": 9.846641278460952e-06, + "loss": 0.5405, + "step": 2358 + }, + { + "epoch": 0.10676623670513691, + "grad_norm": 0.7367450263313308, + "learning_rate": 9.846461096632014e-06, + "loss": 0.4122, + "step": 2359 + }, + { + "epoch": 0.10681149581353247, + "grad_norm": 0.40049087997983424, + "learning_rate": 9.846280810667512e-06, + "loss": 0.5298, + "step": 2360 + }, + { + "epoch": 0.10685675492192803, + "grad_norm": 0.35495512975334464, + "learning_rate": 9.846100420571319e-06, + "loss": 0.4991, + "step": 2361 + }, + { + "epoch": 0.10690201403032361, + "grad_norm": 0.8965978069317818, + "learning_rate": 9.84591992634731e-06, + "loss": 0.4215, + "step": 2362 + }, + { + "epoch": 0.10694727313871917, + "grad_norm": 0.36426252834219985, + "learning_rate": 9.845739327999366e-06, + "loss": 0.5186, + "step": 2363 + }, + { + "epoch": 0.10699253224711473, + "grad_norm": 0.7992452990273541, + "learning_rate": 9.845558625531368e-06, + "loss": 0.4627, + "step": 2364 + }, + { + "epoch": 0.1070377913555103, + "grad_norm": 0.8018786875684687, + "learning_rate": 9.845377818947194e-06, + "loss": 0.4202, + "step": 2365 + }, + { + "epoch": 0.10708305046390586, + "grad_norm": 0.747643050655193, + "learning_rate": 9.845196908250737e-06, + "loss": 0.4563, + "step": 2366 + }, + { + "epoch": 0.10712830957230142, + "grad_norm": 0.43328647639779194, + "learning_rate": 9.845015893445874e-06, + "loss": 0.5062, + "step": 2367 + }, + { + "epoch": 0.107173568680697, + "grad_norm": 0.40488746552796007, + "learning_rate": 9.844834774536503e-06, + "loss": 0.4825, + "step": 2368 + }, + { + "epoch": 0.10721882778909256, + "grad_norm": 0.9608323793728127, + "learning_rate": 9.84465355152651e-06, + "loss": 0.4464, + "step": 2369 + }, + { + "epoch": 0.10726408689748812, + "grad_norm": 0.4088980125981813, + "learning_rate": 9.844472224419794e-06, + "loss": 0.522, + "step": 2370 + }, + { + "epoch": 0.10730934600588368, + "grad_norm": 0.7098724808096835, + "learning_rate": 9.844290793220249e-06, + "loss": 0.4272, + "step": 2371 + }, + { + "epoch": 0.10735460511427924, + "grad_norm": 0.810840485749749, + "learning_rate": 9.84410925793177e-06, + "loss": 0.4372, + "step": 2372 + }, + { + "epoch": 0.1073998642226748, + "grad_norm": 0.4924884016840848, + "learning_rate": 9.843927618558262e-06, + "loss": 0.5439, + "step": 2373 + }, + { + "epoch": 0.10744512333107038, + "grad_norm": 0.8003323683533289, + "learning_rate": 9.843745875103628e-06, + "loss": 0.471, + "step": 2374 + }, + { + "epoch": 0.10749038243946595, + "grad_norm": 0.7457532055280077, + "learning_rate": 9.84356402757177e-06, + "loss": 0.4476, + "step": 2375 + }, + { + "epoch": 0.10753564154786151, + "grad_norm": 0.7647485411939247, + "learning_rate": 9.843382075966596e-06, + "loss": 0.4134, + "step": 2376 + }, + { + "epoch": 0.10758090065625707, + "grad_norm": 0.8488959006361999, + "learning_rate": 9.843200020292017e-06, + "loss": 0.3952, + "step": 2377 + }, + { + "epoch": 0.10762615976465263, + "grad_norm": 0.7277350914827775, + "learning_rate": 9.843017860551946e-06, + "loss": 0.4477, + "step": 2378 + }, + { + "epoch": 0.1076714188730482, + "grad_norm": 0.7694876133740054, + "learning_rate": 9.842835596750292e-06, + "loss": 0.3839, + "step": 2379 + }, + { + "epoch": 0.10771667798144377, + "grad_norm": 0.9111084221791587, + "learning_rate": 9.842653228890979e-06, + "loss": 0.4337, + "step": 2380 + }, + { + "epoch": 0.10776193708983933, + "grad_norm": 0.6855619180024155, + "learning_rate": 9.84247075697792e-06, + "loss": 0.3778, + "step": 2381 + }, + { + "epoch": 0.1078071961982349, + "grad_norm": 0.6863604389228337, + "learning_rate": 9.842288181015035e-06, + "loss": 0.3973, + "step": 2382 + }, + { + "epoch": 0.10785245530663046, + "grad_norm": 0.6123620312305323, + "learning_rate": 9.84210550100625e-06, + "loss": 0.5356, + "step": 2383 + }, + { + "epoch": 0.10789771441502602, + "grad_norm": 0.8038667464891291, + "learning_rate": 9.841922716955488e-06, + "loss": 0.4588, + "step": 2384 + }, + { + "epoch": 0.1079429735234216, + "grad_norm": 0.8232987417202955, + "learning_rate": 9.84173982886668e-06, + "loss": 0.4849, + "step": 2385 + }, + { + "epoch": 0.10798823263181716, + "grad_norm": 0.7063766129624343, + "learning_rate": 9.841556836743752e-06, + "loss": 0.4006, + "step": 2386 + }, + { + "epoch": 0.10803349174021272, + "grad_norm": 0.8885217926078794, + "learning_rate": 9.841373740590638e-06, + "loss": 0.4433, + "step": 2387 + }, + { + "epoch": 0.10807875084860828, + "grad_norm": 0.7139772742218194, + "learning_rate": 9.84119054041127e-06, + "loss": 0.4048, + "step": 2388 + }, + { + "epoch": 0.10812400995700384, + "grad_norm": 0.7428126319567718, + "learning_rate": 9.841007236209588e-06, + "loss": 0.4283, + "step": 2389 + }, + { + "epoch": 0.1081692690653994, + "grad_norm": 0.7118335211303995, + "learning_rate": 9.840823827989526e-06, + "loss": 0.438, + "step": 2390 + }, + { + "epoch": 0.10821452817379498, + "grad_norm": 0.71790940397756, + "learning_rate": 9.84064031575503e-06, + "loss": 0.4164, + "step": 2391 + }, + { + "epoch": 0.10825978728219054, + "grad_norm": 0.7169223945081842, + "learning_rate": 9.840456699510038e-06, + "loss": 0.4193, + "step": 2392 + }, + { + "epoch": 0.10830504639058611, + "grad_norm": 0.5818471073518655, + "learning_rate": 9.840272979258498e-06, + "loss": 0.5264, + "step": 2393 + }, + { + "epoch": 0.10835030549898167, + "grad_norm": 0.44257271729553715, + "learning_rate": 9.84008915500436e-06, + "loss": 0.5038, + "step": 2394 + }, + { + "epoch": 0.10839556460737723, + "grad_norm": 0.9142415732457422, + "learning_rate": 9.83990522675157e-06, + "loss": 0.409, + "step": 2395 + }, + { + "epoch": 0.1084408237157728, + "grad_norm": 0.857612046602221, + "learning_rate": 9.83972119450408e-06, + "loss": 0.4275, + "step": 2396 + }, + { + "epoch": 0.10848608282416837, + "grad_norm": 0.8211196319920059, + "learning_rate": 9.839537058265847e-06, + "loss": 0.4086, + "step": 2397 + }, + { + "epoch": 0.10853134193256393, + "grad_norm": 0.6625393546539192, + "learning_rate": 9.839352818040825e-06, + "loss": 0.5295, + "step": 2398 + }, + { + "epoch": 0.1085766010409595, + "grad_norm": 0.8175415684719074, + "learning_rate": 9.839168473832975e-06, + "loss": 0.4349, + "step": 2399 + }, + { + "epoch": 0.10862186014935506, + "grad_norm": 0.6971690315490437, + "learning_rate": 9.838984025646257e-06, + "loss": 0.398, + "step": 2400 + }, + { + "epoch": 0.10866711925775062, + "grad_norm": 0.7333102031011268, + "learning_rate": 9.838799473484633e-06, + "loss": 0.4326, + "step": 2401 + }, + { + "epoch": 0.10871237836614618, + "grad_norm": 0.7465017771426746, + "learning_rate": 9.83861481735207e-06, + "loss": 0.4563, + "step": 2402 + }, + { + "epoch": 0.10875763747454176, + "grad_norm": 0.7089569072849802, + "learning_rate": 9.838430057252537e-06, + "loss": 0.4603, + "step": 2403 + }, + { + "epoch": 0.10880289658293732, + "grad_norm": 0.9367161129821115, + "learning_rate": 9.838245193189999e-06, + "loss": 0.4472, + "step": 2404 + }, + { + "epoch": 0.10884815569133288, + "grad_norm": 0.44700555725761754, + "learning_rate": 9.838060225168432e-06, + "loss": 0.5022, + "step": 2405 + }, + { + "epoch": 0.10889341479972844, + "grad_norm": 0.7906653966767495, + "learning_rate": 9.837875153191812e-06, + "loss": 0.4389, + "step": 2406 + }, + { + "epoch": 0.108938673908124, + "grad_norm": 0.7286294782716575, + "learning_rate": 9.837689977264111e-06, + "loss": 0.4223, + "step": 2407 + }, + { + "epoch": 0.10898393301651957, + "grad_norm": 2.253138918510198, + "learning_rate": 9.837504697389311e-06, + "loss": 0.4281, + "step": 2408 + }, + { + "epoch": 0.10902919212491514, + "grad_norm": 0.7661119819405651, + "learning_rate": 9.837319313571394e-06, + "loss": 0.4392, + "step": 2409 + }, + { + "epoch": 0.1090744512333107, + "grad_norm": 0.7407852422162088, + "learning_rate": 9.83713382581434e-06, + "loss": 0.4698, + "step": 2410 + }, + { + "epoch": 0.10911971034170627, + "grad_norm": 0.8474642676205407, + "learning_rate": 9.836948234122136e-06, + "loss": 0.4589, + "step": 2411 + }, + { + "epoch": 0.10916496945010183, + "grad_norm": 0.7070961319248004, + "learning_rate": 9.83676253849877e-06, + "loss": 0.4159, + "step": 2412 + }, + { + "epoch": 0.1092102285584974, + "grad_norm": 0.8516499038377042, + "learning_rate": 9.836576738948234e-06, + "loss": 0.5101, + "step": 2413 + }, + { + "epoch": 0.10925548766689296, + "grad_norm": 0.7205091158697706, + "learning_rate": 9.836390835474516e-06, + "loss": 0.4545, + "step": 2414 + }, + { + "epoch": 0.10930074677528853, + "grad_norm": 0.41820220247791706, + "learning_rate": 9.836204828081612e-06, + "loss": 0.489, + "step": 2415 + }, + { + "epoch": 0.1093460058836841, + "grad_norm": 0.7746337605139181, + "learning_rate": 9.836018716773522e-06, + "loss": 0.4397, + "step": 2416 + }, + { + "epoch": 0.10939126499207966, + "grad_norm": 0.7362030193141964, + "learning_rate": 9.835832501554242e-06, + "loss": 0.4459, + "step": 2417 + }, + { + "epoch": 0.10943652410047522, + "grad_norm": 0.7433426511252804, + "learning_rate": 9.835646182427773e-06, + "loss": 0.4436, + "step": 2418 + }, + { + "epoch": 0.10948178320887078, + "grad_norm": 0.8830194859078878, + "learning_rate": 9.835459759398118e-06, + "loss": 0.4438, + "step": 2419 + }, + { + "epoch": 0.10952704231726634, + "grad_norm": 0.7277600436583608, + "learning_rate": 9.835273232469285e-06, + "loss": 0.4386, + "step": 2420 + }, + { + "epoch": 0.10957230142566192, + "grad_norm": 0.3795240972724703, + "learning_rate": 9.83508660164528e-06, + "loss": 0.5036, + "step": 2421 + }, + { + "epoch": 0.10961756053405748, + "grad_norm": 0.7872464533012712, + "learning_rate": 9.834899866930116e-06, + "loss": 0.4306, + "step": 2422 + }, + { + "epoch": 0.10966281964245304, + "grad_norm": 0.7764837247171241, + "learning_rate": 9.834713028327802e-06, + "loss": 0.4909, + "step": 2423 + }, + { + "epoch": 0.1097080787508486, + "grad_norm": 0.6853382068771207, + "learning_rate": 9.834526085842352e-06, + "loss": 0.3955, + "step": 2424 + }, + { + "epoch": 0.10975333785924417, + "grad_norm": 0.7321908758615492, + "learning_rate": 9.834339039477787e-06, + "loss": 0.4379, + "step": 2425 + }, + { + "epoch": 0.10979859696763974, + "grad_norm": 0.7753512763451639, + "learning_rate": 9.834151889238121e-06, + "loss": 0.4054, + "step": 2426 + }, + { + "epoch": 0.1098438560760353, + "grad_norm": 0.3675674524715627, + "learning_rate": 9.83396463512738e-06, + "loss": 0.5238, + "step": 2427 + }, + { + "epoch": 0.10988911518443087, + "grad_norm": 0.3506600433838427, + "learning_rate": 9.833777277149585e-06, + "loss": 0.504, + "step": 2428 + }, + { + "epoch": 0.10993437429282643, + "grad_norm": 0.91527237285889, + "learning_rate": 9.833589815308761e-06, + "loss": 0.5326, + "step": 2429 + }, + { + "epoch": 0.109979633401222, + "grad_norm": 0.8309215990408595, + "learning_rate": 9.833402249608938e-06, + "loss": 0.4735, + "step": 2430 + }, + { + "epoch": 0.11002489250961756, + "grad_norm": 0.6832150109748468, + "learning_rate": 9.833214580054145e-06, + "loss": 0.4463, + "step": 2431 + }, + { + "epoch": 0.11007015161801313, + "grad_norm": 0.8721708352761983, + "learning_rate": 9.833026806648415e-06, + "loss": 0.3863, + "step": 2432 + }, + { + "epoch": 0.1101154107264087, + "grad_norm": 0.7530372258702653, + "learning_rate": 9.832838929395782e-06, + "loss": 0.4044, + "step": 2433 + }, + { + "epoch": 0.11016066983480426, + "grad_norm": 0.4474166144373787, + "learning_rate": 9.832650948300284e-06, + "loss": 0.5151, + "step": 2434 + }, + { + "epoch": 0.11020592894319982, + "grad_norm": 0.9044477916013893, + "learning_rate": 9.832462863365959e-06, + "loss": 0.4523, + "step": 2435 + }, + { + "epoch": 0.11025118805159538, + "grad_norm": 0.8437894209564453, + "learning_rate": 9.83227467459685e-06, + "loss": 0.4247, + "step": 2436 + }, + { + "epoch": 0.11029644715999094, + "grad_norm": 0.7387425628334219, + "learning_rate": 9.832086381996997e-06, + "loss": 0.3963, + "step": 2437 + }, + { + "epoch": 0.11034170626838652, + "grad_norm": 0.7854114015827275, + "learning_rate": 9.83189798557045e-06, + "loss": 0.4649, + "step": 2438 + }, + { + "epoch": 0.11038696537678208, + "grad_norm": 0.827093510582746, + "learning_rate": 9.831709485321255e-06, + "loss": 0.3967, + "step": 2439 + }, + { + "epoch": 0.11043222448517764, + "grad_norm": 0.7078503797630626, + "learning_rate": 9.831520881253462e-06, + "loss": 0.4789, + "step": 2440 + }, + { + "epoch": 0.1104774835935732, + "grad_norm": 0.6430101363692668, + "learning_rate": 9.831332173371125e-06, + "loss": 0.4161, + "step": 2441 + }, + { + "epoch": 0.11052274270196877, + "grad_norm": 0.685885874237446, + "learning_rate": 9.831143361678299e-06, + "loss": 0.4383, + "step": 2442 + }, + { + "epoch": 0.11056800181036433, + "grad_norm": 0.7105514565656513, + "learning_rate": 9.830954446179037e-06, + "loss": 0.432, + "step": 2443 + }, + { + "epoch": 0.1106132609187599, + "grad_norm": 0.48220270099095647, + "learning_rate": 9.830765426877404e-06, + "loss": 0.5381, + "step": 2444 + }, + { + "epoch": 0.11065852002715547, + "grad_norm": 0.6741979700234679, + "learning_rate": 9.830576303777456e-06, + "loss": 0.4586, + "step": 2445 + }, + { + "epoch": 0.11070377913555103, + "grad_norm": 0.7119060669901887, + "learning_rate": 9.83038707688326e-06, + "loss": 0.4608, + "step": 2446 + }, + { + "epoch": 0.1107490382439466, + "grad_norm": 0.8096309822720701, + "learning_rate": 9.830197746198882e-06, + "loss": 0.4664, + "step": 2447 + }, + { + "epoch": 0.11079429735234216, + "grad_norm": 0.8196824978383348, + "learning_rate": 9.83000831172839e-06, + "loss": 0.4452, + "step": 2448 + }, + { + "epoch": 0.11083955646073772, + "grad_norm": 0.7556365021830586, + "learning_rate": 9.829818773475852e-06, + "loss": 0.4527, + "step": 2449 + }, + { + "epoch": 0.1108848155691333, + "grad_norm": 0.8006816242158974, + "learning_rate": 9.829629131445342e-06, + "loss": 0.4201, + "step": 2450 + }, + { + "epoch": 0.11093007467752886, + "grad_norm": 0.7554350680724521, + "learning_rate": 9.829439385640936e-06, + "loss": 0.4824, + "step": 2451 + }, + { + "epoch": 0.11097533378592442, + "grad_norm": 0.664479601846548, + "learning_rate": 9.82924953606671e-06, + "loss": 0.3956, + "step": 2452 + }, + { + "epoch": 0.11102059289431998, + "grad_norm": 0.6857058477647321, + "learning_rate": 9.829059582726743e-06, + "loss": 0.4114, + "step": 2453 + }, + { + "epoch": 0.11106585200271554, + "grad_norm": 0.7722700935916896, + "learning_rate": 9.828869525625118e-06, + "loss": 0.4213, + "step": 2454 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 0.7138037254822772, + "learning_rate": 9.828679364765917e-06, + "loss": 0.4329, + "step": 2455 + }, + { + "epoch": 0.11115637021950668, + "grad_norm": 0.779255191989751, + "learning_rate": 9.828489100153224e-06, + "loss": 0.4003, + "step": 2456 + }, + { + "epoch": 0.11120162932790224, + "grad_norm": 0.9137826989883181, + "learning_rate": 9.828298731791133e-06, + "loss": 0.4325, + "step": 2457 + }, + { + "epoch": 0.1112468884362978, + "grad_norm": 0.7227084991013153, + "learning_rate": 9.82810825968373e-06, + "loss": 0.4099, + "step": 2458 + }, + { + "epoch": 0.11129214754469337, + "grad_norm": 0.7269166807445072, + "learning_rate": 9.827917683835109e-06, + "loss": 0.3982, + "step": 2459 + }, + { + "epoch": 0.11133740665308893, + "grad_norm": 0.6259376766075867, + "learning_rate": 9.827727004249366e-06, + "loss": 0.5181, + "step": 2460 + }, + { + "epoch": 0.11138266576148449, + "grad_norm": 0.7294144278342447, + "learning_rate": 9.827536220930596e-06, + "loss": 0.4308, + "step": 2461 + }, + { + "epoch": 0.11142792486988007, + "grad_norm": 0.7849374307877185, + "learning_rate": 9.827345333882898e-06, + "loss": 0.4454, + "step": 2462 + }, + { + "epoch": 0.11147318397827563, + "grad_norm": 0.8153581247572056, + "learning_rate": 9.827154343110376e-06, + "loss": 0.4257, + "step": 2463 + }, + { + "epoch": 0.11151844308667119, + "grad_norm": 0.6611837576004533, + "learning_rate": 9.826963248617133e-06, + "loss": 0.4247, + "step": 2464 + }, + { + "epoch": 0.11156370219506676, + "grad_norm": 0.7848171203104205, + "learning_rate": 9.826772050407273e-06, + "loss": 0.4558, + "step": 2465 + }, + { + "epoch": 0.11160896130346232, + "grad_norm": 2.1110926208357843, + "learning_rate": 9.826580748484908e-06, + "loss": 0.3769, + "step": 2466 + }, + { + "epoch": 0.11165422041185788, + "grad_norm": 0.8213480521010255, + "learning_rate": 9.826389342854146e-06, + "loss": 0.5348, + "step": 2467 + }, + { + "epoch": 0.11169947952025346, + "grad_norm": 0.7099766095535562, + "learning_rate": 9.8261978335191e-06, + "loss": 0.4407, + "step": 2468 + }, + { + "epoch": 0.11174473862864902, + "grad_norm": 0.7630263984746236, + "learning_rate": 9.826006220483886e-06, + "loss": 0.425, + "step": 2469 + }, + { + "epoch": 0.11178999773704458, + "grad_norm": 0.6950195430682758, + "learning_rate": 9.825814503752618e-06, + "loss": 0.4686, + "step": 2470 + }, + { + "epoch": 0.11183525684544014, + "grad_norm": 0.7677560822279192, + "learning_rate": 9.825622683329419e-06, + "loss": 0.4228, + "step": 2471 + }, + { + "epoch": 0.1118805159538357, + "grad_norm": 0.7758707129168558, + "learning_rate": 9.82543075921841e-06, + "loss": 0.4136, + "step": 2472 + }, + { + "epoch": 0.11192577506223128, + "grad_norm": 0.46941180467558136, + "learning_rate": 9.825238731423713e-06, + "loss": 0.501, + "step": 2473 + }, + { + "epoch": 0.11197103417062684, + "grad_norm": 0.7360325669571167, + "learning_rate": 9.825046599949455e-06, + "loss": 0.4119, + "step": 2474 + }, + { + "epoch": 0.1120162932790224, + "grad_norm": 0.7760259047685877, + "learning_rate": 9.824854364799766e-06, + "loss": 0.4081, + "step": 2475 + }, + { + "epoch": 0.11206155238741797, + "grad_norm": 0.764283254991474, + "learning_rate": 9.824662025978774e-06, + "loss": 0.4486, + "step": 2476 + }, + { + "epoch": 0.11210681149581353, + "grad_norm": 0.702833413633441, + "learning_rate": 9.824469583490612e-06, + "loss": 0.4288, + "step": 2477 + }, + { + "epoch": 0.11215207060420909, + "grad_norm": 0.40113438821656633, + "learning_rate": 9.824277037339419e-06, + "loss": 0.5195, + "step": 2478 + }, + { + "epoch": 0.11219732971260467, + "grad_norm": 0.38158292572294616, + "learning_rate": 9.824084387529326e-06, + "loss": 0.5168, + "step": 2479 + }, + { + "epoch": 0.11224258882100023, + "grad_norm": 0.956080719114455, + "learning_rate": 9.823891634064478e-06, + "loss": 0.4531, + "step": 2480 + }, + { + "epoch": 0.11228784792939579, + "grad_norm": 0.7235253068311136, + "learning_rate": 9.823698776949011e-06, + "loss": 0.4813, + "step": 2481 + }, + { + "epoch": 0.11233310703779135, + "grad_norm": 0.7522682244822607, + "learning_rate": 9.823505816187076e-06, + "loss": 0.4315, + "step": 2482 + }, + { + "epoch": 0.11237836614618692, + "grad_norm": 0.8176435014549138, + "learning_rate": 9.823312751782812e-06, + "loss": 0.43, + "step": 2483 + }, + { + "epoch": 0.11242362525458248, + "grad_norm": 0.718478291493913, + "learning_rate": 9.823119583740373e-06, + "loss": 0.4744, + "step": 2484 + }, + { + "epoch": 0.11246888436297806, + "grad_norm": 0.6502726205028461, + "learning_rate": 9.822926312063905e-06, + "loss": 0.4179, + "step": 2485 + }, + { + "epoch": 0.11251414347137362, + "grad_norm": 0.7305782785378717, + "learning_rate": 9.822732936757564e-06, + "loss": 0.4687, + "step": 2486 + }, + { + "epoch": 0.11255940257976918, + "grad_norm": 0.6004507126829316, + "learning_rate": 9.822539457825505e-06, + "loss": 0.531, + "step": 2487 + }, + { + "epoch": 0.11260466168816474, + "grad_norm": 0.7823041938016477, + "learning_rate": 9.822345875271884e-06, + "loss": 0.4376, + "step": 2488 + }, + { + "epoch": 0.1126499207965603, + "grad_norm": 0.7453882450020083, + "learning_rate": 9.82215218910086e-06, + "loss": 0.4314, + "step": 2489 + }, + { + "epoch": 0.11269517990495587, + "grad_norm": 0.6997393893692536, + "learning_rate": 9.821958399316595e-06, + "loss": 0.3992, + "step": 2490 + }, + { + "epoch": 0.11274043901335144, + "grad_norm": 0.8002430016187357, + "learning_rate": 9.821764505923257e-06, + "loss": 0.4732, + "step": 2491 + }, + { + "epoch": 0.112785698121747, + "grad_norm": 0.7487575906480041, + "learning_rate": 9.821570508925005e-06, + "loss": 0.4273, + "step": 2492 + }, + { + "epoch": 0.11283095723014257, + "grad_norm": 0.741383909506027, + "learning_rate": 9.821376408326013e-06, + "loss": 0.4093, + "step": 2493 + }, + { + "epoch": 0.11287621633853813, + "grad_norm": 0.8072793420604332, + "learning_rate": 9.821182204130448e-06, + "loss": 0.4081, + "step": 2494 + }, + { + "epoch": 0.11292147544693369, + "grad_norm": 0.4724509747117798, + "learning_rate": 9.820987896342487e-06, + "loss": 0.515, + "step": 2495 + }, + { + "epoch": 0.11296673455532925, + "grad_norm": 0.758529766742308, + "learning_rate": 9.8207934849663e-06, + "loss": 0.4594, + "step": 2496 + }, + { + "epoch": 0.11301199366372483, + "grad_norm": 0.7146459027236057, + "learning_rate": 9.820598970006068e-06, + "loss": 0.4138, + "step": 2497 + }, + { + "epoch": 0.11305725277212039, + "grad_norm": 0.34575479257833636, + "learning_rate": 9.82040435146597e-06, + "loss": 0.5035, + "step": 2498 + }, + { + "epoch": 0.11310251188051595, + "grad_norm": 0.3175300244710189, + "learning_rate": 9.820209629350189e-06, + "loss": 0.4976, + "step": 2499 + }, + { + "epoch": 0.11314777098891152, + "grad_norm": 0.9579112274642932, + "learning_rate": 9.820014803662905e-06, + "loss": 0.4725, + "step": 2500 + }, + { + "epoch": 0.11319303009730708, + "grad_norm": 0.42437979117341523, + "learning_rate": 9.819819874408306e-06, + "loss": 0.5118, + "step": 2501 + }, + { + "epoch": 0.11323828920570264, + "grad_norm": 1.0258124340930102, + "learning_rate": 9.81962484159058e-06, + "loss": 0.4211, + "step": 2502 + }, + { + "epoch": 0.11328354831409822, + "grad_norm": 0.7774769252865491, + "learning_rate": 9.819429705213922e-06, + "loss": 0.4202, + "step": 2503 + }, + { + "epoch": 0.11332880742249378, + "grad_norm": 0.7262055087963113, + "learning_rate": 9.819234465282518e-06, + "loss": 0.4285, + "step": 2504 + }, + { + "epoch": 0.11337406653088934, + "grad_norm": 0.8528024190792679, + "learning_rate": 9.819039121800568e-06, + "loss": 0.4395, + "step": 2505 + }, + { + "epoch": 0.1134193256392849, + "grad_norm": 0.723040071029397, + "learning_rate": 9.818843674772268e-06, + "loss": 0.4621, + "step": 2506 + }, + { + "epoch": 0.11346458474768047, + "grad_norm": 0.6986040317293818, + "learning_rate": 9.818648124201817e-06, + "loss": 0.4238, + "step": 2507 + }, + { + "epoch": 0.11350984385607603, + "grad_norm": 0.8031078796455979, + "learning_rate": 9.818452470093416e-06, + "loss": 0.463, + "step": 2508 + }, + { + "epoch": 0.1135551029644716, + "grad_norm": 0.7185941171883956, + "learning_rate": 9.818256712451272e-06, + "loss": 0.4244, + "step": 2509 + }, + { + "epoch": 0.11360036207286717, + "grad_norm": 0.7064335790434774, + "learning_rate": 9.81806085127959e-06, + "loss": 0.422, + "step": 2510 + }, + { + "epoch": 0.11364562118126273, + "grad_norm": 0.658313320908971, + "learning_rate": 9.817864886582575e-06, + "loss": 0.4074, + "step": 2511 + }, + { + "epoch": 0.11369088028965829, + "grad_norm": 0.5048450850733868, + "learning_rate": 9.817668818364441e-06, + "loss": 0.5352, + "step": 2512 + }, + { + "epoch": 0.11373613939805385, + "grad_norm": 0.8615294240399992, + "learning_rate": 9.817472646629403e-06, + "loss": 0.4123, + "step": 2513 + }, + { + "epoch": 0.11378139850644943, + "grad_norm": 0.3868215144673393, + "learning_rate": 9.817276371381671e-06, + "loss": 0.4918, + "step": 2514 + }, + { + "epoch": 0.11382665761484499, + "grad_norm": 0.72087500047661, + "learning_rate": 9.817079992625467e-06, + "loss": 0.4139, + "step": 2515 + }, + { + "epoch": 0.11387191672324055, + "grad_norm": 0.3519348388728298, + "learning_rate": 9.816883510365007e-06, + "loss": 0.5143, + "step": 2516 + }, + { + "epoch": 0.11391717583163612, + "grad_norm": 0.37694064143955736, + "learning_rate": 9.816686924604515e-06, + "loss": 0.5142, + "step": 2517 + }, + { + "epoch": 0.11396243494003168, + "grad_norm": 0.9441190145175116, + "learning_rate": 9.816490235348215e-06, + "loss": 0.4321, + "step": 2518 + }, + { + "epoch": 0.11400769404842724, + "grad_norm": 0.77127837366527, + "learning_rate": 9.816293442600331e-06, + "loss": 0.4461, + "step": 2519 + }, + { + "epoch": 0.11405295315682282, + "grad_norm": 0.8364582358922202, + "learning_rate": 9.816096546365094e-06, + "loss": 0.3901, + "step": 2520 + }, + { + "epoch": 0.11409821226521838, + "grad_norm": 0.500008347179003, + "learning_rate": 9.815899546646734e-06, + "loss": 0.5407, + "step": 2521 + }, + { + "epoch": 0.11414347137361394, + "grad_norm": 0.8221660850937721, + "learning_rate": 9.815702443449482e-06, + "loss": 0.428, + "step": 2522 + }, + { + "epoch": 0.1141887304820095, + "grad_norm": 0.869718534293267, + "learning_rate": 9.815505236777576e-06, + "loss": 0.4608, + "step": 2523 + }, + { + "epoch": 0.11423398959040507, + "grad_norm": 0.38870622655972636, + "learning_rate": 9.815307926635252e-06, + "loss": 0.5186, + "step": 2524 + }, + { + "epoch": 0.11427924869880063, + "grad_norm": 0.8050537795361368, + "learning_rate": 9.815110513026749e-06, + "loss": 0.3684, + "step": 2525 + }, + { + "epoch": 0.1143245078071962, + "grad_norm": 0.9066359801731271, + "learning_rate": 9.814912995956311e-06, + "loss": 0.4784, + "step": 2526 + }, + { + "epoch": 0.11436976691559177, + "grad_norm": 0.780943610306695, + "learning_rate": 9.814715375428181e-06, + "loss": 0.4326, + "step": 2527 + }, + { + "epoch": 0.11441502602398733, + "grad_norm": 0.7916808022497258, + "learning_rate": 9.814517651446603e-06, + "loss": 0.4624, + "step": 2528 + }, + { + "epoch": 0.11446028513238289, + "grad_norm": 0.803747106010139, + "learning_rate": 9.814319824015827e-06, + "loss": 0.4368, + "step": 2529 + }, + { + "epoch": 0.11450554424077845, + "grad_norm": 0.764847693389444, + "learning_rate": 9.814121893140105e-06, + "loss": 0.4765, + "step": 2530 + }, + { + "epoch": 0.11455080334917402, + "grad_norm": 0.6383253482048825, + "learning_rate": 9.81392385882369e-06, + "loss": 0.4087, + "step": 2531 + }, + { + "epoch": 0.11459606245756959, + "grad_norm": 0.821489295199945, + "learning_rate": 9.813725721070834e-06, + "loss": 0.4072, + "step": 2532 + }, + { + "epoch": 0.11464132156596515, + "grad_norm": 0.8707012364160911, + "learning_rate": 9.813527479885799e-06, + "loss": 0.4296, + "step": 2533 + }, + { + "epoch": 0.11468658067436072, + "grad_norm": 1.3106924634509085, + "learning_rate": 9.813329135272841e-06, + "loss": 0.4377, + "step": 2534 + }, + { + "epoch": 0.11473183978275628, + "grad_norm": 0.5267145937741655, + "learning_rate": 9.813130687236222e-06, + "loss": 0.5184, + "step": 2535 + }, + { + "epoch": 0.11477709889115184, + "grad_norm": 0.7352462805393354, + "learning_rate": 9.81293213578021e-06, + "loss": 0.4098, + "step": 2536 + }, + { + "epoch": 0.1148223579995474, + "grad_norm": 0.8631311864144396, + "learning_rate": 9.812733480909065e-06, + "loss": 0.4135, + "step": 2537 + }, + { + "epoch": 0.11486761710794298, + "grad_norm": 0.4587714355663383, + "learning_rate": 9.812534722627058e-06, + "loss": 0.5288, + "step": 2538 + }, + { + "epoch": 0.11491287621633854, + "grad_norm": 0.7901169569777019, + "learning_rate": 9.812335860938462e-06, + "loss": 0.4404, + "step": 2539 + }, + { + "epoch": 0.1149581353247341, + "grad_norm": 0.361868647097338, + "learning_rate": 9.812136895847548e-06, + "loss": 0.5265, + "step": 2540 + }, + { + "epoch": 0.11500339443312967, + "grad_norm": 0.7596945627994101, + "learning_rate": 9.811937827358592e-06, + "loss": 0.4571, + "step": 2541 + }, + { + "epoch": 0.11504865354152523, + "grad_norm": 0.7906186891160457, + "learning_rate": 9.81173865547587e-06, + "loss": 0.4267, + "step": 2542 + }, + { + "epoch": 0.11509391264992079, + "grad_norm": 0.4117514950499734, + "learning_rate": 9.811539380203663e-06, + "loss": 0.4849, + "step": 2543 + }, + { + "epoch": 0.11513917175831637, + "grad_norm": 0.7657511338022506, + "learning_rate": 9.811340001546252e-06, + "loss": 0.4434, + "step": 2544 + }, + { + "epoch": 0.11518443086671193, + "grad_norm": 0.7789309960897834, + "learning_rate": 9.811140519507922e-06, + "loss": 0.4494, + "step": 2545 + }, + { + "epoch": 0.11522968997510749, + "grad_norm": 0.6871882671752764, + "learning_rate": 9.810940934092958e-06, + "loss": 0.4671, + "step": 2546 + }, + { + "epoch": 0.11527494908350305, + "grad_norm": 0.7825805615576867, + "learning_rate": 9.810741245305649e-06, + "loss": 0.4138, + "step": 2547 + }, + { + "epoch": 0.11532020819189862, + "grad_norm": 0.45342424459144637, + "learning_rate": 9.810541453150286e-06, + "loss": 0.4827, + "step": 2548 + }, + { + "epoch": 0.11536546730029418, + "grad_norm": 0.7997696049758634, + "learning_rate": 9.810341557631161e-06, + "loss": 0.4261, + "step": 2549 + }, + { + "epoch": 0.11541072640868975, + "grad_norm": 0.3354195057845609, + "learning_rate": 9.81014155875257e-06, + "loss": 0.5186, + "step": 2550 + }, + { + "epoch": 0.11545598551708532, + "grad_norm": 0.3518734423957318, + "learning_rate": 9.80994145651881e-06, + "loss": 0.5159, + "step": 2551 + }, + { + "epoch": 0.11550124462548088, + "grad_norm": 0.8518932818441349, + "learning_rate": 9.809741250934182e-06, + "loss": 0.4127, + "step": 2552 + }, + { + "epoch": 0.11554650373387644, + "grad_norm": 0.8049676685657544, + "learning_rate": 9.809540942002984e-06, + "loss": 0.47, + "step": 2553 + }, + { + "epoch": 0.115591762842272, + "grad_norm": 1.1333619096212444, + "learning_rate": 9.809340529729523e-06, + "loss": 0.4076, + "step": 2554 + }, + { + "epoch": 0.11563702195066757, + "grad_norm": 0.4749602483194078, + "learning_rate": 9.809140014118106e-06, + "loss": 0.4991, + "step": 2555 + }, + { + "epoch": 0.11568228105906314, + "grad_norm": 0.8929118199192368, + "learning_rate": 9.80893939517304e-06, + "loss": 0.4566, + "step": 2556 + }, + { + "epoch": 0.1157275401674587, + "grad_norm": 1.0534167992870758, + "learning_rate": 9.808738672898637e-06, + "loss": 0.4217, + "step": 2557 + }, + { + "epoch": 0.11577279927585427, + "grad_norm": 0.9090595000677438, + "learning_rate": 9.808537847299206e-06, + "loss": 0.43, + "step": 2558 + }, + { + "epoch": 0.11581805838424983, + "grad_norm": 0.7642568276547604, + "learning_rate": 9.808336918379068e-06, + "loss": 0.4057, + "step": 2559 + }, + { + "epoch": 0.11586331749264539, + "grad_norm": 0.752331667095118, + "learning_rate": 9.808135886142536e-06, + "loss": 0.4222, + "step": 2560 + }, + { + "epoch": 0.11590857660104097, + "grad_norm": 0.7131841249575264, + "learning_rate": 9.80793475059393e-06, + "loss": 0.4255, + "step": 2561 + }, + { + "epoch": 0.11595383570943653, + "grad_norm": 1.0519989552101878, + "learning_rate": 9.807733511737574e-06, + "loss": 0.4236, + "step": 2562 + }, + { + "epoch": 0.11599909481783209, + "grad_norm": 0.5128610138385369, + "learning_rate": 9.80753216957779e-06, + "loss": 0.4961, + "step": 2563 + }, + { + "epoch": 0.11604435392622765, + "grad_norm": 0.45318445672664937, + "learning_rate": 9.807330724118906e-06, + "loss": 0.5104, + "step": 2564 + }, + { + "epoch": 0.11608961303462322, + "grad_norm": 0.9444688018175134, + "learning_rate": 9.807129175365248e-06, + "loss": 0.4216, + "step": 2565 + }, + { + "epoch": 0.11613487214301878, + "grad_norm": 0.7334542392066108, + "learning_rate": 9.806927523321148e-06, + "loss": 0.4404, + "step": 2566 + }, + { + "epoch": 0.11618013125141435, + "grad_norm": 0.667130718816056, + "learning_rate": 9.806725767990938e-06, + "loss": 0.3966, + "step": 2567 + }, + { + "epoch": 0.11622539035980992, + "grad_norm": 0.7872579252818356, + "learning_rate": 9.806523909378956e-06, + "loss": 0.4384, + "step": 2568 + }, + { + "epoch": 0.11627064946820548, + "grad_norm": 0.8402962544898573, + "learning_rate": 9.806321947489537e-06, + "loss": 0.4756, + "step": 2569 + }, + { + "epoch": 0.11631590857660104, + "grad_norm": 0.8202872754958558, + "learning_rate": 9.806119882327019e-06, + "loss": 0.448, + "step": 2570 + }, + { + "epoch": 0.1163611676849966, + "grad_norm": 0.7107492495403155, + "learning_rate": 9.805917713895748e-06, + "loss": 0.4321, + "step": 2571 + }, + { + "epoch": 0.11640642679339216, + "grad_norm": 0.771093224734539, + "learning_rate": 9.805715442200065e-06, + "loss": 0.5144, + "step": 2572 + }, + { + "epoch": 0.11645168590178774, + "grad_norm": 0.9386984016833877, + "learning_rate": 9.805513067244316e-06, + "loss": 0.4454, + "step": 2573 + }, + { + "epoch": 0.1164969450101833, + "grad_norm": 0.8371963863890383, + "learning_rate": 9.80531058903285e-06, + "loss": 0.4083, + "step": 2574 + }, + { + "epoch": 0.11654220411857887, + "grad_norm": 0.7628481457269319, + "learning_rate": 9.805108007570019e-06, + "loss": 0.4178, + "step": 2575 + }, + { + "epoch": 0.11658746322697443, + "grad_norm": 0.7629279641165384, + "learning_rate": 9.804905322860174e-06, + "loss": 0.3906, + "step": 2576 + }, + { + "epoch": 0.11663272233536999, + "grad_norm": 0.7876862625935663, + "learning_rate": 9.80470253490767e-06, + "loss": 0.4435, + "step": 2577 + }, + { + "epoch": 0.11667798144376555, + "grad_norm": 0.8393703723647782, + "learning_rate": 9.804499643716866e-06, + "loss": 0.427, + "step": 2578 + }, + { + "epoch": 0.11672324055216113, + "grad_norm": 0.8430332725251674, + "learning_rate": 9.804296649292119e-06, + "loss": 0.4125, + "step": 2579 + }, + { + "epoch": 0.11676849966055669, + "grad_norm": 0.5868369641884476, + "learning_rate": 9.804093551637794e-06, + "loss": 0.5417, + "step": 2580 + }, + { + "epoch": 0.11681375876895225, + "grad_norm": 0.7528081549316857, + "learning_rate": 9.803890350758253e-06, + "loss": 0.4395, + "step": 2581 + }, + { + "epoch": 0.11685901787734781, + "grad_norm": 0.812847278681468, + "learning_rate": 9.803687046657863e-06, + "loss": 0.4565, + "step": 2582 + }, + { + "epoch": 0.11690427698574338, + "grad_norm": 0.3822252791723251, + "learning_rate": 9.80348363934099e-06, + "loss": 0.5108, + "step": 2583 + }, + { + "epoch": 0.11694953609413894, + "grad_norm": 0.81855950148487, + "learning_rate": 9.803280128812009e-06, + "loss": 0.4828, + "step": 2584 + }, + { + "epoch": 0.11699479520253452, + "grad_norm": 0.38534699136789974, + "learning_rate": 9.803076515075288e-06, + "loss": 0.5013, + "step": 2585 + }, + { + "epoch": 0.11704005431093008, + "grad_norm": 0.8995071789486414, + "learning_rate": 9.802872798135205e-06, + "loss": 0.3777, + "step": 2586 + }, + { + "epoch": 0.11708531341932564, + "grad_norm": 0.6654776493366726, + "learning_rate": 9.802668977996134e-06, + "loss": 0.4325, + "step": 2587 + }, + { + "epoch": 0.1171305725277212, + "grad_norm": 0.40645015656442596, + "learning_rate": 9.80246505466246e-06, + "loss": 0.5202, + "step": 2588 + }, + { + "epoch": 0.11717583163611676, + "grad_norm": 0.4328728192860413, + "learning_rate": 9.802261028138563e-06, + "loss": 0.4921, + "step": 2589 + }, + { + "epoch": 0.11722109074451233, + "grad_norm": 0.8520383668317281, + "learning_rate": 9.802056898428823e-06, + "loss": 0.4322, + "step": 2590 + }, + { + "epoch": 0.1172663498529079, + "grad_norm": 0.8162455760932941, + "learning_rate": 9.801852665537628e-06, + "loss": 0.4772, + "step": 2591 + }, + { + "epoch": 0.11731160896130347, + "grad_norm": 0.715639339968077, + "learning_rate": 9.801648329469368e-06, + "loss": 0.4096, + "step": 2592 + }, + { + "epoch": 0.11735686806969903, + "grad_norm": 0.7674946740030191, + "learning_rate": 9.801443890228433e-06, + "loss": 0.4204, + "step": 2593 + }, + { + "epoch": 0.11740212717809459, + "grad_norm": 0.8687384401260149, + "learning_rate": 9.801239347819213e-06, + "loss": 0.4088, + "step": 2594 + }, + { + "epoch": 0.11744738628649015, + "grad_norm": 0.776904357959414, + "learning_rate": 9.801034702246109e-06, + "loss": 0.519, + "step": 2595 + }, + { + "epoch": 0.11749264539488571, + "grad_norm": 0.7081209701798946, + "learning_rate": 9.80082995351351e-06, + "loss": 0.4869, + "step": 2596 + }, + { + "epoch": 0.11753790450328129, + "grad_norm": 0.8995320362421774, + "learning_rate": 9.800625101625823e-06, + "loss": 0.4077, + "step": 2597 + }, + { + "epoch": 0.11758316361167685, + "grad_norm": 0.8018640403584185, + "learning_rate": 9.800420146587446e-06, + "loss": 0.4211, + "step": 2598 + }, + { + "epoch": 0.11762842272007241, + "grad_norm": 0.7980549659130014, + "learning_rate": 9.800215088402785e-06, + "loss": 0.4222, + "step": 2599 + }, + { + "epoch": 0.11767368182846798, + "grad_norm": 0.4214767758524068, + "learning_rate": 9.800009927076242e-06, + "loss": 0.5301, + "step": 2600 + }, + { + "epoch": 0.11771894093686354, + "grad_norm": 0.8670626633745894, + "learning_rate": 9.79980466261223e-06, + "loss": 0.4073, + "step": 2601 + }, + { + "epoch": 0.1177642000452591, + "grad_norm": 0.7999794305874037, + "learning_rate": 9.799599295015154e-06, + "loss": 0.4164, + "step": 2602 + }, + { + "epoch": 0.11780945915365468, + "grad_norm": 0.38042470528204886, + "learning_rate": 9.799393824289432e-06, + "loss": 0.4855, + "step": 2603 + }, + { + "epoch": 0.11785471826205024, + "grad_norm": 0.7736755748456349, + "learning_rate": 9.799188250439477e-06, + "loss": 0.4406, + "step": 2604 + }, + { + "epoch": 0.1178999773704458, + "grad_norm": 0.9131934705355554, + "learning_rate": 9.798982573469706e-06, + "loss": 0.4272, + "step": 2605 + }, + { + "epoch": 0.11794523647884136, + "grad_norm": 0.7246504193592824, + "learning_rate": 9.79877679338454e-06, + "loss": 0.4511, + "step": 2606 + }, + { + "epoch": 0.11799049558723693, + "grad_norm": 0.7085435596280111, + "learning_rate": 9.798570910188396e-06, + "loss": 0.4068, + "step": 2607 + }, + { + "epoch": 0.1180357546956325, + "grad_norm": 0.7441094134118903, + "learning_rate": 9.798364923885703e-06, + "loss": 0.4549, + "step": 2608 + }, + { + "epoch": 0.11808101380402806, + "grad_norm": 0.7898290377759148, + "learning_rate": 9.798158834480883e-06, + "loss": 0.4463, + "step": 2609 + }, + { + "epoch": 0.11812627291242363, + "grad_norm": 0.6696170261673059, + "learning_rate": 9.797952641978368e-06, + "loss": 0.3952, + "step": 2610 + }, + { + "epoch": 0.11817153202081919, + "grad_norm": 0.84637099325106, + "learning_rate": 9.797746346382586e-06, + "loss": 0.5207, + "step": 2611 + }, + { + "epoch": 0.11821679112921475, + "grad_norm": 0.7675882546910711, + "learning_rate": 9.797539947697969e-06, + "loss": 0.4498, + "step": 2612 + }, + { + "epoch": 0.11826205023761031, + "grad_norm": 0.761430885948382, + "learning_rate": 9.797333445928954e-06, + "loss": 0.4515, + "step": 2613 + }, + { + "epoch": 0.11830730934600589, + "grad_norm": 0.7188207060892471, + "learning_rate": 9.797126841079979e-06, + "loss": 0.4042, + "step": 2614 + }, + { + "epoch": 0.11835256845440145, + "grad_norm": 0.7496719771480668, + "learning_rate": 9.796920133155479e-06, + "loss": 0.4564, + "step": 2615 + }, + { + "epoch": 0.11839782756279701, + "grad_norm": 1.0304731986930586, + "learning_rate": 9.796713322159897e-06, + "loss": 0.4532, + "step": 2616 + }, + { + "epoch": 0.11844308667119258, + "grad_norm": 0.9970062290722393, + "learning_rate": 9.796506408097679e-06, + "loss": 0.4328, + "step": 2617 + }, + { + "epoch": 0.11848834577958814, + "grad_norm": 0.7185759231957992, + "learning_rate": 9.79629939097327e-06, + "loss": 0.469, + "step": 2618 + }, + { + "epoch": 0.1185336048879837, + "grad_norm": 0.6205298876025276, + "learning_rate": 9.796092270791118e-06, + "loss": 0.5458, + "step": 2619 + }, + { + "epoch": 0.11857886399637928, + "grad_norm": 0.9922965858266375, + "learning_rate": 9.795885047555673e-06, + "loss": 0.4199, + "step": 2620 + }, + { + "epoch": 0.11862412310477484, + "grad_norm": 0.7396088827374019, + "learning_rate": 9.795677721271388e-06, + "loss": 0.4193, + "step": 2621 + }, + { + "epoch": 0.1186693822131704, + "grad_norm": 0.6860612044528022, + "learning_rate": 9.795470291942717e-06, + "loss": 0.4015, + "step": 2622 + }, + { + "epoch": 0.11871464132156596, + "grad_norm": 0.9072366386992914, + "learning_rate": 9.795262759574117e-06, + "loss": 0.4456, + "step": 2623 + }, + { + "epoch": 0.11875990042996153, + "grad_norm": 0.8086548745386506, + "learning_rate": 9.795055124170047e-06, + "loss": 0.4396, + "step": 2624 + }, + { + "epoch": 0.11880515953835709, + "grad_norm": 0.8949857372498993, + "learning_rate": 9.79484738573497e-06, + "loss": 0.4149, + "step": 2625 + }, + { + "epoch": 0.11885041864675266, + "grad_norm": 0.7416475609651193, + "learning_rate": 9.794639544273352e-06, + "loss": 0.4285, + "step": 2626 + }, + { + "epoch": 0.11889567775514823, + "grad_norm": 0.7588753079782493, + "learning_rate": 9.794431599789653e-06, + "loss": 0.4561, + "step": 2627 + }, + { + "epoch": 0.11894093686354379, + "grad_norm": 0.790029553819229, + "learning_rate": 9.794223552288344e-06, + "loss": 0.4243, + "step": 2628 + }, + { + "epoch": 0.11898619597193935, + "grad_norm": 0.712822624737303, + "learning_rate": 9.794015401773896e-06, + "loss": 0.3943, + "step": 2629 + }, + { + "epoch": 0.11903145508033491, + "grad_norm": 0.8630365063043336, + "learning_rate": 9.79380714825078e-06, + "loss": 0.5127, + "step": 2630 + }, + { + "epoch": 0.11907671418873048, + "grad_norm": 0.9709964026202484, + "learning_rate": 9.793598791723471e-06, + "loss": 0.4413, + "step": 2631 + }, + { + "epoch": 0.11912197329712605, + "grad_norm": 0.7388955468482264, + "learning_rate": 9.793390332196448e-06, + "loss": 0.4456, + "step": 2632 + }, + { + "epoch": 0.11916723240552161, + "grad_norm": 0.7321239683277707, + "learning_rate": 9.793181769674186e-06, + "loss": 0.4058, + "step": 2633 + }, + { + "epoch": 0.11921249151391718, + "grad_norm": 1.2012183009430955, + "learning_rate": 9.792973104161172e-06, + "loss": 0.4178, + "step": 2634 + }, + { + "epoch": 0.11925775062231274, + "grad_norm": 1.0114217801817902, + "learning_rate": 9.792764335661885e-06, + "loss": 0.4096, + "step": 2635 + }, + { + "epoch": 0.1193030097307083, + "grad_norm": 0.6756070247331848, + "learning_rate": 9.792555464180813e-06, + "loss": 0.5141, + "step": 2636 + }, + { + "epoch": 0.11934826883910386, + "grad_norm": 0.5561437651032958, + "learning_rate": 9.792346489722443e-06, + "loss": 0.5274, + "step": 2637 + }, + { + "epoch": 0.11939352794749944, + "grad_norm": 0.9687338809941708, + "learning_rate": 9.792137412291265e-06, + "loss": 0.4768, + "step": 2638 + }, + { + "epoch": 0.119438787055895, + "grad_norm": 0.7698369750400488, + "learning_rate": 9.791928231891771e-06, + "loss": 0.4102, + "step": 2639 + }, + { + "epoch": 0.11948404616429056, + "grad_norm": 0.7731517828178811, + "learning_rate": 9.791718948528457e-06, + "loss": 0.4375, + "step": 2640 + }, + { + "epoch": 0.11952930527268613, + "grad_norm": 0.92130600172214, + "learning_rate": 9.79150956220582e-06, + "loss": 0.4676, + "step": 2641 + }, + { + "epoch": 0.11957456438108169, + "grad_norm": 0.6881212305908873, + "learning_rate": 9.79130007292836e-06, + "loss": 0.5164, + "step": 2642 + }, + { + "epoch": 0.11961982348947725, + "grad_norm": 0.7206591094332322, + "learning_rate": 9.791090480700575e-06, + "loss": 0.4386, + "step": 2643 + }, + { + "epoch": 0.11966508259787283, + "grad_norm": 0.4618535399153771, + "learning_rate": 9.790880785526971e-06, + "loss": 0.5285, + "step": 2644 + }, + { + "epoch": 0.11971034170626839, + "grad_norm": 0.746839294471897, + "learning_rate": 9.790670987412052e-06, + "loss": 0.4009, + "step": 2645 + }, + { + "epoch": 0.11975560081466395, + "grad_norm": 0.7155483593476287, + "learning_rate": 9.790461086360327e-06, + "loss": 0.3695, + "step": 2646 + }, + { + "epoch": 0.11980085992305951, + "grad_norm": 0.7112119393094498, + "learning_rate": 9.790251082376308e-06, + "loss": 0.4128, + "step": 2647 + }, + { + "epoch": 0.11984611903145508, + "grad_norm": 0.7763810167513379, + "learning_rate": 9.790040975464503e-06, + "loss": 0.4237, + "step": 2648 + }, + { + "epoch": 0.11989137813985065, + "grad_norm": 0.712552760136062, + "learning_rate": 9.78983076562943e-06, + "loss": 0.4416, + "step": 2649 + }, + { + "epoch": 0.11993663724824621, + "grad_norm": 0.7682964438579254, + "learning_rate": 9.789620452875605e-06, + "loss": 0.4608, + "step": 2650 + }, + { + "epoch": 0.11998189635664178, + "grad_norm": 0.87596034032508, + "learning_rate": 9.789410037207546e-06, + "loss": 0.5363, + "step": 2651 + }, + { + "epoch": 0.12002715546503734, + "grad_norm": 0.7109720872914763, + "learning_rate": 9.789199518629774e-06, + "loss": 0.4195, + "step": 2652 + }, + { + "epoch": 0.1200724145734329, + "grad_norm": 0.7260005596256348, + "learning_rate": 9.788988897146814e-06, + "loss": 0.446, + "step": 2653 + }, + { + "epoch": 0.12011767368182846, + "grad_norm": 0.7833435501815497, + "learning_rate": 9.788778172763191e-06, + "loss": 0.4292, + "step": 2654 + }, + { + "epoch": 0.12016293279022404, + "grad_norm": 0.3969348371479244, + "learning_rate": 9.788567345483434e-06, + "loss": 0.5069, + "step": 2655 + }, + { + "epoch": 0.1202081918986196, + "grad_norm": 0.7136665781135838, + "learning_rate": 9.78835641531207e-06, + "loss": 0.4012, + "step": 2656 + }, + { + "epoch": 0.12025345100701516, + "grad_norm": 0.715615027824156, + "learning_rate": 9.788145382253633e-06, + "loss": 0.4371, + "step": 2657 + }, + { + "epoch": 0.12029871011541073, + "grad_norm": 0.507504666798424, + "learning_rate": 9.787934246312657e-06, + "loss": 0.5096, + "step": 2658 + }, + { + "epoch": 0.12034396922380629, + "grad_norm": 0.4527911516398708, + "learning_rate": 9.787723007493681e-06, + "loss": 0.5422, + "step": 2659 + }, + { + "epoch": 0.12038922833220185, + "grad_norm": 0.7400944870716484, + "learning_rate": 9.787511665801242e-06, + "loss": 0.3803, + "step": 2660 + }, + { + "epoch": 0.12043448744059743, + "grad_norm": 0.6825565016927304, + "learning_rate": 9.78730022123988e-06, + "loss": 0.4075, + "step": 2661 + }, + { + "epoch": 0.12047974654899299, + "grad_norm": 0.4016213987042365, + "learning_rate": 9.787088673814137e-06, + "loss": 0.491, + "step": 2662 + }, + { + "epoch": 0.12052500565738855, + "grad_norm": 0.6816641941164435, + "learning_rate": 9.786877023528564e-06, + "loss": 0.3884, + "step": 2663 + }, + { + "epoch": 0.12057026476578411, + "grad_norm": 0.49056929924742526, + "learning_rate": 9.786665270387706e-06, + "loss": 0.5096, + "step": 2664 + }, + { + "epoch": 0.12061552387417968, + "grad_norm": 0.40521515766664484, + "learning_rate": 9.78645341439611e-06, + "loss": 0.4994, + "step": 2665 + }, + { + "epoch": 0.12066078298257524, + "grad_norm": 0.8613575720890704, + "learning_rate": 9.786241455558332e-06, + "loss": 0.4777, + "step": 2666 + }, + { + "epoch": 0.12070604209097081, + "grad_norm": 0.7503037569130684, + "learning_rate": 9.786029393878925e-06, + "loss": 0.4586, + "step": 2667 + }, + { + "epoch": 0.12075130119936638, + "grad_norm": 0.6666433288490968, + "learning_rate": 9.785817229362445e-06, + "loss": 0.3881, + "step": 2668 + }, + { + "epoch": 0.12079656030776194, + "grad_norm": 0.895421914416902, + "learning_rate": 9.78560496201345e-06, + "loss": 0.399, + "step": 2669 + }, + { + "epoch": 0.1208418194161575, + "grad_norm": 0.7827897135331325, + "learning_rate": 9.785392591836504e-06, + "loss": 0.4544, + "step": 2670 + }, + { + "epoch": 0.12088707852455306, + "grad_norm": 0.7347360025993976, + "learning_rate": 9.785180118836169e-06, + "loss": 0.455, + "step": 2671 + }, + { + "epoch": 0.12093233763294862, + "grad_norm": 0.828298118763274, + "learning_rate": 9.784967543017008e-06, + "loss": 0.4535, + "step": 2672 + }, + { + "epoch": 0.1209775967413442, + "grad_norm": 0.7050553279671258, + "learning_rate": 9.784754864383593e-06, + "loss": 0.3994, + "step": 2673 + }, + { + "epoch": 0.12102285584973976, + "grad_norm": 0.7759845857752202, + "learning_rate": 9.784542082940488e-06, + "loss": 0.4549, + "step": 2674 + }, + { + "epoch": 0.12106811495813533, + "grad_norm": 0.9078020537580649, + "learning_rate": 9.784329198692269e-06, + "loss": 0.4441, + "step": 2675 + }, + { + "epoch": 0.12111337406653089, + "grad_norm": 0.7967655768014362, + "learning_rate": 9.78411621164351e-06, + "loss": 0.4718, + "step": 2676 + }, + { + "epoch": 0.12115863317492645, + "grad_norm": 0.6867947079019111, + "learning_rate": 9.783903121798787e-06, + "loss": 0.436, + "step": 2677 + }, + { + "epoch": 0.12120389228332201, + "grad_norm": 0.7372737057900989, + "learning_rate": 9.783689929162679e-06, + "loss": 0.4426, + "step": 2678 + }, + { + "epoch": 0.12124915139171759, + "grad_norm": 0.6993752887877634, + "learning_rate": 9.783476633739766e-06, + "loss": 0.4679, + "step": 2679 + }, + { + "epoch": 0.12129441050011315, + "grad_norm": 0.6940593830233261, + "learning_rate": 9.783263235534632e-06, + "loss": 0.4123, + "step": 2680 + }, + { + "epoch": 0.12133966960850871, + "grad_norm": 0.7090396183152797, + "learning_rate": 9.783049734551861e-06, + "loss": 0.398, + "step": 2681 + }, + { + "epoch": 0.12138492871690428, + "grad_norm": 0.6781535914506774, + "learning_rate": 9.78283613079604e-06, + "loss": 0.4313, + "step": 2682 + }, + { + "epoch": 0.12143018782529984, + "grad_norm": 1.229670729375344, + "learning_rate": 9.782622424271761e-06, + "loss": 0.5487, + "step": 2683 + }, + { + "epoch": 0.1214754469336954, + "grad_norm": 0.8239341407113727, + "learning_rate": 9.782408614983616e-06, + "loss": 0.3965, + "step": 2684 + }, + { + "epoch": 0.12152070604209098, + "grad_norm": 0.7395028627363661, + "learning_rate": 9.782194702936198e-06, + "loss": 0.4073, + "step": 2685 + }, + { + "epoch": 0.12156596515048654, + "grad_norm": 0.6849031343236605, + "learning_rate": 9.781980688134102e-06, + "loss": 0.4154, + "step": 2686 + }, + { + "epoch": 0.1216112242588821, + "grad_norm": 1.2292072746561793, + "learning_rate": 9.781766570581927e-06, + "loss": 0.4413, + "step": 2687 + }, + { + "epoch": 0.12165648336727766, + "grad_norm": 0.7494350410309273, + "learning_rate": 9.781552350284275e-06, + "loss": 0.4313, + "step": 2688 + }, + { + "epoch": 0.12170174247567322, + "grad_norm": 0.7544210475047712, + "learning_rate": 9.78133802724575e-06, + "loss": 0.4419, + "step": 2689 + }, + { + "epoch": 0.12174700158406879, + "grad_norm": 0.8333559848106804, + "learning_rate": 9.781123601470953e-06, + "loss": 0.4334, + "step": 2690 + }, + { + "epoch": 0.12179226069246436, + "grad_norm": 1.3548197867903617, + "learning_rate": 9.780909072964497e-06, + "loss": 0.5363, + "step": 2691 + }, + { + "epoch": 0.12183751980085993, + "grad_norm": 0.8618630036683134, + "learning_rate": 9.780694441730987e-06, + "loss": 0.5295, + "step": 2692 + }, + { + "epoch": 0.12188277890925549, + "grad_norm": 0.45892450903463605, + "learning_rate": 9.780479707775035e-06, + "loss": 0.4854, + "step": 2693 + }, + { + "epoch": 0.12192803801765105, + "grad_norm": 0.7846049894036222, + "learning_rate": 9.780264871101256e-06, + "loss": 0.438, + "step": 2694 + }, + { + "epoch": 0.12197329712604661, + "grad_norm": 0.7605031550438223, + "learning_rate": 9.78004993171427e-06, + "loss": 0.4153, + "step": 2695 + }, + { + "epoch": 0.12201855623444219, + "grad_norm": 0.7357793468390382, + "learning_rate": 9.77983488961869e-06, + "loss": 0.4399, + "step": 2696 + }, + { + "epoch": 0.12206381534283775, + "grad_norm": 0.7580043201789151, + "learning_rate": 9.779619744819136e-06, + "loss": 0.4408, + "step": 2697 + }, + { + "epoch": 0.12210907445123331, + "grad_norm": 2.103868880774699, + "learning_rate": 9.779404497320236e-06, + "loss": 0.5704, + "step": 2698 + }, + { + "epoch": 0.12215433355962887, + "grad_norm": 0.7871003114405238, + "learning_rate": 9.77918914712661e-06, + "loss": 0.3799, + "step": 2699 + }, + { + "epoch": 0.12219959266802444, + "grad_norm": 0.7841364790364305, + "learning_rate": 9.778973694242888e-06, + "loss": 0.474, + "step": 2700 + }, + { + "epoch": 0.12224485177642, + "grad_norm": 0.8486597643275121, + "learning_rate": 9.7787581386737e-06, + "loss": 0.4336, + "step": 2701 + }, + { + "epoch": 0.12229011088481558, + "grad_norm": 0.7196137791219471, + "learning_rate": 9.778542480423677e-06, + "loss": 0.4111, + "step": 2702 + }, + { + "epoch": 0.12233536999321114, + "grad_norm": 0.7347694160748849, + "learning_rate": 9.77832671949745e-06, + "loss": 0.4042, + "step": 2703 + }, + { + "epoch": 0.1223806291016067, + "grad_norm": 0.9636687858399887, + "learning_rate": 9.778110855899659e-06, + "loss": 0.5408, + "step": 2704 + }, + { + "epoch": 0.12242588821000226, + "grad_norm": 0.8111963342960673, + "learning_rate": 9.777894889634939e-06, + "loss": 0.4039, + "step": 2705 + }, + { + "epoch": 0.12247114731839782, + "grad_norm": 0.6695899517416207, + "learning_rate": 9.777678820707932e-06, + "loss": 0.5425, + "step": 2706 + }, + { + "epoch": 0.12251640642679339, + "grad_norm": 0.7610453505809822, + "learning_rate": 9.777462649123281e-06, + "loss": 0.3837, + "step": 2707 + }, + { + "epoch": 0.12256166553518896, + "grad_norm": 0.7326549088785459, + "learning_rate": 9.777246374885631e-06, + "loss": 0.3737, + "step": 2708 + }, + { + "epoch": 0.12260692464358453, + "grad_norm": 0.6840927367574374, + "learning_rate": 9.77702999799963e-06, + "loss": 0.3914, + "step": 2709 + }, + { + "epoch": 0.12265218375198009, + "grad_norm": 0.9382804953482135, + "learning_rate": 9.776813518469924e-06, + "loss": 0.5241, + "step": 2710 + }, + { + "epoch": 0.12269744286037565, + "grad_norm": 0.8627904070576863, + "learning_rate": 9.776596936301168e-06, + "loss": 0.5221, + "step": 2711 + }, + { + "epoch": 0.12274270196877121, + "grad_norm": 0.6784951736072385, + "learning_rate": 9.776380251498013e-06, + "loss": 0.4575, + "step": 2712 + }, + { + "epoch": 0.12278796107716677, + "grad_norm": 0.6935729007977068, + "learning_rate": 9.776163464065115e-06, + "loss": 0.4268, + "step": 2713 + }, + { + "epoch": 0.12283322018556235, + "grad_norm": 0.7215493944586941, + "learning_rate": 9.775946574007133e-06, + "loss": 0.4375, + "step": 2714 + }, + { + "epoch": 0.12287847929395791, + "grad_norm": 0.791182864674943, + "learning_rate": 9.775729581328728e-06, + "loss": 0.4137, + "step": 2715 + }, + { + "epoch": 0.12292373840235347, + "grad_norm": 0.69097511820002, + "learning_rate": 9.775512486034564e-06, + "loss": 0.4204, + "step": 2716 + }, + { + "epoch": 0.12296899751074904, + "grad_norm": 0.7109609228395887, + "learning_rate": 9.775295288129301e-06, + "loss": 0.4299, + "step": 2717 + }, + { + "epoch": 0.1230142566191446, + "grad_norm": 0.6922140625025477, + "learning_rate": 9.775077987617609e-06, + "loss": 0.4669, + "step": 2718 + }, + { + "epoch": 0.12305951572754016, + "grad_norm": 0.7454249071139529, + "learning_rate": 9.774860584504156e-06, + "loss": 0.4484, + "step": 2719 + }, + { + "epoch": 0.12310477483593574, + "grad_norm": 0.6929325920676569, + "learning_rate": 9.774643078793616e-06, + "loss": 0.3958, + "step": 2720 + }, + { + "epoch": 0.1231500339443313, + "grad_norm": 0.6871179296450526, + "learning_rate": 9.774425470490657e-06, + "loss": 0.4551, + "step": 2721 + }, + { + "epoch": 0.12319529305272686, + "grad_norm": 0.6682448994056809, + "learning_rate": 9.774207759599961e-06, + "loss": 0.446, + "step": 2722 + }, + { + "epoch": 0.12324055216112242, + "grad_norm": 0.6988690432746627, + "learning_rate": 9.773989946126202e-06, + "loss": 0.4169, + "step": 2723 + }, + { + "epoch": 0.12328581126951799, + "grad_norm": 0.6891060920685302, + "learning_rate": 9.773772030074062e-06, + "loss": 0.3707, + "step": 2724 + }, + { + "epoch": 0.12333107037791355, + "grad_norm": 0.6759094580335833, + "learning_rate": 9.773554011448221e-06, + "loss": 0.4012, + "step": 2725 + }, + { + "epoch": 0.12337632948630912, + "grad_norm": 0.6820553553816232, + "learning_rate": 9.773335890253367e-06, + "loss": 0.3912, + "step": 2726 + }, + { + "epoch": 0.12342158859470469, + "grad_norm": 0.6928170927079416, + "learning_rate": 9.773117666494183e-06, + "loss": 0.4546, + "step": 2727 + }, + { + "epoch": 0.12346684770310025, + "grad_norm": 0.7628843441717775, + "learning_rate": 9.772899340175362e-06, + "loss": 0.4215, + "step": 2728 + }, + { + "epoch": 0.12351210681149581, + "grad_norm": 0.7729846245333306, + "learning_rate": 9.772680911301592e-06, + "loss": 0.4508, + "step": 2729 + }, + { + "epoch": 0.12355736591989137, + "grad_norm": 1.8521663449637993, + "learning_rate": 9.772462379877566e-06, + "loss": 0.5501, + "step": 2730 + }, + { + "epoch": 0.12360262502828694, + "grad_norm": 0.7956300766318737, + "learning_rate": 9.772243745907983e-06, + "loss": 0.4294, + "step": 2731 + }, + { + "epoch": 0.12364788413668251, + "grad_norm": 0.7989248544289832, + "learning_rate": 9.772025009397538e-06, + "loss": 0.427, + "step": 2732 + }, + { + "epoch": 0.12369314324507807, + "grad_norm": 0.6440191845101102, + "learning_rate": 9.771806170350931e-06, + "loss": 0.5273, + "step": 2733 + }, + { + "epoch": 0.12373840235347364, + "grad_norm": 0.8104647497365935, + "learning_rate": 9.771587228772866e-06, + "loss": 0.3975, + "step": 2734 + }, + { + "epoch": 0.1237836614618692, + "grad_norm": 0.7902673158685333, + "learning_rate": 9.771368184668046e-06, + "loss": 0.5167, + "step": 2735 + }, + { + "epoch": 0.12382892057026476, + "grad_norm": 1.0776498695175314, + "learning_rate": 9.771149038041177e-06, + "loss": 0.4532, + "step": 2736 + }, + { + "epoch": 0.12387417967866034, + "grad_norm": 0.7327511682649216, + "learning_rate": 9.77092978889697e-06, + "loss": 0.4391, + "step": 2737 + }, + { + "epoch": 0.1239194387870559, + "grad_norm": 1.005928049604739, + "learning_rate": 9.770710437240134e-06, + "loss": 0.5269, + "step": 2738 + }, + { + "epoch": 0.12396469789545146, + "grad_norm": 0.8360074267582877, + "learning_rate": 9.770490983075383e-06, + "loss": 0.457, + "step": 2739 + }, + { + "epoch": 0.12400995700384702, + "grad_norm": 0.8991652457166528, + "learning_rate": 9.770271426407432e-06, + "loss": 0.4054, + "step": 2740 + }, + { + "epoch": 0.12405521611224259, + "grad_norm": 0.7664564769263479, + "learning_rate": 9.770051767241e-06, + "loss": 0.3936, + "step": 2741 + }, + { + "epoch": 0.12410047522063815, + "grad_norm": 0.77113558498915, + "learning_rate": 9.769832005580804e-06, + "loss": 0.5359, + "step": 2742 + }, + { + "epoch": 0.12414573432903372, + "grad_norm": 0.8478107407330637, + "learning_rate": 9.769612141431568e-06, + "loss": 0.4158, + "step": 2743 + }, + { + "epoch": 0.12419099343742929, + "grad_norm": 0.795928760884458, + "learning_rate": 9.769392174798017e-06, + "loss": 0.3909, + "step": 2744 + }, + { + "epoch": 0.12423625254582485, + "grad_norm": 0.9940043746867019, + "learning_rate": 9.769172105684875e-06, + "loss": 0.4278, + "step": 2745 + }, + { + "epoch": 0.12428151165422041, + "grad_norm": 0.7112341670412834, + "learning_rate": 9.76895193409687e-06, + "loss": 0.4406, + "step": 2746 + }, + { + "epoch": 0.12432677076261597, + "grad_norm": 0.7573968135300648, + "learning_rate": 9.768731660038737e-06, + "loss": 0.4156, + "step": 2747 + }, + { + "epoch": 0.12437202987101154, + "grad_norm": 1.113628090493707, + "learning_rate": 9.768511283515207e-06, + "loss": 0.4301, + "step": 2748 + }, + { + "epoch": 0.12441728897940711, + "grad_norm": 0.6786265356978682, + "learning_rate": 9.768290804531013e-06, + "loss": 0.4271, + "step": 2749 + }, + { + "epoch": 0.12446254808780267, + "grad_norm": 0.7577079928644646, + "learning_rate": 9.768070223090896e-06, + "loss": 0.5163, + "step": 2750 + }, + { + "epoch": 0.12450780719619824, + "grad_norm": 0.5699591505059829, + "learning_rate": 9.767849539199594e-06, + "loss": 0.5325, + "step": 2751 + }, + { + "epoch": 0.1245530663045938, + "grad_norm": 1.6891404275229212, + "learning_rate": 9.767628752861848e-06, + "loss": 0.4455, + "step": 2752 + }, + { + "epoch": 0.12459832541298936, + "grad_norm": 0.7397212184485615, + "learning_rate": 9.767407864082404e-06, + "loss": 0.4223, + "step": 2753 + }, + { + "epoch": 0.12464358452138492, + "grad_norm": 0.7178882499970353, + "learning_rate": 9.767186872866004e-06, + "loss": 0.3905, + "step": 2754 + }, + { + "epoch": 0.1246888436297805, + "grad_norm": 0.7992228039565072, + "learning_rate": 9.766965779217401e-06, + "loss": 0.4232, + "step": 2755 + }, + { + "epoch": 0.12473410273817606, + "grad_norm": 0.7976996216847373, + "learning_rate": 9.766744583141345e-06, + "loss": 0.4512, + "step": 2756 + }, + { + "epoch": 0.12477936184657162, + "grad_norm": 0.6992639657248829, + "learning_rate": 9.766523284642588e-06, + "loss": 0.4165, + "step": 2757 + }, + { + "epoch": 0.12482462095496719, + "grad_norm": 0.6921757743659276, + "learning_rate": 9.766301883725884e-06, + "loss": 0.4365, + "step": 2758 + }, + { + "epoch": 0.12486988006336275, + "grad_norm": 0.7561530071180924, + "learning_rate": 9.76608038039599e-06, + "loss": 0.4301, + "step": 2759 + }, + { + "epoch": 0.12491513917175831, + "grad_norm": 0.8004041588160753, + "learning_rate": 9.765858774657669e-06, + "loss": 0.4177, + "step": 2760 + }, + { + "epoch": 0.12496039828015389, + "grad_norm": 1.0769918767267206, + "learning_rate": 9.76563706651568e-06, + "loss": 0.5095, + "step": 2761 + }, + { + "epoch": 0.12500565738854943, + "grad_norm": 0.8997733531140661, + "learning_rate": 9.765415255974784e-06, + "loss": 0.5023, + "step": 2762 + }, + { + "epoch": 0.125050916496945, + "grad_norm": 0.433269329782108, + "learning_rate": 9.765193343039751e-06, + "loss": 0.5207, + "step": 2763 + }, + { + "epoch": 0.1250961756053406, + "grad_norm": 0.9853752511046728, + "learning_rate": 9.76497132771535e-06, + "loss": 0.4668, + "step": 2764 + }, + { + "epoch": 0.12514143471373615, + "grad_norm": 0.8148443072198143, + "learning_rate": 9.764749210006348e-06, + "loss": 0.4301, + "step": 2765 + }, + { + "epoch": 0.1251866938221317, + "grad_norm": 0.8802776513421972, + "learning_rate": 9.76452698991752e-06, + "loss": 0.4973, + "step": 2766 + }, + { + "epoch": 0.12523195293052727, + "grad_norm": 0.855492498623336, + "learning_rate": 9.76430466745364e-06, + "loss": 0.4407, + "step": 2767 + }, + { + "epoch": 0.12527721203892284, + "grad_norm": 0.8785273791521889, + "learning_rate": 9.764082242619485e-06, + "loss": 0.3817, + "step": 2768 + }, + { + "epoch": 0.1253224711473184, + "grad_norm": 1.557105939671637, + "learning_rate": 9.763859715419834e-06, + "loss": 0.53, + "step": 2769 + }, + { + "epoch": 0.12536773025571396, + "grad_norm": 1.066750154692408, + "learning_rate": 9.76363708585947e-06, + "loss": 0.4435, + "step": 2770 + }, + { + "epoch": 0.12541298936410952, + "grad_norm": 1.162892927861453, + "learning_rate": 9.763414353943175e-06, + "loss": 0.5445, + "step": 2771 + }, + { + "epoch": 0.12545824847250509, + "grad_norm": 0.8886752219994062, + "learning_rate": 9.763191519675735e-06, + "loss": 0.4512, + "step": 2772 + }, + { + "epoch": 0.12550350758090065, + "grad_norm": 0.8803921244404787, + "learning_rate": 9.762968583061938e-06, + "loss": 0.458, + "step": 2773 + }, + { + "epoch": 0.1255487666892962, + "grad_norm": 0.9265125219905003, + "learning_rate": 9.762745544106576e-06, + "loss": 0.4956, + "step": 2774 + }, + { + "epoch": 0.12559402579769177, + "grad_norm": 1.0048813765796745, + "learning_rate": 9.762522402814438e-06, + "loss": 0.47, + "step": 2775 + }, + { + "epoch": 0.12563928490608736, + "grad_norm": 1.0089599105936264, + "learning_rate": 9.762299159190322e-06, + "loss": 0.5454, + "step": 2776 + }, + { + "epoch": 0.12568454401448292, + "grad_norm": 0.7490852698867788, + "learning_rate": 9.762075813239022e-06, + "loss": 0.4505, + "step": 2777 + }, + { + "epoch": 0.1257298031228785, + "grad_norm": 0.7337323557104144, + "learning_rate": 9.761852364965339e-06, + "loss": 0.4108, + "step": 2778 + }, + { + "epoch": 0.12577506223127405, + "grad_norm": 0.8230115914730641, + "learning_rate": 9.761628814374074e-06, + "loss": 0.4365, + "step": 2779 + }, + { + "epoch": 0.1258203213396696, + "grad_norm": 0.7377174974714752, + "learning_rate": 9.76140516147003e-06, + "loss": 0.4069, + "step": 2780 + }, + { + "epoch": 0.12586558044806517, + "grad_norm": 0.7771110398697952, + "learning_rate": 9.761181406258012e-06, + "loss": 0.4056, + "step": 2781 + }, + { + "epoch": 0.12591083955646074, + "grad_norm": 0.8547150246842478, + "learning_rate": 9.760957548742828e-06, + "loss": 0.4479, + "step": 2782 + }, + { + "epoch": 0.1259560986648563, + "grad_norm": 0.7135937835367194, + "learning_rate": 9.760733588929289e-06, + "loss": 0.3704, + "step": 2783 + }, + { + "epoch": 0.12600135777325186, + "grad_norm": 0.733145252105959, + "learning_rate": 9.760509526822206e-06, + "loss": 0.4439, + "step": 2784 + }, + { + "epoch": 0.12604661688164742, + "grad_norm": 0.7259614720917196, + "learning_rate": 9.760285362426397e-06, + "loss": 0.4677, + "step": 2785 + }, + { + "epoch": 0.12609187599004298, + "grad_norm": 0.7895174836944507, + "learning_rate": 9.760061095746671e-06, + "loss": 0.422, + "step": 2786 + }, + { + "epoch": 0.12613713509843857, + "grad_norm": 0.7242396310884901, + "learning_rate": 9.759836726787855e-06, + "loss": 0.4348, + "step": 2787 + }, + { + "epoch": 0.12618239420683414, + "grad_norm": 0.7492140269556657, + "learning_rate": 9.759612255554765e-06, + "loss": 0.4439, + "step": 2788 + }, + { + "epoch": 0.1262276533152297, + "grad_norm": 0.7253084120049096, + "learning_rate": 9.759387682052226e-06, + "loss": 0.4383, + "step": 2789 + }, + { + "epoch": 0.12627291242362526, + "grad_norm": 1.1116143910529677, + "learning_rate": 9.759163006285064e-06, + "loss": 0.5338, + "step": 2790 + }, + { + "epoch": 0.12631817153202082, + "grad_norm": 0.8685718392395363, + "learning_rate": 9.758938228258103e-06, + "loss": 0.4255, + "step": 2791 + }, + { + "epoch": 0.12636343064041639, + "grad_norm": 0.7273351825438547, + "learning_rate": 9.758713347976179e-06, + "loss": 0.4382, + "step": 2792 + }, + { + "epoch": 0.12640868974881195, + "grad_norm": 0.7179864020192804, + "learning_rate": 9.758488365444117e-06, + "loss": 0.4311, + "step": 2793 + }, + { + "epoch": 0.1264539488572075, + "grad_norm": 0.6649989419467585, + "learning_rate": 9.758263280666757e-06, + "loss": 0.4111, + "step": 2794 + }, + { + "epoch": 0.12649920796560307, + "grad_norm": 0.4415612740759909, + "learning_rate": 9.758038093648931e-06, + "loss": 0.5024, + "step": 2795 + }, + { + "epoch": 0.12654446707399863, + "grad_norm": 0.8351532083948827, + "learning_rate": 9.757812804395482e-06, + "loss": 0.4213, + "step": 2796 + }, + { + "epoch": 0.1265897261823942, + "grad_norm": 1.0093853854686414, + "learning_rate": 9.757587412911247e-06, + "loss": 0.4192, + "step": 2797 + }, + { + "epoch": 0.12663498529078976, + "grad_norm": 0.6988444753162972, + "learning_rate": 9.75736191920107e-06, + "loss": 0.4206, + "step": 2798 + }, + { + "epoch": 0.12668024439918535, + "grad_norm": 0.7719411662818325, + "learning_rate": 9.757136323269798e-06, + "loss": 0.4357, + "step": 2799 + }, + { + "epoch": 0.1267255035075809, + "grad_norm": 0.8921782142026669, + "learning_rate": 9.756910625122276e-06, + "loss": 0.4749, + "step": 2800 + }, + { + "epoch": 0.12677076261597647, + "grad_norm": 0.7741589039777517, + "learning_rate": 9.756684824763354e-06, + "loss": 0.4751, + "step": 2801 + }, + { + "epoch": 0.12681602172437204, + "grad_norm": 0.7571345424322213, + "learning_rate": 9.756458922197884e-06, + "loss": 0.4489, + "step": 2802 + }, + { + "epoch": 0.1268612808327676, + "grad_norm": 0.8639275063462156, + "learning_rate": 9.756232917430719e-06, + "loss": 0.5225, + "step": 2803 + }, + { + "epoch": 0.12690653994116316, + "grad_norm": 0.8443327036951966, + "learning_rate": 9.756006810466719e-06, + "loss": 0.4155, + "step": 2804 + }, + { + "epoch": 0.12695179904955872, + "grad_norm": 0.8229293203051627, + "learning_rate": 9.755780601310738e-06, + "loss": 0.4464, + "step": 2805 + }, + { + "epoch": 0.12699705815795428, + "grad_norm": 0.8798994860945353, + "learning_rate": 9.755554289967638e-06, + "loss": 0.4316, + "step": 2806 + }, + { + "epoch": 0.12704231726634985, + "grad_norm": 0.7797064513535364, + "learning_rate": 9.755327876442282e-06, + "loss": 0.4324, + "step": 2807 + }, + { + "epoch": 0.1270875763747454, + "grad_norm": 0.46667805555480824, + "learning_rate": 9.755101360739537e-06, + "loss": 0.5179, + "step": 2808 + }, + { + "epoch": 0.12713283548314097, + "grad_norm": 1.1732456802108047, + "learning_rate": 9.754874742864264e-06, + "loss": 0.4062, + "step": 2809 + }, + { + "epoch": 0.12717809459153653, + "grad_norm": 0.7681936991538912, + "learning_rate": 9.754648022821339e-06, + "loss": 0.4489, + "step": 2810 + }, + { + "epoch": 0.12722335369993212, + "grad_norm": 0.8233301252300935, + "learning_rate": 9.754421200615629e-06, + "loss": 0.4357, + "step": 2811 + }, + { + "epoch": 0.12726861280832769, + "grad_norm": 1.5611820942128543, + "learning_rate": 9.75419427625201e-06, + "loss": 0.4545, + "step": 2812 + }, + { + "epoch": 0.12731387191672325, + "grad_norm": 0.7366516657356496, + "learning_rate": 9.753967249735359e-06, + "loss": 0.4269, + "step": 2813 + }, + { + "epoch": 0.1273591310251188, + "grad_norm": 1.197383763338776, + "learning_rate": 9.753740121070552e-06, + "loss": 0.4172, + "step": 2814 + }, + { + "epoch": 0.12740439013351437, + "grad_norm": 0.5187181295570135, + "learning_rate": 9.753512890262468e-06, + "loss": 0.523, + "step": 2815 + }, + { + "epoch": 0.12744964924190993, + "grad_norm": 0.7402400139361618, + "learning_rate": 9.753285557315993e-06, + "loss": 0.4342, + "step": 2816 + }, + { + "epoch": 0.1274949083503055, + "grad_norm": 0.4179812876405542, + "learning_rate": 9.75305812223601e-06, + "loss": 0.5042, + "step": 2817 + }, + { + "epoch": 0.12754016745870106, + "grad_norm": 0.956692225707494, + "learning_rate": 9.752830585027406e-06, + "loss": 0.4321, + "step": 2818 + }, + { + "epoch": 0.12758542656709662, + "grad_norm": 0.7580316124124711, + "learning_rate": 9.752602945695068e-06, + "loss": 0.4106, + "step": 2819 + }, + { + "epoch": 0.12763068567549218, + "grad_norm": 0.6779058714280451, + "learning_rate": 9.75237520424389e-06, + "loss": 0.4584, + "step": 2820 + }, + { + "epoch": 0.12767594478388775, + "grad_norm": 0.7006360886542689, + "learning_rate": 9.752147360678767e-06, + "loss": 0.4538, + "step": 2821 + }, + { + "epoch": 0.1277212038922833, + "grad_norm": 0.4225543051078938, + "learning_rate": 9.75191941500459e-06, + "loss": 0.5084, + "step": 2822 + }, + { + "epoch": 0.1277664630006789, + "grad_norm": 0.9870591208561422, + "learning_rate": 9.75169136722626e-06, + "loss": 0.4234, + "step": 2823 + }, + { + "epoch": 0.12781172210907446, + "grad_norm": 0.38302841608917704, + "learning_rate": 9.751463217348675e-06, + "loss": 0.497, + "step": 2824 + }, + { + "epoch": 0.12785698121747002, + "grad_norm": 0.7176736840275659, + "learning_rate": 9.75123496537674e-06, + "loss": 0.4236, + "step": 2825 + }, + { + "epoch": 0.12790224032586558, + "grad_norm": 0.34689025847761207, + "learning_rate": 9.751006611315357e-06, + "loss": 0.5039, + "step": 2826 + }, + { + "epoch": 0.12794749943426115, + "grad_norm": 0.3500954668873423, + "learning_rate": 9.750778155169434e-06, + "loss": 0.4995, + "step": 2827 + }, + { + "epoch": 0.1279927585426567, + "grad_norm": 1.0915034728521975, + "learning_rate": 9.75054959694388e-06, + "loss": 0.4577, + "step": 2828 + }, + { + "epoch": 0.12803801765105227, + "grad_norm": 0.36993428839340675, + "learning_rate": 9.750320936643604e-06, + "loss": 0.5616, + "step": 2829 + }, + { + "epoch": 0.12808327675944783, + "grad_norm": 1.1635716200014408, + "learning_rate": 9.75009217427352e-06, + "loss": 0.4061, + "step": 2830 + }, + { + "epoch": 0.1281285358678434, + "grad_norm": 0.7709762211162258, + "learning_rate": 9.749863309838545e-06, + "loss": 0.4129, + "step": 2831 + }, + { + "epoch": 0.12817379497623896, + "grad_norm": 0.8127009596810594, + "learning_rate": 9.749634343343598e-06, + "loss": 0.4606, + "step": 2832 + }, + { + "epoch": 0.12821905408463452, + "grad_norm": 0.7069452105591859, + "learning_rate": 9.749405274793592e-06, + "loss": 0.432, + "step": 2833 + }, + { + "epoch": 0.1282643131930301, + "grad_norm": 0.7539224217869867, + "learning_rate": 9.749176104193456e-06, + "loss": 0.4228, + "step": 2834 + }, + { + "epoch": 0.12830957230142567, + "grad_norm": 0.682126292506584, + "learning_rate": 9.748946831548111e-06, + "loss": 0.3924, + "step": 2835 + }, + { + "epoch": 0.12835483140982124, + "grad_norm": 0.8189363607131463, + "learning_rate": 9.748717456862484e-06, + "loss": 0.4298, + "step": 2836 + }, + { + "epoch": 0.1284000905182168, + "grad_norm": 0.6526943554999189, + "learning_rate": 9.748487980141503e-06, + "loss": 0.4387, + "step": 2837 + }, + { + "epoch": 0.12844534962661236, + "grad_norm": 0.7165030693701668, + "learning_rate": 9.748258401390099e-06, + "loss": 0.4605, + "step": 2838 + }, + { + "epoch": 0.12849060873500792, + "grad_norm": 0.748171676264515, + "learning_rate": 9.748028720613206e-06, + "loss": 0.4589, + "step": 2839 + }, + { + "epoch": 0.12853586784340348, + "grad_norm": 0.6958245051079487, + "learning_rate": 9.747798937815756e-06, + "loss": 0.4405, + "step": 2840 + }, + { + "epoch": 0.12858112695179905, + "grad_norm": 0.7010544328742045, + "learning_rate": 9.74756905300269e-06, + "loss": 0.4166, + "step": 2841 + }, + { + "epoch": 0.1286263860601946, + "grad_norm": 0.6929601001488586, + "learning_rate": 9.747339066178947e-06, + "loss": 0.3861, + "step": 2842 + }, + { + "epoch": 0.12867164516859017, + "grad_norm": 0.7691628549541619, + "learning_rate": 9.747108977349466e-06, + "loss": 0.4512, + "step": 2843 + }, + { + "epoch": 0.12871690427698573, + "grad_norm": 1.5952758417935748, + "learning_rate": 9.746878786519195e-06, + "loss": 0.4443, + "step": 2844 + }, + { + "epoch": 0.1287621633853813, + "grad_norm": 0.7012408391747204, + "learning_rate": 9.746648493693076e-06, + "loss": 0.3726, + "step": 2845 + }, + { + "epoch": 0.12880742249377689, + "grad_norm": 0.7661600137981615, + "learning_rate": 9.74641809887606e-06, + "loss": 0.38, + "step": 2846 + }, + { + "epoch": 0.12885268160217245, + "grad_norm": 0.8000061170172621, + "learning_rate": 9.746187602073097e-06, + "loss": 0.3985, + "step": 2847 + }, + { + "epoch": 0.128897940710568, + "grad_norm": 0.7000898176833337, + "learning_rate": 9.745957003289138e-06, + "loss": 0.4487, + "step": 2848 + }, + { + "epoch": 0.12894319981896357, + "grad_norm": 0.717128506736437, + "learning_rate": 9.745726302529139e-06, + "loss": 0.4188, + "step": 2849 + }, + { + "epoch": 0.12898845892735913, + "grad_norm": 0.7935962162575106, + "learning_rate": 9.745495499798058e-06, + "loss": 0.3798, + "step": 2850 + }, + { + "epoch": 0.1290337180357547, + "grad_norm": 0.7923976032261633, + "learning_rate": 9.745264595100854e-06, + "loss": 0.4351, + "step": 2851 + }, + { + "epoch": 0.12907897714415026, + "grad_norm": 0.747883990023528, + "learning_rate": 9.745033588442487e-06, + "loss": 0.4304, + "step": 2852 + }, + { + "epoch": 0.12912423625254582, + "grad_norm": 0.7459299202346427, + "learning_rate": 9.744802479827921e-06, + "loss": 0.4162, + "step": 2853 + }, + { + "epoch": 0.12916949536094138, + "grad_norm": 0.5887077338666191, + "learning_rate": 9.744571269262122e-06, + "loss": 0.5119, + "step": 2854 + }, + { + "epoch": 0.12921475446933695, + "grad_norm": 0.6729072197590653, + "learning_rate": 9.74433995675006e-06, + "loss": 0.4359, + "step": 2855 + }, + { + "epoch": 0.1292600135777325, + "grad_norm": 0.7777456505468983, + "learning_rate": 9.744108542296702e-06, + "loss": 0.4771, + "step": 2856 + }, + { + "epoch": 0.12930527268612807, + "grad_norm": 0.7469741198993132, + "learning_rate": 9.743877025907023e-06, + "loss": 0.3983, + "step": 2857 + }, + { + "epoch": 0.12935053179452366, + "grad_norm": 0.6858749117382782, + "learning_rate": 9.743645407585994e-06, + "loss": 0.3637, + "step": 2858 + }, + { + "epoch": 0.12939579090291922, + "grad_norm": 0.7031646069476174, + "learning_rate": 9.743413687338596e-06, + "loss": 0.4184, + "step": 2859 + }, + { + "epoch": 0.12944105001131478, + "grad_norm": 0.7255909443098081, + "learning_rate": 9.743181865169806e-06, + "loss": 0.4131, + "step": 2860 + }, + { + "epoch": 0.12948630911971035, + "grad_norm": 0.7322265572281352, + "learning_rate": 9.742949941084604e-06, + "loss": 0.4202, + "step": 2861 + }, + { + "epoch": 0.1295315682281059, + "grad_norm": 0.6886194155596765, + "learning_rate": 9.742717915087978e-06, + "loss": 0.4113, + "step": 2862 + }, + { + "epoch": 0.12957682733650147, + "grad_norm": 0.7652278487384526, + "learning_rate": 9.742485787184907e-06, + "loss": 0.4339, + "step": 2863 + }, + { + "epoch": 0.12962208644489703, + "grad_norm": 0.67801745151185, + "learning_rate": 9.742253557380383e-06, + "loss": 0.4259, + "step": 2864 + }, + { + "epoch": 0.1296673455532926, + "grad_norm": 0.6355102242351469, + "learning_rate": 9.742021225679394e-06, + "loss": 0.4087, + "step": 2865 + }, + { + "epoch": 0.12971260466168816, + "grad_norm": 0.7260698872298267, + "learning_rate": 9.741788792086934e-06, + "loss": 0.502, + "step": 2866 + }, + { + "epoch": 0.12975786377008372, + "grad_norm": 1.0475091478809628, + "learning_rate": 9.741556256607996e-06, + "loss": 0.376, + "step": 2867 + }, + { + "epoch": 0.12980312287847928, + "grad_norm": 0.7174565795305505, + "learning_rate": 9.741323619247575e-06, + "loss": 0.4064, + "step": 2868 + }, + { + "epoch": 0.12984838198687487, + "grad_norm": 0.6816845441724139, + "learning_rate": 9.741090880010674e-06, + "loss": 0.4205, + "step": 2869 + }, + { + "epoch": 0.12989364109527043, + "grad_norm": 0.7404934530715019, + "learning_rate": 9.74085803890229e-06, + "loss": 0.469, + "step": 2870 + }, + { + "epoch": 0.129938900203666, + "grad_norm": 0.7107227611827823, + "learning_rate": 9.740625095927428e-06, + "loss": 0.4323, + "step": 2871 + }, + { + "epoch": 0.12998415931206156, + "grad_norm": 0.6435610487654178, + "learning_rate": 9.74039205109109e-06, + "loss": 0.4506, + "step": 2872 + }, + { + "epoch": 0.13002941842045712, + "grad_norm": 0.6905701954641099, + "learning_rate": 9.740158904398286e-06, + "loss": 0.4603, + "step": 2873 + }, + { + "epoch": 0.13007467752885268, + "grad_norm": 0.6490855543128955, + "learning_rate": 9.739925655854028e-06, + "loss": 0.3842, + "step": 2874 + }, + { + "epoch": 0.13011993663724825, + "grad_norm": 0.7628737285578185, + "learning_rate": 9.739692305463324e-06, + "loss": 0.4419, + "step": 2875 + }, + { + "epoch": 0.1301651957456438, + "grad_norm": 0.7171768424557958, + "learning_rate": 9.739458853231188e-06, + "loss": 0.4759, + "step": 2876 + }, + { + "epoch": 0.13021045485403937, + "grad_norm": 0.6877385757644017, + "learning_rate": 9.739225299162638e-06, + "loss": 0.4431, + "step": 2877 + }, + { + "epoch": 0.13025571396243493, + "grad_norm": 0.7360380510178575, + "learning_rate": 9.738991643262693e-06, + "loss": 0.4724, + "step": 2878 + }, + { + "epoch": 0.1303009730708305, + "grad_norm": 0.7086556215160046, + "learning_rate": 9.738757885536371e-06, + "loss": 0.4279, + "step": 2879 + }, + { + "epoch": 0.13034623217922606, + "grad_norm": 0.7367770757564954, + "learning_rate": 9.738524025988696e-06, + "loss": 0.4422, + "step": 2880 + }, + { + "epoch": 0.13039149128762165, + "grad_norm": 0.7325494044645873, + "learning_rate": 9.738290064624694e-06, + "loss": 0.4302, + "step": 2881 + }, + { + "epoch": 0.1304367503960172, + "grad_norm": 0.7139844892563704, + "learning_rate": 9.73805600144939e-06, + "loss": 0.5225, + "step": 2882 + }, + { + "epoch": 0.13048200950441277, + "grad_norm": 0.7369897055113472, + "learning_rate": 9.737821836467816e-06, + "loss": 0.3924, + "step": 2883 + }, + { + "epoch": 0.13052726861280833, + "grad_norm": 0.7422154925009801, + "learning_rate": 9.737587569685e-06, + "loss": 0.4555, + "step": 2884 + }, + { + "epoch": 0.1305725277212039, + "grad_norm": 0.6495721284288817, + "learning_rate": 9.737353201105978e-06, + "loss": 0.3824, + "step": 2885 + }, + { + "epoch": 0.13061778682959946, + "grad_norm": 0.3972406270595581, + "learning_rate": 9.737118730735786e-06, + "loss": 0.5372, + "step": 2886 + }, + { + "epoch": 0.13066304593799502, + "grad_norm": 0.3759637227326978, + "learning_rate": 9.73688415857946e-06, + "loss": 0.5196, + "step": 2887 + }, + { + "epoch": 0.13070830504639058, + "grad_norm": 0.38323779671317704, + "learning_rate": 9.736649484642044e-06, + "loss": 0.5211, + "step": 2888 + }, + { + "epoch": 0.13075356415478614, + "grad_norm": 0.3246091338161728, + "learning_rate": 9.736414708928576e-06, + "loss": 0.502, + "step": 2889 + }, + { + "epoch": 0.1307988232631817, + "grad_norm": 0.3367150725113058, + "learning_rate": 9.736179831444103e-06, + "loss": 0.5038, + "step": 2890 + }, + { + "epoch": 0.13084408237157727, + "grad_norm": 1.1456990720264577, + "learning_rate": 9.735944852193673e-06, + "loss": 0.4215, + "step": 2891 + }, + { + "epoch": 0.13088934147997283, + "grad_norm": 0.7204268232990344, + "learning_rate": 9.735709771182331e-06, + "loss": 0.4528, + "step": 2892 + }, + { + "epoch": 0.13093460058836842, + "grad_norm": 0.8307652317369043, + "learning_rate": 9.735474588415132e-06, + "loss": 0.4629, + "step": 2893 + }, + { + "epoch": 0.13097985969676398, + "grad_norm": 1.1668789311608647, + "learning_rate": 9.735239303897129e-06, + "loss": 0.4201, + "step": 2894 + }, + { + "epoch": 0.13102511880515955, + "grad_norm": 0.7384757066530285, + "learning_rate": 9.735003917633376e-06, + "loss": 0.4314, + "step": 2895 + }, + { + "epoch": 0.1310703779135551, + "grad_norm": 0.6199463148996311, + "learning_rate": 9.73476842962893e-06, + "loss": 0.5127, + "step": 2896 + }, + { + "epoch": 0.13111563702195067, + "grad_norm": 1.2150021320725093, + "learning_rate": 9.734532839888853e-06, + "loss": 0.4345, + "step": 2897 + }, + { + "epoch": 0.13116089613034623, + "grad_norm": 0.9311562905072975, + "learning_rate": 9.734297148418205e-06, + "loss": 0.4413, + "step": 2898 + }, + { + "epoch": 0.1312061552387418, + "grad_norm": 0.7231301939259253, + "learning_rate": 9.734061355222054e-06, + "loss": 0.4008, + "step": 2899 + }, + { + "epoch": 0.13125141434713736, + "grad_norm": 0.8095404884163195, + "learning_rate": 9.733825460305462e-06, + "loss": 0.3956, + "step": 2900 + }, + { + "epoch": 0.13129667345553292, + "grad_norm": 0.8873271958738239, + "learning_rate": 9.7335894636735e-06, + "loss": 0.4109, + "step": 2901 + }, + { + "epoch": 0.13134193256392848, + "grad_norm": 0.8089331693710592, + "learning_rate": 9.73335336533124e-06, + "loss": 0.4431, + "step": 2902 + }, + { + "epoch": 0.13138719167232404, + "grad_norm": 0.7109221137009005, + "learning_rate": 9.733117165283753e-06, + "loss": 0.4636, + "step": 2903 + }, + { + "epoch": 0.1314324507807196, + "grad_norm": 0.5918246544216307, + "learning_rate": 9.732880863536114e-06, + "loss": 0.5099, + "step": 2904 + }, + { + "epoch": 0.1314777098891152, + "grad_norm": 0.8772212127328848, + "learning_rate": 9.732644460093402e-06, + "loss": 0.4276, + "step": 2905 + }, + { + "epoch": 0.13152296899751076, + "grad_norm": 0.8671021696476754, + "learning_rate": 9.732407954960695e-06, + "loss": 0.4327, + "step": 2906 + }, + { + "epoch": 0.13156822810590632, + "grad_norm": 0.3731968179795825, + "learning_rate": 9.732171348143076e-06, + "loss": 0.5093, + "step": 2907 + }, + { + "epoch": 0.13161348721430188, + "grad_norm": 1.5183166337584588, + "learning_rate": 9.731934639645628e-06, + "loss": 0.3628, + "step": 2908 + }, + { + "epoch": 0.13165874632269745, + "grad_norm": 0.7402805392392205, + "learning_rate": 9.731697829473438e-06, + "loss": 0.4497, + "step": 2909 + }, + { + "epoch": 0.131704005431093, + "grad_norm": 0.4875191976551136, + "learning_rate": 9.731460917631594e-06, + "loss": 0.5167, + "step": 2910 + }, + { + "epoch": 0.13174926453948857, + "grad_norm": 0.8975361419222202, + "learning_rate": 9.731223904125186e-06, + "loss": 0.4501, + "step": 2911 + }, + { + "epoch": 0.13179452364788413, + "grad_norm": 0.7226982167131014, + "learning_rate": 9.730986788959308e-06, + "loss": 0.4582, + "step": 2912 + }, + { + "epoch": 0.1318397827562797, + "grad_norm": 0.7359214134878374, + "learning_rate": 9.730749572139054e-06, + "loss": 0.3826, + "step": 2913 + }, + { + "epoch": 0.13188504186467526, + "grad_norm": 0.6907853883793874, + "learning_rate": 9.730512253669523e-06, + "loss": 0.4567, + "step": 2914 + }, + { + "epoch": 0.13193030097307082, + "grad_norm": 0.7620674928519404, + "learning_rate": 9.730274833555809e-06, + "loss": 0.4673, + "step": 2915 + }, + { + "epoch": 0.1319755600814664, + "grad_norm": 0.4297711923387903, + "learning_rate": 9.730037311803017e-06, + "loss": 0.4913, + "step": 2916 + }, + { + "epoch": 0.13202081918986197, + "grad_norm": 0.3635336031936697, + "learning_rate": 9.72979968841625e-06, + "loss": 0.5311, + "step": 2917 + }, + { + "epoch": 0.13206607829825753, + "grad_norm": 0.31921531659711294, + "learning_rate": 9.729561963400616e-06, + "loss": 0.5029, + "step": 2918 + }, + { + "epoch": 0.1321113374066531, + "grad_norm": 0.8854230801627538, + "learning_rate": 9.72932413676122e-06, + "loss": 0.4328, + "step": 2919 + }, + { + "epoch": 0.13215659651504866, + "grad_norm": 0.7914974220186524, + "learning_rate": 9.729086208503174e-06, + "loss": 0.4549, + "step": 2920 + }, + { + "epoch": 0.13220185562344422, + "grad_norm": 0.6883760552004766, + "learning_rate": 9.728848178631588e-06, + "loss": 0.4069, + "step": 2921 + }, + { + "epoch": 0.13224711473183978, + "grad_norm": 1.0327106749885167, + "learning_rate": 9.72861004715158e-06, + "loss": 0.402, + "step": 2922 + }, + { + "epoch": 0.13229237384023534, + "grad_norm": 0.7813827826117568, + "learning_rate": 9.728371814068265e-06, + "loss": 0.3908, + "step": 2923 + }, + { + "epoch": 0.1323376329486309, + "grad_norm": 0.6625608201070756, + "learning_rate": 9.728133479386763e-06, + "loss": 0.4126, + "step": 2924 + }, + { + "epoch": 0.13238289205702647, + "grad_norm": 0.8204268002708216, + "learning_rate": 9.727895043112192e-06, + "loss": 0.3966, + "step": 2925 + }, + { + "epoch": 0.13242815116542203, + "grad_norm": 0.7096880863141186, + "learning_rate": 9.727656505249676e-06, + "loss": 0.5155, + "step": 2926 + }, + { + "epoch": 0.1324734102738176, + "grad_norm": 0.8209178150784526, + "learning_rate": 9.727417865804343e-06, + "loss": 0.4203, + "step": 2927 + }, + { + "epoch": 0.13251866938221318, + "grad_norm": 1.320863933337403, + "learning_rate": 9.72717912478132e-06, + "loss": 0.4437, + "step": 2928 + }, + { + "epoch": 0.13256392849060875, + "grad_norm": 0.6550034192890875, + "learning_rate": 9.726940282185734e-06, + "loss": 0.4092, + "step": 2929 + }, + { + "epoch": 0.1326091875990043, + "grad_norm": 0.7084129783400551, + "learning_rate": 9.726701338022722e-06, + "loss": 0.4351, + "step": 2930 + }, + { + "epoch": 0.13265444670739987, + "grad_norm": 0.693353211799939, + "learning_rate": 9.726462292297411e-06, + "loss": 0.3808, + "step": 2931 + }, + { + "epoch": 0.13269970581579543, + "grad_norm": 0.7701332472199786, + "learning_rate": 9.726223145014946e-06, + "loss": 0.4215, + "step": 2932 + }, + { + "epoch": 0.132744964924191, + "grad_norm": 0.7094866856303067, + "learning_rate": 9.725983896180458e-06, + "loss": 0.4252, + "step": 2933 + }, + { + "epoch": 0.13279022403258656, + "grad_norm": 0.6232236818498672, + "learning_rate": 9.725744545799093e-06, + "loss": 0.5054, + "step": 2934 + }, + { + "epoch": 0.13283548314098212, + "grad_norm": 0.9707367889864059, + "learning_rate": 9.72550509387599e-06, + "loss": 0.3881, + "step": 2935 + }, + { + "epoch": 0.13288074224937768, + "grad_norm": 0.7360273696473658, + "learning_rate": 9.725265540416296e-06, + "loss": 0.4109, + "step": 2936 + }, + { + "epoch": 0.13292600135777324, + "grad_norm": 0.7238757945473541, + "learning_rate": 9.725025885425159e-06, + "loss": 0.4553, + "step": 2937 + }, + { + "epoch": 0.1329712604661688, + "grad_norm": 0.471024966139833, + "learning_rate": 9.724786128907726e-06, + "loss": 0.5154, + "step": 2938 + }, + { + "epoch": 0.13301651957456437, + "grad_norm": 0.7205417842353122, + "learning_rate": 9.724546270869152e-06, + "loss": 0.4465, + "step": 2939 + }, + { + "epoch": 0.13306177868295996, + "grad_norm": 0.40647518704260016, + "learning_rate": 9.724306311314589e-06, + "loss": 0.5377, + "step": 2940 + }, + { + "epoch": 0.13310703779135552, + "grad_norm": 0.35852848738255716, + "learning_rate": 9.724066250249192e-06, + "loss": 0.5163, + "step": 2941 + }, + { + "epoch": 0.13315229689975108, + "grad_norm": 0.9552604293041855, + "learning_rate": 9.72382608767812e-06, + "loss": 0.4381, + "step": 2942 + }, + { + "epoch": 0.13319755600814664, + "grad_norm": 0.7275710516886414, + "learning_rate": 9.723585823606533e-06, + "loss": 0.4132, + "step": 2943 + }, + { + "epoch": 0.1332428151165422, + "grad_norm": 0.5386892240679365, + "learning_rate": 9.723345458039595e-06, + "loss": 0.5094, + "step": 2944 + }, + { + "epoch": 0.13328807422493777, + "grad_norm": 0.9176692907509812, + "learning_rate": 9.723104990982469e-06, + "loss": 0.4535, + "step": 2945 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 0.7598831526931533, + "learning_rate": 9.722864422440323e-06, + "loss": 0.4384, + "step": 2946 + }, + { + "epoch": 0.1333785924417289, + "grad_norm": 0.6886217263953099, + "learning_rate": 9.722623752418327e-06, + "loss": 0.4696, + "step": 2947 + }, + { + "epoch": 0.13342385155012446, + "grad_norm": 0.7236203182922575, + "learning_rate": 9.722382980921649e-06, + "loss": 0.4127, + "step": 2948 + }, + { + "epoch": 0.13346911065852002, + "grad_norm": 0.7866624324006298, + "learning_rate": 9.722142107955465e-06, + "loss": 0.4252, + "step": 2949 + }, + { + "epoch": 0.13351436976691558, + "grad_norm": 0.7674053504076765, + "learning_rate": 9.721901133524951e-06, + "loss": 0.427, + "step": 2950 + }, + { + "epoch": 0.13355962887531114, + "grad_norm": 0.7596085549378919, + "learning_rate": 9.721660057635284e-06, + "loss": 0.4098, + "step": 2951 + }, + { + "epoch": 0.13360488798370673, + "grad_norm": 0.7883147091388137, + "learning_rate": 9.721418880291642e-06, + "loss": 0.3913, + "step": 2952 + }, + { + "epoch": 0.1336501470921023, + "grad_norm": 0.7018500754706035, + "learning_rate": 9.72117760149921e-06, + "loss": 0.384, + "step": 2953 + }, + { + "epoch": 0.13369540620049786, + "grad_norm": 0.6791992191941137, + "learning_rate": 9.720936221263174e-06, + "loss": 0.4403, + "step": 2954 + }, + { + "epoch": 0.13374066530889342, + "grad_norm": 0.7247397511156864, + "learning_rate": 9.720694739588714e-06, + "loss": 0.4036, + "step": 2955 + }, + { + "epoch": 0.13378592441728898, + "grad_norm": 0.7815257711538628, + "learning_rate": 9.720453156481023e-06, + "loss": 0.4059, + "step": 2956 + }, + { + "epoch": 0.13383118352568454, + "grad_norm": 0.7438749116316435, + "learning_rate": 9.720211471945293e-06, + "loss": 0.4463, + "step": 2957 + }, + { + "epoch": 0.1338764426340801, + "grad_norm": 0.7237329330053868, + "learning_rate": 9.719969685986714e-06, + "loss": 0.4146, + "step": 2958 + }, + { + "epoch": 0.13392170174247567, + "grad_norm": 0.9573335901450759, + "learning_rate": 9.719727798610483e-06, + "loss": 0.442, + "step": 2959 + }, + { + "epoch": 0.13396696085087123, + "grad_norm": 0.9656716511929738, + "learning_rate": 9.719485809821799e-06, + "loss": 0.4202, + "step": 2960 + }, + { + "epoch": 0.1340122199592668, + "grad_norm": 0.8338597559468536, + "learning_rate": 9.719243719625857e-06, + "loss": 0.3783, + "step": 2961 + }, + { + "epoch": 0.13405747906766236, + "grad_norm": 0.6863750264693458, + "learning_rate": 9.719001528027863e-06, + "loss": 0.5274, + "step": 2962 + }, + { + "epoch": 0.13410273817605795, + "grad_norm": 0.7985367660393604, + "learning_rate": 9.71875923503302e-06, + "loss": 0.4137, + "step": 2963 + }, + { + "epoch": 0.1341479972844535, + "grad_norm": 0.9118946431924335, + "learning_rate": 9.718516840646533e-06, + "loss": 0.4091, + "step": 2964 + }, + { + "epoch": 0.13419325639284907, + "grad_norm": 0.7934119150445078, + "learning_rate": 9.71827434487361e-06, + "loss": 0.4263, + "step": 2965 + }, + { + "epoch": 0.13423851550124463, + "grad_norm": 0.7735298280481725, + "learning_rate": 9.718031747719465e-06, + "loss": 0.4382, + "step": 2966 + }, + { + "epoch": 0.1342837746096402, + "grad_norm": 0.9513330875037518, + "learning_rate": 9.717789049189306e-06, + "loss": 0.4435, + "step": 2967 + }, + { + "epoch": 0.13432903371803576, + "grad_norm": 0.6909982648821564, + "learning_rate": 9.71754624928835e-06, + "loss": 0.4049, + "step": 2968 + }, + { + "epoch": 0.13437429282643132, + "grad_norm": 0.8679915662733991, + "learning_rate": 9.717303348021814e-06, + "loss": 0.395, + "step": 2969 + }, + { + "epoch": 0.13441955193482688, + "grad_norm": 0.8847599037454432, + "learning_rate": 9.717060345394917e-06, + "loss": 0.4044, + "step": 2970 + }, + { + "epoch": 0.13446481104322244, + "grad_norm": 0.7118812380117908, + "learning_rate": 9.716817241412882e-06, + "loss": 0.3957, + "step": 2971 + }, + { + "epoch": 0.134510070151618, + "grad_norm": 0.6306224960599156, + "learning_rate": 9.71657403608093e-06, + "loss": 0.5091, + "step": 2972 + }, + { + "epoch": 0.13455532926001357, + "grad_norm": 0.7622301601596669, + "learning_rate": 9.716330729404287e-06, + "loss": 0.4114, + "step": 2973 + }, + { + "epoch": 0.13460058836840913, + "grad_norm": 0.7484186601210657, + "learning_rate": 9.716087321388184e-06, + "loss": 0.3871, + "step": 2974 + }, + { + "epoch": 0.13464584747680472, + "grad_norm": 0.7625419668353308, + "learning_rate": 9.715843812037846e-06, + "loss": 0.4587, + "step": 2975 + }, + { + "epoch": 0.13469110658520028, + "grad_norm": 0.35951379933644734, + "learning_rate": 9.71560020135851e-06, + "loss": 0.4977, + "step": 2976 + }, + { + "epoch": 0.13473636569359584, + "grad_norm": 0.3442069853028463, + "learning_rate": 9.715356489355408e-06, + "loss": 0.5211, + "step": 2977 + }, + { + "epoch": 0.1347816248019914, + "grad_norm": 0.7327349225883127, + "learning_rate": 9.715112676033777e-06, + "loss": 0.3668, + "step": 2978 + }, + { + "epoch": 0.13482688391038697, + "grad_norm": 0.6963321159595294, + "learning_rate": 9.714868761398856e-06, + "loss": 0.4397, + "step": 2979 + }, + { + "epoch": 0.13487214301878253, + "grad_norm": 0.7512751684699605, + "learning_rate": 9.714624745455885e-06, + "loss": 0.4309, + "step": 2980 + }, + { + "epoch": 0.1349174021271781, + "grad_norm": 0.6640385044151759, + "learning_rate": 9.71438062821011e-06, + "loss": 0.4231, + "step": 2981 + }, + { + "epoch": 0.13496266123557366, + "grad_norm": 0.7347924559093464, + "learning_rate": 9.714136409666773e-06, + "loss": 0.3837, + "step": 2982 + }, + { + "epoch": 0.13500792034396922, + "grad_norm": 0.7153839496940779, + "learning_rate": 9.713892089831122e-06, + "loss": 0.3813, + "step": 2983 + }, + { + "epoch": 0.13505317945236478, + "grad_norm": 0.6314896443694339, + "learning_rate": 9.71364766870841e-06, + "loss": 0.5242, + "step": 2984 + }, + { + "epoch": 0.13509843856076034, + "grad_norm": 0.7938527436789027, + "learning_rate": 9.713403146303885e-06, + "loss": 0.3912, + "step": 2985 + }, + { + "epoch": 0.1351436976691559, + "grad_norm": 0.7578285470729604, + "learning_rate": 9.713158522622804e-06, + "loss": 0.4341, + "step": 2986 + }, + { + "epoch": 0.1351889567775515, + "grad_norm": 0.7256290252232681, + "learning_rate": 9.71291379767042e-06, + "loss": 0.4769, + "step": 2987 + }, + { + "epoch": 0.13523421588594706, + "grad_norm": 0.705979909773582, + "learning_rate": 9.712668971451996e-06, + "loss": 0.4145, + "step": 2988 + }, + { + "epoch": 0.13527947499434262, + "grad_norm": 0.7603561663218684, + "learning_rate": 9.712424043972786e-06, + "loss": 0.4247, + "step": 2989 + }, + { + "epoch": 0.13532473410273818, + "grad_norm": 0.7209131814522719, + "learning_rate": 9.712179015238058e-06, + "loss": 0.4629, + "step": 2990 + }, + { + "epoch": 0.13536999321113374, + "grad_norm": 0.685193750110945, + "learning_rate": 9.711933885253076e-06, + "loss": 0.443, + "step": 2991 + }, + { + "epoch": 0.1354152523195293, + "grad_norm": 0.6841066749635569, + "learning_rate": 9.711688654023105e-06, + "loss": 0.4122, + "step": 2992 + }, + { + "epoch": 0.13546051142792487, + "grad_norm": 0.5869559059563596, + "learning_rate": 9.711443321553415e-06, + "loss": 0.4883, + "step": 2993 + }, + { + "epoch": 0.13550577053632043, + "grad_norm": 0.7553318168260519, + "learning_rate": 9.71119788784928e-06, + "loss": 0.4483, + "step": 2994 + }, + { + "epoch": 0.135551029644716, + "grad_norm": 0.7287364896534111, + "learning_rate": 9.71095235291597e-06, + "loss": 0.4336, + "step": 2995 + }, + { + "epoch": 0.13559628875311155, + "grad_norm": 0.7214137149091241, + "learning_rate": 9.710706716758765e-06, + "loss": 0.4295, + "step": 2996 + }, + { + "epoch": 0.13564154786150712, + "grad_norm": 0.7463034267148326, + "learning_rate": 9.710460979382938e-06, + "loss": 0.411, + "step": 2997 + }, + { + "epoch": 0.13568680696990268, + "grad_norm": 0.7686412440469463, + "learning_rate": 9.710215140793774e-06, + "loss": 0.3998, + "step": 2998 + }, + { + "epoch": 0.13573206607829827, + "grad_norm": 0.9299126331208909, + "learning_rate": 9.709969200996551e-06, + "loss": 0.3754, + "step": 2999 + }, + { + "epoch": 0.13577732518669383, + "grad_norm": 0.7395976891382515, + "learning_rate": 9.709723159996556e-06, + "loss": 0.443, + "step": 3000 + }, + { + "epoch": 0.1358225842950894, + "grad_norm": 0.4746397495528787, + "learning_rate": 9.709477017799076e-06, + "loss": 0.5077, + "step": 3001 + }, + { + "epoch": 0.13586784340348496, + "grad_norm": 0.7794278734782368, + "learning_rate": 9.709230774409397e-06, + "loss": 0.4328, + "step": 3002 + }, + { + "epoch": 0.13591310251188052, + "grad_norm": 0.649400693345909, + "learning_rate": 9.708984429832815e-06, + "loss": 0.3932, + "step": 3003 + }, + { + "epoch": 0.13595836162027608, + "grad_norm": 0.7243280492242261, + "learning_rate": 9.708737984074616e-06, + "loss": 0.425, + "step": 3004 + }, + { + "epoch": 0.13600362072867164, + "grad_norm": 0.7567664983915044, + "learning_rate": 9.708491437140103e-06, + "loss": 0.4414, + "step": 3005 + }, + { + "epoch": 0.1360488798370672, + "grad_norm": 0.7045563573152411, + "learning_rate": 9.708244789034568e-06, + "loss": 0.438, + "step": 3006 + }, + { + "epoch": 0.13609413894546277, + "grad_norm": 0.6520952845285576, + "learning_rate": 9.707998039763315e-06, + "loss": 0.4046, + "step": 3007 + }, + { + "epoch": 0.13613939805385833, + "grad_norm": 0.521269484352832, + "learning_rate": 9.707751189331642e-06, + "loss": 0.5201, + "step": 3008 + }, + { + "epoch": 0.1361846571622539, + "grad_norm": 0.7179054327234664, + "learning_rate": 9.707504237744854e-06, + "loss": 0.4759, + "step": 3009 + }, + { + "epoch": 0.13622991627064948, + "grad_norm": 0.6624657232936156, + "learning_rate": 9.707257185008259e-06, + "loss": 0.4593, + "step": 3010 + }, + { + "epoch": 0.13627517537904504, + "grad_norm": 0.9881809471220743, + "learning_rate": 9.707010031127164e-06, + "loss": 0.4301, + "step": 3011 + }, + { + "epoch": 0.1363204344874406, + "grad_norm": 0.7301209156664, + "learning_rate": 9.70676277610688e-06, + "loss": 0.4417, + "step": 3012 + }, + { + "epoch": 0.13636569359583617, + "grad_norm": 0.9904205272743063, + "learning_rate": 9.70651541995272e-06, + "loss": 0.4054, + "step": 3013 + }, + { + "epoch": 0.13641095270423173, + "grad_norm": 0.470084247855341, + "learning_rate": 9.706267962669999e-06, + "loss": 0.5211, + "step": 3014 + }, + { + "epoch": 0.1364562118126273, + "grad_norm": 0.7041348282498282, + "learning_rate": 9.706020404264033e-06, + "loss": 0.4135, + "step": 3015 + }, + { + "epoch": 0.13650147092102285, + "grad_norm": 0.6653132246514796, + "learning_rate": 9.705772744740142e-06, + "loss": 0.4016, + "step": 3016 + }, + { + "epoch": 0.13654673002941842, + "grad_norm": 0.6466986946363746, + "learning_rate": 9.705524984103647e-06, + "loss": 0.4183, + "step": 3017 + }, + { + "epoch": 0.13659198913781398, + "grad_norm": 0.6928442879758144, + "learning_rate": 9.705277122359871e-06, + "loss": 0.4253, + "step": 3018 + }, + { + "epoch": 0.13663724824620954, + "grad_norm": 0.6640542362916704, + "learning_rate": 9.705029159514143e-06, + "loss": 0.4113, + "step": 3019 + }, + { + "epoch": 0.1366825073546051, + "grad_norm": 0.6814633476218547, + "learning_rate": 9.704781095571788e-06, + "loss": 0.4088, + "step": 3020 + }, + { + "epoch": 0.13672776646300067, + "grad_norm": 0.706195642860411, + "learning_rate": 9.704532930538137e-06, + "loss": 0.3909, + "step": 3021 + }, + { + "epoch": 0.13677302557139626, + "grad_norm": 0.8893079768742019, + "learning_rate": 9.704284664418521e-06, + "loss": 0.397, + "step": 3022 + }, + { + "epoch": 0.13681828467979182, + "grad_norm": 0.4988443481670754, + "learning_rate": 9.704036297218278e-06, + "loss": 0.4932, + "step": 3023 + }, + { + "epoch": 0.13686354378818738, + "grad_norm": 0.3946010923966299, + "learning_rate": 9.70378782894274e-06, + "loss": 0.5055, + "step": 3024 + }, + { + "epoch": 0.13690880289658294, + "grad_norm": 0.7658283547259195, + "learning_rate": 9.70353925959725e-06, + "loss": 0.438, + "step": 3025 + }, + { + "epoch": 0.1369540620049785, + "grad_norm": 0.7081223389869151, + "learning_rate": 9.703290589187146e-06, + "loss": 0.3792, + "step": 3026 + }, + { + "epoch": 0.13699932111337407, + "grad_norm": 0.7429936786885113, + "learning_rate": 9.703041817717773e-06, + "loss": 0.4005, + "step": 3027 + }, + { + "epoch": 0.13704458022176963, + "grad_norm": 0.9040521372243512, + "learning_rate": 9.702792945194475e-06, + "loss": 0.4689, + "step": 3028 + }, + { + "epoch": 0.1370898393301652, + "grad_norm": 0.7365739004941558, + "learning_rate": 9.7025439716226e-06, + "loss": 0.4277, + "step": 3029 + }, + { + "epoch": 0.13713509843856075, + "grad_norm": 0.6711554139649331, + "learning_rate": 9.702294897007499e-06, + "loss": 0.3984, + "step": 3030 + }, + { + "epoch": 0.13718035754695632, + "grad_norm": 0.7033004233347675, + "learning_rate": 9.702045721354521e-06, + "loss": 0.5139, + "step": 3031 + }, + { + "epoch": 0.13722561665535188, + "grad_norm": 0.9480372201213745, + "learning_rate": 9.701796444669022e-06, + "loss": 0.4476, + "step": 3032 + }, + { + "epoch": 0.13727087576374744, + "grad_norm": 0.7158231088813505, + "learning_rate": 9.701547066956359e-06, + "loss": 0.4125, + "step": 3033 + }, + { + "epoch": 0.13731613487214303, + "grad_norm": 0.7652456510075898, + "learning_rate": 9.701297588221888e-06, + "loss": 0.3784, + "step": 3034 + }, + { + "epoch": 0.1373613939805386, + "grad_norm": 0.9602938075922123, + "learning_rate": 9.701048008470972e-06, + "loss": 0.4084, + "step": 3035 + }, + { + "epoch": 0.13740665308893416, + "grad_norm": 0.7415008906843575, + "learning_rate": 9.700798327708972e-06, + "loss": 0.423, + "step": 3036 + }, + { + "epoch": 0.13745191219732972, + "grad_norm": 0.6903867395699205, + "learning_rate": 9.700548545941253e-06, + "loss": 0.43, + "step": 3037 + }, + { + "epoch": 0.13749717130572528, + "grad_norm": 0.6979826967977066, + "learning_rate": 9.700298663173183e-06, + "loss": 0.4955, + "step": 3038 + }, + { + "epoch": 0.13754243041412084, + "grad_norm": 0.7044161732381952, + "learning_rate": 9.70004867941013e-06, + "loss": 0.421, + "step": 3039 + }, + { + "epoch": 0.1375876895225164, + "grad_norm": 0.37286787245742103, + "learning_rate": 9.699798594657464e-06, + "loss": 0.5152, + "step": 3040 + }, + { + "epoch": 0.13763294863091197, + "grad_norm": 0.9123876179707686, + "learning_rate": 9.699548408920563e-06, + "loss": 0.445, + "step": 3041 + }, + { + "epoch": 0.13767820773930753, + "grad_norm": 0.6151008314597627, + "learning_rate": 9.6992981222048e-06, + "loss": 0.5323, + "step": 3042 + }, + { + "epoch": 0.1377234668477031, + "grad_norm": 0.8045341700582606, + "learning_rate": 9.699047734515554e-06, + "loss": 0.4556, + "step": 3043 + }, + { + "epoch": 0.13776872595609865, + "grad_norm": 0.8138702592007475, + "learning_rate": 9.698797245858202e-06, + "loss": 0.443, + "step": 3044 + }, + { + "epoch": 0.13781398506449424, + "grad_norm": 0.5004662711445704, + "learning_rate": 9.69854665623813e-06, + "loss": 0.4995, + "step": 3045 + }, + { + "epoch": 0.1378592441728898, + "grad_norm": 0.4342575012009011, + "learning_rate": 9.698295965660721e-06, + "loss": 0.5132, + "step": 3046 + }, + { + "epoch": 0.13790450328128537, + "grad_norm": 0.8695634845187036, + "learning_rate": 9.69804517413136e-06, + "loss": 0.389, + "step": 3047 + }, + { + "epoch": 0.13794976238968093, + "grad_norm": 0.7739147195274749, + "learning_rate": 9.697794281655439e-06, + "loss": 0.4401, + "step": 3048 + }, + { + "epoch": 0.1379950214980765, + "grad_norm": 0.7119861534065948, + "learning_rate": 9.697543288238345e-06, + "loss": 0.3981, + "step": 3049 + }, + { + "epoch": 0.13804028060647205, + "grad_norm": 0.6663963626978507, + "learning_rate": 9.697292193885475e-06, + "loss": 0.4072, + "step": 3050 + }, + { + "epoch": 0.13808553971486762, + "grad_norm": 0.781265903071915, + "learning_rate": 9.69704099860222e-06, + "loss": 0.3964, + "step": 3051 + }, + { + "epoch": 0.13813079882326318, + "grad_norm": 1.2176129605873554, + "learning_rate": 9.696789702393982e-06, + "loss": 0.4457, + "step": 3052 + }, + { + "epoch": 0.13817605793165874, + "grad_norm": 0.9365927829037233, + "learning_rate": 9.69653830526616e-06, + "loss": 0.5298, + "step": 3053 + }, + { + "epoch": 0.1382213170400543, + "grad_norm": 0.5873324580483902, + "learning_rate": 9.696286807224151e-06, + "loss": 0.4898, + "step": 3054 + }, + { + "epoch": 0.13826657614844987, + "grad_norm": 0.7170843817829563, + "learning_rate": 9.696035208273363e-06, + "loss": 0.4818, + "step": 3055 + }, + { + "epoch": 0.13831183525684543, + "grad_norm": 0.7445503826919057, + "learning_rate": 9.6957835084192e-06, + "loss": 0.4018, + "step": 3056 + }, + { + "epoch": 0.13835709436524102, + "grad_norm": 0.6868032484108987, + "learning_rate": 9.695531707667073e-06, + "loss": 0.4229, + "step": 3057 + }, + { + "epoch": 0.13840235347363658, + "grad_norm": 0.7126566217183055, + "learning_rate": 9.695279806022391e-06, + "loss": 0.4223, + "step": 3058 + }, + { + "epoch": 0.13844761258203214, + "grad_norm": 1.6047999058425257, + "learning_rate": 9.695027803490565e-06, + "loss": 0.5042, + "step": 3059 + }, + { + "epoch": 0.1384928716904277, + "grad_norm": 1.1171172770190672, + "learning_rate": 9.694775700077013e-06, + "loss": 0.5175, + "step": 3060 + }, + { + "epoch": 0.13853813079882327, + "grad_norm": 0.9032744039406752, + "learning_rate": 9.694523495787149e-06, + "loss": 0.3975, + "step": 3061 + }, + { + "epoch": 0.13858338990721883, + "grad_norm": 0.7173000975750978, + "learning_rate": 9.694271190626393e-06, + "loss": 0.4484, + "step": 3062 + }, + { + "epoch": 0.1386286490156144, + "grad_norm": 1.0967957259191934, + "learning_rate": 9.694018784600166e-06, + "loss": 0.5279, + "step": 3063 + }, + { + "epoch": 0.13867390812400995, + "grad_norm": 1.1085504083237756, + "learning_rate": 9.693766277713893e-06, + "loss": 0.5089, + "step": 3064 + }, + { + "epoch": 0.13871916723240552, + "grad_norm": 1.0817725333092245, + "learning_rate": 9.693513669972999e-06, + "loss": 0.4073, + "step": 3065 + }, + { + "epoch": 0.13876442634080108, + "grad_norm": 0.7458369430458078, + "learning_rate": 9.69326096138291e-06, + "loss": 0.382, + "step": 3066 + }, + { + "epoch": 0.13880968544919664, + "grad_norm": 0.6444122363483363, + "learning_rate": 9.693008151949058e-06, + "loss": 0.531, + "step": 3067 + }, + { + "epoch": 0.1388549445575922, + "grad_norm": 0.8259422450902474, + "learning_rate": 9.692755241676874e-06, + "loss": 0.4223, + "step": 3068 + }, + { + "epoch": 0.1389002036659878, + "grad_norm": 0.8397951737371382, + "learning_rate": 9.692502230571792e-06, + "loss": 0.4282, + "step": 3069 + }, + { + "epoch": 0.13894546277438335, + "grad_norm": 0.7945570794817268, + "learning_rate": 9.69224911863925e-06, + "loss": 0.3981, + "step": 3070 + }, + { + "epoch": 0.13899072188277892, + "grad_norm": 0.7121357050394432, + "learning_rate": 9.691995905884684e-06, + "loss": 0.4371, + "step": 3071 + }, + { + "epoch": 0.13903598099117448, + "grad_norm": 0.7504348353548871, + "learning_rate": 9.691742592313537e-06, + "loss": 0.4514, + "step": 3072 + }, + { + "epoch": 0.13908124009957004, + "grad_norm": 0.8473511032416616, + "learning_rate": 9.691489177931253e-06, + "loss": 0.419, + "step": 3073 + }, + { + "epoch": 0.1391264992079656, + "grad_norm": 0.7824499897745099, + "learning_rate": 9.691235662743273e-06, + "loss": 0.4036, + "step": 3074 + }, + { + "epoch": 0.13917175831636117, + "grad_norm": 0.6989322866124807, + "learning_rate": 9.690982046755048e-06, + "loss": 0.4274, + "step": 3075 + }, + { + "epoch": 0.13921701742475673, + "grad_norm": 0.6853412184971148, + "learning_rate": 9.690728329972025e-06, + "loss": 0.4167, + "step": 3076 + }, + { + "epoch": 0.1392622765331523, + "grad_norm": 1.4691170124006308, + "learning_rate": 9.690474512399658e-06, + "loss": 0.5224, + "step": 3077 + }, + { + "epoch": 0.13930753564154785, + "grad_norm": 0.8608378530218217, + "learning_rate": 9.690220594043399e-06, + "loss": 0.4327, + "step": 3078 + }, + { + "epoch": 0.13935279474994341, + "grad_norm": 0.8612975549057106, + "learning_rate": 9.689966574908704e-06, + "loss": 0.4128, + "step": 3079 + }, + { + "epoch": 0.13939805385833898, + "grad_norm": 1.1479647773489714, + "learning_rate": 9.689712455001031e-06, + "loss": 0.3769, + "step": 3080 + }, + { + "epoch": 0.13944331296673457, + "grad_norm": 0.7404215174065047, + "learning_rate": 9.689458234325842e-06, + "loss": 0.4226, + "step": 3081 + }, + { + "epoch": 0.13948857207513013, + "grad_norm": 0.7714942920613758, + "learning_rate": 9.689203912888597e-06, + "loss": 0.5254, + "step": 3082 + }, + { + "epoch": 0.1395338311835257, + "grad_norm": 0.8908715997971788, + "learning_rate": 9.688949490694762e-06, + "loss": 0.3953, + "step": 3083 + }, + { + "epoch": 0.13957909029192125, + "grad_norm": 0.8204552348832636, + "learning_rate": 9.688694967749804e-06, + "loss": 0.4468, + "step": 3084 + }, + { + "epoch": 0.13962434940031682, + "grad_norm": 0.7103680809500149, + "learning_rate": 9.68844034405919e-06, + "loss": 0.4333, + "step": 3085 + }, + { + "epoch": 0.13966960850871238, + "grad_norm": 0.6542833279183922, + "learning_rate": 9.688185619628395e-06, + "loss": 0.5296, + "step": 3086 + }, + { + "epoch": 0.13971486761710794, + "grad_norm": 0.7607580376314098, + "learning_rate": 9.687930794462887e-06, + "loss": 0.4291, + "step": 3087 + }, + { + "epoch": 0.1397601267255035, + "grad_norm": 0.731342029185371, + "learning_rate": 9.687675868568145e-06, + "loss": 0.426, + "step": 3088 + }, + { + "epoch": 0.13980538583389907, + "grad_norm": 0.729612946517145, + "learning_rate": 9.687420841949646e-06, + "loss": 0.4535, + "step": 3089 + }, + { + "epoch": 0.13985064494229463, + "grad_norm": 0.4080284721261495, + "learning_rate": 9.68716571461287e-06, + "loss": 0.5057, + "step": 3090 + }, + { + "epoch": 0.1398959040506902, + "grad_norm": 0.6791576858736899, + "learning_rate": 9.686910486563297e-06, + "loss": 0.431, + "step": 3091 + }, + { + "epoch": 0.13994116315908578, + "grad_norm": 0.6886144579501806, + "learning_rate": 9.686655157806412e-06, + "loss": 0.4299, + "step": 3092 + }, + { + "epoch": 0.13998642226748134, + "grad_norm": 0.717296049557901, + "learning_rate": 9.686399728347704e-06, + "loss": 0.418, + "step": 3093 + }, + { + "epoch": 0.1400316813758769, + "grad_norm": 0.4554757341055788, + "learning_rate": 9.686144198192658e-06, + "loss": 0.5322, + "step": 3094 + }, + { + "epoch": 0.14007694048427247, + "grad_norm": 0.8239705370084248, + "learning_rate": 9.685888567346765e-06, + "loss": 0.4378, + "step": 3095 + }, + { + "epoch": 0.14012219959266803, + "grad_norm": 0.7019855219895466, + "learning_rate": 9.685632835815519e-06, + "loss": 0.3947, + "step": 3096 + }, + { + "epoch": 0.1401674587010636, + "grad_norm": 0.6641989821498935, + "learning_rate": 9.685377003604412e-06, + "loss": 0.4222, + "step": 3097 + }, + { + "epoch": 0.14021271780945915, + "grad_norm": 0.7060878628909356, + "learning_rate": 9.685121070718946e-06, + "loss": 0.418, + "step": 3098 + }, + { + "epoch": 0.14025797691785472, + "grad_norm": 0.7055236199159629, + "learning_rate": 9.684865037164616e-06, + "loss": 0.4459, + "step": 3099 + }, + { + "epoch": 0.14030323602625028, + "grad_norm": 0.7136009838012457, + "learning_rate": 9.684608902946926e-06, + "loss": 0.3899, + "step": 3100 + }, + { + "epoch": 0.14034849513464584, + "grad_norm": 0.7280440062086467, + "learning_rate": 9.684352668071378e-06, + "loss": 0.417, + "step": 3101 + }, + { + "epoch": 0.1403937542430414, + "grad_norm": 0.7027541777851652, + "learning_rate": 9.684096332543477e-06, + "loss": 0.415, + "step": 3102 + }, + { + "epoch": 0.14043901335143696, + "grad_norm": 0.686899495440445, + "learning_rate": 9.683839896368732e-06, + "loss": 0.4969, + "step": 3103 + }, + { + "epoch": 0.14048427245983255, + "grad_norm": 0.8245957668931218, + "learning_rate": 9.683583359552654e-06, + "loss": 0.4609, + "step": 3104 + }, + { + "epoch": 0.14052953156822812, + "grad_norm": 0.7556686305076334, + "learning_rate": 9.683326722100753e-06, + "loss": 0.4388, + "step": 3105 + }, + { + "epoch": 0.14057479067662368, + "grad_norm": 0.5379909049373236, + "learning_rate": 9.683069984018545e-06, + "loss": 0.4911, + "step": 3106 + }, + { + "epoch": 0.14062004978501924, + "grad_norm": 0.8009003920741268, + "learning_rate": 9.682813145311547e-06, + "loss": 0.4007, + "step": 3107 + }, + { + "epoch": 0.1406653088934148, + "grad_norm": 0.7335679247958816, + "learning_rate": 9.682556205985274e-06, + "loss": 0.4309, + "step": 3108 + }, + { + "epoch": 0.14071056800181037, + "grad_norm": 0.730711292524861, + "learning_rate": 9.682299166045252e-06, + "loss": 0.4242, + "step": 3109 + }, + { + "epoch": 0.14075582711020593, + "grad_norm": 0.7004543383866226, + "learning_rate": 9.682042025497001e-06, + "loss": 0.3755, + "step": 3110 + }, + { + "epoch": 0.1408010862186015, + "grad_norm": 1.519595889804459, + "learning_rate": 9.681784784346047e-06, + "loss": 0.4541, + "step": 3111 + }, + { + "epoch": 0.14084634532699705, + "grad_norm": 0.661638334038799, + "learning_rate": 9.681527442597916e-06, + "loss": 0.3909, + "step": 3112 + }, + { + "epoch": 0.14089160443539261, + "grad_norm": 0.6977427321641013, + "learning_rate": 9.681270000258138e-06, + "loss": 0.447, + "step": 3113 + }, + { + "epoch": 0.14093686354378818, + "grad_norm": 0.7069246418810469, + "learning_rate": 9.681012457332247e-06, + "loss": 0.3864, + "step": 3114 + }, + { + "epoch": 0.14098212265218374, + "grad_norm": 0.7266791455837109, + "learning_rate": 9.680754813825774e-06, + "loss": 0.4387, + "step": 3115 + }, + { + "epoch": 0.14102738176057933, + "grad_norm": 0.7571786820538708, + "learning_rate": 9.680497069744254e-06, + "loss": 0.4425, + "step": 3116 + }, + { + "epoch": 0.1410726408689749, + "grad_norm": 0.7102976525155017, + "learning_rate": 9.68023922509323e-06, + "loss": 0.439, + "step": 3117 + }, + { + "epoch": 0.14111789997737045, + "grad_norm": 0.6973062459504656, + "learning_rate": 9.67998127987824e-06, + "loss": 0.4311, + "step": 3118 + }, + { + "epoch": 0.14116315908576602, + "grad_norm": 0.6736480913668162, + "learning_rate": 9.679723234104822e-06, + "loss": 0.4194, + "step": 3119 + }, + { + "epoch": 0.14120841819416158, + "grad_norm": 0.6712826154773708, + "learning_rate": 9.679465087778526e-06, + "loss": 0.3818, + "step": 3120 + }, + { + "epoch": 0.14125367730255714, + "grad_norm": 0.7766965611884528, + "learning_rate": 9.679206840904898e-06, + "loss": 0.4403, + "step": 3121 + }, + { + "epoch": 0.1412989364109527, + "grad_norm": 0.7349713706275844, + "learning_rate": 9.678948493489485e-06, + "loss": 0.4386, + "step": 3122 + }, + { + "epoch": 0.14134419551934826, + "grad_norm": 0.6508673734652729, + "learning_rate": 9.67869004553784e-06, + "loss": 0.4208, + "step": 3123 + }, + { + "epoch": 0.14138945462774383, + "grad_norm": 0.728103346866895, + "learning_rate": 9.678431497055515e-06, + "loss": 0.5208, + "step": 3124 + }, + { + "epoch": 0.1414347137361394, + "grad_norm": 0.45853081090677567, + "learning_rate": 9.678172848048067e-06, + "loss": 0.5069, + "step": 3125 + }, + { + "epoch": 0.14147997284453495, + "grad_norm": 0.804412565112675, + "learning_rate": 9.677914098521051e-06, + "loss": 0.3767, + "step": 3126 + }, + { + "epoch": 0.1415252319529305, + "grad_norm": 0.6728901882341966, + "learning_rate": 9.677655248480026e-06, + "loss": 0.42, + "step": 3127 + }, + { + "epoch": 0.1415704910613261, + "grad_norm": 0.7435907240723189, + "learning_rate": 9.67739629793056e-06, + "loss": 0.3903, + "step": 3128 + }, + { + "epoch": 0.14161575016972167, + "grad_norm": 0.7145965926168072, + "learning_rate": 9.677137246878212e-06, + "loss": 0.4231, + "step": 3129 + }, + { + "epoch": 0.14166100927811723, + "grad_norm": 0.6718267845844599, + "learning_rate": 9.676878095328547e-06, + "loss": 0.4187, + "step": 3130 + }, + { + "epoch": 0.1417062683865128, + "grad_norm": 0.7073341669717274, + "learning_rate": 9.67661884328714e-06, + "loss": 0.4173, + "step": 3131 + }, + { + "epoch": 0.14175152749490835, + "grad_norm": 0.7355489401608707, + "learning_rate": 9.676359490759554e-06, + "loss": 0.4201, + "step": 3132 + }, + { + "epoch": 0.14179678660330391, + "grad_norm": 1.6407994213832153, + "learning_rate": 9.676100037751366e-06, + "loss": 0.5338, + "step": 3133 + }, + { + "epoch": 0.14184204571169948, + "grad_norm": 0.7249211925149227, + "learning_rate": 9.675840484268149e-06, + "loss": 0.4435, + "step": 3134 + }, + { + "epoch": 0.14188730482009504, + "grad_norm": 0.6720350118302825, + "learning_rate": 9.675580830315481e-06, + "loss": 0.5272, + "step": 3135 + }, + { + "epoch": 0.1419325639284906, + "grad_norm": 0.7299979256540658, + "learning_rate": 9.67532107589894e-06, + "loss": 0.4302, + "step": 3136 + }, + { + "epoch": 0.14197782303688616, + "grad_norm": 0.727738005887431, + "learning_rate": 9.67506122102411e-06, + "loss": 0.3973, + "step": 3137 + }, + { + "epoch": 0.14202308214528173, + "grad_norm": 0.8311652702626384, + "learning_rate": 9.674801265696572e-06, + "loss": 0.423, + "step": 3138 + }, + { + "epoch": 0.14206834125367732, + "grad_norm": 0.6585685880370094, + "learning_rate": 9.674541209921913e-06, + "loss": 0.3822, + "step": 3139 + }, + { + "epoch": 0.14211360036207288, + "grad_norm": 0.7377137503636694, + "learning_rate": 9.674281053705719e-06, + "loss": 0.4129, + "step": 3140 + }, + { + "epoch": 0.14215885947046844, + "grad_norm": 0.7171166966408776, + "learning_rate": 9.67402079705358e-06, + "loss": 0.4275, + "step": 3141 + }, + { + "epoch": 0.142204118578864, + "grad_norm": 0.7429829000995922, + "learning_rate": 9.673760439971091e-06, + "loss": 0.4481, + "step": 3142 + }, + { + "epoch": 0.14224937768725956, + "grad_norm": 0.7112414502918661, + "learning_rate": 9.673499982463846e-06, + "loss": 0.4229, + "step": 3143 + }, + { + "epoch": 0.14229463679565513, + "grad_norm": 0.6799300213562083, + "learning_rate": 9.673239424537437e-06, + "loss": 0.4215, + "step": 3144 + }, + { + "epoch": 0.1423398959040507, + "grad_norm": 2.4514558013874717, + "learning_rate": 9.672978766197468e-06, + "loss": 0.5697, + "step": 3145 + }, + { + "epoch": 0.14238515501244625, + "grad_norm": 0.8073367634530231, + "learning_rate": 9.672718007449535e-06, + "loss": 0.3968, + "step": 3146 + }, + { + "epoch": 0.14243041412084181, + "grad_norm": 0.7186657373222725, + "learning_rate": 9.672457148299245e-06, + "loss": 0.3786, + "step": 3147 + }, + { + "epoch": 0.14247567322923738, + "grad_norm": 0.7936502277208761, + "learning_rate": 9.672196188752201e-06, + "loss": 0.5066, + "step": 3148 + }, + { + "epoch": 0.14252093233763294, + "grad_norm": 0.7367266440609374, + "learning_rate": 9.67193512881401e-06, + "loss": 0.3956, + "step": 3149 + }, + { + "epoch": 0.1425661914460285, + "grad_norm": 0.86974007930535, + "learning_rate": 9.671673968490281e-06, + "loss": 0.51, + "step": 3150 + }, + { + "epoch": 0.1426114505544241, + "grad_norm": 0.7339821934903883, + "learning_rate": 9.671412707786628e-06, + "loss": 0.3905, + "step": 3151 + }, + { + "epoch": 0.14265670966281965, + "grad_norm": 0.7741642555970408, + "learning_rate": 9.67115134670866e-06, + "loss": 0.4156, + "step": 3152 + }, + { + "epoch": 0.14270196877121522, + "grad_norm": 0.6771007640808087, + "learning_rate": 9.670889885262e-06, + "loss": 0.4323, + "step": 3153 + }, + { + "epoch": 0.14274722787961078, + "grad_norm": 1.4497561146456408, + "learning_rate": 9.670628323452259e-06, + "loss": 0.5543, + "step": 3154 + }, + { + "epoch": 0.14279248698800634, + "grad_norm": 2.3808597659867896, + "learning_rate": 9.670366661285061e-06, + "loss": 0.4272, + "step": 3155 + }, + { + "epoch": 0.1428377460964019, + "grad_norm": 0.6572621235218521, + "learning_rate": 9.670104898766028e-06, + "loss": 0.389, + "step": 3156 + }, + { + "epoch": 0.14288300520479746, + "grad_norm": 0.6716930824211316, + "learning_rate": 9.669843035900783e-06, + "loss": 0.4043, + "step": 3157 + }, + { + "epoch": 0.14292826431319303, + "grad_norm": 0.7747608617896153, + "learning_rate": 9.669581072694954e-06, + "loss": 0.4865, + "step": 3158 + }, + { + "epoch": 0.1429735234215886, + "grad_norm": 0.7170303310627186, + "learning_rate": 9.669319009154169e-06, + "loss": 0.4336, + "step": 3159 + }, + { + "epoch": 0.14301878252998415, + "grad_norm": 0.7309134726102187, + "learning_rate": 9.66905684528406e-06, + "loss": 0.4216, + "step": 3160 + }, + { + "epoch": 0.1430640416383797, + "grad_norm": 0.7802958711128894, + "learning_rate": 9.668794581090257e-06, + "loss": 0.4163, + "step": 3161 + }, + { + "epoch": 0.14310930074677528, + "grad_norm": 0.8650002567846453, + "learning_rate": 9.6685322165784e-06, + "loss": 0.4519, + "step": 3162 + }, + { + "epoch": 0.14315455985517087, + "grad_norm": 0.6585549158195979, + "learning_rate": 9.668269751754123e-06, + "loss": 0.3696, + "step": 3163 + }, + { + "epoch": 0.14319981896356643, + "grad_norm": 0.7047364200382954, + "learning_rate": 9.668007186623068e-06, + "loss": 0.3826, + "step": 3164 + }, + { + "epoch": 0.143245078071962, + "grad_norm": 0.6752872962284534, + "learning_rate": 9.667744521190873e-06, + "loss": 0.4091, + "step": 3165 + }, + { + "epoch": 0.14329033718035755, + "grad_norm": 0.7053220031969246, + "learning_rate": 9.667481755463183e-06, + "loss": 0.3918, + "step": 3166 + }, + { + "epoch": 0.14333559628875311, + "grad_norm": 0.7531808398595325, + "learning_rate": 9.66721888944565e-06, + "loss": 0.4624, + "step": 3167 + }, + { + "epoch": 0.14338085539714868, + "grad_norm": 0.7065600576391988, + "learning_rate": 9.666955923143912e-06, + "loss": 0.4121, + "step": 3168 + }, + { + "epoch": 0.14342611450554424, + "grad_norm": 0.7669130699470585, + "learning_rate": 9.666692856563628e-06, + "loss": 0.4169, + "step": 3169 + }, + { + "epoch": 0.1434713736139398, + "grad_norm": 1.1338249346765497, + "learning_rate": 9.666429689710447e-06, + "loss": 0.5071, + "step": 3170 + }, + { + "epoch": 0.14351663272233536, + "grad_norm": 0.6905545961200613, + "learning_rate": 9.666166422590024e-06, + "loss": 0.4471, + "step": 3171 + }, + { + "epoch": 0.14356189183073093, + "grad_norm": 0.6918574529763974, + "learning_rate": 9.665903055208013e-06, + "loss": 0.4686, + "step": 3172 + }, + { + "epoch": 0.1436071509391265, + "grad_norm": 0.725100808369826, + "learning_rate": 9.665639587570079e-06, + "loss": 0.4353, + "step": 3173 + }, + { + "epoch": 0.14365241004752205, + "grad_norm": 0.7448320533413242, + "learning_rate": 9.665376019681876e-06, + "loss": 0.4346, + "step": 3174 + }, + { + "epoch": 0.14369766915591764, + "grad_norm": 0.7240294172659404, + "learning_rate": 9.665112351549074e-06, + "loss": 0.3947, + "step": 3175 + }, + { + "epoch": 0.1437429282643132, + "grad_norm": 0.7927000090096381, + "learning_rate": 9.664848583177335e-06, + "loss": 0.4177, + "step": 3176 + }, + { + "epoch": 0.14378818737270876, + "grad_norm": 0.7320172279898588, + "learning_rate": 9.664584714572326e-06, + "loss": 0.4181, + "step": 3177 + }, + { + "epoch": 0.14383344648110433, + "grad_norm": 0.5698166164116315, + "learning_rate": 9.664320745739717e-06, + "loss": 0.5135, + "step": 3178 + }, + { + "epoch": 0.1438787055894999, + "grad_norm": 0.7539947870246922, + "learning_rate": 9.664056676685183e-06, + "loss": 0.4467, + "step": 3179 + }, + { + "epoch": 0.14392396469789545, + "grad_norm": 0.7213958856579106, + "learning_rate": 9.663792507414393e-06, + "loss": 0.4245, + "step": 3180 + }, + { + "epoch": 0.143969223806291, + "grad_norm": 0.6726300749608423, + "learning_rate": 9.663528237933027e-06, + "loss": 0.4099, + "step": 3181 + }, + { + "epoch": 0.14401448291468658, + "grad_norm": 0.4822560169478301, + "learning_rate": 9.663263868246762e-06, + "loss": 0.5279, + "step": 3182 + }, + { + "epoch": 0.14405974202308214, + "grad_norm": 0.9085236049734731, + "learning_rate": 9.662999398361278e-06, + "loss": 0.4152, + "step": 3183 + }, + { + "epoch": 0.1441050011314777, + "grad_norm": 0.7829003770567539, + "learning_rate": 9.662734828282258e-06, + "loss": 0.4515, + "step": 3184 + }, + { + "epoch": 0.14415026023987326, + "grad_norm": 0.6575551419625164, + "learning_rate": 9.66247015801539e-06, + "loss": 0.4002, + "step": 3185 + }, + { + "epoch": 0.14419551934826885, + "grad_norm": 0.7181233616587022, + "learning_rate": 9.662205387566355e-06, + "loss": 0.4125, + "step": 3186 + }, + { + "epoch": 0.14424077845666441, + "grad_norm": 0.6990078657681448, + "learning_rate": 9.661940516940846e-06, + "loss": 0.41, + "step": 3187 + }, + { + "epoch": 0.14428603756505998, + "grad_norm": 0.4046658946943539, + "learning_rate": 9.661675546144553e-06, + "loss": 0.4915, + "step": 3188 + }, + { + "epoch": 0.14433129667345554, + "grad_norm": 0.7822597863670415, + "learning_rate": 9.661410475183169e-06, + "loss": 0.4238, + "step": 3189 + }, + { + "epoch": 0.1443765557818511, + "grad_norm": 0.691259239024519, + "learning_rate": 9.661145304062391e-06, + "loss": 0.3847, + "step": 3190 + }, + { + "epoch": 0.14442181489024666, + "grad_norm": 0.7603748825768698, + "learning_rate": 9.660880032787917e-06, + "loss": 0.4286, + "step": 3191 + }, + { + "epoch": 0.14446707399864223, + "grad_norm": 0.7192307911925878, + "learning_rate": 9.660614661365446e-06, + "loss": 0.407, + "step": 3192 + }, + { + "epoch": 0.1445123331070378, + "grad_norm": 0.7306554858820006, + "learning_rate": 9.660349189800678e-06, + "loss": 0.4582, + "step": 3193 + }, + { + "epoch": 0.14455759221543335, + "grad_norm": 0.35736569073643326, + "learning_rate": 9.660083618099321e-06, + "loss": 0.5332, + "step": 3194 + }, + { + "epoch": 0.1446028513238289, + "grad_norm": 0.7395746670458239, + "learning_rate": 9.659817946267079e-06, + "loss": 0.4104, + "step": 3195 + }, + { + "epoch": 0.14464811043222447, + "grad_norm": 0.7604286179825007, + "learning_rate": 9.65955217430966e-06, + "loss": 0.4173, + "step": 3196 + }, + { + "epoch": 0.14469336954062004, + "grad_norm": 0.34860121685691725, + "learning_rate": 9.659286302232776e-06, + "loss": 0.4974, + "step": 3197 + }, + { + "epoch": 0.14473862864901563, + "grad_norm": 0.691904119059187, + "learning_rate": 9.659020330042139e-06, + "loss": 0.4527, + "step": 3198 + }, + { + "epoch": 0.1447838877574112, + "grad_norm": 0.697285444271822, + "learning_rate": 9.658754257743465e-06, + "loss": 0.4309, + "step": 3199 + }, + { + "epoch": 0.14482914686580675, + "grad_norm": 0.3251444549614559, + "learning_rate": 9.65848808534247e-06, + "loss": 0.5187, + "step": 3200 + }, + { + "epoch": 0.1448744059742023, + "grad_norm": 0.6903935648469879, + "learning_rate": 9.658221812844872e-06, + "loss": 0.425, + "step": 3201 + }, + { + "epoch": 0.14491966508259788, + "grad_norm": 0.6953138537555082, + "learning_rate": 9.657955440256396e-06, + "loss": 0.4405, + "step": 3202 + }, + { + "epoch": 0.14496492419099344, + "grad_norm": 0.726536205730736, + "learning_rate": 9.657688967582762e-06, + "loss": 0.4221, + "step": 3203 + }, + { + "epoch": 0.145010183299389, + "grad_norm": 0.7074917191163478, + "learning_rate": 9.657422394829697e-06, + "loss": 0.4541, + "step": 3204 + }, + { + "epoch": 0.14505544240778456, + "grad_norm": 0.3587028380035552, + "learning_rate": 9.65715572200293e-06, + "loss": 0.5251, + "step": 3205 + }, + { + "epoch": 0.14510070151618013, + "grad_norm": 0.5153669271065352, + "learning_rate": 9.65688894910819e-06, + "loss": 0.5109, + "step": 3206 + }, + { + "epoch": 0.1451459606245757, + "grad_norm": 0.7331308798946778, + "learning_rate": 9.656622076151208e-06, + "loss": 0.4372, + "step": 3207 + }, + { + "epoch": 0.14519121973297125, + "grad_norm": 0.731716823903968, + "learning_rate": 9.65635510313772e-06, + "loss": 0.437, + "step": 3208 + }, + { + "epoch": 0.1452364788413668, + "grad_norm": 0.6501948418255162, + "learning_rate": 9.656088030073462e-06, + "loss": 0.3799, + "step": 3209 + }, + { + "epoch": 0.1452817379497624, + "grad_norm": 0.6802252677586819, + "learning_rate": 9.655820856964171e-06, + "loss": 0.4147, + "step": 3210 + }, + { + "epoch": 0.14532699705815796, + "grad_norm": 0.669078224193458, + "learning_rate": 9.65555358381559e-06, + "loss": 0.4373, + "step": 3211 + }, + { + "epoch": 0.14537225616655353, + "grad_norm": 0.7840548343002391, + "learning_rate": 9.65528621063346e-06, + "loss": 0.3962, + "step": 3212 + }, + { + "epoch": 0.1454175152749491, + "grad_norm": 0.6714606210120766, + "learning_rate": 9.655018737423529e-06, + "loss": 0.4113, + "step": 3213 + }, + { + "epoch": 0.14546277438334465, + "grad_norm": 0.764637062360135, + "learning_rate": 9.65475116419154e-06, + "loss": 0.435, + "step": 3214 + }, + { + "epoch": 0.1455080334917402, + "grad_norm": 0.6940196373497772, + "learning_rate": 9.654483490943245e-06, + "loss": 0.3845, + "step": 3215 + }, + { + "epoch": 0.14555329260013578, + "grad_norm": 0.6979860285067682, + "learning_rate": 9.654215717684397e-06, + "loss": 0.3827, + "step": 3216 + }, + { + "epoch": 0.14559855170853134, + "grad_norm": 0.6450860678926025, + "learning_rate": 9.653947844420744e-06, + "loss": 0.4263, + "step": 3217 + }, + { + "epoch": 0.1456438108169269, + "grad_norm": 0.4631569753271721, + "learning_rate": 9.653679871158048e-06, + "loss": 0.4863, + "step": 3218 + }, + { + "epoch": 0.14568906992532246, + "grad_norm": 0.6504102546655056, + "learning_rate": 9.653411797902063e-06, + "loss": 0.4481, + "step": 3219 + }, + { + "epoch": 0.14573432903371802, + "grad_norm": 0.8385292768004025, + "learning_rate": 9.65314362465855e-06, + "loss": 0.4089, + "step": 3220 + }, + { + "epoch": 0.14577958814211361, + "grad_norm": 0.3677785772048963, + "learning_rate": 9.652875351433272e-06, + "loss": 0.5351, + "step": 3221 + }, + { + "epoch": 0.14582484725050918, + "grad_norm": 0.7357209315563944, + "learning_rate": 9.652606978231994e-06, + "loss": 0.4031, + "step": 3222 + }, + { + "epoch": 0.14587010635890474, + "grad_norm": 0.9786863345150582, + "learning_rate": 9.65233850506048e-06, + "loss": 0.4362, + "step": 3223 + }, + { + "epoch": 0.1459153654673003, + "grad_norm": 0.7184510643808878, + "learning_rate": 9.6520699319245e-06, + "loss": 0.4401, + "step": 3224 + }, + { + "epoch": 0.14596062457569586, + "grad_norm": 0.7416426760575285, + "learning_rate": 9.651801258829827e-06, + "loss": 0.3851, + "step": 3225 + }, + { + "epoch": 0.14600588368409143, + "grad_norm": 0.6576989128660552, + "learning_rate": 9.651532485782231e-06, + "loss": 0.4249, + "step": 3226 + }, + { + "epoch": 0.146051142792487, + "grad_norm": 0.7296653820837178, + "learning_rate": 9.651263612787487e-06, + "loss": 0.379, + "step": 3227 + }, + { + "epoch": 0.14609640190088255, + "grad_norm": 0.47286884291343695, + "learning_rate": 9.650994639851375e-06, + "loss": 0.4942, + "step": 3228 + }, + { + "epoch": 0.1461416610092781, + "grad_norm": 0.41271948404533615, + "learning_rate": 9.650725566979671e-06, + "loss": 0.5121, + "step": 3229 + }, + { + "epoch": 0.14618692011767367, + "grad_norm": 0.8261333798102216, + "learning_rate": 9.650456394178157e-06, + "loss": 0.442, + "step": 3230 + }, + { + "epoch": 0.14623217922606924, + "grad_norm": 0.6917499919499427, + "learning_rate": 9.65018712145262e-06, + "loss": 0.426, + "step": 3231 + }, + { + "epoch": 0.1462774383344648, + "grad_norm": 0.7170816180353979, + "learning_rate": 9.649917748808844e-06, + "loss": 0.4061, + "step": 3232 + }, + { + "epoch": 0.1463226974428604, + "grad_norm": 0.6987359541063993, + "learning_rate": 9.649648276252614e-06, + "loss": 0.4426, + "step": 3233 + }, + { + "epoch": 0.14636795655125595, + "grad_norm": 0.8070158387970268, + "learning_rate": 9.649378703789724e-06, + "loss": 0.4252, + "step": 3234 + }, + { + "epoch": 0.1464132156596515, + "grad_norm": 0.6080123077930029, + "learning_rate": 9.649109031425968e-06, + "loss": 0.4893, + "step": 3235 + }, + { + "epoch": 0.14645847476804708, + "grad_norm": 0.6925974737374918, + "learning_rate": 9.648839259167135e-06, + "loss": 0.4406, + "step": 3236 + }, + { + "epoch": 0.14650373387644264, + "grad_norm": 0.7773373297030651, + "learning_rate": 9.648569387019025e-06, + "loss": 0.3996, + "step": 3237 + }, + { + "epoch": 0.1465489929848382, + "grad_norm": 0.7564126557863732, + "learning_rate": 9.648299414987434e-06, + "loss": 0.4379, + "step": 3238 + }, + { + "epoch": 0.14659425209323376, + "grad_norm": 0.4150638456805394, + "learning_rate": 9.648029343078167e-06, + "loss": 0.5252, + "step": 3239 + }, + { + "epoch": 0.14663951120162932, + "grad_norm": 0.3622096334292757, + "learning_rate": 9.647759171297024e-06, + "loss": 0.5406, + "step": 3240 + }, + { + "epoch": 0.1466847703100249, + "grad_norm": 0.6823350984312218, + "learning_rate": 9.64748889964981e-06, + "loss": 0.3972, + "step": 3241 + }, + { + "epoch": 0.14673002941842045, + "grad_norm": 0.7071456722892714, + "learning_rate": 9.647218528142333e-06, + "loss": 0.4485, + "step": 3242 + }, + { + "epoch": 0.146775288526816, + "grad_norm": 0.6869486414541307, + "learning_rate": 9.646948056780403e-06, + "loss": 0.4102, + "step": 3243 + }, + { + "epoch": 0.14682054763521157, + "grad_norm": 0.6827200494044977, + "learning_rate": 9.646677485569834e-06, + "loss": 0.3975, + "step": 3244 + }, + { + "epoch": 0.14686580674360716, + "grad_norm": 0.6883900751807166, + "learning_rate": 9.646406814516434e-06, + "loss": 0.4023, + "step": 3245 + }, + { + "epoch": 0.14691106585200273, + "grad_norm": 0.5684018991219494, + "learning_rate": 9.646136043626023e-06, + "loss": 0.4943, + "step": 3246 + }, + { + "epoch": 0.1469563249603983, + "grad_norm": 0.6624837027292081, + "learning_rate": 9.645865172904418e-06, + "loss": 0.4079, + "step": 3247 + }, + { + "epoch": 0.14700158406879385, + "grad_norm": 0.6713274806582772, + "learning_rate": 9.645594202357438e-06, + "loss": 0.4176, + "step": 3248 + }, + { + "epoch": 0.1470468431771894, + "grad_norm": 0.34889750817078513, + "learning_rate": 9.645323131990908e-06, + "loss": 0.4997, + "step": 3249 + }, + { + "epoch": 0.14709210228558497, + "grad_norm": 0.7653840453006011, + "learning_rate": 9.64505196181065e-06, + "loss": 0.4072, + "step": 3250 + }, + { + "epoch": 0.14713736139398054, + "grad_norm": 0.6156596928615148, + "learning_rate": 9.644780691822491e-06, + "loss": 0.3849, + "step": 3251 + }, + { + "epoch": 0.1471826205023761, + "grad_norm": 0.7840515047666305, + "learning_rate": 9.644509322032262e-06, + "loss": 0.4479, + "step": 3252 + }, + { + "epoch": 0.14722787961077166, + "grad_norm": 0.6974849515726319, + "learning_rate": 9.644237852445792e-06, + "loss": 0.3878, + "step": 3253 + }, + { + "epoch": 0.14727313871916722, + "grad_norm": 0.5227456682682667, + "learning_rate": 9.643966283068912e-06, + "loss": 0.4938, + "step": 3254 + }, + { + "epoch": 0.14731839782756279, + "grad_norm": 0.7538320838930355, + "learning_rate": 9.643694613907461e-06, + "loss": 0.4184, + "step": 3255 + }, + { + "epoch": 0.14736365693595835, + "grad_norm": 0.6633093812499693, + "learning_rate": 9.643422844967274e-06, + "loss": 0.4429, + "step": 3256 + }, + { + "epoch": 0.14740891604435394, + "grad_norm": 0.38889087430455327, + "learning_rate": 9.643150976254192e-06, + "loss": 0.5149, + "step": 3257 + }, + { + "epoch": 0.1474541751527495, + "grad_norm": 0.6689124157664896, + "learning_rate": 9.642879007774058e-06, + "loss": 0.4621, + "step": 3258 + }, + { + "epoch": 0.14749943426114506, + "grad_norm": 0.6469967659217317, + "learning_rate": 9.64260693953271e-06, + "loss": 0.3693, + "step": 3259 + }, + { + "epoch": 0.14754469336954062, + "grad_norm": 0.7225777422722433, + "learning_rate": 9.642334771536e-06, + "loss": 0.461, + "step": 3260 + }, + { + "epoch": 0.1475899524779362, + "grad_norm": 0.663154906716596, + "learning_rate": 9.642062503789772e-06, + "loss": 0.3842, + "step": 3261 + }, + { + "epoch": 0.14763521158633175, + "grad_norm": 0.4600695882779264, + "learning_rate": 9.641790136299877e-06, + "loss": 0.5037, + "step": 3262 + }, + { + "epoch": 0.1476804706947273, + "grad_norm": 0.7247436825298432, + "learning_rate": 9.641517669072171e-06, + "loss": 0.4413, + "step": 3263 + }, + { + "epoch": 0.14772572980312287, + "grad_norm": 0.3658974333913344, + "learning_rate": 9.641245102112503e-06, + "loss": 0.5163, + "step": 3264 + }, + { + "epoch": 0.14777098891151844, + "grad_norm": 0.679325315229181, + "learning_rate": 9.640972435426734e-06, + "loss": 0.4003, + "step": 3265 + }, + { + "epoch": 0.147816248019914, + "grad_norm": 0.6799213139722677, + "learning_rate": 9.640699669020721e-06, + "loss": 0.4247, + "step": 3266 + }, + { + "epoch": 0.14786150712830956, + "grad_norm": 0.7157540734235226, + "learning_rate": 9.640426802900325e-06, + "loss": 0.3924, + "step": 3267 + }, + { + "epoch": 0.14790676623670515, + "grad_norm": 0.4449196343882221, + "learning_rate": 9.640153837071407e-06, + "loss": 0.5094, + "step": 3268 + }, + { + "epoch": 0.1479520253451007, + "grad_norm": 0.38711013174344855, + "learning_rate": 9.639880771539836e-06, + "loss": 0.4969, + "step": 3269 + }, + { + "epoch": 0.14799728445349628, + "grad_norm": 0.7658134774111022, + "learning_rate": 9.639607606311477e-06, + "loss": 0.4226, + "step": 3270 + }, + { + "epoch": 0.14804254356189184, + "grad_norm": 0.3612525389646945, + "learning_rate": 9.6393343413922e-06, + "loss": 0.5179, + "step": 3271 + }, + { + "epoch": 0.1480878026702874, + "grad_norm": 0.6473237930596188, + "learning_rate": 9.639060976787878e-06, + "loss": 0.3766, + "step": 3272 + }, + { + "epoch": 0.14813306177868296, + "grad_norm": 0.7181412797772644, + "learning_rate": 9.638787512504382e-06, + "loss": 0.3788, + "step": 3273 + }, + { + "epoch": 0.14817832088707852, + "grad_norm": 0.6567385369804504, + "learning_rate": 9.63851394854759e-06, + "loss": 0.3702, + "step": 3274 + }, + { + "epoch": 0.1482235799954741, + "grad_norm": 0.7640669550629611, + "learning_rate": 9.638240284923377e-06, + "loss": 0.445, + "step": 3275 + }, + { + "epoch": 0.14826883910386965, + "grad_norm": 0.6652970779894702, + "learning_rate": 9.637966521637628e-06, + "loss": 0.424, + "step": 3276 + }, + { + "epoch": 0.1483140982122652, + "grad_norm": 0.6624015894203762, + "learning_rate": 9.637692658696222e-06, + "loss": 0.404, + "step": 3277 + }, + { + "epoch": 0.14835935732066077, + "grad_norm": 0.6559214372418471, + "learning_rate": 9.637418696105043e-06, + "loss": 0.4228, + "step": 3278 + }, + { + "epoch": 0.14840461642905634, + "grad_norm": 0.7299149369216319, + "learning_rate": 9.63714463386998e-06, + "loss": 0.4156, + "step": 3279 + }, + { + "epoch": 0.14844987553745193, + "grad_norm": 0.4885908240246616, + "learning_rate": 9.636870471996923e-06, + "loss": 0.4854, + "step": 3280 + }, + { + "epoch": 0.1484951346458475, + "grad_norm": 0.6781850874150899, + "learning_rate": 9.63659621049176e-06, + "loss": 0.386, + "step": 3281 + }, + { + "epoch": 0.14854039375424305, + "grad_norm": 0.3620606036338175, + "learning_rate": 9.636321849360382e-06, + "loss": 0.512, + "step": 3282 + }, + { + "epoch": 0.1485856528626386, + "grad_norm": 0.7136500943838553, + "learning_rate": 9.63604738860869e-06, + "loss": 0.3859, + "step": 3283 + }, + { + "epoch": 0.14863091197103417, + "grad_norm": 0.7297957271328588, + "learning_rate": 9.635772828242575e-06, + "loss": 0.423, + "step": 3284 + }, + { + "epoch": 0.14867617107942974, + "grad_norm": 0.3745810356349233, + "learning_rate": 9.63549816826794e-06, + "loss": 0.4979, + "step": 3285 + }, + { + "epoch": 0.1487214301878253, + "grad_norm": 0.356023359325862, + "learning_rate": 9.635223408690688e-06, + "loss": 0.5067, + "step": 3286 + }, + { + "epoch": 0.14876668929622086, + "grad_norm": 0.6842405407097039, + "learning_rate": 9.63494854951672e-06, + "loss": 0.416, + "step": 3287 + }, + { + "epoch": 0.14881194840461642, + "grad_norm": 0.7189774734887591, + "learning_rate": 9.634673590751944e-06, + "loss": 0.4349, + "step": 3288 + }, + { + "epoch": 0.14885720751301199, + "grad_norm": 0.7186667769161001, + "learning_rate": 9.634398532402264e-06, + "loss": 0.4225, + "step": 3289 + }, + { + "epoch": 0.14890246662140755, + "grad_norm": 0.7200939913489343, + "learning_rate": 9.634123374473596e-06, + "loss": 0.3895, + "step": 3290 + }, + { + "epoch": 0.1489477257298031, + "grad_norm": 0.6927317189414052, + "learning_rate": 9.633848116971849e-06, + "loss": 0.3925, + "step": 3291 + }, + { + "epoch": 0.1489929848381987, + "grad_norm": 1.166576042660753, + "learning_rate": 9.633572759902936e-06, + "loss": 0.4118, + "step": 3292 + }, + { + "epoch": 0.14903824394659426, + "grad_norm": 0.7492381420963506, + "learning_rate": 9.633297303272777e-06, + "loss": 0.4184, + "step": 3293 + }, + { + "epoch": 0.14908350305498982, + "grad_norm": 0.7223441248858142, + "learning_rate": 9.633021747087288e-06, + "loss": 0.3735, + "step": 3294 + }, + { + "epoch": 0.1491287621633854, + "grad_norm": 0.6917643370279334, + "learning_rate": 9.632746091352393e-06, + "loss": 0.4248, + "step": 3295 + }, + { + "epoch": 0.14917402127178095, + "grad_norm": 0.7034104999065266, + "learning_rate": 9.632470336074009e-06, + "loss": 0.3917, + "step": 3296 + }, + { + "epoch": 0.1492192803801765, + "grad_norm": 0.6831009361510235, + "learning_rate": 9.632194481258069e-06, + "loss": 0.4178, + "step": 3297 + }, + { + "epoch": 0.14926453948857207, + "grad_norm": 0.5716097893116413, + "learning_rate": 9.631918526910493e-06, + "loss": 0.5524, + "step": 3298 + }, + { + "epoch": 0.14930979859696764, + "grad_norm": 0.6941509909598592, + "learning_rate": 9.631642473037216e-06, + "loss": 0.3869, + "step": 3299 + }, + { + "epoch": 0.1493550577053632, + "grad_norm": 0.7320256723128162, + "learning_rate": 9.631366319644167e-06, + "loss": 0.4733, + "step": 3300 + }, + { + "epoch": 0.14940031681375876, + "grad_norm": 0.7026666178027788, + "learning_rate": 9.631090066737278e-06, + "loss": 0.4246, + "step": 3301 + }, + { + "epoch": 0.14944557592215432, + "grad_norm": 1.0725104038960356, + "learning_rate": 9.630813714322488e-06, + "loss": 0.481, + "step": 3302 + }, + { + "epoch": 0.14949083503054988, + "grad_norm": 0.6730109808516953, + "learning_rate": 9.630537262405735e-06, + "loss": 0.3949, + "step": 3303 + }, + { + "epoch": 0.14953609413894547, + "grad_norm": 0.7169901020211172, + "learning_rate": 9.630260710992956e-06, + "loss": 0.4161, + "step": 3304 + }, + { + "epoch": 0.14958135324734104, + "grad_norm": 0.6953568784611611, + "learning_rate": 9.629984060090097e-06, + "loss": 0.3782, + "step": 3305 + }, + { + "epoch": 0.1496266123557366, + "grad_norm": 0.723483016156664, + "learning_rate": 9.629707309703099e-06, + "loss": 0.4203, + "step": 3306 + }, + { + "epoch": 0.14967187146413216, + "grad_norm": 0.6857172507609567, + "learning_rate": 9.629430459837909e-06, + "loss": 0.3877, + "step": 3307 + }, + { + "epoch": 0.14971713057252772, + "grad_norm": 0.700995997510525, + "learning_rate": 9.629153510500478e-06, + "loss": 0.4093, + "step": 3308 + }, + { + "epoch": 0.14976238968092329, + "grad_norm": 0.6784060071160138, + "learning_rate": 9.628876461696754e-06, + "loss": 0.3871, + "step": 3309 + }, + { + "epoch": 0.14980764878931885, + "grad_norm": 0.5473550274042821, + "learning_rate": 9.628599313432694e-06, + "loss": 0.5218, + "step": 3310 + }, + { + "epoch": 0.1498529078977144, + "grad_norm": 0.42281332962853047, + "learning_rate": 9.628322065714248e-06, + "loss": 0.5336, + "step": 3311 + }, + { + "epoch": 0.14989816700610997, + "grad_norm": 0.7061183170557235, + "learning_rate": 9.628044718547379e-06, + "loss": 0.4277, + "step": 3312 + }, + { + "epoch": 0.14994342611450553, + "grad_norm": 0.7485224328430917, + "learning_rate": 9.62776727193804e-06, + "loss": 0.4106, + "step": 3313 + }, + { + "epoch": 0.1499886852229011, + "grad_norm": 0.49668806668198767, + "learning_rate": 9.627489725892195e-06, + "loss": 0.5306, + "step": 3314 + }, + { + "epoch": 0.1500339443312967, + "grad_norm": 0.5014177021586415, + "learning_rate": 9.627212080415808e-06, + "loss": 0.5247, + "step": 3315 + }, + { + "epoch": 0.15007920343969225, + "grad_norm": 0.7862004631444977, + "learning_rate": 9.626934335514847e-06, + "loss": 0.4317, + "step": 3316 + }, + { + "epoch": 0.1501244625480878, + "grad_norm": 0.7304331450450849, + "learning_rate": 9.626656491195277e-06, + "loss": 0.4345, + "step": 3317 + }, + { + "epoch": 0.15016972165648337, + "grad_norm": 0.7099372085651383, + "learning_rate": 9.626378547463067e-06, + "loss": 0.4388, + "step": 3318 + }, + { + "epoch": 0.15021498076487894, + "grad_norm": 0.6846526401291254, + "learning_rate": 9.626100504324194e-06, + "loss": 0.4032, + "step": 3319 + }, + { + "epoch": 0.1502602398732745, + "grad_norm": 0.8179284569944197, + "learning_rate": 9.625822361784626e-06, + "loss": 0.4003, + "step": 3320 + }, + { + "epoch": 0.15030549898167006, + "grad_norm": 0.7223286737584297, + "learning_rate": 9.625544119850344e-06, + "loss": 0.3899, + "step": 3321 + }, + { + "epoch": 0.15035075809006562, + "grad_norm": 0.6603367788079666, + "learning_rate": 9.625265778527325e-06, + "loss": 0.3849, + "step": 3322 + }, + { + "epoch": 0.15039601719846118, + "grad_norm": 0.7607035253944717, + "learning_rate": 9.62498733782155e-06, + "loss": 0.4401, + "step": 3323 + }, + { + "epoch": 0.15044127630685675, + "grad_norm": 0.7541225861262272, + "learning_rate": 9.624708797739002e-06, + "loss": 0.4311, + "step": 3324 + }, + { + "epoch": 0.1504865354152523, + "grad_norm": 0.7014753507801132, + "learning_rate": 9.624430158285664e-06, + "loss": 0.4407, + "step": 3325 + }, + { + "epoch": 0.15053179452364787, + "grad_norm": 0.6942835461363718, + "learning_rate": 9.624151419467527e-06, + "loss": 0.3872, + "step": 3326 + }, + { + "epoch": 0.15057705363204346, + "grad_norm": 0.7138036553160272, + "learning_rate": 9.623872581290576e-06, + "loss": 0.4314, + "step": 3327 + }, + { + "epoch": 0.15062231274043902, + "grad_norm": 0.8268866607473049, + "learning_rate": 9.623593643760805e-06, + "loss": 0.5235, + "step": 3328 + }, + { + "epoch": 0.15066757184883459, + "grad_norm": 0.7084738223029766, + "learning_rate": 9.623314606884207e-06, + "loss": 0.4128, + "step": 3329 + }, + { + "epoch": 0.15071283095723015, + "grad_norm": 0.7136920204031075, + "learning_rate": 9.623035470666778e-06, + "loss": 0.443, + "step": 3330 + }, + { + "epoch": 0.1507580900656257, + "grad_norm": 0.6631487881509494, + "learning_rate": 9.622756235114515e-06, + "loss": 0.423, + "step": 3331 + }, + { + "epoch": 0.15080334917402127, + "grad_norm": 0.7237223774625545, + "learning_rate": 9.622476900233417e-06, + "loss": 0.4024, + "step": 3332 + }, + { + "epoch": 0.15084860828241684, + "grad_norm": 0.7136308557874039, + "learning_rate": 9.622197466029488e-06, + "loss": 0.4373, + "step": 3333 + }, + { + "epoch": 0.1508938673908124, + "grad_norm": 0.6607160453705881, + "learning_rate": 9.621917932508733e-06, + "loss": 0.5152, + "step": 3334 + }, + { + "epoch": 0.15093912649920796, + "grad_norm": 0.6643946238580568, + "learning_rate": 9.621638299677157e-06, + "loss": 0.4072, + "step": 3335 + }, + { + "epoch": 0.15098438560760352, + "grad_norm": 0.7875523683733319, + "learning_rate": 9.621358567540766e-06, + "loss": 0.4121, + "step": 3336 + }, + { + "epoch": 0.15102964471599908, + "grad_norm": 0.6171204450407061, + "learning_rate": 9.621078736105573e-06, + "loss": 0.3895, + "step": 3337 + }, + { + "epoch": 0.15107490382439465, + "grad_norm": 0.6658184861266957, + "learning_rate": 9.620798805377592e-06, + "loss": 0.3854, + "step": 3338 + }, + { + "epoch": 0.15112016293279024, + "grad_norm": 0.404686142066214, + "learning_rate": 9.620518775362835e-06, + "loss": 0.4777, + "step": 3339 + }, + { + "epoch": 0.1511654220411858, + "grad_norm": 0.7400084138254952, + "learning_rate": 9.620238646067322e-06, + "loss": 0.4257, + "step": 3340 + }, + { + "epoch": 0.15121068114958136, + "grad_norm": 0.8422360462884343, + "learning_rate": 9.619958417497069e-06, + "loss": 0.4365, + "step": 3341 + }, + { + "epoch": 0.15125594025797692, + "grad_norm": 0.9105430523186688, + "learning_rate": 9.619678089658097e-06, + "loss": 0.4329, + "step": 3342 + }, + { + "epoch": 0.15130119936637249, + "grad_norm": 0.41325749692787855, + "learning_rate": 9.619397662556434e-06, + "loss": 0.5005, + "step": 3343 + }, + { + "epoch": 0.15134645847476805, + "grad_norm": 0.8607971073288683, + "learning_rate": 9.619117136198101e-06, + "loss": 0.4102, + "step": 3344 + }, + { + "epoch": 0.1513917175831636, + "grad_norm": 0.7294942692583787, + "learning_rate": 9.61883651058913e-06, + "loss": 0.4079, + "step": 3345 + }, + { + "epoch": 0.15143697669155917, + "grad_norm": 0.682562825235258, + "learning_rate": 9.618555785735546e-06, + "loss": 0.4413, + "step": 3346 + }, + { + "epoch": 0.15148223579995473, + "grad_norm": 0.6883105556066248, + "learning_rate": 9.618274961643384e-06, + "loss": 0.3879, + "step": 3347 + }, + { + "epoch": 0.1515274949083503, + "grad_norm": 0.6450651111810509, + "learning_rate": 9.617994038318675e-06, + "loss": 0.4119, + "step": 3348 + }, + { + "epoch": 0.15157275401674586, + "grad_norm": 0.7116436770471571, + "learning_rate": 9.617713015767457e-06, + "loss": 0.3703, + "step": 3349 + }, + { + "epoch": 0.15161801312514142, + "grad_norm": 0.7377554108903471, + "learning_rate": 9.617431893995771e-06, + "loss": 0.3761, + "step": 3350 + }, + { + "epoch": 0.151663272233537, + "grad_norm": 0.6646223645221694, + "learning_rate": 9.617150673009654e-06, + "loss": 0.4193, + "step": 3351 + }, + { + "epoch": 0.15170853134193257, + "grad_norm": 0.45223626911500664, + "learning_rate": 9.61686935281515e-06, + "loss": 0.5071, + "step": 3352 + }, + { + "epoch": 0.15175379045032814, + "grad_norm": 0.3798005018025008, + "learning_rate": 9.616587933418302e-06, + "loss": 0.5095, + "step": 3353 + }, + { + "epoch": 0.1517990495587237, + "grad_norm": 0.789312374773116, + "learning_rate": 9.616306414825158e-06, + "loss": 0.4139, + "step": 3354 + }, + { + "epoch": 0.15184430866711926, + "grad_norm": 0.6956206272305917, + "learning_rate": 9.616024797041769e-06, + "loss": 0.4142, + "step": 3355 + }, + { + "epoch": 0.15188956777551482, + "grad_norm": 0.6648217865219, + "learning_rate": 9.615743080074183e-06, + "loss": 0.4318, + "step": 3356 + }, + { + "epoch": 0.15193482688391038, + "grad_norm": 0.5935656105926574, + "learning_rate": 9.615461263928454e-06, + "loss": 0.5229, + "step": 3357 + }, + { + "epoch": 0.15198008599230595, + "grad_norm": 0.7476404386102188, + "learning_rate": 9.615179348610638e-06, + "loss": 0.4273, + "step": 3358 + }, + { + "epoch": 0.1520253451007015, + "grad_norm": 0.7061949866038572, + "learning_rate": 9.614897334126791e-06, + "loss": 0.4166, + "step": 3359 + }, + { + "epoch": 0.15207060420909707, + "grad_norm": 0.40998979916945805, + "learning_rate": 9.614615220482976e-06, + "loss": 0.5397, + "step": 3360 + }, + { + "epoch": 0.15211586331749263, + "grad_norm": 0.8891488917747642, + "learning_rate": 9.614333007685253e-06, + "loss": 0.4159, + "step": 3361 + }, + { + "epoch": 0.15216112242588822, + "grad_norm": 0.7086492645793606, + "learning_rate": 9.614050695739683e-06, + "loss": 0.3637, + "step": 3362 + }, + { + "epoch": 0.15220638153428379, + "grad_norm": 0.41334132030452503, + "learning_rate": 9.613768284652336e-06, + "loss": 0.4981, + "step": 3363 + }, + { + "epoch": 0.15225164064267935, + "grad_norm": 0.7341079457582734, + "learning_rate": 9.613485774429279e-06, + "loss": 0.3899, + "step": 3364 + }, + { + "epoch": 0.1522968997510749, + "grad_norm": 0.7868870876356426, + "learning_rate": 9.61320316507658e-06, + "loss": 0.4445, + "step": 3365 + }, + { + "epoch": 0.15234215885947047, + "grad_norm": 0.7311498728838661, + "learning_rate": 9.612920456600317e-06, + "loss": 0.4189, + "step": 3366 + }, + { + "epoch": 0.15238741796786603, + "grad_norm": 0.7843591853548378, + "learning_rate": 9.612637649006557e-06, + "loss": 0.3834, + "step": 3367 + }, + { + "epoch": 0.1524326770762616, + "grad_norm": 0.7826692283832626, + "learning_rate": 9.612354742301381e-06, + "loss": 0.4292, + "step": 3368 + }, + { + "epoch": 0.15247793618465716, + "grad_norm": 0.6647315439646727, + "learning_rate": 9.61207173649087e-06, + "loss": 0.3619, + "step": 3369 + }, + { + "epoch": 0.15252319529305272, + "grad_norm": 0.6557323897622711, + "learning_rate": 9.6117886315811e-06, + "loss": 0.3907, + "step": 3370 + }, + { + "epoch": 0.15256845440144828, + "grad_norm": 0.7221361141961096, + "learning_rate": 9.611505427578159e-06, + "loss": 0.4113, + "step": 3371 + }, + { + "epoch": 0.15261371350984385, + "grad_norm": 0.7099770327135274, + "learning_rate": 9.611222124488126e-06, + "loss": 0.3876, + "step": 3372 + }, + { + "epoch": 0.1526589726182394, + "grad_norm": 0.4333483735128783, + "learning_rate": 9.610938722317095e-06, + "loss": 0.4772, + "step": 3373 + }, + { + "epoch": 0.152704231726635, + "grad_norm": 0.7950935256983794, + "learning_rate": 9.61065522107115e-06, + "loss": 0.4274, + "step": 3374 + }, + { + "epoch": 0.15274949083503056, + "grad_norm": 0.3544158593732371, + "learning_rate": 9.610371620756385e-06, + "loss": 0.5014, + "step": 3375 + }, + { + "epoch": 0.15279474994342612, + "grad_norm": 0.7494143798836961, + "learning_rate": 9.610087921378895e-06, + "loss": 0.4031, + "step": 3376 + }, + { + "epoch": 0.15284000905182168, + "grad_norm": 0.32936531220215204, + "learning_rate": 9.609804122944774e-06, + "loss": 0.5054, + "step": 3377 + }, + { + "epoch": 0.15288526816021725, + "grad_norm": 0.6970342828940654, + "learning_rate": 9.60952022546012e-06, + "loss": 0.3913, + "step": 3378 + }, + { + "epoch": 0.1529305272686128, + "grad_norm": 0.3542374522374869, + "learning_rate": 9.609236228931033e-06, + "loss": 0.5125, + "step": 3379 + }, + { + "epoch": 0.15297578637700837, + "grad_norm": 0.37112645861116433, + "learning_rate": 9.608952133363616e-06, + "loss": 0.5181, + "step": 3380 + }, + { + "epoch": 0.15302104548540393, + "grad_norm": 0.7934526387588287, + "learning_rate": 9.608667938763974e-06, + "loss": 0.3955, + "step": 3381 + }, + { + "epoch": 0.1530663045937995, + "grad_norm": 0.7638806885248461, + "learning_rate": 9.60838364513821e-06, + "loss": 0.3943, + "step": 3382 + }, + { + "epoch": 0.15311156370219506, + "grad_norm": 0.6629446396210611, + "learning_rate": 9.608099252492437e-06, + "loss": 0.4013, + "step": 3383 + }, + { + "epoch": 0.15315682281059062, + "grad_norm": 0.8507887664141085, + "learning_rate": 9.607814760832764e-06, + "loss": 0.3968, + "step": 3384 + }, + { + "epoch": 0.15320208191898618, + "grad_norm": 0.4667875313224176, + "learning_rate": 9.607530170165302e-06, + "loss": 0.5105, + "step": 3385 + }, + { + "epoch": 0.15324734102738177, + "grad_norm": 0.7202328958788586, + "learning_rate": 9.607245480496168e-06, + "loss": 0.4417, + "step": 3386 + }, + { + "epoch": 0.15329260013577733, + "grad_norm": 0.7011725011282022, + "learning_rate": 9.60696069183148e-06, + "loss": 0.4114, + "step": 3387 + }, + { + "epoch": 0.1533378592441729, + "grad_norm": 0.7338372700466135, + "learning_rate": 9.606675804177355e-06, + "loss": 0.4384, + "step": 3388 + }, + { + "epoch": 0.15338311835256846, + "grad_norm": 0.6826376956547067, + "learning_rate": 9.606390817539915e-06, + "loss": 0.451, + "step": 3389 + }, + { + "epoch": 0.15342837746096402, + "grad_norm": 0.6496624803686839, + "learning_rate": 9.606105731925284e-06, + "loss": 0.436, + "step": 3390 + }, + { + "epoch": 0.15347363656935958, + "grad_norm": 0.7179188848683196, + "learning_rate": 9.605820547339585e-06, + "loss": 0.4005, + "step": 3391 + }, + { + "epoch": 0.15351889567775515, + "grad_norm": 0.6822218854988548, + "learning_rate": 9.605535263788952e-06, + "loss": 0.4075, + "step": 3392 + }, + { + "epoch": 0.1535641547861507, + "grad_norm": 0.3730441185268565, + "learning_rate": 9.60524988127951e-06, + "loss": 0.53, + "step": 3393 + }, + { + "epoch": 0.15360941389454627, + "grad_norm": 0.6636620437666377, + "learning_rate": 9.604964399817392e-06, + "loss": 0.4018, + "step": 3394 + }, + { + "epoch": 0.15365467300294183, + "grad_norm": 0.7268505905368877, + "learning_rate": 9.60467881940873e-06, + "loss": 0.4387, + "step": 3395 + }, + { + "epoch": 0.1536999321113374, + "grad_norm": 0.6961618085585248, + "learning_rate": 9.604393140059666e-06, + "loss": 0.3756, + "step": 3396 + }, + { + "epoch": 0.15374519121973299, + "grad_norm": 0.3179645190143338, + "learning_rate": 9.604107361776331e-06, + "loss": 0.4851, + "step": 3397 + }, + { + "epoch": 0.15379045032812855, + "grad_norm": 0.662873080346535, + "learning_rate": 9.603821484564873e-06, + "loss": 0.3963, + "step": 3398 + }, + { + "epoch": 0.1538357094365241, + "grad_norm": 0.6926916308137251, + "learning_rate": 9.603535508431428e-06, + "loss": 0.3792, + "step": 3399 + }, + { + "epoch": 0.15388096854491967, + "grad_norm": 0.7226052047552491, + "learning_rate": 9.603249433382145e-06, + "loss": 0.4637, + "step": 3400 + }, + { + "epoch": 0.15392622765331523, + "grad_norm": 0.7045172676686957, + "learning_rate": 9.602963259423168e-06, + "loss": 0.4246, + "step": 3401 + }, + { + "epoch": 0.1539714867617108, + "grad_norm": 0.7305812318197507, + "learning_rate": 9.602676986560649e-06, + "loss": 0.4106, + "step": 3402 + }, + { + "epoch": 0.15401674587010636, + "grad_norm": 0.6517290131883109, + "learning_rate": 9.602390614800737e-06, + "loss": 0.3772, + "step": 3403 + }, + { + "epoch": 0.15406200497850192, + "grad_norm": 0.6246818631587848, + "learning_rate": 9.602104144149587e-06, + "loss": 0.4714, + "step": 3404 + }, + { + "epoch": 0.15410726408689748, + "grad_norm": 0.6493350034733257, + "learning_rate": 9.601817574613352e-06, + "loss": 0.4017, + "step": 3405 + }, + { + "epoch": 0.15415252319529305, + "grad_norm": 0.69897511909871, + "learning_rate": 9.60153090619819e-06, + "loss": 0.4399, + "step": 3406 + }, + { + "epoch": 0.1541977823036886, + "grad_norm": 0.6498934389942151, + "learning_rate": 9.601244138910262e-06, + "loss": 0.3845, + "step": 3407 + }, + { + "epoch": 0.15424304141208417, + "grad_norm": 0.7266953270385536, + "learning_rate": 9.60095727275573e-06, + "loss": 0.4543, + "step": 3408 + }, + { + "epoch": 0.15428830052047976, + "grad_norm": 0.4488563542322566, + "learning_rate": 9.600670307740755e-06, + "loss": 0.5156, + "step": 3409 + }, + { + "epoch": 0.15433355962887532, + "grad_norm": 0.965869209891706, + "learning_rate": 9.600383243871508e-06, + "loss": 0.3851, + "step": 3410 + }, + { + "epoch": 0.15437881873727088, + "grad_norm": 0.7504707497764602, + "learning_rate": 9.600096081154151e-06, + "loss": 0.4307, + "step": 3411 + }, + { + "epoch": 0.15442407784566645, + "grad_norm": 0.6613917357422878, + "learning_rate": 9.59980881959486e-06, + "loss": 0.3978, + "step": 3412 + }, + { + "epoch": 0.154469336954062, + "grad_norm": 0.7520780460741235, + "learning_rate": 9.599521459199803e-06, + "loss": 0.4667, + "step": 3413 + }, + { + "epoch": 0.15451459606245757, + "grad_norm": 0.8176268578974006, + "learning_rate": 9.599233999975156e-06, + "loss": 0.4274, + "step": 3414 + }, + { + "epoch": 0.15455985517085313, + "grad_norm": 0.6840665310353456, + "learning_rate": 9.598946441927097e-06, + "loss": 0.4168, + "step": 3415 + }, + { + "epoch": 0.1546051142792487, + "grad_norm": 0.7683252535016913, + "learning_rate": 9.598658785061803e-06, + "loss": 0.4372, + "step": 3416 + }, + { + "epoch": 0.15465037338764426, + "grad_norm": 0.6823793322842577, + "learning_rate": 9.598371029385455e-06, + "loss": 0.3983, + "step": 3417 + }, + { + "epoch": 0.15469563249603982, + "grad_norm": 0.44229167249114293, + "learning_rate": 9.598083174904235e-06, + "loss": 0.4959, + "step": 3418 + }, + { + "epoch": 0.15474089160443538, + "grad_norm": 0.7338618242173148, + "learning_rate": 9.597795221624334e-06, + "loss": 0.4095, + "step": 3419 + }, + { + "epoch": 0.15478615071283094, + "grad_norm": 0.32181814585633023, + "learning_rate": 9.59750716955193e-06, + "loss": 0.5028, + "step": 3420 + }, + { + "epoch": 0.15483140982122653, + "grad_norm": 0.7202648707296, + "learning_rate": 9.59721901869322e-06, + "loss": 0.4154, + "step": 3421 + }, + { + "epoch": 0.1548766689296221, + "grad_norm": 0.7033701337578779, + "learning_rate": 9.596930769054391e-06, + "loss": 0.4178, + "step": 3422 + }, + { + "epoch": 0.15492192803801766, + "grad_norm": 0.9866248658193294, + "learning_rate": 9.59664242064164e-06, + "loss": 0.407, + "step": 3423 + }, + { + "epoch": 0.15496718714641322, + "grad_norm": 0.7775425621832033, + "learning_rate": 9.59635397346116e-06, + "loss": 0.3774, + "step": 3424 + }, + { + "epoch": 0.15501244625480878, + "grad_norm": 0.6732563516972015, + "learning_rate": 9.596065427519149e-06, + "loss": 0.4344, + "step": 3425 + }, + { + "epoch": 0.15505770536320435, + "grad_norm": 0.691794745822921, + "learning_rate": 9.595776782821807e-06, + "loss": 0.421, + "step": 3426 + }, + { + "epoch": 0.1551029644715999, + "grad_norm": 0.6687992237129932, + "learning_rate": 9.595488039375338e-06, + "loss": 0.4408, + "step": 3427 + }, + { + "epoch": 0.15514822357999547, + "grad_norm": 0.7624800700828009, + "learning_rate": 9.595199197185944e-06, + "loss": 0.4267, + "step": 3428 + }, + { + "epoch": 0.15519348268839103, + "grad_norm": 0.7863688543009814, + "learning_rate": 9.594910256259834e-06, + "loss": 0.4503, + "step": 3429 + }, + { + "epoch": 0.1552387417967866, + "grad_norm": 0.6196129348860103, + "learning_rate": 9.594621216603215e-06, + "loss": 0.5111, + "step": 3430 + }, + { + "epoch": 0.15528400090518216, + "grad_norm": 0.7464054819999745, + "learning_rate": 9.594332078222296e-06, + "loss": 0.3929, + "step": 3431 + }, + { + "epoch": 0.15532926001357772, + "grad_norm": 0.660553397731443, + "learning_rate": 9.594042841123291e-06, + "loss": 0.4165, + "step": 3432 + }, + { + "epoch": 0.1553745191219733, + "grad_norm": 0.6912744419593565, + "learning_rate": 9.593753505312415e-06, + "loss": 0.4312, + "step": 3433 + }, + { + "epoch": 0.15541977823036887, + "grad_norm": 0.700543306891848, + "learning_rate": 9.593464070795887e-06, + "loss": 0.4068, + "step": 3434 + }, + { + "epoch": 0.15546503733876443, + "grad_norm": 0.6372973202578978, + "learning_rate": 9.593174537579921e-06, + "loss": 0.3695, + "step": 3435 + }, + { + "epoch": 0.15551029644716, + "grad_norm": 0.7077294435391664, + "learning_rate": 9.592884905670742e-06, + "loss": 0.3848, + "step": 3436 + }, + { + "epoch": 0.15555555555555556, + "grad_norm": 2.966558214938584, + "learning_rate": 9.592595175074573e-06, + "loss": 0.3867, + "step": 3437 + }, + { + "epoch": 0.15560081466395112, + "grad_norm": 0.6559110571739295, + "learning_rate": 9.592305345797636e-06, + "loss": 0.5055, + "step": 3438 + }, + { + "epoch": 0.15564607377234668, + "grad_norm": 0.7035205428029994, + "learning_rate": 9.592015417846166e-06, + "loss": 0.4145, + "step": 3439 + }, + { + "epoch": 0.15569133288074224, + "grad_norm": 0.7079837650347159, + "learning_rate": 9.591725391226383e-06, + "loss": 0.3844, + "step": 3440 + }, + { + "epoch": 0.1557365919891378, + "grad_norm": 0.32253785644702726, + "learning_rate": 9.591435265944527e-06, + "loss": 0.4867, + "step": 3441 + }, + { + "epoch": 0.15578185109753337, + "grad_norm": 0.8604594320238467, + "learning_rate": 9.591145042006829e-06, + "loss": 0.4029, + "step": 3442 + }, + { + "epoch": 0.15582711020592893, + "grad_norm": 0.8566153255616973, + "learning_rate": 9.590854719419522e-06, + "loss": 0.4557, + "step": 3443 + }, + { + "epoch": 0.15587236931432452, + "grad_norm": 0.6991748547257743, + "learning_rate": 9.59056429818885e-06, + "loss": 0.4254, + "step": 3444 + }, + { + "epoch": 0.15591762842272008, + "grad_norm": 0.6833290597384973, + "learning_rate": 9.590273778321048e-06, + "loss": 0.3911, + "step": 3445 + }, + { + "epoch": 0.15596288753111565, + "grad_norm": 0.7384819888520089, + "learning_rate": 9.58998315982236e-06, + "loss": 0.4306, + "step": 3446 + }, + { + "epoch": 0.1560081466395112, + "grad_norm": 0.7408386652212549, + "learning_rate": 9.589692442699033e-06, + "loss": 0.4349, + "step": 3447 + }, + { + "epoch": 0.15605340574790677, + "grad_norm": 0.7202578582855139, + "learning_rate": 9.589401626957309e-06, + "loss": 0.4633, + "step": 3448 + }, + { + "epoch": 0.15609866485630233, + "grad_norm": 0.73345358302742, + "learning_rate": 9.589110712603442e-06, + "loss": 0.4691, + "step": 3449 + }, + { + "epoch": 0.1561439239646979, + "grad_norm": 0.6367108198509374, + "learning_rate": 9.588819699643677e-06, + "loss": 0.536, + "step": 3450 + }, + { + "epoch": 0.15618918307309346, + "grad_norm": 0.4522720428613994, + "learning_rate": 9.588528588084272e-06, + "loss": 0.5085, + "step": 3451 + }, + { + "epoch": 0.15623444218148902, + "grad_norm": 0.7553918774782068, + "learning_rate": 9.588237377931482e-06, + "loss": 0.439, + "step": 3452 + }, + { + "epoch": 0.15627970128988458, + "grad_norm": 0.7313305103265421, + "learning_rate": 9.587946069191561e-06, + "loss": 0.4457, + "step": 3453 + }, + { + "epoch": 0.15632496039828014, + "grad_norm": 0.6840203653682762, + "learning_rate": 9.58765466187077e-06, + "loss": 0.4285, + "step": 3454 + }, + { + "epoch": 0.1563702195066757, + "grad_norm": 0.7561467717478978, + "learning_rate": 9.587363155975367e-06, + "loss": 0.3913, + "step": 3455 + }, + { + "epoch": 0.1564154786150713, + "grad_norm": 0.8080672045005288, + "learning_rate": 9.587071551511621e-06, + "loss": 0.4189, + "step": 3456 + }, + { + "epoch": 0.15646073772346686, + "grad_norm": 0.7731559050053085, + "learning_rate": 9.586779848485797e-06, + "loss": 0.3409, + "step": 3457 + }, + { + "epoch": 0.15650599683186242, + "grad_norm": 0.6965717472553806, + "learning_rate": 9.58648804690416e-06, + "loss": 0.4369, + "step": 3458 + }, + { + "epoch": 0.15655125594025798, + "grad_norm": 0.8440351872536466, + "learning_rate": 9.586196146772982e-06, + "loss": 0.4422, + "step": 3459 + }, + { + "epoch": 0.15659651504865355, + "grad_norm": 0.7661592744550679, + "learning_rate": 9.585904148098532e-06, + "loss": 0.4561, + "step": 3460 + }, + { + "epoch": 0.1566417741570491, + "grad_norm": 1.3733560238638691, + "learning_rate": 9.58561205088709e-06, + "loss": 0.4997, + "step": 3461 + }, + { + "epoch": 0.15668703326544467, + "grad_norm": 0.7280290741276498, + "learning_rate": 9.585319855144926e-06, + "loss": 0.4139, + "step": 3462 + }, + { + "epoch": 0.15673229237384023, + "grad_norm": 0.7531301358903874, + "learning_rate": 9.585027560878322e-06, + "loss": 0.4493, + "step": 3463 + }, + { + "epoch": 0.1567775514822358, + "grad_norm": 0.6604910087043134, + "learning_rate": 9.584735168093557e-06, + "loss": 0.4061, + "step": 3464 + }, + { + "epoch": 0.15682281059063136, + "grad_norm": 0.7322442914769552, + "learning_rate": 9.584442676796915e-06, + "loss": 0.4581, + "step": 3465 + }, + { + "epoch": 0.15686806969902692, + "grad_norm": 0.7073063990760776, + "learning_rate": 9.584150086994678e-06, + "loss": 0.4086, + "step": 3466 + }, + { + "epoch": 0.15691332880742248, + "grad_norm": 0.7190082010411192, + "learning_rate": 9.583857398693137e-06, + "loss": 0.3896, + "step": 3467 + }, + { + "epoch": 0.15695858791581807, + "grad_norm": 0.7584845386772335, + "learning_rate": 9.583564611898577e-06, + "loss": 0.4143, + "step": 3468 + }, + { + "epoch": 0.15700384702421363, + "grad_norm": 0.7108815510865111, + "learning_rate": 9.583271726617293e-06, + "loss": 0.4264, + "step": 3469 + }, + { + "epoch": 0.1570491061326092, + "grad_norm": 0.7277968624062817, + "learning_rate": 9.582978742855575e-06, + "loss": 0.4339, + "step": 3470 + }, + { + "epoch": 0.15709436524100476, + "grad_norm": 0.7515655120521509, + "learning_rate": 9.582685660619718e-06, + "loss": 0.4065, + "step": 3471 + }, + { + "epoch": 0.15713962434940032, + "grad_norm": 0.6949952504423228, + "learning_rate": 9.582392479916023e-06, + "loss": 0.4211, + "step": 3472 + }, + { + "epoch": 0.15718488345779588, + "grad_norm": 0.7550078700494964, + "learning_rate": 9.582099200750784e-06, + "loss": 0.4322, + "step": 3473 + }, + { + "epoch": 0.15723014256619144, + "grad_norm": 0.6658476068153182, + "learning_rate": 9.58180582313031e-06, + "loss": 0.3894, + "step": 3474 + }, + { + "epoch": 0.157275401674587, + "grad_norm": 0.7200657052746892, + "learning_rate": 9.581512347060899e-06, + "loss": 0.4394, + "step": 3475 + }, + { + "epoch": 0.15732066078298257, + "grad_norm": 0.6551082332498498, + "learning_rate": 9.58121877254886e-06, + "loss": 0.4125, + "step": 3476 + }, + { + "epoch": 0.15736591989137813, + "grad_norm": 0.8696623317849659, + "learning_rate": 9.580925099600497e-06, + "loss": 0.4245, + "step": 3477 + }, + { + "epoch": 0.1574111789997737, + "grad_norm": 1.3728801852345183, + "learning_rate": 9.580631328222124e-06, + "loss": 0.5327, + "step": 3478 + }, + { + "epoch": 0.15745643810816926, + "grad_norm": 0.7613061786692783, + "learning_rate": 9.580337458420052e-06, + "loss": 0.3812, + "step": 3479 + }, + { + "epoch": 0.15750169721656485, + "grad_norm": 0.6733684603028954, + "learning_rate": 9.580043490200597e-06, + "loss": 0.4271, + "step": 3480 + }, + { + "epoch": 0.1575469563249604, + "grad_norm": 0.7118128617274064, + "learning_rate": 9.579749423570072e-06, + "loss": 0.3765, + "step": 3481 + }, + { + "epoch": 0.15759221543335597, + "grad_norm": 0.8008041863113271, + "learning_rate": 9.579455258534798e-06, + "loss": 0.4328, + "step": 3482 + }, + { + "epoch": 0.15763747454175153, + "grad_norm": 0.7067609703059613, + "learning_rate": 9.579160995101095e-06, + "loss": 0.4616, + "step": 3483 + }, + { + "epoch": 0.1576827336501471, + "grad_norm": 0.6948951876700356, + "learning_rate": 9.578866633275289e-06, + "loss": 0.4095, + "step": 3484 + }, + { + "epoch": 0.15772799275854266, + "grad_norm": 1.030356858024951, + "learning_rate": 9.578572173063698e-06, + "loss": 0.5231, + "step": 3485 + }, + { + "epoch": 0.15777325186693822, + "grad_norm": 0.731783601918967, + "learning_rate": 9.578277614472655e-06, + "loss": 0.4501, + "step": 3486 + }, + { + "epoch": 0.15781851097533378, + "grad_norm": 0.6664453441880271, + "learning_rate": 9.577982957508488e-06, + "loss": 0.3953, + "step": 3487 + }, + { + "epoch": 0.15786377008372934, + "grad_norm": 0.6950749453384258, + "learning_rate": 9.577688202177525e-06, + "loss": 0.4274, + "step": 3488 + }, + { + "epoch": 0.1579090291921249, + "grad_norm": 1.0097198471065796, + "learning_rate": 9.577393348486104e-06, + "loss": 0.4375, + "step": 3489 + }, + { + "epoch": 0.15795428830052047, + "grad_norm": 0.7139937096631528, + "learning_rate": 9.577098396440557e-06, + "loss": 0.4217, + "step": 3490 + }, + { + "epoch": 0.15799954740891606, + "grad_norm": 0.5875788879196998, + "learning_rate": 9.576803346047223e-06, + "loss": 0.5023, + "step": 3491 + }, + { + "epoch": 0.15804480651731162, + "grad_norm": 0.6762842201383202, + "learning_rate": 9.576508197312441e-06, + "loss": 0.4239, + "step": 3492 + }, + { + "epoch": 0.15809006562570718, + "grad_norm": 0.37114250178621616, + "learning_rate": 9.576212950242554e-06, + "loss": 0.4967, + "step": 3493 + }, + { + "epoch": 0.15813532473410274, + "grad_norm": 0.6747086799824057, + "learning_rate": 9.575917604843907e-06, + "loss": 0.443, + "step": 3494 + }, + { + "epoch": 0.1581805838424983, + "grad_norm": 0.6714700623457361, + "learning_rate": 9.575622161122843e-06, + "loss": 0.4158, + "step": 3495 + }, + { + "epoch": 0.15822584295089387, + "grad_norm": 0.7040860890844103, + "learning_rate": 9.575326619085713e-06, + "loss": 0.4436, + "step": 3496 + }, + { + "epoch": 0.15827110205928943, + "grad_norm": 0.8520999125563331, + "learning_rate": 9.575030978738865e-06, + "loss": 0.4327, + "step": 3497 + }, + { + "epoch": 0.158316361167685, + "grad_norm": 0.742115121650574, + "learning_rate": 9.574735240088652e-06, + "loss": 0.3907, + "step": 3498 + }, + { + "epoch": 0.15836162027608056, + "grad_norm": 0.6498680949772371, + "learning_rate": 9.574439403141431e-06, + "loss": 0.4895, + "step": 3499 + }, + { + "epoch": 0.15840687938447612, + "grad_norm": 0.7487791434196598, + "learning_rate": 9.574143467903554e-06, + "loss": 0.3604, + "step": 3500 + }, + { + "epoch": 0.15845213849287168, + "grad_norm": 0.7048832865090792, + "learning_rate": 9.573847434381382e-06, + "loss": 0.4797, + "step": 3501 + }, + { + "epoch": 0.15849739760126724, + "grad_norm": 0.6976048296528936, + "learning_rate": 9.573551302581279e-06, + "loss": 0.4349, + "step": 3502 + }, + { + "epoch": 0.15854265670966283, + "grad_norm": 0.7027842537035046, + "learning_rate": 9.573255072509604e-06, + "loss": 0.3949, + "step": 3503 + }, + { + "epoch": 0.1585879158180584, + "grad_norm": 0.35742746636337064, + "learning_rate": 9.572958744172722e-06, + "loss": 0.5011, + "step": 3504 + }, + { + "epoch": 0.15863317492645396, + "grad_norm": 0.6404956434831562, + "learning_rate": 9.572662317577002e-06, + "loss": 0.4505, + "step": 3505 + }, + { + "epoch": 0.15867843403484952, + "grad_norm": 0.7493153738563227, + "learning_rate": 9.572365792728812e-06, + "loss": 0.4209, + "step": 3506 + }, + { + "epoch": 0.15872369314324508, + "grad_norm": 0.6490770161540544, + "learning_rate": 9.572069169634526e-06, + "loss": 0.402, + "step": 3507 + }, + { + "epoch": 0.15876895225164064, + "grad_norm": 0.48412819221920733, + "learning_rate": 9.571772448300514e-06, + "loss": 0.5375, + "step": 3508 + }, + { + "epoch": 0.1588142113600362, + "grad_norm": 0.691642616032123, + "learning_rate": 9.571475628733153e-06, + "loss": 0.3852, + "step": 3509 + }, + { + "epoch": 0.15885947046843177, + "grad_norm": 0.6706389167853233, + "learning_rate": 9.571178710938823e-06, + "loss": 0.421, + "step": 3510 + }, + { + "epoch": 0.15890472957682733, + "grad_norm": 0.8253546949943985, + "learning_rate": 9.570881694923899e-06, + "loss": 0.4211, + "step": 3511 + }, + { + "epoch": 0.1589499886852229, + "grad_norm": 0.6671583142880999, + "learning_rate": 9.570584580694768e-06, + "loss": 0.4265, + "step": 3512 + }, + { + "epoch": 0.15899524779361845, + "grad_norm": 0.9602296597346538, + "learning_rate": 9.570287368257811e-06, + "loss": 0.4102, + "step": 3513 + }, + { + "epoch": 0.15904050690201402, + "grad_norm": 0.7444960285872806, + "learning_rate": 9.569990057619414e-06, + "loss": 0.4406, + "step": 3514 + }, + { + "epoch": 0.1590857660104096, + "grad_norm": 0.6599282051949944, + "learning_rate": 9.569692648785967e-06, + "loss": 0.4068, + "step": 3515 + }, + { + "epoch": 0.15913102511880517, + "grad_norm": 0.7741719356633847, + "learning_rate": 9.56939514176386e-06, + "loss": 0.415, + "step": 3516 + }, + { + "epoch": 0.15917628422720073, + "grad_norm": 0.7258449526309476, + "learning_rate": 9.569097536559486e-06, + "loss": 0.4139, + "step": 3517 + }, + { + "epoch": 0.1592215433355963, + "grad_norm": 0.7239432327363277, + "learning_rate": 9.568799833179238e-06, + "loss": 0.4041, + "step": 3518 + }, + { + "epoch": 0.15926680244399186, + "grad_norm": 0.56606812255434, + "learning_rate": 9.568502031629513e-06, + "loss": 0.5377, + "step": 3519 + }, + { + "epoch": 0.15931206155238742, + "grad_norm": 0.6611254951118147, + "learning_rate": 9.568204131916712e-06, + "loss": 0.3549, + "step": 3520 + }, + { + "epoch": 0.15935732066078298, + "grad_norm": 0.7134987779635908, + "learning_rate": 9.567906134047233e-06, + "loss": 0.413, + "step": 3521 + }, + { + "epoch": 0.15940257976917854, + "grad_norm": 0.6931576955652552, + "learning_rate": 9.567608038027481e-06, + "loss": 0.4342, + "step": 3522 + }, + { + "epoch": 0.1594478388775741, + "grad_norm": 0.7093232644238057, + "learning_rate": 9.567309843863862e-06, + "loss": 0.3926, + "step": 3523 + }, + { + "epoch": 0.15949309798596967, + "grad_norm": 0.7141010617990805, + "learning_rate": 9.56701155156278e-06, + "loss": 0.476, + "step": 3524 + }, + { + "epoch": 0.15953835709436523, + "grad_norm": 0.6637371625249113, + "learning_rate": 9.566713161130646e-06, + "loss": 0.4099, + "step": 3525 + }, + { + "epoch": 0.1595836162027608, + "grad_norm": 0.7357520258636889, + "learning_rate": 9.566414672573873e-06, + "loss": 0.391, + "step": 3526 + }, + { + "epoch": 0.15962887531115638, + "grad_norm": 0.7564330883531917, + "learning_rate": 9.566116085898872e-06, + "loss": 0.4638, + "step": 3527 + }, + { + "epoch": 0.15967413441955194, + "grad_norm": 0.49859478913625926, + "learning_rate": 9.565817401112061e-06, + "loss": 0.5584, + "step": 3528 + }, + { + "epoch": 0.1597193935279475, + "grad_norm": 0.3963057852996049, + "learning_rate": 9.565518618219857e-06, + "loss": 0.4982, + "step": 3529 + }, + { + "epoch": 0.15976465263634307, + "grad_norm": 0.7049571051495215, + "learning_rate": 9.56521973722868e-06, + "loss": 0.4088, + "step": 3530 + }, + { + "epoch": 0.15980991174473863, + "grad_norm": 0.6898393168730423, + "learning_rate": 9.564920758144951e-06, + "loss": 0.3681, + "step": 3531 + }, + { + "epoch": 0.1598551708531342, + "grad_norm": 0.8196229163508992, + "learning_rate": 9.564621680975095e-06, + "loss": 0.3881, + "step": 3532 + }, + { + "epoch": 0.15990042996152976, + "grad_norm": 0.7760658972671267, + "learning_rate": 9.564322505725539e-06, + "loss": 0.4025, + "step": 3533 + }, + { + "epoch": 0.15994568906992532, + "grad_norm": 0.7553452613369879, + "learning_rate": 9.56402323240271e-06, + "loss": 0.4105, + "step": 3534 + }, + { + "epoch": 0.15999094817832088, + "grad_norm": 0.7444146630985331, + "learning_rate": 9.563723861013039e-06, + "loss": 0.3949, + "step": 3535 + }, + { + "epoch": 0.16003620728671644, + "grad_norm": 0.634783786367562, + "learning_rate": 9.563424391562958e-06, + "loss": 0.375, + "step": 3536 + }, + { + "epoch": 0.160081466395112, + "grad_norm": 0.6960440889965599, + "learning_rate": 9.563124824058905e-06, + "loss": 0.4077, + "step": 3537 + }, + { + "epoch": 0.1601267255035076, + "grad_norm": 0.6738085708575341, + "learning_rate": 9.562825158507311e-06, + "loss": 0.4072, + "step": 3538 + }, + { + "epoch": 0.16017198461190316, + "grad_norm": 0.730674294084897, + "learning_rate": 9.562525394914621e-06, + "loss": 0.4379, + "step": 3539 + }, + { + "epoch": 0.16021724372029872, + "grad_norm": 0.8099966068598785, + "learning_rate": 9.562225533287271e-06, + "loss": 0.4621, + "step": 3540 + }, + { + "epoch": 0.16026250282869428, + "grad_norm": 0.8510329125183358, + "learning_rate": 9.561925573631706e-06, + "loss": 0.5253, + "step": 3541 + }, + { + "epoch": 0.16030776193708984, + "grad_norm": 0.7692233573042879, + "learning_rate": 9.561625515954372e-06, + "loss": 0.3987, + "step": 3542 + }, + { + "epoch": 0.1603530210454854, + "grad_norm": 0.7289333091370424, + "learning_rate": 9.561325360261714e-06, + "loss": 0.4236, + "step": 3543 + }, + { + "epoch": 0.16039828015388097, + "grad_norm": 0.6737579331807066, + "learning_rate": 9.561025106560184e-06, + "loss": 0.39, + "step": 3544 + }, + { + "epoch": 0.16044353926227653, + "grad_norm": 1.0999296662435172, + "learning_rate": 9.560724754856234e-06, + "loss": 0.5043, + "step": 3545 + }, + { + "epoch": 0.1604887983706721, + "grad_norm": 0.8174191146577231, + "learning_rate": 9.560424305156314e-06, + "loss": 0.4529, + "step": 3546 + }, + { + "epoch": 0.16053405747906765, + "grad_norm": 0.7134271369346344, + "learning_rate": 9.560123757466885e-06, + "loss": 0.4264, + "step": 3547 + }, + { + "epoch": 0.16057931658746322, + "grad_norm": 0.7892566606627814, + "learning_rate": 9.5598231117944e-06, + "loss": 0.4666, + "step": 3548 + }, + { + "epoch": 0.16062457569585878, + "grad_norm": 0.6618307576773829, + "learning_rate": 9.559522368145319e-06, + "loss": 0.4007, + "step": 3549 + }, + { + "epoch": 0.16066983480425437, + "grad_norm": 0.5561735482323752, + "learning_rate": 9.55922152652611e-06, + "loss": 0.5258, + "step": 3550 + }, + { + "epoch": 0.16071509391264993, + "grad_norm": 0.45960926418367365, + "learning_rate": 9.55892058694323e-06, + "loss": 0.5249, + "step": 3551 + }, + { + "epoch": 0.1607603530210455, + "grad_norm": 1.039650118403853, + "learning_rate": 9.558619549403148e-06, + "loss": 0.3931, + "step": 3552 + }, + { + "epoch": 0.16080561212944106, + "grad_norm": 0.7145501228608365, + "learning_rate": 9.558318413912333e-06, + "loss": 0.42, + "step": 3553 + }, + { + "epoch": 0.16085087123783662, + "grad_norm": 0.7942813553983717, + "learning_rate": 9.558017180477256e-06, + "loss": 0.4074, + "step": 3554 + }, + { + "epoch": 0.16089613034623218, + "grad_norm": 1.2037104862401944, + "learning_rate": 9.557715849104388e-06, + "loss": 0.3903, + "step": 3555 + }, + { + "epoch": 0.16094138945462774, + "grad_norm": 0.6698408169445956, + "learning_rate": 9.557414419800204e-06, + "loss": 0.3849, + "step": 3556 + }, + { + "epoch": 0.1609866485630233, + "grad_norm": 0.6827869819031588, + "learning_rate": 9.557112892571182e-06, + "loss": 0.4154, + "step": 3557 + }, + { + "epoch": 0.16103190767141887, + "grad_norm": 0.7589222223962308, + "learning_rate": 9.5568112674238e-06, + "loss": 0.4312, + "step": 3558 + }, + { + "epoch": 0.16107716677981443, + "grad_norm": 0.7177871268316959, + "learning_rate": 9.556509544364538e-06, + "loss": 0.4274, + "step": 3559 + }, + { + "epoch": 0.16112242588821, + "grad_norm": 0.8385093075093688, + "learning_rate": 9.556207723399882e-06, + "loss": 0.4941, + "step": 3560 + }, + { + "epoch": 0.16116768499660555, + "grad_norm": 0.7822093267945871, + "learning_rate": 9.555905804536315e-06, + "loss": 0.4128, + "step": 3561 + }, + { + "epoch": 0.16121294410500114, + "grad_norm": 0.8118716712352124, + "learning_rate": 9.555603787780321e-06, + "loss": 0.4083, + "step": 3562 + }, + { + "epoch": 0.1612582032133967, + "grad_norm": 0.709902451425543, + "learning_rate": 9.555301673138397e-06, + "loss": 0.4207, + "step": 3563 + }, + { + "epoch": 0.16130346232179227, + "grad_norm": 0.4298801720977222, + "learning_rate": 9.55499946061703e-06, + "loss": 0.5039, + "step": 3564 + }, + { + "epoch": 0.16134872143018783, + "grad_norm": 0.7351410622515675, + "learning_rate": 9.554697150222713e-06, + "loss": 0.4508, + "step": 3565 + }, + { + "epoch": 0.1613939805385834, + "grad_norm": 0.4190010950162947, + "learning_rate": 9.554394741961944e-06, + "loss": 0.5276, + "step": 3566 + }, + { + "epoch": 0.16143923964697895, + "grad_norm": 0.7005793374248309, + "learning_rate": 9.554092235841219e-06, + "loss": 0.4163, + "step": 3567 + }, + { + "epoch": 0.16148449875537452, + "grad_norm": 0.7101329404583272, + "learning_rate": 9.553789631867039e-06, + "loss": 0.4009, + "step": 3568 + }, + { + "epoch": 0.16152975786377008, + "grad_norm": 0.671422559631563, + "learning_rate": 9.553486930045906e-06, + "loss": 0.4007, + "step": 3569 + }, + { + "epoch": 0.16157501697216564, + "grad_norm": 0.8008267532909126, + "learning_rate": 9.553184130384324e-06, + "loss": 0.4549, + "step": 3570 + }, + { + "epoch": 0.1616202760805612, + "grad_norm": 0.7376105220646921, + "learning_rate": 9.5528812328888e-06, + "loss": 0.4455, + "step": 3571 + }, + { + "epoch": 0.16166553518895677, + "grad_norm": 0.6704041835203152, + "learning_rate": 9.552578237565839e-06, + "loss": 0.4112, + "step": 3572 + }, + { + "epoch": 0.16171079429735236, + "grad_norm": 0.6929611158994938, + "learning_rate": 9.552275144421953e-06, + "loss": 0.4417, + "step": 3573 + }, + { + "epoch": 0.16175605340574792, + "grad_norm": 0.6915671452152358, + "learning_rate": 9.551971953463659e-06, + "loss": 0.4215, + "step": 3574 + }, + { + "epoch": 0.16180131251414348, + "grad_norm": 1.136766335665436, + "learning_rate": 9.551668664697467e-06, + "loss": 0.5025, + "step": 3575 + }, + { + "epoch": 0.16184657162253904, + "grad_norm": 0.6674529201333661, + "learning_rate": 9.551365278129894e-06, + "loss": 0.3585, + "step": 3576 + }, + { + "epoch": 0.1618918307309346, + "grad_norm": 0.8954324093302272, + "learning_rate": 9.55106179376746e-06, + "loss": 0.4171, + "step": 3577 + }, + { + "epoch": 0.16193708983933017, + "grad_norm": 0.78892785286471, + "learning_rate": 9.550758211616684e-06, + "loss": 0.4321, + "step": 3578 + }, + { + "epoch": 0.16198234894772573, + "grad_norm": 0.6293828498610229, + "learning_rate": 9.550454531684092e-06, + "loss": 0.3647, + "step": 3579 + }, + { + "epoch": 0.1620276080561213, + "grad_norm": 0.7028121697581299, + "learning_rate": 9.550150753976209e-06, + "loss": 0.4225, + "step": 3580 + }, + { + "epoch": 0.16207286716451685, + "grad_norm": 0.6885948868606172, + "learning_rate": 9.54984687849956e-06, + "loss": 0.4268, + "step": 3581 + }, + { + "epoch": 0.16211812627291242, + "grad_norm": 0.6863217671163234, + "learning_rate": 9.549542905260674e-06, + "loss": 0.4105, + "step": 3582 + }, + { + "epoch": 0.16216338538130798, + "grad_norm": 0.7042879685578601, + "learning_rate": 9.549238834266086e-06, + "loss": 0.3946, + "step": 3583 + }, + { + "epoch": 0.16220864448970354, + "grad_norm": 0.6589248712522245, + "learning_rate": 9.548934665522325e-06, + "loss": 0.3693, + "step": 3584 + }, + { + "epoch": 0.16225390359809913, + "grad_norm": 0.7011875986352972, + "learning_rate": 9.548630399035931e-06, + "loss": 0.3809, + "step": 3585 + }, + { + "epoch": 0.1622991627064947, + "grad_norm": 0.7806831482841735, + "learning_rate": 9.54832603481344e-06, + "loss": 0.4031, + "step": 3586 + }, + { + "epoch": 0.16234442181489026, + "grad_norm": 0.7346212570439461, + "learning_rate": 9.54802157286139e-06, + "loss": 0.4171, + "step": 3587 + }, + { + "epoch": 0.16238968092328582, + "grad_norm": 0.6962142519779313, + "learning_rate": 9.547717013186326e-06, + "loss": 0.4172, + "step": 3588 + }, + { + "epoch": 0.16243494003168138, + "grad_norm": 0.6451915080977847, + "learning_rate": 9.547412355794789e-06, + "loss": 0.4122, + "step": 3589 + }, + { + "epoch": 0.16248019914007694, + "grad_norm": 0.6863066177902849, + "learning_rate": 9.547107600693328e-06, + "loss": 0.4314, + "step": 3590 + }, + { + "epoch": 0.1625254582484725, + "grad_norm": 0.6418986780368956, + "learning_rate": 9.54680274788849e-06, + "loss": 0.4208, + "step": 3591 + }, + { + "epoch": 0.16257071735686807, + "grad_norm": 0.9359545702071064, + "learning_rate": 9.546497797386824e-06, + "loss": 0.552, + "step": 3592 + }, + { + "epoch": 0.16261597646526363, + "grad_norm": 0.7327979098491275, + "learning_rate": 9.546192749194885e-06, + "loss": 0.4228, + "step": 3593 + }, + { + "epoch": 0.1626612355736592, + "grad_norm": 0.8076526403856159, + "learning_rate": 9.545887603319228e-06, + "loss": 0.4029, + "step": 3594 + }, + { + "epoch": 0.16270649468205475, + "grad_norm": 0.7663084724651145, + "learning_rate": 9.545582359766405e-06, + "loss": 0.3929, + "step": 3595 + }, + { + "epoch": 0.16275175379045032, + "grad_norm": 0.6985073830450215, + "learning_rate": 9.54527701854298e-06, + "loss": 0.3769, + "step": 3596 + }, + { + "epoch": 0.1627970128988459, + "grad_norm": 0.7097929957097808, + "learning_rate": 9.544971579655512e-06, + "loss": 0.4669, + "step": 3597 + }, + { + "epoch": 0.16284227200724147, + "grad_norm": 0.6590480282146434, + "learning_rate": 9.544666043110562e-06, + "loss": 0.4041, + "step": 3598 + }, + { + "epoch": 0.16288753111563703, + "grad_norm": 0.6845449427550792, + "learning_rate": 9.544360408914696e-06, + "loss": 0.4011, + "step": 3599 + }, + { + "epoch": 0.1629327902240326, + "grad_norm": 0.7318456776258486, + "learning_rate": 9.544054677074483e-06, + "loss": 0.4056, + "step": 3600 + }, + { + "epoch": 0.16297804933242815, + "grad_norm": 0.7210936459362473, + "learning_rate": 9.543748847596491e-06, + "loss": 0.4206, + "step": 3601 + }, + { + "epoch": 0.16302330844082372, + "grad_norm": 0.7099193412366627, + "learning_rate": 9.543442920487291e-06, + "loss": 0.3923, + "step": 3602 + }, + { + "epoch": 0.16306856754921928, + "grad_norm": 0.7374127538432773, + "learning_rate": 9.543136895753458e-06, + "loss": 0.4666, + "step": 3603 + }, + { + "epoch": 0.16311382665761484, + "grad_norm": 0.6380376806590465, + "learning_rate": 9.542830773401564e-06, + "loss": 0.3662, + "step": 3604 + }, + { + "epoch": 0.1631590857660104, + "grad_norm": 0.7137586537082149, + "learning_rate": 9.54252455343819e-06, + "loss": 0.4414, + "step": 3605 + }, + { + "epoch": 0.16320434487440597, + "grad_norm": 0.9055787596951604, + "learning_rate": 9.542218235869915e-06, + "loss": 0.4986, + "step": 3606 + }, + { + "epoch": 0.16324960398280153, + "grad_norm": 0.8021776941432398, + "learning_rate": 9.54191182070332e-06, + "loss": 0.4135, + "step": 3607 + }, + { + "epoch": 0.1632948630911971, + "grad_norm": 0.5627123166001644, + "learning_rate": 9.54160530794499e-06, + "loss": 0.5387, + "step": 3608 + }, + { + "epoch": 0.16334012219959268, + "grad_norm": 0.7214389595222919, + "learning_rate": 9.541298697601508e-06, + "loss": 0.4124, + "step": 3609 + }, + { + "epoch": 0.16338538130798824, + "grad_norm": 0.7582642791339812, + "learning_rate": 9.540991989679468e-06, + "loss": 0.4447, + "step": 3610 + }, + { + "epoch": 0.1634306404163838, + "grad_norm": 0.7113602407039508, + "learning_rate": 9.540685184185455e-06, + "loss": 0.4116, + "step": 3611 + }, + { + "epoch": 0.16347589952477937, + "grad_norm": 0.7172389497873176, + "learning_rate": 9.540378281126064e-06, + "loss": 0.4665, + "step": 3612 + }, + { + "epoch": 0.16352115863317493, + "grad_norm": 0.7156189181787637, + "learning_rate": 9.540071280507887e-06, + "loss": 0.4648, + "step": 3613 + }, + { + "epoch": 0.1635664177415705, + "grad_norm": 0.7428520280649665, + "learning_rate": 9.539764182337523e-06, + "loss": 0.3967, + "step": 3614 + }, + { + "epoch": 0.16361167684996605, + "grad_norm": 0.6564775779238287, + "learning_rate": 9.539456986621568e-06, + "loss": 0.4227, + "step": 3615 + }, + { + "epoch": 0.16365693595836162, + "grad_norm": 0.5938727255118851, + "learning_rate": 9.539149693366628e-06, + "loss": 0.4373, + "step": 3616 + }, + { + "epoch": 0.16370219506675718, + "grad_norm": 0.8369196659255341, + "learning_rate": 9.538842302579299e-06, + "loss": 0.3769, + "step": 3617 + }, + { + "epoch": 0.16374745417515274, + "grad_norm": 1.165080626912052, + "learning_rate": 9.538534814266187e-06, + "loss": 0.5097, + "step": 3618 + }, + { + "epoch": 0.1637927132835483, + "grad_norm": 0.8593480539402252, + "learning_rate": 9.538227228433905e-06, + "loss": 0.4292, + "step": 3619 + }, + { + "epoch": 0.1638379723919439, + "grad_norm": 0.7962035527894968, + "learning_rate": 9.537919545089057e-06, + "loss": 0.3933, + "step": 3620 + }, + { + "epoch": 0.16388323150033945, + "grad_norm": 0.6133916320357296, + "learning_rate": 9.537611764238253e-06, + "loss": 0.5197, + "step": 3621 + }, + { + "epoch": 0.16392849060873502, + "grad_norm": 0.45833468804418676, + "learning_rate": 9.53730388588811e-06, + "loss": 0.4907, + "step": 3622 + }, + { + "epoch": 0.16397374971713058, + "grad_norm": 0.6799233073262908, + "learning_rate": 9.536995910045241e-06, + "loss": 0.3854, + "step": 3623 + }, + { + "epoch": 0.16401900882552614, + "grad_norm": 0.7132031385597974, + "learning_rate": 9.536687836716265e-06, + "loss": 0.4124, + "step": 3624 + }, + { + "epoch": 0.1640642679339217, + "grad_norm": 1.1479222305246017, + "learning_rate": 9.536379665907801e-06, + "loss": 0.4351, + "step": 3625 + }, + { + "epoch": 0.16410952704231727, + "grad_norm": 0.8723725440454708, + "learning_rate": 9.53607139762647e-06, + "loss": 0.5495, + "step": 3626 + }, + { + "epoch": 0.16415478615071283, + "grad_norm": 0.7988608216779318, + "learning_rate": 9.535763031878895e-06, + "loss": 0.4412, + "step": 3627 + }, + { + "epoch": 0.1642000452591084, + "grad_norm": 0.6804044503637056, + "learning_rate": 9.535454568671705e-06, + "loss": 0.4246, + "step": 3628 + }, + { + "epoch": 0.16424530436750395, + "grad_norm": 0.7159799420828592, + "learning_rate": 9.535146008011524e-06, + "loss": 0.4234, + "step": 3629 + }, + { + "epoch": 0.16429056347589951, + "grad_norm": 0.8517435555100842, + "learning_rate": 9.534837349904986e-06, + "loss": 0.3846, + "step": 3630 + }, + { + "epoch": 0.16433582258429508, + "grad_norm": 0.7481983359140346, + "learning_rate": 9.534528594358718e-06, + "loss": 0.4253, + "step": 3631 + }, + { + "epoch": 0.16438108169269067, + "grad_norm": 0.746705182354197, + "learning_rate": 9.53421974137936e-06, + "loss": 0.4291, + "step": 3632 + }, + { + "epoch": 0.16442634080108623, + "grad_norm": 0.656268854264488, + "learning_rate": 9.533910790973545e-06, + "loss": 0.42, + "step": 3633 + }, + { + "epoch": 0.1644715999094818, + "grad_norm": 0.715318601638388, + "learning_rate": 9.533601743147911e-06, + "loss": 0.454, + "step": 3634 + }, + { + "epoch": 0.16451685901787735, + "grad_norm": 0.6353354533937493, + "learning_rate": 9.533292597909101e-06, + "loss": 0.4316, + "step": 3635 + }, + { + "epoch": 0.16456211812627292, + "grad_norm": 0.6869695424669242, + "learning_rate": 9.532983355263753e-06, + "loss": 0.4179, + "step": 3636 + }, + { + "epoch": 0.16460737723466848, + "grad_norm": 0.6966617156005382, + "learning_rate": 9.532674015218519e-06, + "loss": 0.3999, + "step": 3637 + }, + { + "epoch": 0.16465263634306404, + "grad_norm": 0.738112774923969, + "learning_rate": 9.532364577780039e-06, + "loss": 0.5192, + "step": 3638 + }, + { + "epoch": 0.1646978954514596, + "grad_norm": 0.7197831342194553, + "learning_rate": 9.532055042954964e-06, + "loss": 0.4315, + "step": 3639 + }, + { + "epoch": 0.16474315455985516, + "grad_norm": 0.6691610827792658, + "learning_rate": 9.531745410749946e-06, + "loss": 0.432, + "step": 3640 + }, + { + "epoch": 0.16478841366825073, + "grad_norm": 0.7336041070909316, + "learning_rate": 9.531435681171637e-06, + "loss": 0.4584, + "step": 3641 + }, + { + "epoch": 0.1648336727766463, + "grad_norm": 0.6534578008595666, + "learning_rate": 9.531125854226692e-06, + "loss": 0.4358, + "step": 3642 + }, + { + "epoch": 0.16487893188504185, + "grad_norm": 0.5270481010940409, + "learning_rate": 9.53081592992177e-06, + "loss": 0.5232, + "step": 3643 + }, + { + "epoch": 0.16492419099343744, + "grad_norm": 0.832431322687446, + "learning_rate": 9.530505908263528e-06, + "loss": 0.3747, + "step": 3644 + }, + { + "epoch": 0.164969450101833, + "grad_norm": 0.7072464593601516, + "learning_rate": 9.53019578925863e-06, + "loss": 0.4118, + "step": 3645 + }, + { + "epoch": 0.16501470921022857, + "grad_norm": 0.6158071480238312, + "learning_rate": 9.529885572913735e-06, + "loss": 0.4015, + "step": 3646 + }, + { + "epoch": 0.16505996831862413, + "grad_norm": 0.7465380445759865, + "learning_rate": 9.529575259235514e-06, + "loss": 0.4451, + "step": 3647 + }, + { + "epoch": 0.1651052274270197, + "grad_norm": 0.684587534515466, + "learning_rate": 9.52926484823063e-06, + "loss": 0.3958, + "step": 3648 + }, + { + "epoch": 0.16515048653541525, + "grad_norm": 0.636071964727268, + "learning_rate": 9.528954339905759e-06, + "loss": 0.404, + "step": 3649 + }, + { + "epoch": 0.16519574564381082, + "grad_norm": 0.6693368657078425, + "learning_rate": 9.528643734267564e-06, + "loss": 0.4247, + "step": 3650 + }, + { + "epoch": 0.16524100475220638, + "grad_norm": 0.8203539046854412, + "learning_rate": 9.528333031322728e-06, + "loss": 0.4013, + "step": 3651 + }, + { + "epoch": 0.16528626386060194, + "grad_norm": 0.6957575535631207, + "learning_rate": 9.528022231077921e-06, + "loss": 0.4628, + "step": 3652 + }, + { + "epoch": 0.1653315229689975, + "grad_norm": 0.6883990276108745, + "learning_rate": 9.527711333539821e-06, + "loss": 0.4181, + "step": 3653 + }, + { + "epoch": 0.16537678207739306, + "grad_norm": 0.6621880635568219, + "learning_rate": 9.527400338715112e-06, + "loss": 0.4216, + "step": 3654 + }, + { + "epoch": 0.16542204118578863, + "grad_norm": 0.5905985298350221, + "learning_rate": 9.527089246610475e-06, + "loss": 0.5367, + "step": 3655 + }, + { + "epoch": 0.16546730029418422, + "grad_norm": 0.66555066514175, + "learning_rate": 9.526778057232595e-06, + "loss": 0.4306, + "step": 3656 + }, + { + "epoch": 0.16551255940257978, + "grad_norm": 0.6431260588652664, + "learning_rate": 9.526466770588156e-06, + "loss": 0.3738, + "step": 3657 + }, + { + "epoch": 0.16555781851097534, + "grad_norm": 0.7130443154083126, + "learning_rate": 9.526155386683848e-06, + "loss": 0.4557, + "step": 3658 + }, + { + "epoch": 0.1656030776193709, + "grad_norm": 0.6961582015719266, + "learning_rate": 9.525843905526361e-06, + "loss": 0.397, + "step": 3659 + }, + { + "epoch": 0.16564833672776647, + "grad_norm": 0.623855766689215, + "learning_rate": 9.525532327122391e-06, + "loss": 0.4043, + "step": 3660 + }, + { + "epoch": 0.16569359583616203, + "grad_norm": 0.4348115038481914, + "learning_rate": 9.525220651478628e-06, + "loss": 0.505, + "step": 3661 + }, + { + "epoch": 0.1657388549445576, + "grad_norm": 0.7124951405659162, + "learning_rate": 9.524908878601773e-06, + "loss": 0.4289, + "step": 3662 + }, + { + "epoch": 0.16578411405295315, + "grad_norm": 0.3185914241502027, + "learning_rate": 9.524597008498522e-06, + "loss": 0.4992, + "step": 3663 + }, + { + "epoch": 0.16582937316134871, + "grad_norm": 0.7086305378925704, + "learning_rate": 9.524285041175578e-06, + "loss": 0.4552, + "step": 3664 + }, + { + "epoch": 0.16587463226974428, + "grad_norm": 0.8279029973754335, + "learning_rate": 9.523972976639645e-06, + "loss": 0.3899, + "step": 3665 + }, + { + "epoch": 0.16591989137813984, + "grad_norm": 0.3352919478515927, + "learning_rate": 9.523660814897426e-06, + "loss": 0.4951, + "step": 3666 + }, + { + "epoch": 0.16596515048653543, + "grad_norm": 0.727907832487834, + "learning_rate": 9.52334855595563e-06, + "loss": 0.4349, + "step": 3667 + }, + { + "epoch": 0.166010409594931, + "grad_norm": 0.6549909987507452, + "learning_rate": 9.523036199820964e-06, + "loss": 0.377, + "step": 3668 + }, + { + "epoch": 0.16605566870332655, + "grad_norm": 0.626733270750146, + "learning_rate": 9.522723746500144e-06, + "loss": 0.4197, + "step": 3669 + }, + { + "epoch": 0.16610092781172212, + "grad_norm": 0.7155877450445944, + "learning_rate": 9.522411195999879e-06, + "loss": 0.4233, + "step": 3670 + }, + { + "epoch": 0.16614618692011768, + "grad_norm": 0.6985169309325515, + "learning_rate": 9.522098548326888e-06, + "loss": 0.4286, + "step": 3671 + }, + { + "epoch": 0.16619144602851324, + "grad_norm": 0.7018155538709772, + "learning_rate": 9.521785803487888e-06, + "loss": 0.4138, + "step": 3672 + }, + { + "epoch": 0.1662367051369088, + "grad_norm": 0.6868291461365117, + "learning_rate": 9.5214729614896e-06, + "loss": 0.3932, + "step": 3673 + }, + { + "epoch": 0.16628196424530436, + "grad_norm": 0.770746088836217, + "learning_rate": 9.521160022338742e-06, + "loss": 0.405, + "step": 3674 + }, + { + "epoch": 0.16632722335369993, + "grad_norm": 0.7083686368080435, + "learning_rate": 9.520846986042043e-06, + "loss": 0.3715, + "step": 3675 + }, + { + "epoch": 0.1663724824620955, + "grad_norm": 0.7256127301256134, + "learning_rate": 9.520533852606226e-06, + "loss": 0.4242, + "step": 3676 + }, + { + "epoch": 0.16641774157049105, + "grad_norm": 0.7874565138391614, + "learning_rate": 9.520220622038019e-06, + "loss": 0.4022, + "step": 3677 + }, + { + "epoch": 0.1664630006788866, + "grad_norm": 0.663211610255665, + "learning_rate": 9.519907294344155e-06, + "loss": 0.4053, + "step": 3678 + }, + { + "epoch": 0.1665082597872822, + "grad_norm": 0.7512651019964488, + "learning_rate": 9.519593869531366e-06, + "loss": 0.4084, + "step": 3679 + }, + { + "epoch": 0.16655351889567777, + "grad_norm": 0.6856705100857937, + "learning_rate": 9.519280347606383e-06, + "loss": 0.4363, + "step": 3680 + }, + { + "epoch": 0.16659877800407333, + "grad_norm": 0.820504135650576, + "learning_rate": 9.518966728575947e-06, + "loss": 0.4354, + "step": 3681 + }, + { + "epoch": 0.1666440371124689, + "grad_norm": 0.5359567082935988, + "learning_rate": 9.518653012446794e-06, + "loss": 0.4859, + "step": 3682 + }, + { + "epoch": 0.16668929622086445, + "grad_norm": 0.42768864998571654, + "learning_rate": 9.518339199225668e-06, + "loss": 0.4734, + "step": 3683 + }, + { + "epoch": 0.16673455532926001, + "grad_norm": 0.6977386931322633, + "learning_rate": 9.518025288919307e-06, + "loss": 0.4493, + "step": 3684 + }, + { + "epoch": 0.16677981443765558, + "grad_norm": 0.7312921685664348, + "learning_rate": 9.51771128153446e-06, + "loss": 0.4551, + "step": 3685 + }, + { + "epoch": 0.16682507354605114, + "grad_norm": 0.8006426810646926, + "learning_rate": 9.517397177077874e-06, + "loss": 0.3496, + "step": 3686 + }, + { + "epoch": 0.1668703326544467, + "grad_norm": 0.6467847843733492, + "learning_rate": 9.517082975556294e-06, + "loss": 0.4105, + "step": 3687 + }, + { + "epoch": 0.16691559176284226, + "grad_norm": 0.7434533319298859, + "learning_rate": 9.516768676976476e-06, + "loss": 0.3715, + "step": 3688 + }, + { + "epoch": 0.16696085087123783, + "grad_norm": 0.7565063603778319, + "learning_rate": 9.51645428134517e-06, + "loss": 0.4101, + "step": 3689 + }, + { + "epoch": 0.1670061099796334, + "grad_norm": 0.6583406605473527, + "learning_rate": 9.516139788669133e-06, + "loss": 0.4008, + "step": 3690 + }, + { + "epoch": 0.16705136908802898, + "grad_norm": 0.9488362943271945, + "learning_rate": 9.515825198955122e-06, + "loss": 0.5161, + "step": 3691 + }, + { + "epoch": 0.16709662819642454, + "grad_norm": 0.74207752543715, + "learning_rate": 9.515510512209898e-06, + "loss": 0.4167, + "step": 3692 + }, + { + "epoch": 0.1671418873048201, + "grad_norm": 0.688311061847171, + "learning_rate": 9.515195728440221e-06, + "loss": 0.421, + "step": 3693 + }, + { + "epoch": 0.16718714641321566, + "grad_norm": 0.7076735124909065, + "learning_rate": 9.514880847652855e-06, + "loss": 0.4215, + "step": 3694 + }, + { + "epoch": 0.16723240552161123, + "grad_norm": 0.4649065122917939, + "learning_rate": 9.514565869854566e-06, + "loss": 0.5269, + "step": 3695 + }, + { + "epoch": 0.1672776646300068, + "grad_norm": 0.37744810277788543, + "learning_rate": 9.51425079505212e-06, + "loss": 0.4916, + "step": 3696 + }, + { + "epoch": 0.16732292373840235, + "grad_norm": 0.35435769341967316, + "learning_rate": 9.513935623252292e-06, + "loss": 0.4882, + "step": 3697 + }, + { + "epoch": 0.1673681828467979, + "grad_norm": 0.8444424239920386, + "learning_rate": 9.51362035446185e-06, + "loss": 0.3775, + "step": 3698 + }, + { + "epoch": 0.16741344195519348, + "grad_norm": 0.49136085657274564, + "learning_rate": 9.513304988687568e-06, + "loss": 0.5053, + "step": 3699 + }, + { + "epoch": 0.16745870106358904, + "grad_norm": 0.48488017714940024, + "learning_rate": 9.512989525936223e-06, + "loss": 0.5036, + "step": 3700 + }, + { + "epoch": 0.1675039601719846, + "grad_norm": 0.7566271474139192, + "learning_rate": 9.512673966214597e-06, + "loss": 0.4554, + "step": 3701 + }, + { + "epoch": 0.16754921928038016, + "grad_norm": 0.8793616289280668, + "learning_rate": 9.512358309529463e-06, + "loss": 0.3939, + "step": 3702 + }, + { + "epoch": 0.16759447838877575, + "grad_norm": 0.6267765901631913, + "learning_rate": 9.51204255588761e-06, + "loss": 0.3886, + "step": 3703 + }, + { + "epoch": 0.16763973749717132, + "grad_norm": 0.7177662264806949, + "learning_rate": 9.51172670529582e-06, + "loss": 0.42, + "step": 3704 + }, + { + "epoch": 0.16768499660556688, + "grad_norm": 0.7540731952293713, + "learning_rate": 9.511410757760878e-06, + "loss": 0.4347, + "step": 3705 + }, + { + "epoch": 0.16773025571396244, + "grad_norm": 1.175057370829924, + "learning_rate": 9.511094713289575e-06, + "loss": 0.4344, + "step": 3706 + }, + { + "epoch": 0.167775514822358, + "grad_norm": 0.6673807601229402, + "learning_rate": 9.510778571888704e-06, + "loss": 0.3923, + "step": 3707 + }, + { + "epoch": 0.16782077393075356, + "grad_norm": 0.6639737685542729, + "learning_rate": 9.510462333565052e-06, + "loss": 0.4127, + "step": 3708 + }, + { + "epoch": 0.16786603303914913, + "grad_norm": 0.6623937707899048, + "learning_rate": 9.510145998325419e-06, + "loss": 0.3768, + "step": 3709 + }, + { + "epoch": 0.1679112921475447, + "grad_norm": 0.7189384687642437, + "learning_rate": 9.509829566176601e-06, + "loss": 0.4426, + "step": 3710 + }, + { + "epoch": 0.16795655125594025, + "grad_norm": 0.7553610441482141, + "learning_rate": 9.509513037125395e-06, + "loss": 0.4116, + "step": 3711 + }, + { + "epoch": 0.1680018103643358, + "grad_norm": 0.6739738444460805, + "learning_rate": 9.509196411178605e-06, + "loss": 0.4017, + "step": 3712 + }, + { + "epoch": 0.16804706947273138, + "grad_norm": 0.8404052913139409, + "learning_rate": 9.508879688343033e-06, + "loss": 0.4114, + "step": 3713 + }, + { + "epoch": 0.16809232858112697, + "grad_norm": 0.7005051307524329, + "learning_rate": 9.508562868625484e-06, + "loss": 0.4285, + "step": 3714 + }, + { + "epoch": 0.16813758768952253, + "grad_norm": 0.6861061710619839, + "learning_rate": 9.508245952032765e-06, + "loss": 0.3782, + "step": 3715 + }, + { + "epoch": 0.1681828467979181, + "grad_norm": 0.7290703589658392, + "learning_rate": 9.507928938571689e-06, + "loss": 0.4325, + "step": 3716 + }, + { + "epoch": 0.16822810590631365, + "grad_norm": 0.6613589739454301, + "learning_rate": 9.507611828249062e-06, + "loss": 0.3857, + "step": 3717 + }, + { + "epoch": 0.16827336501470921, + "grad_norm": 0.675217399005107, + "learning_rate": 9.507294621071702e-06, + "loss": 0.4095, + "step": 3718 + }, + { + "epoch": 0.16831862412310478, + "grad_norm": 0.7494715356484618, + "learning_rate": 9.506977317046424e-06, + "loss": 0.4322, + "step": 3719 + }, + { + "epoch": 0.16836388323150034, + "grad_norm": 0.6606116250864447, + "learning_rate": 9.506659916180046e-06, + "loss": 0.4673, + "step": 3720 + }, + { + "epoch": 0.1684091423398959, + "grad_norm": 0.7163806350258065, + "learning_rate": 9.506342418479388e-06, + "loss": 0.4361, + "step": 3721 + }, + { + "epoch": 0.16845440144829146, + "grad_norm": 0.626221536747177, + "learning_rate": 9.50602482395127e-06, + "loss": 0.4085, + "step": 3722 + }, + { + "epoch": 0.16849966055668703, + "grad_norm": 0.7311666290265683, + "learning_rate": 9.50570713260252e-06, + "loss": 0.4388, + "step": 3723 + }, + { + "epoch": 0.1685449196650826, + "grad_norm": 0.6904737034485492, + "learning_rate": 9.50538934443996e-06, + "loss": 0.3982, + "step": 3724 + }, + { + "epoch": 0.16859017877347815, + "grad_norm": 0.6646357804123364, + "learning_rate": 9.50507145947042e-06, + "loss": 0.4169, + "step": 3725 + }, + { + "epoch": 0.16863543788187374, + "grad_norm": 0.6716345872070512, + "learning_rate": 9.504753477700731e-06, + "loss": 0.4108, + "step": 3726 + }, + { + "epoch": 0.1686806969902693, + "grad_norm": 0.6807055677303921, + "learning_rate": 9.504435399137726e-06, + "loss": 0.41, + "step": 3727 + }, + { + "epoch": 0.16872595609866486, + "grad_norm": 0.7083670984942221, + "learning_rate": 9.504117223788238e-06, + "loss": 0.3943, + "step": 3728 + }, + { + "epoch": 0.16877121520706043, + "grad_norm": 0.6568251151354582, + "learning_rate": 9.503798951659104e-06, + "loss": 0.4139, + "step": 3729 + }, + { + "epoch": 0.168816474315456, + "grad_norm": 0.6810666065376982, + "learning_rate": 9.503480582757163e-06, + "loss": 0.4098, + "step": 3730 + }, + { + "epoch": 0.16886173342385155, + "grad_norm": 0.987821729528041, + "learning_rate": 9.503162117089256e-06, + "loss": 0.518, + "step": 3731 + }, + { + "epoch": 0.1689069925322471, + "grad_norm": 0.6956385502760074, + "learning_rate": 9.502843554662225e-06, + "loss": 0.4107, + "step": 3732 + }, + { + "epoch": 0.16895225164064268, + "grad_norm": 0.7239870623317979, + "learning_rate": 9.502524895482917e-06, + "loss": 0.3878, + "step": 3733 + }, + { + "epoch": 0.16899751074903824, + "grad_norm": 0.6494967848122583, + "learning_rate": 9.502206139558175e-06, + "loss": 0.4095, + "step": 3734 + }, + { + "epoch": 0.1690427698574338, + "grad_norm": 0.7057674434549391, + "learning_rate": 9.501887286894852e-06, + "loss": 0.405, + "step": 3735 + }, + { + "epoch": 0.16908802896582936, + "grad_norm": 0.7003729702583934, + "learning_rate": 9.501568337499798e-06, + "loss": 0.4078, + "step": 3736 + }, + { + "epoch": 0.16913328807422492, + "grad_norm": 0.6829548547414105, + "learning_rate": 9.501249291379865e-06, + "loss": 0.393, + "step": 3737 + }, + { + "epoch": 0.16917854718262051, + "grad_norm": 0.74769762989029, + "learning_rate": 9.50093014854191e-06, + "loss": 0.4413, + "step": 3738 + }, + { + "epoch": 0.16922380629101608, + "grad_norm": 0.7410832274031145, + "learning_rate": 9.500610908992788e-06, + "loss": 0.4369, + "step": 3739 + }, + { + "epoch": 0.16926906539941164, + "grad_norm": 0.9351541665911204, + "learning_rate": 9.500291572739362e-06, + "loss": 0.5246, + "step": 3740 + }, + { + "epoch": 0.1693143245078072, + "grad_norm": 0.7186639929072621, + "learning_rate": 9.49997213978849e-06, + "loss": 0.5083, + "step": 3741 + }, + { + "epoch": 0.16935958361620276, + "grad_norm": 0.7107391517242865, + "learning_rate": 9.49965261014704e-06, + "loss": 0.43, + "step": 3742 + }, + { + "epoch": 0.16940484272459833, + "grad_norm": 0.6242561143083231, + "learning_rate": 9.499332983821873e-06, + "loss": 0.4075, + "step": 3743 + }, + { + "epoch": 0.1694501018329939, + "grad_norm": 0.6358146911217676, + "learning_rate": 9.49901326081986e-06, + "loss": 0.4052, + "step": 3744 + }, + { + "epoch": 0.16949536094138945, + "grad_norm": 0.7088530729448421, + "learning_rate": 9.498693441147868e-06, + "loss": 0.4071, + "step": 3745 + }, + { + "epoch": 0.169540620049785, + "grad_norm": 1.0440527962553254, + "learning_rate": 9.498373524812771e-06, + "loss": 0.5321, + "step": 3746 + }, + { + "epoch": 0.16958587915818057, + "grad_norm": 0.7200570581024353, + "learning_rate": 9.498053511821445e-06, + "loss": 0.3925, + "step": 3747 + }, + { + "epoch": 0.16963113826657614, + "grad_norm": 0.6562807866803275, + "learning_rate": 9.497733402180761e-06, + "loss": 0.4265, + "step": 3748 + }, + { + "epoch": 0.16967639737497173, + "grad_norm": 0.6703611503814182, + "learning_rate": 9.497413195897601e-06, + "loss": 0.4132, + "step": 3749 + }, + { + "epoch": 0.1697216564833673, + "grad_norm": 0.6892556857897737, + "learning_rate": 9.497092892978844e-06, + "loss": 0.518, + "step": 3750 + }, + { + "epoch": 0.16976691559176285, + "grad_norm": 0.6611965128166901, + "learning_rate": 9.496772493431373e-06, + "loss": 0.4001, + "step": 3751 + }, + { + "epoch": 0.1698121747001584, + "grad_norm": 0.7793165941706258, + "learning_rate": 9.496451997262071e-06, + "loss": 0.4128, + "step": 3752 + }, + { + "epoch": 0.16985743380855398, + "grad_norm": 0.47410875781920114, + "learning_rate": 9.496131404477826e-06, + "loss": 0.4902, + "step": 3753 + }, + { + "epoch": 0.16990269291694954, + "grad_norm": 0.6863007768927061, + "learning_rate": 9.495810715085526e-06, + "loss": 0.393, + "step": 3754 + }, + { + "epoch": 0.1699479520253451, + "grad_norm": 0.7225011596239164, + "learning_rate": 9.495489929092062e-06, + "loss": 0.3979, + "step": 3755 + }, + { + "epoch": 0.16999321113374066, + "grad_norm": 0.7322895998589937, + "learning_rate": 9.495169046504325e-06, + "loss": 0.391, + "step": 3756 + }, + { + "epoch": 0.17003847024213622, + "grad_norm": 0.6541370202035405, + "learning_rate": 9.494848067329211e-06, + "loss": 0.3956, + "step": 3757 + }, + { + "epoch": 0.1700837293505318, + "grad_norm": 0.643158696186771, + "learning_rate": 9.494526991573619e-06, + "loss": 0.3263, + "step": 3758 + }, + { + "epoch": 0.17012898845892735, + "grad_norm": 0.6596094479839292, + "learning_rate": 9.494205819244444e-06, + "loss": 0.4137, + "step": 3759 + }, + { + "epoch": 0.1701742475673229, + "grad_norm": 0.6836657205327861, + "learning_rate": 9.493884550348589e-06, + "loss": 0.4406, + "step": 3760 + }, + { + "epoch": 0.1702195066757185, + "grad_norm": 0.6867349047506497, + "learning_rate": 9.493563184892958e-06, + "loss": 0.4254, + "step": 3761 + }, + { + "epoch": 0.17026476578411406, + "grad_norm": 0.6815726649750058, + "learning_rate": 9.493241722884454e-06, + "loss": 0.4609, + "step": 3762 + }, + { + "epoch": 0.17031002489250963, + "grad_norm": 0.6885133337543684, + "learning_rate": 9.492920164329985e-06, + "loss": 0.3385, + "step": 3763 + }, + { + "epoch": 0.1703552840009052, + "grad_norm": 1.0212362444151246, + "learning_rate": 9.492598509236461e-06, + "loss": 0.5036, + "step": 3764 + }, + { + "epoch": 0.17040054310930075, + "grad_norm": 0.774041865443583, + "learning_rate": 9.492276757610795e-06, + "loss": 0.3948, + "step": 3765 + }, + { + "epoch": 0.1704458022176963, + "grad_norm": 0.6134765089458971, + "learning_rate": 9.491954909459895e-06, + "loss": 0.4832, + "step": 3766 + }, + { + "epoch": 0.17049106132609188, + "grad_norm": 0.36207745015444576, + "learning_rate": 9.491632964790683e-06, + "loss": 0.5457, + "step": 3767 + }, + { + "epoch": 0.17053632043448744, + "grad_norm": 0.7729859301025481, + "learning_rate": 9.491310923610071e-06, + "loss": 0.419, + "step": 3768 + }, + { + "epoch": 0.170581579542883, + "grad_norm": 0.6830124066237396, + "learning_rate": 9.490988785924983e-06, + "loss": 0.4085, + "step": 3769 + }, + { + "epoch": 0.17062683865127856, + "grad_norm": 0.7352583131801282, + "learning_rate": 9.490666551742338e-06, + "loss": 0.4953, + "step": 3770 + }, + { + "epoch": 0.17067209775967412, + "grad_norm": 0.6727503824450569, + "learning_rate": 9.490344221069062e-06, + "loss": 0.3845, + "step": 3771 + }, + { + "epoch": 0.1707173568680697, + "grad_norm": 0.7477892470274518, + "learning_rate": 9.490021793912079e-06, + "loss": 0.4165, + "step": 3772 + }, + { + "epoch": 0.17076261597646528, + "grad_norm": 0.7280098699277786, + "learning_rate": 9.489699270278316e-06, + "loss": 0.3909, + "step": 3773 + }, + { + "epoch": 0.17080787508486084, + "grad_norm": 0.662921243277628, + "learning_rate": 9.489376650174708e-06, + "loss": 0.4372, + "step": 3774 + }, + { + "epoch": 0.1708531341932564, + "grad_norm": 0.9470274488208026, + "learning_rate": 9.489053933608182e-06, + "loss": 0.5428, + "step": 3775 + }, + { + "epoch": 0.17089839330165196, + "grad_norm": 0.6684584842570336, + "learning_rate": 9.488731120585675e-06, + "loss": 0.4145, + "step": 3776 + }, + { + "epoch": 0.17094365241004753, + "grad_norm": 0.7104320385344154, + "learning_rate": 9.488408211114121e-06, + "loss": 0.3872, + "step": 3777 + }, + { + "epoch": 0.1709889115184431, + "grad_norm": 0.6115588397093513, + "learning_rate": 9.48808520520046e-06, + "loss": 0.3681, + "step": 3778 + }, + { + "epoch": 0.17103417062683865, + "grad_norm": 0.7587750055088698, + "learning_rate": 9.487762102851631e-06, + "loss": 0.4817, + "step": 3779 + }, + { + "epoch": 0.1710794297352342, + "grad_norm": 0.6573454769461418, + "learning_rate": 9.487438904074581e-06, + "loss": 0.3939, + "step": 3780 + }, + { + "epoch": 0.17112468884362977, + "grad_norm": 0.6807681638602197, + "learning_rate": 9.48711560887625e-06, + "loss": 0.3964, + "step": 3781 + }, + { + "epoch": 0.17116994795202534, + "grad_norm": 0.6528819830811584, + "learning_rate": 9.486792217263584e-06, + "loss": 0.4169, + "step": 3782 + }, + { + "epoch": 0.1712152070604209, + "grad_norm": 0.6456444671915634, + "learning_rate": 9.486468729243533e-06, + "loss": 0.3778, + "step": 3783 + }, + { + "epoch": 0.17126046616881646, + "grad_norm": 0.685917141382002, + "learning_rate": 9.48614514482305e-06, + "loss": 0.4021, + "step": 3784 + }, + { + "epoch": 0.17130572527721205, + "grad_norm": 0.7035389881359554, + "learning_rate": 9.485821464009084e-06, + "loss": 0.3866, + "step": 3785 + }, + { + "epoch": 0.1713509843856076, + "grad_norm": 0.6293192536212908, + "learning_rate": 9.485497686808594e-06, + "loss": 0.5125, + "step": 3786 + }, + { + "epoch": 0.17139624349400318, + "grad_norm": 0.6996237899447874, + "learning_rate": 9.485173813228535e-06, + "loss": 0.4342, + "step": 3787 + }, + { + "epoch": 0.17144150260239874, + "grad_norm": 0.7228737991665823, + "learning_rate": 9.484849843275863e-06, + "loss": 0.4426, + "step": 3788 + }, + { + "epoch": 0.1714867617107943, + "grad_norm": 0.6784925368979711, + "learning_rate": 9.484525776957544e-06, + "loss": 0.4401, + "step": 3789 + }, + { + "epoch": 0.17153202081918986, + "grad_norm": 0.6955890630411466, + "learning_rate": 9.484201614280539e-06, + "loss": 0.4187, + "step": 3790 + }, + { + "epoch": 0.17157727992758542, + "grad_norm": 0.720595862616324, + "learning_rate": 9.483877355251814e-06, + "loss": 0.3606, + "step": 3791 + }, + { + "epoch": 0.171622539035981, + "grad_norm": 0.6916800542587127, + "learning_rate": 9.483552999878335e-06, + "loss": 0.4037, + "step": 3792 + }, + { + "epoch": 0.17166779814437655, + "grad_norm": 0.694954500461271, + "learning_rate": 9.483228548167075e-06, + "loss": 0.4423, + "step": 3793 + }, + { + "epoch": 0.1717130572527721, + "grad_norm": 0.3969421838470267, + "learning_rate": 9.482904000124998e-06, + "loss": 0.5163, + "step": 3794 + }, + { + "epoch": 0.17175831636116767, + "grad_norm": 0.33452167181396353, + "learning_rate": 9.482579355759085e-06, + "loss": 0.5129, + "step": 3795 + }, + { + "epoch": 0.17180357546956326, + "grad_norm": 0.6967667263051072, + "learning_rate": 9.482254615076307e-06, + "loss": 0.455, + "step": 3796 + }, + { + "epoch": 0.17184883457795883, + "grad_norm": 0.3339040070467812, + "learning_rate": 9.481929778083646e-06, + "loss": 0.5311, + "step": 3797 + }, + { + "epoch": 0.1718940936863544, + "grad_norm": 0.36808453193924556, + "learning_rate": 9.481604844788078e-06, + "loss": 0.5197, + "step": 3798 + }, + { + "epoch": 0.17193935279474995, + "grad_norm": 0.35561700516571404, + "learning_rate": 9.481279815196587e-06, + "loss": 0.4952, + "step": 3799 + }, + { + "epoch": 0.1719846119031455, + "grad_norm": 1.0893008139643745, + "learning_rate": 9.480954689316155e-06, + "loss": 0.4168, + "step": 3800 + }, + { + "epoch": 0.17202987101154107, + "grad_norm": 0.4053512638244079, + "learning_rate": 9.480629467153768e-06, + "loss": 0.5244, + "step": 3801 + }, + { + "epoch": 0.17207513011993664, + "grad_norm": 0.40523098581740213, + "learning_rate": 9.480304148716418e-06, + "loss": 0.4998, + "step": 3802 + }, + { + "epoch": 0.1721203892283322, + "grad_norm": 0.8014392539434086, + "learning_rate": 9.479978734011089e-06, + "loss": 0.4028, + "step": 3803 + }, + { + "epoch": 0.17216564833672776, + "grad_norm": 0.7256290606944289, + "learning_rate": 9.479653223044776e-06, + "loss": 0.4377, + "step": 3804 + }, + { + "epoch": 0.17221090744512332, + "grad_norm": 0.6517262142803637, + "learning_rate": 9.479327615824476e-06, + "loss": 0.359, + "step": 3805 + }, + { + "epoch": 0.17225616655351889, + "grad_norm": 0.7758678317801353, + "learning_rate": 9.479001912357181e-06, + "loss": 0.4047, + "step": 3806 + }, + { + "epoch": 0.17230142566191445, + "grad_norm": 0.6734929141109003, + "learning_rate": 9.478676112649892e-06, + "loss": 0.4003, + "step": 3807 + }, + { + "epoch": 0.17234668477031004, + "grad_norm": 0.6460118289975646, + "learning_rate": 9.478350216709609e-06, + "loss": 0.4209, + "step": 3808 + }, + { + "epoch": 0.1723919438787056, + "grad_norm": 0.7400410273732939, + "learning_rate": 9.478024224543332e-06, + "loss": 0.4035, + "step": 3809 + }, + { + "epoch": 0.17243720298710116, + "grad_norm": 0.6620153485178493, + "learning_rate": 9.477698136158068e-06, + "loss": 0.4006, + "step": 3810 + }, + { + "epoch": 0.17248246209549672, + "grad_norm": 0.674849489646353, + "learning_rate": 9.477371951560825e-06, + "loss": 0.3948, + "step": 3811 + }, + { + "epoch": 0.1725277212038923, + "grad_norm": 0.6830113481329078, + "learning_rate": 9.477045670758609e-06, + "loss": 0.3879, + "step": 3812 + }, + { + "epoch": 0.17257298031228785, + "grad_norm": 0.6779094831298704, + "learning_rate": 9.476719293758431e-06, + "loss": 0.3828, + "step": 3813 + }, + { + "epoch": 0.1726182394206834, + "grad_norm": 0.66722862120204, + "learning_rate": 9.476392820567306e-06, + "loss": 0.399, + "step": 3814 + }, + { + "epoch": 0.17266349852907897, + "grad_norm": 0.6684781940195049, + "learning_rate": 9.476066251192248e-06, + "loss": 0.3661, + "step": 3815 + }, + { + "epoch": 0.17270875763747454, + "grad_norm": 0.49516822571517094, + "learning_rate": 9.475739585640272e-06, + "loss": 0.4783, + "step": 3816 + }, + { + "epoch": 0.1727540167458701, + "grad_norm": 0.624620814684528, + "learning_rate": 9.475412823918398e-06, + "loss": 0.3734, + "step": 3817 + }, + { + "epoch": 0.17279927585426566, + "grad_norm": 0.3758624282498899, + "learning_rate": 9.475085966033649e-06, + "loss": 0.4856, + "step": 3818 + }, + { + "epoch": 0.17284453496266122, + "grad_norm": 0.7495092258007775, + "learning_rate": 9.474759011993045e-06, + "loss": 0.4685, + "step": 3819 + }, + { + "epoch": 0.1728897940710568, + "grad_norm": 0.7028451617869309, + "learning_rate": 9.474431961803615e-06, + "loss": 0.4345, + "step": 3820 + }, + { + "epoch": 0.17293505317945237, + "grad_norm": 0.6838612365680022, + "learning_rate": 9.474104815472382e-06, + "loss": 0.3717, + "step": 3821 + }, + { + "epoch": 0.17298031228784794, + "grad_norm": 0.6690480173586114, + "learning_rate": 9.47377757300638e-06, + "loss": 0.4126, + "step": 3822 + }, + { + "epoch": 0.1730255713962435, + "grad_norm": 0.6459824004506101, + "learning_rate": 9.473450234412638e-06, + "loss": 0.3475, + "step": 3823 + }, + { + "epoch": 0.17307083050463906, + "grad_norm": 0.6605687360932971, + "learning_rate": 9.473122799698189e-06, + "loss": 0.4021, + "step": 3824 + }, + { + "epoch": 0.17311608961303462, + "grad_norm": 0.7336982972593437, + "learning_rate": 9.472795268870068e-06, + "loss": 0.3697, + "step": 3825 + }, + { + "epoch": 0.17316134872143019, + "grad_norm": 0.6807569738448235, + "learning_rate": 9.472467641935314e-06, + "loss": 0.4196, + "step": 3826 + }, + { + "epoch": 0.17320660782982575, + "grad_norm": 0.6598634075011642, + "learning_rate": 9.472139918900969e-06, + "loss": 0.5147, + "step": 3827 + }, + { + "epoch": 0.1732518669382213, + "grad_norm": 0.6445631249211847, + "learning_rate": 9.47181209977407e-06, + "loss": 0.4068, + "step": 3828 + }, + { + "epoch": 0.17329712604661687, + "grad_norm": 0.4750175521736982, + "learning_rate": 9.471484184561664e-06, + "loss": 0.5171, + "step": 3829 + }, + { + "epoch": 0.17334238515501244, + "grad_norm": 0.3584003360435236, + "learning_rate": 9.471156173270796e-06, + "loss": 0.4917, + "step": 3830 + }, + { + "epoch": 0.173387644263408, + "grad_norm": 0.7073047056713918, + "learning_rate": 9.470828065908512e-06, + "loss": 0.3914, + "step": 3831 + }, + { + "epoch": 0.1734329033718036, + "grad_norm": 0.7026671316830553, + "learning_rate": 9.470499862481867e-06, + "loss": 0.4363, + "step": 3832 + }, + { + "epoch": 0.17347816248019915, + "grad_norm": 0.6446855918509418, + "learning_rate": 9.470171562997908e-06, + "loss": 0.3664, + "step": 3833 + }, + { + "epoch": 0.1735234215885947, + "grad_norm": 0.6653694374505091, + "learning_rate": 9.469843167463692e-06, + "loss": 0.3864, + "step": 3834 + }, + { + "epoch": 0.17356868069699027, + "grad_norm": 0.6814010028261566, + "learning_rate": 9.469514675886276e-06, + "loss": 0.399, + "step": 3835 + }, + { + "epoch": 0.17361393980538584, + "grad_norm": 0.7220147662032761, + "learning_rate": 9.469186088272714e-06, + "loss": 0.3474, + "step": 3836 + }, + { + "epoch": 0.1736591989137814, + "grad_norm": 0.6498031930000514, + "learning_rate": 9.468857404630069e-06, + "loss": 0.4211, + "step": 3837 + }, + { + "epoch": 0.17370445802217696, + "grad_norm": 0.6605078113760698, + "learning_rate": 9.468528624965406e-06, + "loss": 0.4029, + "step": 3838 + }, + { + "epoch": 0.17374971713057252, + "grad_norm": 0.686696816542692, + "learning_rate": 9.468199749285785e-06, + "loss": 0.3802, + "step": 3839 + }, + { + "epoch": 0.17379497623896809, + "grad_norm": 0.6838184982820498, + "learning_rate": 9.467870777598274e-06, + "loss": 0.4076, + "step": 3840 + }, + { + "epoch": 0.17384023534736365, + "grad_norm": 1.603580102321151, + "learning_rate": 9.467541709909942e-06, + "loss": 0.5383, + "step": 3841 + }, + { + "epoch": 0.1738854944557592, + "grad_norm": 0.7090022793113754, + "learning_rate": 9.46721254622786e-06, + "loss": 0.4162, + "step": 3842 + }, + { + "epoch": 0.1739307535641548, + "grad_norm": 0.6742737656798072, + "learning_rate": 9.466883286559102e-06, + "loss": 0.4254, + "step": 3843 + }, + { + "epoch": 0.17397601267255036, + "grad_norm": 0.7230508206818501, + "learning_rate": 9.46655393091074e-06, + "loss": 0.3856, + "step": 3844 + }, + { + "epoch": 0.17402127178094592, + "grad_norm": 0.812199433596557, + "learning_rate": 9.466224479289851e-06, + "loss": 0.4029, + "step": 3845 + }, + { + "epoch": 0.1740665308893415, + "grad_norm": 0.706257234178393, + "learning_rate": 9.465894931703517e-06, + "loss": 0.3809, + "step": 3846 + }, + { + "epoch": 0.17411178999773705, + "grad_norm": 0.7939136340507305, + "learning_rate": 9.465565288158815e-06, + "loss": 0.3825, + "step": 3847 + }, + { + "epoch": 0.1741570491061326, + "grad_norm": 0.7038949961513123, + "learning_rate": 9.46523554866283e-06, + "loss": 0.3804, + "step": 3848 + }, + { + "epoch": 0.17420230821452817, + "grad_norm": 0.6547818451610412, + "learning_rate": 9.464905713222648e-06, + "loss": 0.3839, + "step": 3849 + }, + { + "epoch": 0.17424756732292374, + "grad_norm": 0.7689604401981761, + "learning_rate": 9.464575781845355e-06, + "loss": 0.3985, + "step": 3850 + }, + { + "epoch": 0.1742928264313193, + "grad_norm": 0.6932882122137568, + "learning_rate": 9.46424575453804e-06, + "loss": 0.437, + "step": 3851 + }, + { + "epoch": 0.17433808553971486, + "grad_norm": 0.6653382774110156, + "learning_rate": 9.463915631307795e-06, + "loss": 0.3566, + "step": 3852 + }, + { + "epoch": 0.17438334464811042, + "grad_norm": 0.7126119345383991, + "learning_rate": 9.463585412161712e-06, + "loss": 0.4094, + "step": 3853 + }, + { + "epoch": 0.17442860375650598, + "grad_norm": 0.7299262826317278, + "learning_rate": 9.463255097106888e-06, + "loss": 0.4516, + "step": 3854 + }, + { + "epoch": 0.17447386286490157, + "grad_norm": 0.6814962311461914, + "learning_rate": 9.462924686150419e-06, + "loss": 0.4126, + "step": 3855 + }, + { + "epoch": 0.17451912197329714, + "grad_norm": 0.6083842476390211, + "learning_rate": 9.462594179299408e-06, + "loss": 0.3564, + "step": 3856 + }, + { + "epoch": 0.1745643810816927, + "grad_norm": 0.6648655332817587, + "learning_rate": 9.462263576560951e-06, + "loss": 0.3934, + "step": 3857 + }, + { + "epoch": 0.17460964019008826, + "grad_norm": 0.6540476263210379, + "learning_rate": 9.461932877942154e-06, + "loss": 0.4531, + "step": 3858 + }, + { + "epoch": 0.17465489929848382, + "grad_norm": 0.6841250761641118, + "learning_rate": 9.461602083450126e-06, + "loss": 0.4127, + "step": 3859 + }, + { + "epoch": 0.17470015840687939, + "grad_norm": 0.6339962178288016, + "learning_rate": 9.461271193091971e-06, + "loss": 0.4099, + "step": 3860 + }, + { + "epoch": 0.17474541751527495, + "grad_norm": 0.7159915950097685, + "learning_rate": 9.4609402068748e-06, + "loss": 0.4309, + "step": 3861 + }, + { + "epoch": 0.1747906766236705, + "grad_norm": 0.7058571671469768, + "learning_rate": 9.460609124805724e-06, + "loss": 0.4109, + "step": 3862 + }, + { + "epoch": 0.17483593573206607, + "grad_norm": 0.6230601643244665, + "learning_rate": 9.460277946891859e-06, + "loss": 0.3917, + "step": 3863 + }, + { + "epoch": 0.17488119484046163, + "grad_norm": 0.6702894866068349, + "learning_rate": 9.459946673140317e-06, + "loss": 0.3789, + "step": 3864 + }, + { + "epoch": 0.1749264539488572, + "grad_norm": 0.6890944216862027, + "learning_rate": 9.45961530355822e-06, + "loss": 0.4154, + "step": 3865 + }, + { + "epoch": 0.17497171305725276, + "grad_norm": 1.3068604598743, + "learning_rate": 9.459283838152686e-06, + "loss": 0.5401, + "step": 3866 + }, + { + "epoch": 0.17501697216564835, + "grad_norm": 1.1368248257270164, + "learning_rate": 9.45895227693084e-06, + "loss": 0.518, + "step": 3867 + }, + { + "epoch": 0.1750622312740439, + "grad_norm": 0.7575971192564259, + "learning_rate": 9.458620619899803e-06, + "loss": 0.4324, + "step": 3868 + }, + { + "epoch": 0.17510749038243947, + "grad_norm": 0.7079932207926797, + "learning_rate": 9.458288867066702e-06, + "loss": 0.4319, + "step": 3869 + }, + { + "epoch": 0.17515274949083504, + "grad_norm": 0.7920654492018414, + "learning_rate": 9.457957018438668e-06, + "loss": 0.5161, + "step": 3870 + }, + { + "epoch": 0.1751980085992306, + "grad_norm": 0.7178653393777417, + "learning_rate": 9.457625074022827e-06, + "loss": 0.4424, + "step": 3871 + }, + { + "epoch": 0.17524326770762616, + "grad_norm": 0.6877880291711488, + "learning_rate": 9.457293033826314e-06, + "loss": 0.4109, + "step": 3872 + }, + { + "epoch": 0.17528852681602172, + "grad_norm": 0.6855375864869679, + "learning_rate": 9.456960897856264e-06, + "loss": 0.3978, + "step": 3873 + }, + { + "epoch": 0.17533378592441728, + "grad_norm": 0.6933572812811226, + "learning_rate": 9.456628666119812e-06, + "loss": 0.4077, + "step": 3874 + }, + { + "epoch": 0.17537904503281285, + "grad_norm": 1.5072854612067466, + "learning_rate": 9.456296338624098e-06, + "loss": 0.5352, + "step": 3875 + }, + { + "epoch": 0.1754243041412084, + "grad_norm": 0.6863337744350352, + "learning_rate": 9.455963915376262e-06, + "loss": 0.4084, + "step": 3876 + }, + { + "epoch": 0.17546956324960397, + "grad_norm": 0.8083693280407686, + "learning_rate": 9.455631396383446e-06, + "loss": 0.3907, + "step": 3877 + }, + { + "epoch": 0.17551482235799953, + "grad_norm": 1.1066296499903405, + "learning_rate": 9.455298781652797e-06, + "loss": 0.5165, + "step": 3878 + }, + { + "epoch": 0.17556008146639512, + "grad_norm": 0.8388078483952227, + "learning_rate": 9.454966071191461e-06, + "loss": 0.4957, + "step": 3879 + }, + { + "epoch": 0.17560534057479069, + "grad_norm": 0.7459585566669298, + "learning_rate": 9.454633265006585e-06, + "loss": 0.4132, + "step": 3880 + }, + { + "epoch": 0.17565059968318625, + "grad_norm": 0.7000355787238284, + "learning_rate": 9.454300363105323e-06, + "loss": 0.4443, + "step": 3881 + }, + { + "epoch": 0.1756958587915818, + "grad_norm": 0.6348413063134372, + "learning_rate": 9.453967365494824e-06, + "loss": 0.4895, + "step": 3882 + }, + { + "epoch": 0.17574111789997737, + "grad_norm": 0.7044484596842852, + "learning_rate": 9.453634272182249e-06, + "loss": 0.4248, + "step": 3883 + }, + { + "epoch": 0.17578637700837293, + "grad_norm": 0.7131839130118517, + "learning_rate": 9.45330108317475e-06, + "loss": 0.4257, + "step": 3884 + }, + { + "epoch": 0.1758316361167685, + "grad_norm": 0.6633999287098569, + "learning_rate": 9.45296779847949e-06, + "loss": 0.4329, + "step": 3885 + }, + { + "epoch": 0.17587689522516406, + "grad_norm": 0.6678411587728268, + "learning_rate": 9.452634418103626e-06, + "loss": 0.4161, + "step": 3886 + }, + { + "epoch": 0.17592215433355962, + "grad_norm": 0.6953785685795281, + "learning_rate": 9.452300942054324e-06, + "loss": 0.4185, + "step": 3887 + }, + { + "epoch": 0.17596741344195518, + "grad_norm": 1.179121220652229, + "learning_rate": 9.451967370338747e-06, + "loss": 0.5453, + "step": 3888 + }, + { + "epoch": 0.17601267255035075, + "grad_norm": 0.7069774255329982, + "learning_rate": 9.451633702964067e-06, + "loss": 0.4168, + "step": 3889 + }, + { + "epoch": 0.17605793165874634, + "grad_norm": 0.7127050767647245, + "learning_rate": 9.45129993993745e-06, + "loss": 0.3923, + "step": 3890 + }, + { + "epoch": 0.1761031907671419, + "grad_norm": 0.9695002630974516, + "learning_rate": 9.450966081266069e-06, + "loss": 0.4977, + "step": 3891 + }, + { + "epoch": 0.17614844987553746, + "grad_norm": 0.8269460584043274, + "learning_rate": 9.450632126957098e-06, + "loss": 0.526, + "step": 3892 + }, + { + "epoch": 0.17619370898393302, + "grad_norm": 0.5904239742667041, + "learning_rate": 9.45029807701771e-06, + "loss": 0.5192, + "step": 3893 + }, + { + "epoch": 0.17623896809232859, + "grad_norm": 0.7518798449772747, + "learning_rate": 9.449963931455084e-06, + "loss": 0.3919, + "step": 3894 + }, + { + "epoch": 0.17628422720072415, + "grad_norm": 0.7435713399453958, + "learning_rate": 9.449629690276401e-06, + "loss": 0.4009, + "step": 3895 + }, + { + "epoch": 0.1763294863091197, + "grad_norm": 0.5816424605119339, + "learning_rate": 9.44929535348884e-06, + "loss": 0.3547, + "step": 3896 + }, + { + "epoch": 0.17637474541751527, + "grad_norm": 0.821270546116312, + "learning_rate": 9.44896092109959e-06, + "loss": 0.367, + "step": 3897 + }, + { + "epoch": 0.17642000452591083, + "grad_norm": 0.9931850336613028, + "learning_rate": 9.448626393115833e-06, + "loss": 0.4123, + "step": 3898 + }, + { + "epoch": 0.1764652636343064, + "grad_norm": 0.6533003091582168, + "learning_rate": 9.448291769544758e-06, + "loss": 0.3862, + "step": 3899 + }, + { + "epoch": 0.17651052274270196, + "grad_norm": 0.6816255014141562, + "learning_rate": 9.447957050393552e-06, + "loss": 0.4037, + "step": 3900 + }, + { + "epoch": 0.17655578185109752, + "grad_norm": 0.7414261462013415, + "learning_rate": 9.447622235669412e-06, + "loss": 0.4018, + "step": 3901 + }, + { + "epoch": 0.1766010409594931, + "grad_norm": 0.7064509154674706, + "learning_rate": 9.44728732537953e-06, + "loss": 0.3837, + "step": 3902 + }, + { + "epoch": 0.17664630006788867, + "grad_norm": 0.6743859732965065, + "learning_rate": 9.446952319531102e-06, + "loss": 0.3788, + "step": 3903 + }, + { + "epoch": 0.17669155917628424, + "grad_norm": 2.4314369942165257, + "learning_rate": 9.446617218131326e-06, + "loss": 0.5672, + "step": 3904 + }, + { + "epoch": 0.1767368182846798, + "grad_norm": 0.7027033907421777, + "learning_rate": 9.446282021187403e-06, + "loss": 0.4037, + "step": 3905 + }, + { + "epoch": 0.17678207739307536, + "grad_norm": 0.6764659734044738, + "learning_rate": 9.445946728706535e-06, + "loss": 0.4069, + "step": 3906 + }, + { + "epoch": 0.17682733650147092, + "grad_norm": 1.2756689798521164, + "learning_rate": 9.445611340695926e-06, + "loss": 0.5385, + "step": 3907 + }, + { + "epoch": 0.17687259560986648, + "grad_norm": 2.5327412146331674, + "learning_rate": 9.445275857162784e-06, + "loss": 0.3839, + "step": 3908 + }, + { + "epoch": 0.17691785471826205, + "grad_norm": 0.6686919910303133, + "learning_rate": 9.444940278114316e-06, + "loss": 0.406, + "step": 3909 + }, + { + "epoch": 0.1769631138266576, + "grad_norm": 0.8168236937312541, + "learning_rate": 9.444604603557733e-06, + "loss": 0.3656, + "step": 3910 + }, + { + "epoch": 0.17700837293505317, + "grad_norm": 0.5818948523437871, + "learning_rate": 9.444268833500247e-06, + "loss": 0.5121, + "step": 3911 + }, + { + "epoch": 0.17705363204344873, + "grad_norm": 0.6705505609317879, + "learning_rate": 9.443932967949074e-06, + "loss": 0.4032, + "step": 3912 + }, + { + "epoch": 0.1770988911518443, + "grad_norm": 0.756973477548209, + "learning_rate": 9.443597006911432e-06, + "loss": 0.5175, + "step": 3913 + }, + { + "epoch": 0.17714415026023989, + "grad_norm": 0.720833225924833, + "learning_rate": 9.443260950394535e-06, + "loss": 0.4031, + "step": 3914 + }, + { + "epoch": 0.17718940936863545, + "grad_norm": 0.7253115963736714, + "learning_rate": 9.442924798405605e-06, + "loss": 0.3792, + "step": 3915 + }, + { + "epoch": 0.177234668477031, + "grad_norm": 0.7770778836748186, + "learning_rate": 9.44258855095187e-06, + "loss": 0.5194, + "step": 3916 + }, + { + "epoch": 0.17727992758542657, + "grad_norm": 0.7620354620331995, + "learning_rate": 9.442252208040551e-06, + "loss": 0.3729, + "step": 3917 + }, + { + "epoch": 0.17732518669382213, + "grad_norm": 0.7620836046469401, + "learning_rate": 9.441915769678874e-06, + "loss": 0.4391, + "step": 3918 + }, + { + "epoch": 0.1773704458022177, + "grad_norm": 0.6579862798048236, + "learning_rate": 9.44157923587407e-06, + "loss": 0.4095, + "step": 3919 + }, + { + "epoch": 0.17741570491061326, + "grad_norm": 0.8267059990269631, + "learning_rate": 9.441242606633369e-06, + "loss": 0.4239, + "step": 3920 + }, + { + "epoch": 0.17746096401900882, + "grad_norm": 0.7353518313708115, + "learning_rate": 9.440905881964007e-06, + "loss": 0.3951, + "step": 3921 + }, + { + "epoch": 0.17750622312740438, + "grad_norm": 0.6659406803623616, + "learning_rate": 9.440569061873213e-06, + "loss": 0.3894, + "step": 3922 + }, + { + "epoch": 0.17755148223579995, + "grad_norm": 0.7242839163193702, + "learning_rate": 9.44023214636823e-06, + "loss": 0.3994, + "step": 3923 + }, + { + "epoch": 0.1775967413441955, + "grad_norm": 0.6445084964739258, + "learning_rate": 9.439895135456297e-06, + "loss": 0.535, + "step": 3924 + }, + { + "epoch": 0.17764200045259107, + "grad_norm": 0.7747303062389351, + "learning_rate": 9.43955802914465e-06, + "loss": 0.4296, + "step": 3925 + }, + { + "epoch": 0.17768725956098666, + "grad_norm": 0.6792800098985597, + "learning_rate": 9.439220827440539e-06, + "loss": 0.421, + "step": 3926 + }, + { + "epoch": 0.17773251866938222, + "grad_norm": 0.7261120584253042, + "learning_rate": 9.438883530351207e-06, + "loss": 0.432, + "step": 3927 + }, + { + "epoch": 0.17777777777777778, + "grad_norm": 0.39904041842961024, + "learning_rate": 9.438546137883898e-06, + "loss": 0.5249, + "step": 3928 + }, + { + "epoch": 0.17782303688617335, + "grad_norm": 0.8107503680750474, + "learning_rate": 9.438208650045866e-06, + "loss": 0.3769, + "step": 3929 + }, + { + "epoch": 0.1778682959945689, + "grad_norm": 0.7203559014405553, + "learning_rate": 9.43787106684436e-06, + "loss": 0.4014, + "step": 3930 + }, + { + "epoch": 0.17791355510296447, + "grad_norm": 0.962397168004711, + "learning_rate": 9.437533388286635e-06, + "loss": 0.3732, + "step": 3931 + }, + { + "epoch": 0.17795881421136003, + "grad_norm": 0.45413655885885273, + "learning_rate": 9.437195614379947e-06, + "loss": 0.5248, + "step": 3932 + }, + { + "epoch": 0.1780040733197556, + "grad_norm": 0.817919637821282, + "learning_rate": 9.436857745131553e-06, + "loss": 0.3741, + "step": 3933 + }, + { + "epoch": 0.17804933242815116, + "grad_norm": 0.682119639232124, + "learning_rate": 9.436519780548712e-06, + "loss": 0.4316, + "step": 3934 + }, + { + "epoch": 0.17809459153654672, + "grad_norm": 0.6600377596146887, + "learning_rate": 9.436181720638688e-06, + "loss": 0.3644, + "step": 3935 + }, + { + "epoch": 0.17813985064494228, + "grad_norm": 0.4001893548138264, + "learning_rate": 9.435843565408742e-06, + "loss": 0.4959, + "step": 3936 + }, + { + "epoch": 0.17818510975333787, + "grad_norm": 0.8448226633705256, + "learning_rate": 9.435505314866143e-06, + "loss": 0.4095, + "step": 3937 + }, + { + "epoch": 0.17823036886173343, + "grad_norm": 0.7752472854802783, + "learning_rate": 9.435166969018158e-06, + "loss": 0.4406, + "step": 3938 + }, + { + "epoch": 0.178275627970129, + "grad_norm": 0.6748292726243518, + "learning_rate": 9.434828527872052e-06, + "loss": 0.422, + "step": 3939 + }, + { + "epoch": 0.17832088707852456, + "grad_norm": 0.7678223720734237, + "learning_rate": 9.434489991435106e-06, + "loss": 0.4333, + "step": 3940 + }, + { + "epoch": 0.17836614618692012, + "grad_norm": 0.8010333175192554, + "learning_rate": 9.434151359714587e-06, + "loss": 0.3947, + "step": 3941 + }, + { + "epoch": 0.17841140529531568, + "grad_norm": 0.4352994652107656, + "learning_rate": 9.433812632717776e-06, + "loss": 0.5114, + "step": 3942 + }, + { + "epoch": 0.17845666440371125, + "grad_norm": 0.7307693833726469, + "learning_rate": 9.433473810451947e-06, + "loss": 0.4461, + "step": 3943 + }, + { + "epoch": 0.1785019235121068, + "grad_norm": 0.6569377227188848, + "learning_rate": 9.433134892924383e-06, + "loss": 0.3852, + "step": 3944 + }, + { + "epoch": 0.17854718262050237, + "grad_norm": 0.7588640467049291, + "learning_rate": 9.432795880142366e-06, + "loss": 0.4646, + "step": 3945 + }, + { + "epoch": 0.17859244172889793, + "grad_norm": 0.7047461181700752, + "learning_rate": 9.432456772113179e-06, + "loss": 0.3859, + "step": 3946 + }, + { + "epoch": 0.1786377008372935, + "grad_norm": 0.6265443772082004, + "learning_rate": 9.43211756884411e-06, + "loss": 0.4089, + "step": 3947 + }, + { + "epoch": 0.17868295994568906, + "grad_norm": 0.709027381706952, + "learning_rate": 9.431778270342447e-06, + "loss": 0.4275, + "step": 3948 + }, + { + "epoch": 0.17872821905408465, + "grad_norm": 0.7120230598819207, + "learning_rate": 9.431438876615478e-06, + "loss": 0.4154, + "step": 3949 + }, + { + "epoch": 0.1787734781624802, + "grad_norm": 0.6887782725922001, + "learning_rate": 9.4310993876705e-06, + "loss": 0.4286, + "step": 3950 + }, + { + "epoch": 0.17881873727087577, + "grad_norm": 0.710693416079211, + "learning_rate": 9.430759803514802e-06, + "loss": 0.4382, + "step": 3951 + }, + { + "epoch": 0.17886399637927133, + "grad_norm": 0.6188621557116822, + "learning_rate": 9.430420124155687e-06, + "loss": 0.3749, + "step": 3952 + }, + { + "epoch": 0.1789092554876669, + "grad_norm": 0.6738178241809274, + "learning_rate": 9.43008034960045e-06, + "loss": 0.436, + "step": 3953 + }, + { + "epoch": 0.17895451459606246, + "grad_norm": 0.6825365111947984, + "learning_rate": 9.42974047985639e-06, + "loss": 0.4149, + "step": 3954 + }, + { + "epoch": 0.17899977370445802, + "grad_norm": 0.6605285605975528, + "learning_rate": 9.429400514930815e-06, + "loss": 0.4023, + "step": 3955 + }, + { + "epoch": 0.17904503281285358, + "grad_norm": 0.7096638896075035, + "learning_rate": 9.429060454831026e-06, + "loss": 0.4472, + "step": 3956 + }, + { + "epoch": 0.17909029192124915, + "grad_norm": 0.415451298677631, + "learning_rate": 9.42872029956433e-06, + "loss": 0.5404, + "step": 3957 + }, + { + "epoch": 0.1791355510296447, + "grad_norm": 0.7667486909261592, + "learning_rate": 9.428380049138038e-06, + "loss": 0.4114, + "step": 3958 + }, + { + "epoch": 0.17918081013804027, + "grad_norm": 0.6663120328599494, + "learning_rate": 9.428039703559458e-06, + "loss": 0.4269, + "step": 3959 + }, + { + "epoch": 0.17922606924643583, + "grad_norm": 0.7142005965525264, + "learning_rate": 9.427699262835904e-06, + "loss": 0.4392, + "step": 3960 + }, + { + "epoch": 0.17927132835483142, + "grad_norm": 0.8063258085038403, + "learning_rate": 9.427358726974693e-06, + "loss": 0.4144, + "step": 3961 + }, + { + "epoch": 0.17931658746322698, + "grad_norm": 0.6875786825371241, + "learning_rate": 9.42701809598314e-06, + "loss": 0.3895, + "step": 3962 + }, + { + "epoch": 0.17936184657162255, + "grad_norm": 0.38949801700994485, + "learning_rate": 9.426677369868564e-06, + "loss": 0.4787, + "step": 3963 + }, + { + "epoch": 0.1794071056800181, + "grad_norm": 0.7008172907799324, + "learning_rate": 9.426336548638287e-06, + "loss": 0.3878, + "step": 3964 + }, + { + "epoch": 0.17945236478841367, + "grad_norm": 0.6497017033257975, + "learning_rate": 9.425995632299631e-06, + "loss": 0.4116, + "step": 3965 + }, + { + "epoch": 0.17949762389680923, + "grad_norm": 0.3271747753890301, + "learning_rate": 9.425654620859923e-06, + "loss": 0.5049, + "step": 3966 + }, + { + "epoch": 0.1795428830052048, + "grad_norm": 0.7075592309298688, + "learning_rate": 9.425313514326491e-06, + "loss": 0.3762, + "step": 3967 + }, + { + "epoch": 0.17958814211360036, + "grad_norm": 1.1093788268170846, + "learning_rate": 9.424972312706663e-06, + "loss": 0.4145, + "step": 3968 + }, + { + "epoch": 0.17963340122199592, + "grad_norm": 0.69564273713801, + "learning_rate": 9.424631016007768e-06, + "loss": 0.4049, + "step": 3969 + }, + { + "epoch": 0.17967866033039148, + "grad_norm": 0.6730428805768747, + "learning_rate": 9.424289624237143e-06, + "loss": 0.357, + "step": 3970 + }, + { + "epoch": 0.17972391943878704, + "grad_norm": 0.753946831911797, + "learning_rate": 9.423948137402123e-06, + "loss": 0.4044, + "step": 3971 + }, + { + "epoch": 0.17976917854718263, + "grad_norm": 0.6356412800721248, + "learning_rate": 9.423606555510043e-06, + "loss": 0.3898, + "step": 3972 + }, + { + "epoch": 0.1798144376555782, + "grad_norm": 0.6725822799634429, + "learning_rate": 9.423264878568246e-06, + "loss": 0.3978, + "step": 3973 + }, + { + "epoch": 0.17985969676397376, + "grad_norm": 0.7231602834064391, + "learning_rate": 9.42292310658407e-06, + "loss": 0.4201, + "step": 3974 + }, + { + "epoch": 0.17990495587236932, + "grad_norm": 0.3432254737661334, + "learning_rate": 9.422581239564861e-06, + "loss": 0.5007, + "step": 3975 + }, + { + "epoch": 0.17995021498076488, + "grad_norm": 0.7604507555915985, + "learning_rate": 9.422239277517964e-06, + "loss": 0.4049, + "step": 3976 + }, + { + "epoch": 0.17999547408916045, + "grad_norm": 0.6473006192279481, + "learning_rate": 9.421897220450728e-06, + "loss": 0.3911, + "step": 3977 + }, + { + "epoch": 0.180040733197556, + "grad_norm": 0.6406786856957288, + "learning_rate": 9.4215550683705e-06, + "loss": 0.3843, + "step": 3978 + }, + { + "epoch": 0.18008599230595157, + "grad_norm": 0.7426731979212589, + "learning_rate": 9.421212821284633e-06, + "loss": 0.4037, + "step": 3979 + }, + { + "epoch": 0.18013125141434713, + "grad_norm": 0.6165518680048144, + "learning_rate": 9.420870479200483e-06, + "loss": 0.4113, + "step": 3980 + }, + { + "epoch": 0.1801765105227427, + "grad_norm": 0.6055004238082176, + "learning_rate": 9.420528042125404e-06, + "loss": 0.3845, + "step": 3981 + }, + { + "epoch": 0.18022176963113826, + "grad_norm": 0.6829154371077608, + "learning_rate": 9.420185510066753e-06, + "loss": 0.4331, + "step": 3982 + }, + { + "epoch": 0.18026702873953382, + "grad_norm": 0.701267887540177, + "learning_rate": 9.41984288303189e-06, + "loss": 0.4057, + "step": 3983 + }, + { + "epoch": 0.1803122878479294, + "grad_norm": 0.7412461761260488, + "learning_rate": 9.419500161028178e-06, + "loss": 0.4022, + "step": 3984 + }, + { + "epoch": 0.18035754695632497, + "grad_norm": 0.6478422974417011, + "learning_rate": 9.419157344062984e-06, + "loss": 0.3746, + "step": 3985 + }, + { + "epoch": 0.18040280606472053, + "grad_norm": 0.6463703026899874, + "learning_rate": 9.418814432143669e-06, + "loss": 0.4228, + "step": 3986 + }, + { + "epoch": 0.1804480651731161, + "grad_norm": 0.6316234750070395, + "learning_rate": 9.418471425277603e-06, + "loss": 0.4108, + "step": 3987 + }, + { + "epoch": 0.18049332428151166, + "grad_norm": 0.7962127763617338, + "learning_rate": 9.418128323472157e-06, + "loss": 0.4254, + "step": 3988 + }, + { + "epoch": 0.18053858338990722, + "grad_norm": 0.6952711404179807, + "learning_rate": 9.417785126734701e-06, + "loss": 0.4222, + "step": 3989 + }, + { + "epoch": 0.18058384249830278, + "grad_norm": 0.7285155806695047, + "learning_rate": 9.417441835072615e-06, + "loss": 0.412, + "step": 3990 + }, + { + "epoch": 0.18062910160669834, + "grad_norm": 0.6387050480157371, + "learning_rate": 9.417098448493267e-06, + "loss": 0.3712, + "step": 3991 + }, + { + "epoch": 0.1806743607150939, + "grad_norm": 0.6248158896534401, + "learning_rate": 9.41675496700404e-06, + "loss": 0.3723, + "step": 3992 + }, + { + "epoch": 0.18071961982348947, + "grad_norm": 0.6519533699295533, + "learning_rate": 9.416411390612315e-06, + "loss": 0.3747, + "step": 3993 + }, + { + "epoch": 0.18076487893188503, + "grad_norm": 0.49737740981951944, + "learning_rate": 9.416067719325472e-06, + "loss": 0.5126, + "step": 3994 + }, + { + "epoch": 0.1808101380402806, + "grad_norm": 0.6608173062896084, + "learning_rate": 9.415723953150897e-06, + "loss": 0.4085, + "step": 3995 + }, + { + "epoch": 0.18085539714867618, + "grad_norm": 0.6267983817821665, + "learning_rate": 9.415380092095976e-06, + "loss": 0.3878, + "step": 3996 + }, + { + "epoch": 0.18090065625707175, + "grad_norm": 0.7368257783544285, + "learning_rate": 9.415036136168099e-06, + "loss": 0.4488, + "step": 3997 + }, + { + "epoch": 0.1809459153654673, + "grad_norm": 0.6199378468837977, + "learning_rate": 9.414692085374654e-06, + "loss": 0.4199, + "step": 3998 + }, + { + "epoch": 0.18099117447386287, + "grad_norm": 0.5008602627173249, + "learning_rate": 9.414347939723033e-06, + "loss": 0.517, + "step": 3999 + }, + { + "epoch": 0.18103643358225843, + "grad_norm": 0.7481791294382593, + "learning_rate": 9.414003699220636e-06, + "loss": 0.4177, + "step": 4000 + }, + { + "epoch": 0.181081692690654, + "grad_norm": 0.6333229028807124, + "learning_rate": 9.413659363874855e-06, + "loss": 0.3521, + "step": 4001 + }, + { + "epoch": 0.18112695179904956, + "grad_norm": 0.6906285797805105, + "learning_rate": 9.413314933693088e-06, + "loss": 0.3738, + "step": 4002 + }, + { + "epoch": 0.18117221090744512, + "grad_norm": 0.6255253306601154, + "learning_rate": 9.41297040868274e-06, + "loss": 0.4365, + "step": 4003 + }, + { + "epoch": 0.18121747001584068, + "grad_norm": 0.6497671002311871, + "learning_rate": 9.412625788851208e-06, + "loss": 0.382, + "step": 4004 + }, + { + "epoch": 0.18126272912423624, + "grad_norm": 0.734449128993359, + "learning_rate": 9.412281074205903e-06, + "loss": 0.4666, + "step": 4005 + }, + { + "epoch": 0.1813079882326318, + "grad_norm": 0.6881308403070234, + "learning_rate": 9.41193626475423e-06, + "loss": 0.4051, + "step": 4006 + }, + { + "epoch": 0.18135324734102737, + "grad_norm": 0.4046944363293297, + "learning_rate": 9.411591360503594e-06, + "loss": 0.5014, + "step": 4007 + }, + { + "epoch": 0.18139850644942296, + "grad_norm": 0.6909008023747709, + "learning_rate": 9.41124636146141e-06, + "loss": 0.3746, + "step": 4008 + }, + { + "epoch": 0.18144376555781852, + "grad_norm": 0.7304086747129781, + "learning_rate": 9.41090126763509e-06, + "loss": 0.4352, + "step": 4009 + }, + { + "epoch": 0.18148902466621408, + "grad_norm": 0.6541757729998198, + "learning_rate": 9.410556079032049e-06, + "loss": 0.4304, + "step": 4010 + }, + { + "epoch": 0.18153428377460964, + "grad_norm": 0.6872384761920306, + "learning_rate": 9.410210795659702e-06, + "loss": 0.4321, + "step": 4011 + }, + { + "epoch": 0.1815795428830052, + "grad_norm": 0.3936706884541984, + "learning_rate": 9.409865417525473e-06, + "loss": 0.4897, + "step": 4012 + }, + { + "epoch": 0.18162480199140077, + "grad_norm": 0.6558205643441305, + "learning_rate": 9.409519944636778e-06, + "loss": 0.3871, + "step": 4013 + }, + { + "epoch": 0.18167006109979633, + "grad_norm": 0.6706625783570828, + "learning_rate": 9.409174377001043e-06, + "loss": 0.3961, + "step": 4014 + }, + { + "epoch": 0.1817153202081919, + "grad_norm": 0.6638586577515181, + "learning_rate": 9.40882871462569e-06, + "loss": 0.4043, + "step": 4015 + }, + { + "epoch": 0.18176057931658746, + "grad_norm": 0.6821026362875763, + "learning_rate": 9.408482957518152e-06, + "loss": 0.4002, + "step": 4016 + }, + { + "epoch": 0.18180583842498302, + "grad_norm": 0.6699639269128657, + "learning_rate": 9.408137105685853e-06, + "loss": 0.4185, + "step": 4017 + }, + { + "epoch": 0.18185109753337858, + "grad_norm": 0.6801453596286959, + "learning_rate": 9.407791159136226e-06, + "loss": 0.4125, + "step": 4018 + }, + { + "epoch": 0.18189635664177417, + "grad_norm": 0.7345921944079861, + "learning_rate": 9.407445117876705e-06, + "loss": 0.4349, + "step": 4019 + }, + { + "epoch": 0.18194161575016973, + "grad_norm": 0.6975348399576882, + "learning_rate": 9.407098981914726e-06, + "loss": 0.4387, + "step": 4020 + }, + { + "epoch": 0.1819868748585653, + "grad_norm": 0.653769033304466, + "learning_rate": 9.406752751257724e-06, + "loss": 0.4169, + "step": 4021 + }, + { + "epoch": 0.18203213396696086, + "grad_norm": 0.6274532282943137, + "learning_rate": 9.40640642591314e-06, + "loss": 0.3745, + "step": 4022 + }, + { + "epoch": 0.18207739307535642, + "grad_norm": 0.7483261826815685, + "learning_rate": 9.406060005888414e-06, + "loss": 0.4375, + "step": 4023 + }, + { + "epoch": 0.18212265218375198, + "grad_norm": 0.6041073913009501, + "learning_rate": 9.405713491190992e-06, + "loss": 0.5221, + "step": 4024 + }, + { + "epoch": 0.18216791129214754, + "grad_norm": 0.669360438322126, + "learning_rate": 9.405366881828317e-06, + "loss": 0.4088, + "step": 4025 + }, + { + "epoch": 0.1822131704005431, + "grad_norm": 0.6639839913216719, + "learning_rate": 9.40502017780784e-06, + "loss": 0.4042, + "step": 4026 + }, + { + "epoch": 0.18225842950893867, + "grad_norm": 0.7186389028518378, + "learning_rate": 9.404673379137007e-06, + "loss": 0.3955, + "step": 4027 + }, + { + "epoch": 0.18230368861733423, + "grad_norm": 0.7107843378506711, + "learning_rate": 9.40432648582327e-06, + "loss": 0.4038, + "step": 4028 + }, + { + "epoch": 0.1823489477257298, + "grad_norm": 0.38811158141052376, + "learning_rate": 9.403979497874085e-06, + "loss": 0.5146, + "step": 4029 + }, + { + "epoch": 0.18239420683412536, + "grad_norm": 0.639213961994482, + "learning_rate": 9.403632415296907e-06, + "loss": 0.4025, + "step": 4030 + }, + { + "epoch": 0.18243946594252095, + "grad_norm": 0.7539547800235694, + "learning_rate": 9.403285238099192e-06, + "loss": 0.4333, + "step": 4031 + }, + { + "epoch": 0.1824847250509165, + "grad_norm": 0.6599444003297105, + "learning_rate": 9.402937966288402e-06, + "loss": 0.3992, + "step": 4032 + }, + { + "epoch": 0.18252998415931207, + "grad_norm": 0.7890073548455298, + "learning_rate": 9.402590599871994e-06, + "loss": 0.3805, + "step": 4033 + }, + { + "epoch": 0.18257524326770763, + "grad_norm": 0.6702074136415639, + "learning_rate": 9.402243138857439e-06, + "loss": 0.4398, + "step": 4034 + }, + { + "epoch": 0.1826205023761032, + "grad_norm": 0.7131108641193631, + "learning_rate": 9.401895583252198e-06, + "loss": 0.4137, + "step": 4035 + }, + { + "epoch": 0.18266576148449876, + "grad_norm": 0.6868999795018752, + "learning_rate": 9.40154793306374e-06, + "loss": 0.432, + "step": 4036 + }, + { + "epoch": 0.18271102059289432, + "grad_norm": 1.1847528882430005, + "learning_rate": 9.401200188299538e-06, + "loss": 0.4423, + "step": 4037 + }, + { + "epoch": 0.18275627970128988, + "grad_norm": 0.6741045662787118, + "learning_rate": 9.40085234896706e-06, + "loss": 0.4007, + "step": 4038 + }, + { + "epoch": 0.18280153880968544, + "grad_norm": 0.7622032017541412, + "learning_rate": 9.400504415073781e-06, + "loss": 0.4238, + "step": 4039 + }, + { + "epoch": 0.182846797918081, + "grad_norm": 0.6220923109649558, + "learning_rate": 9.400156386627177e-06, + "loss": 0.3761, + "step": 4040 + }, + { + "epoch": 0.18289205702647657, + "grad_norm": 0.8052312403575104, + "learning_rate": 9.399808263634725e-06, + "loss": 0.4273, + "step": 4041 + }, + { + "epoch": 0.18293731613487213, + "grad_norm": 0.684150835282518, + "learning_rate": 9.399460046103908e-06, + "loss": 0.4268, + "step": 4042 + }, + { + "epoch": 0.18298257524326772, + "grad_norm": 0.4843734116041665, + "learning_rate": 9.399111734042206e-06, + "loss": 0.488, + "step": 4043 + }, + { + "epoch": 0.18302783435166328, + "grad_norm": 0.7435265409800369, + "learning_rate": 9.398763327457104e-06, + "loss": 0.3692, + "step": 4044 + }, + { + "epoch": 0.18307309346005884, + "grad_norm": 0.6868701918588811, + "learning_rate": 9.398414826356088e-06, + "loss": 0.3993, + "step": 4045 + }, + { + "epoch": 0.1831183525684544, + "grad_norm": 0.9011937083746742, + "learning_rate": 9.398066230746645e-06, + "loss": 0.3478, + "step": 4046 + }, + { + "epoch": 0.18316361167684997, + "grad_norm": 0.714107170767041, + "learning_rate": 9.397717540636268e-06, + "loss": 0.3849, + "step": 4047 + }, + { + "epoch": 0.18320887078524553, + "grad_norm": 0.6470444070084024, + "learning_rate": 9.397368756032445e-06, + "loss": 0.3682, + "step": 4048 + }, + { + "epoch": 0.1832541298936411, + "grad_norm": 0.4175690544272301, + "learning_rate": 9.397019876942675e-06, + "loss": 0.5435, + "step": 4049 + }, + { + "epoch": 0.18329938900203666, + "grad_norm": 0.7044682525903221, + "learning_rate": 9.396670903374452e-06, + "loss": 0.4174, + "step": 4050 + }, + { + "epoch": 0.18334464811043222, + "grad_norm": 0.7265792303979158, + "learning_rate": 9.396321835335274e-06, + "loss": 0.4232, + "step": 4051 + }, + { + "epoch": 0.18338990721882778, + "grad_norm": 0.6683492011615667, + "learning_rate": 9.395972672832642e-06, + "loss": 0.4212, + "step": 4052 + }, + { + "epoch": 0.18343516632722334, + "grad_norm": 0.8017287999678097, + "learning_rate": 9.39562341587406e-06, + "loss": 0.4166, + "step": 4053 + }, + { + "epoch": 0.1834804254356189, + "grad_norm": 0.7425769217227569, + "learning_rate": 9.39527406446703e-06, + "loss": 0.4377, + "step": 4054 + }, + { + "epoch": 0.1835256845440145, + "grad_norm": 0.32643590782763204, + "learning_rate": 9.394924618619059e-06, + "loss": 0.4994, + "step": 4055 + }, + { + "epoch": 0.18357094365241006, + "grad_norm": 0.6471436602045438, + "learning_rate": 9.394575078337657e-06, + "loss": 0.4105, + "step": 4056 + }, + { + "epoch": 0.18361620276080562, + "grad_norm": 0.4833246761409749, + "learning_rate": 9.394225443630332e-06, + "loss": 0.4927, + "step": 4057 + }, + { + "epoch": 0.18366146186920118, + "grad_norm": 0.6848428054800386, + "learning_rate": 9.393875714504598e-06, + "loss": 0.3934, + "step": 4058 + }, + { + "epoch": 0.18370672097759674, + "grad_norm": 0.655694304334304, + "learning_rate": 9.393525890967971e-06, + "loss": 0.4049, + "step": 4059 + }, + { + "epoch": 0.1837519800859923, + "grad_norm": 0.3594581111319001, + "learning_rate": 9.393175973027967e-06, + "loss": 0.4795, + "step": 4060 + }, + { + "epoch": 0.18379723919438787, + "grad_norm": 0.7635421681342424, + "learning_rate": 9.392825960692103e-06, + "loss": 0.3944, + "step": 4061 + }, + { + "epoch": 0.18384249830278343, + "grad_norm": 0.6727442618433395, + "learning_rate": 9.3924758539679e-06, + "loss": 0.4417, + "step": 4062 + }, + { + "epoch": 0.183887757411179, + "grad_norm": 0.6499968211176823, + "learning_rate": 9.392125652862881e-06, + "loss": 0.3782, + "step": 4063 + }, + { + "epoch": 0.18393301651957455, + "grad_norm": 0.6338590652010262, + "learning_rate": 9.391775357384571e-06, + "loss": 0.4408, + "step": 4064 + }, + { + "epoch": 0.18397827562797012, + "grad_norm": 0.6456796294022185, + "learning_rate": 9.3914249675405e-06, + "loss": 0.3867, + "step": 4065 + }, + { + "epoch": 0.1840235347363657, + "grad_norm": 0.4560740638541108, + "learning_rate": 9.39107448333819e-06, + "loss": 0.5238, + "step": 4066 + }, + { + "epoch": 0.18406879384476127, + "grad_norm": 0.3738339016612427, + "learning_rate": 9.390723904785178e-06, + "loss": 0.5169, + "step": 4067 + }, + { + "epoch": 0.18411405295315683, + "grad_norm": 0.8041291215714814, + "learning_rate": 9.390373231888991e-06, + "loss": 0.4241, + "step": 4068 + }, + { + "epoch": 0.1841593120615524, + "grad_norm": 0.6659286018838321, + "learning_rate": 9.39002246465717e-06, + "loss": 0.4115, + "step": 4069 + }, + { + "epoch": 0.18420457116994796, + "grad_norm": 0.6382235578815533, + "learning_rate": 9.389671603097248e-06, + "loss": 0.4536, + "step": 4070 + }, + { + "epoch": 0.18424983027834352, + "grad_norm": 0.7681460436470686, + "learning_rate": 9.389320647216767e-06, + "loss": 0.3774, + "step": 4071 + }, + { + "epoch": 0.18429508938673908, + "grad_norm": 0.7688218133575312, + "learning_rate": 9.388969597023265e-06, + "loss": 0.399, + "step": 4072 + }, + { + "epoch": 0.18434034849513464, + "grad_norm": 0.6452861606689477, + "learning_rate": 9.388618452524285e-06, + "loss": 0.3793, + "step": 4073 + }, + { + "epoch": 0.1843856076035302, + "grad_norm": 0.703849544538944, + "learning_rate": 9.388267213727373e-06, + "loss": 0.4177, + "step": 4074 + }, + { + "epoch": 0.18443086671192577, + "grad_norm": 0.6851811677788411, + "learning_rate": 9.387915880640077e-06, + "loss": 0.4469, + "step": 4075 + }, + { + "epoch": 0.18447612582032133, + "grad_norm": 0.6926954407735016, + "learning_rate": 9.387564453269945e-06, + "loss": 0.4019, + "step": 4076 + }, + { + "epoch": 0.1845213849287169, + "grad_norm": 0.6532267417161022, + "learning_rate": 9.38721293162453e-06, + "loss": 0.4304, + "step": 4077 + }, + { + "epoch": 0.18456664403711248, + "grad_norm": 0.7015512523650694, + "learning_rate": 9.386861315711382e-06, + "loss": 0.5237, + "step": 4078 + }, + { + "epoch": 0.18461190314550804, + "grad_norm": 0.5024324484532633, + "learning_rate": 9.386509605538057e-06, + "loss": 0.5247, + "step": 4079 + }, + { + "epoch": 0.1846571622539036, + "grad_norm": 0.8009254479377098, + "learning_rate": 9.386157801112112e-06, + "loss": 0.4297, + "step": 4080 + }, + { + "epoch": 0.18470242136229917, + "grad_norm": 0.7143862676068782, + "learning_rate": 9.385805902441109e-06, + "loss": 0.4324, + "step": 4081 + }, + { + "epoch": 0.18474768047069473, + "grad_norm": 0.7113426206348091, + "learning_rate": 9.385453909532606e-06, + "loss": 0.3778, + "step": 4082 + }, + { + "epoch": 0.1847929395790903, + "grad_norm": 0.7426225523157818, + "learning_rate": 9.385101822394167e-06, + "loss": 0.3976, + "step": 4083 + }, + { + "epoch": 0.18483819868748586, + "grad_norm": 0.7347993942730485, + "learning_rate": 9.384749641033358e-06, + "loss": 0.3931, + "step": 4084 + }, + { + "epoch": 0.18488345779588142, + "grad_norm": 0.7249770199361875, + "learning_rate": 9.384397365457747e-06, + "loss": 0.4235, + "step": 4085 + }, + { + "epoch": 0.18492871690427698, + "grad_norm": 0.7628881619671343, + "learning_rate": 9.3840449956749e-06, + "loss": 0.3643, + "step": 4086 + }, + { + "epoch": 0.18497397601267254, + "grad_norm": 0.6992424817628683, + "learning_rate": 9.383692531692392e-06, + "loss": 0.4394, + "step": 4087 + }, + { + "epoch": 0.1850192351210681, + "grad_norm": 0.6819527112357563, + "learning_rate": 9.383339973517796e-06, + "loss": 0.4066, + "step": 4088 + }, + { + "epoch": 0.18506449422946367, + "grad_norm": 0.7248904254009867, + "learning_rate": 9.382987321158686e-06, + "loss": 0.42, + "step": 4089 + }, + { + "epoch": 0.18510975333785926, + "grad_norm": 1.0207624752501492, + "learning_rate": 9.382634574622637e-06, + "loss": 0.4014, + "step": 4090 + }, + { + "epoch": 0.18515501244625482, + "grad_norm": 0.6421552433584352, + "learning_rate": 9.382281733917235e-06, + "loss": 0.4049, + "step": 4091 + }, + { + "epoch": 0.18520027155465038, + "grad_norm": 0.6737615415897059, + "learning_rate": 9.381928799050054e-06, + "loss": 0.3951, + "step": 4092 + }, + { + "epoch": 0.18524553066304594, + "grad_norm": 0.6744702758862204, + "learning_rate": 9.381575770028684e-06, + "loss": 0.3851, + "step": 4093 + }, + { + "epoch": 0.1852907897714415, + "grad_norm": 1.5943439842998557, + "learning_rate": 9.381222646860708e-06, + "loss": 0.5285, + "step": 4094 + }, + { + "epoch": 0.18533604887983707, + "grad_norm": 0.7502753234602005, + "learning_rate": 9.380869429553712e-06, + "loss": 0.4299, + "step": 4095 + }, + { + "epoch": 0.18538130798823263, + "grad_norm": 0.6712999824888282, + "learning_rate": 9.380516118115287e-06, + "loss": 0.399, + "step": 4096 + }, + { + "epoch": 0.1854265670966282, + "grad_norm": 0.6842794981481997, + "learning_rate": 9.380162712553024e-06, + "loss": 0.3814, + "step": 4097 + }, + { + "epoch": 0.18547182620502375, + "grad_norm": 0.7185421908678674, + "learning_rate": 9.379809212874517e-06, + "loss": 0.3983, + "step": 4098 + }, + { + "epoch": 0.18551708531341932, + "grad_norm": 0.6614546240803848, + "learning_rate": 9.379455619087361e-06, + "loss": 0.3495, + "step": 4099 + }, + { + "epoch": 0.18556234442181488, + "grad_norm": 0.6541861454625707, + "learning_rate": 9.379101931199154e-06, + "loss": 0.4067, + "step": 4100 + }, + { + "epoch": 0.18560760353021044, + "grad_norm": 1.0781073128655911, + "learning_rate": 9.378748149217498e-06, + "loss": 0.5088, + "step": 4101 + }, + { + "epoch": 0.18565286263860603, + "grad_norm": 0.690712285204404, + "learning_rate": 9.378394273149992e-06, + "loss": 0.3794, + "step": 4102 + }, + { + "epoch": 0.1856981217470016, + "grad_norm": 0.6801659428571151, + "learning_rate": 9.37804030300424e-06, + "loss": 0.3863, + "step": 4103 + }, + { + "epoch": 0.18574338085539716, + "grad_norm": 0.6462483308953831, + "learning_rate": 9.377686238787848e-06, + "loss": 0.5028, + "step": 4104 + }, + { + "epoch": 0.18578863996379272, + "grad_norm": 0.6980783750964051, + "learning_rate": 9.377332080508423e-06, + "loss": 0.3842, + "step": 4105 + }, + { + "epoch": 0.18583389907218828, + "grad_norm": 0.6791753952826917, + "learning_rate": 9.376977828173576e-06, + "loss": 0.4278, + "step": 4106 + }, + { + "epoch": 0.18587915818058384, + "grad_norm": 0.5805388440208535, + "learning_rate": 9.376623481790918e-06, + "loss": 0.4883, + "step": 4107 + }, + { + "epoch": 0.1859244172889794, + "grad_norm": 0.6774348967610799, + "learning_rate": 9.376269041368063e-06, + "loss": 0.3824, + "step": 4108 + }, + { + "epoch": 0.18596967639737497, + "grad_norm": 0.7046508039451562, + "learning_rate": 9.375914506912628e-06, + "loss": 0.4184, + "step": 4109 + }, + { + "epoch": 0.18601493550577053, + "grad_norm": 0.6349773226277379, + "learning_rate": 9.37555987843223e-06, + "loss": 0.5237, + "step": 4110 + }, + { + "epoch": 0.1860601946141661, + "grad_norm": 0.7162257440503814, + "learning_rate": 9.375205155934488e-06, + "loss": 0.3783, + "step": 4111 + }, + { + "epoch": 0.18610545372256165, + "grad_norm": 0.7165412489731983, + "learning_rate": 9.374850339427024e-06, + "loss": 0.4266, + "step": 4112 + }, + { + "epoch": 0.18615071283095724, + "grad_norm": 0.6954870860047865, + "learning_rate": 9.374495428917463e-06, + "loss": 0.3875, + "step": 4113 + }, + { + "epoch": 0.1861959719393528, + "grad_norm": 0.6981956064505662, + "learning_rate": 9.37414042441343e-06, + "loss": 0.3425, + "step": 4114 + }, + { + "epoch": 0.18624123104774837, + "grad_norm": 0.7453770808853691, + "learning_rate": 9.373785325922556e-06, + "loss": 0.4829, + "step": 4115 + }, + { + "epoch": 0.18628649015614393, + "grad_norm": 0.6556482744053577, + "learning_rate": 9.373430133452466e-06, + "loss": 0.4009, + "step": 4116 + }, + { + "epoch": 0.1863317492645395, + "grad_norm": 0.6973709463168327, + "learning_rate": 9.373074847010795e-06, + "loss": 0.4074, + "step": 4117 + }, + { + "epoch": 0.18637700837293505, + "grad_norm": 0.7096935290207299, + "learning_rate": 9.372719466605176e-06, + "loss": 0.459, + "step": 4118 + }, + { + "epoch": 0.18642226748133062, + "grad_norm": 0.626924754136172, + "learning_rate": 9.372363992243245e-06, + "loss": 0.3997, + "step": 4119 + }, + { + "epoch": 0.18646752658972618, + "grad_norm": 0.691742254232117, + "learning_rate": 9.37200842393264e-06, + "loss": 0.4543, + "step": 4120 + }, + { + "epoch": 0.18651278569812174, + "grad_norm": 0.7763991797792152, + "learning_rate": 9.371652761681006e-06, + "loss": 0.4108, + "step": 4121 + }, + { + "epoch": 0.1865580448065173, + "grad_norm": 0.6590271573620654, + "learning_rate": 9.371297005495976e-06, + "loss": 0.3566, + "step": 4122 + }, + { + "epoch": 0.18660330391491287, + "grad_norm": 0.7172524152091183, + "learning_rate": 9.3709411553852e-06, + "loss": 0.3836, + "step": 4123 + }, + { + "epoch": 0.18664856302330843, + "grad_norm": 0.6864398739450693, + "learning_rate": 9.370585211356323e-06, + "loss": 0.3759, + "step": 4124 + }, + { + "epoch": 0.18669382213170402, + "grad_norm": 0.645730820024419, + "learning_rate": 9.370229173416994e-06, + "loss": 0.378, + "step": 4125 + }, + { + "epoch": 0.18673908124009958, + "grad_norm": 0.6075348044191042, + "learning_rate": 9.36987304157486e-06, + "loss": 0.5183, + "step": 4126 + }, + { + "epoch": 0.18678434034849514, + "grad_norm": 0.78877271214592, + "learning_rate": 9.369516815837579e-06, + "loss": 0.3947, + "step": 4127 + }, + { + "epoch": 0.1868295994568907, + "grad_norm": 0.7040157005782529, + "learning_rate": 9.369160496212797e-06, + "loss": 0.4193, + "step": 4128 + }, + { + "epoch": 0.18687485856528627, + "grad_norm": 0.7747630187627644, + "learning_rate": 9.368804082708178e-06, + "loss": 0.4055, + "step": 4129 + }, + { + "epoch": 0.18692011767368183, + "grad_norm": 0.6983420726951024, + "learning_rate": 9.368447575331376e-06, + "loss": 0.4087, + "step": 4130 + }, + { + "epoch": 0.1869653767820774, + "grad_norm": 0.6971366034220329, + "learning_rate": 9.368090974090053e-06, + "loss": 0.3831, + "step": 4131 + }, + { + "epoch": 0.18701063589047295, + "grad_norm": 0.6732722620717891, + "learning_rate": 9.36773427899187e-06, + "loss": 0.4186, + "step": 4132 + }, + { + "epoch": 0.18705589499886852, + "grad_norm": 0.7045716634227874, + "learning_rate": 9.367377490044491e-06, + "loss": 0.4112, + "step": 4133 + }, + { + "epoch": 0.18710115410726408, + "grad_norm": 0.7178724111065302, + "learning_rate": 9.367020607255584e-06, + "loss": 0.4044, + "step": 4134 + }, + { + "epoch": 0.18714641321565964, + "grad_norm": 0.7229009173713574, + "learning_rate": 9.366663630632817e-06, + "loss": 0.4038, + "step": 4135 + }, + { + "epoch": 0.1871916723240552, + "grad_norm": 0.6270133182764555, + "learning_rate": 9.36630656018386e-06, + "loss": 0.3759, + "step": 4136 + }, + { + "epoch": 0.1872369314324508, + "grad_norm": 0.7126974681377124, + "learning_rate": 9.365949395916383e-06, + "loss": 0.3723, + "step": 4137 + }, + { + "epoch": 0.18728219054084635, + "grad_norm": 0.7413695555778791, + "learning_rate": 9.365592137838063e-06, + "loss": 0.4281, + "step": 4138 + }, + { + "epoch": 0.18732744964924192, + "grad_norm": 1.0872267695040065, + "learning_rate": 9.365234785956575e-06, + "loss": 0.4052, + "step": 4139 + }, + { + "epoch": 0.18737270875763748, + "grad_norm": 0.6618657909759026, + "learning_rate": 9.3648773402796e-06, + "loss": 0.395, + "step": 4140 + }, + { + "epoch": 0.18741796786603304, + "grad_norm": 0.756847238761708, + "learning_rate": 9.364519800814818e-06, + "loss": 0.3977, + "step": 4141 + }, + { + "epoch": 0.1874632269744286, + "grad_norm": 0.7104506052937238, + "learning_rate": 9.364162167569907e-06, + "loss": 0.4612, + "step": 4142 + }, + { + "epoch": 0.18750848608282417, + "grad_norm": 0.622825098297183, + "learning_rate": 9.363804440552557e-06, + "loss": 0.5194, + "step": 4143 + }, + { + "epoch": 0.18755374519121973, + "grad_norm": 0.8854425327165626, + "learning_rate": 9.363446619770452e-06, + "loss": 0.3511, + "step": 4144 + }, + { + "epoch": 0.1875990042996153, + "grad_norm": 0.7319115033311788, + "learning_rate": 9.363088705231277e-06, + "loss": 0.4249, + "step": 4145 + }, + { + "epoch": 0.18764426340801085, + "grad_norm": 0.6450997911633352, + "learning_rate": 9.36273069694273e-06, + "loss": 0.3504, + "step": 4146 + }, + { + "epoch": 0.18768952251640642, + "grad_norm": 0.46984863114291675, + "learning_rate": 9.362372594912498e-06, + "loss": 0.535, + "step": 4147 + }, + { + "epoch": 0.187734781624802, + "grad_norm": 0.4128089269608341, + "learning_rate": 9.362014399148275e-06, + "loss": 0.5204, + "step": 4148 + }, + { + "epoch": 0.18778004073319757, + "grad_norm": 0.8256883317191542, + "learning_rate": 9.361656109657761e-06, + "loss": 0.397, + "step": 4149 + }, + { + "epoch": 0.18782529984159313, + "grad_norm": 0.808810125346021, + "learning_rate": 9.361297726448656e-06, + "loss": 0.4696, + "step": 4150 + }, + { + "epoch": 0.1878705589499887, + "grad_norm": 0.3406958381061442, + "learning_rate": 9.360939249528653e-06, + "loss": 0.5191, + "step": 4151 + }, + { + "epoch": 0.18791581805838425, + "grad_norm": 0.7482057961528514, + "learning_rate": 9.360580678905462e-06, + "loss": 0.3934, + "step": 4152 + }, + { + "epoch": 0.18796107716677982, + "grad_norm": 0.3900188665176955, + "learning_rate": 9.360222014586782e-06, + "loss": 0.5091, + "step": 4153 + }, + { + "epoch": 0.18800633627517538, + "grad_norm": 0.7373474726783783, + "learning_rate": 9.359863256580326e-06, + "loss": 0.4003, + "step": 4154 + }, + { + "epoch": 0.18805159538357094, + "grad_norm": 0.7019847965214174, + "learning_rate": 9.359504404893795e-06, + "loss": 0.3899, + "step": 4155 + }, + { + "epoch": 0.1880968544919665, + "grad_norm": 0.6653724063594627, + "learning_rate": 9.359145459534906e-06, + "loss": 0.38, + "step": 4156 + }, + { + "epoch": 0.18814211360036207, + "grad_norm": 0.3733988868528731, + "learning_rate": 9.35878642051137e-06, + "loss": 0.5017, + "step": 4157 + }, + { + "epoch": 0.18818737270875763, + "grad_norm": 0.8251591439561985, + "learning_rate": 9.358427287830898e-06, + "loss": 0.4396, + "step": 4158 + }, + { + "epoch": 0.1882326318171532, + "grad_norm": 0.7482630429833076, + "learning_rate": 9.358068061501211e-06, + "loss": 0.4259, + "step": 4159 + }, + { + "epoch": 0.18827789092554878, + "grad_norm": 0.6534549270921789, + "learning_rate": 9.357708741530025e-06, + "loss": 0.3608, + "step": 4160 + }, + { + "epoch": 0.18832315003394434, + "grad_norm": 0.6465604289725814, + "learning_rate": 9.357349327925063e-06, + "loss": 0.3773, + "step": 4161 + }, + { + "epoch": 0.1883684091423399, + "grad_norm": 0.7427467905232471, + "learning_rate": 9.356989820694046e-06, + "loss": 0.3589, + "step": 4162 + }, + { + "epoch": 0.18841366825073547, + "grad_norm": 0.7248522411623955, + "learning_rate": 9.3566302198447e-06, + "loss": 0.4453, + "step": 4163 + }, + { + "epoch": 0.18845892735913103, + "grad_norm": 0.6468436050912464, + "learning_rate": 9.356270525384749e-06, + "loss": 0.3759, + "step": 4164 + }, + { + "epoch": 0.1885041864675266, + "grad_norm": 0.6606287578369291, + "learning_rate": 9.355910737321927e-06, + "loss": 0.3866, + "step": 4165 + }, + { + "epoch": 0.18854944557592215, + "grad_norm": 0.874469386000399, + "learning_rate": 9.35555085566396e-06, + "loss": 0.426, + "step": 4166 + }, + { + "epoch": 0.18859470468431772, + "grad_norm": 1.379091788268698, + "learning_rate": 9.35519088041858e-06, + "loss": 0.4193, + "step": 4167 + }, + { + "epoch": 0.18863996379271328, + "grad_norm": 0.6644904529186746, + "learning_rate": 9.354830811593527e-06, + "loss": 0.3589, + "step": 4168 + }, + { + "epoch": 0.18868522290110884, + "grad_norm": 0.6252804901163832, + "learning_rate": 9.354470649196532e-06, + "loss": 0.3771, + "step": 4169 + }, + { + "epoch": 0.1887304820095044, + "grad_norm": 0.6371878142364097, + "learning_rate": 9.354110393235339e-06, + "loss": 0.3911, + "step": 4170 + }, + { + "epoch": 0.18877574111789996, + "grad_norm": 0.7310086362067233, + "learning_rate": 9.353750043717685e-06, + "loss": 0.435, + "step": 4171 + }, + { + "epoch": 0.18882100022629555, + "grad_norm": 0.6213844537992925, + "learning_rate": 9.353389600651313e-06, + "loss": 0.3899, + "step": 4172 + }, + { + "epoch": 0.18886625933469112, + "grad_norm": 0.6700371913451663, + "learning_rate": 9.35302906404397e-06, + "loss": 0.4159, + "step": 4173 + }, + { + "epoch": 0.18891151844308668, + "grad_norm": 0.7052752239619765, + "learning_rate": 9.352668433903402e-06, + "loss": 0.3734, + "step": 4174 + }, + { + "epoch": 0.18895677755148224, + "grad_norm": 0.6566206805556328, + "learning_rate": 9.352307710237358e-06, + "loss": 0.4252, + "step": 4175 + }, + { + "epoch": 0.1890020366598778, + "grad_norm": 0.7189694032411736, + "learning_rate": 9.351946893053587e-06, + "loss": 0.4134, + "step": 4176 + }, + { + "epoch": 0.18904729576827337, + "grad_norm": 0.714903760039513, + "learning_rate": 9.351585982359845e-06, + "loss": 0.4327, + "step": 4177 + }, + { + "epoch": 0.18909255487666893, + "grad_norm": 0.9170516480758911, + "learning_rate": 9.351224978163885e-06, + "loss": 0.3823, + "step": 4178 + }, + { + "epoch": 0.1891378139850645, + "grad_norm": 0.6625988385123889, + "learning_rate": 9.350863880473462e-06, + "loss": 0.4025, + "step": 4179 + }, + { + "epoch": 0.18918307309346005, + "grad_norm": 0.6808033178232126, + "learning_rate": 9.350502689296337e-06, + "loss": 0.4382, + "step": 4180 + }, + { + "epoch": 0.18922833220185561, + "grad_norm": 0.6377075302123607, + "learning_rate": 9.350141404640273e-06, + "loss": 0.3599, + "step": 4181 + }, + { + "epoch": 0.18927359131025118, + "grad_norm": 0.4455416067005901, + "learning_rate": 9.34978002651303e-06, + "loss": 0.5079, + "step": 4182 + }, + { + "epoch": 0.18931885041864674, + "grad_norm": 0.9095683713471634, + "learning_rate": 9.349418554922371e-06, + "loss": 0.3626, + "step": 4183 + }, + { + "epoch": 0.18936410952704233, + "grad_norm": 0.7259307961524236, + "learning_rate": 9.349056989876068e-06, + "loss": 0.3844, + "step": 4184 + }, + { + "epoch": 0.1894093686354379, + "grad_norm": 0.3346786654337558, + "learning_rate": 9.348695331381887e-06, + "loss": 0.5089, + "step": 4185 + }, + { + "epoch": 0.18945462774383345, + "grad_norm": 0.3344837591888438, + "learning_rate": 9.3483335794476e-06, + "loss": 0.4931, + "step": 4186 + }, + { + "epoch": 0.18949988685222902, + "grad_norm": 0.8388219787853163, + "learning_rate": 9.347971734080978e-06, + "loss": 0.3988, + "step": 4187 + }, + { + "epoch": 0.18954514596062458, + "grad_norm": 0.7570586941523583, + "learning_rate": 9.347609795289798e-06, + "loss": 0.413, + "step": 4188 + }, + { + "epoch": 0.18959040506902014, + "grad_norm": 0.8178611564571792, + "learning_rate": 9.347247763081834e-06, + "loss": 0.3826, + "step": 4189 + }, + { + "epoch": 0.1896356641774157, + "grad_norm": 0.628098793520388, + "learning_rate": 9.346885637464871e-06, + "loss": 0.5218, + "step": 4190 + }, + { + "epoch": 0.18968092328581126, + "grad_norm": 0.7271969659510159, + "learning_rate": 9.346523418446682e-06, + "loss": 0.3973, + "step": 4191 + }, + { + "epoch": 0.18972618239420683, + "grad_norm": 0.39111820880276893, + "learning_rate": 9.346161106035056e-06, + "loss": 0.5036, + "step": 4192 + }, + { + "epoch": 0.1897714415026024, + "grad_norm": 1.1236366782758218, + "learning_rate": 9.345798700237778e-06, + "loss": 0.4423, + "step": 4193 + }, + { + "epoch": 0.18981670061099795, + "grad_norm": 0.6378386630155638, + "learning_rate": 9.34543620106263e-06, + "loss": 0.3715, + "step": 4194 + }, + { + "epoch": 0.18986195971939354, + "grad_norm": 0.7933074202178645, + "learning_rate": 9.345073608517405e-06, + "loss": 0.3934, + "step": 4195 + }, + { + "epoch": 0.1899072188277891, + "grad_norm": 0.6613598448529215, + "learning_rate": 9.344710922609893e-06, + "loss": 0.3943, + "step": 4196 + }, + { + "epoch": 0.18995247793618467, + "grad_norm": 0.7896961749002904, + "learning_rate": 9.344348143347888e-06, + "loss": 0.4128, + "step": 4197 + }, + { + "epoch": 0.18999773704458023, + "grad_norm": 0.6664564006056659, + "learning_rate": 9.343985270739184e-06, + "loss": 0.3779, + "step": 4198 + }, + { + "epoch": 0.1900429961529758, + "grad_norm": 0.6422339067166803, + "learning_rate": 9.343622304791577e-06, + "loss": 0.3793, + "step": 4199 + }, + { + "epoch": 0.19008825526137135, + "grad_norm": 0.446876644874073, + "learning_rate": 9.343259245512866e-06, + "loss": 0.4923, + "step": 4200 + }, + { + "epoch": 0.19013351436976691, + "grad_norm": 0.38858522128843226, + "learning_rate": 9.342896092910857e-06, + "loss": 0.5146, + "step": 4201 + }, + { + "epoch": 0.19017877347816248, + "grad_norm": 0.7559868937443666, + "learning_rate": 9.342532846993345e-06, + "loss": 0.4266, + "step": 4202 + }, + { + "epoch": 0.19022403258655804, + "grad_norm": 0.6361014808828712, + "learning_rate": 9.342169507768143e-06, + "loss": 0.4009, + "step": 4203 + }, + { + "epoch": 0.1902692916949536, + "grad_norm": 0.6703834528070357, + "learning_rate": 9.341806075243049e-06, + "loss": 0.3937, + "step": 4204 + }, + { + "epoch": 0.19031455080334916, + "grad_norm": 0.8034159210953544, + "learning_rate": 9.341442549425882e-06, + "loss": 0.4345, + "step": 4205 + }, + { + "epoch": 0.19035980991174473, + "grad_norm": 0.70894004080529, + "learning_rate": 9.341078930324446e-06, + "loss": 0.4077, + "step": 4206 + }, + { + "epoch": 0.19040506902014032, + "grad_norm": 0.6265618012794626, + "learning_rate": 9.340715217946557e-06, + "loss": 0.505, + "step": 4207 + }, + { + "epoch": 0.19045032812853588, + "grad_norm": 0.6867976221003556, + "learning_rate": 9.34035141230003e-06, + "loss": 0.4117, + "step": 4208 + }, + { + "epoch": 0.19049558723693144, + "grad_norm": 0.6815725037318351, + "learning_rate": 9.339987513392681e-06, + "loss": 0.3745, + "step": 4209 + }, + { + "epoch": 0.190540846345327, + "grad_norm": 0.9252713915472213, + "learning_rate": 9.33962352123233e-06, + "loss": 0.4132, + "step": 4210 + }, + { + "epoch": 0.19058610545372257, + "grad_norm": 0.35903187846953316, + "learning_rate": 9.339259435826798e-06, + "loss": 0.4936, + "step": 4211 + }, + { + "epoch": 0.19063136456211813, + "grad_norm": 0.7351904438219221, + "learning_rate": 9.338895257183907e-06, + "loss": 0.3868, + "step": 4212 + }, + { + "epoch": 0.1906766236705137, + "grad_norm": 0.666741204958213, + "learning_rate": 9.338530985311483e-06, + "loss": 0.427, + "step": 4213 + }, + { + "epoch": 0.19072188277890925, + "grad_norm": 0.6864012420285249, + "learning_rate": 9.338166620217353e-06, + "loss": 0.3899, + "step": 4214 + }, + { + "epoch": 0.19076714188730481, + "grad_norm": 0.6406318140484143, + "learning_rate": 9.337802161909344e-06, + "loss": 0.4032, + "step": 4215 + }, + { + "epoch": 0.19081240099570038, + "grad_norm": 0.6779079194311202, + "learning_rate": 9.337437610395292e-06, + "loss": 0.4443, + "step": 4216 + }, + { + "epoch": 0.19085766010409594, + "grad_norm": 0.8085914383393745, + "learning_rate": 9.337072965683026e-06, + "loss": 0.4001, + "step": 4217 + }, + { + "epoch": 0.1909029192124915, + "grad_norm": 0.8418371319390886, + "learning_rate": 9.336708227780382e-06, + "loss": 0.4139, + "step": 4218 + }, + { + "epoch": 0.1909481783208871, + "grad_norm": 0.5312634645258777, + "learning_rate": 9.336343396695197e-06, + "loss": 0.4937, + "step": 4219 + }, + { + "epoch": 0.19099343742928265, + "grad_norm": 0.8147529416185956, + "learning_rate": 9.335978472435311e-06, + "loss": 0.3672, + "step": 4220 + }, + { + "epoch": 0.19103869653767822, + "grad_norm": 0.36155580688435995, + "learning_rate": 9.335613455008565e-06, + "loss": 0.499, + "step": 4221 + }, + { + "epoch": 0.19108395564607378, + "grad_norm": 0.8797910704411804, + "learning_rate": 9.335248344422803e-06, + "loss": 0.4269, + "step": 4222 + }, + { + "epoch": 0.19112921475446934, + "grad_norm": 0.6906031892517221, + "learning_rate": 9.334883140685867e-06, + "loss": 0.4253, + "step": 4223 + }, + { + "epoch": 0.1911744738628649, + "grad_norm": 0.7501075268027417, + "learning_rate": 9.334517843805606e-06, + "loss": 0.4356, + "step": 4224 + }, + { + "epoch": 0.19121973297126046, + "grad_norm": 0.8377097360755146, + "learning_rate": 9.334152453789868e-06, + "loss": 0.4198, + "step": 4225 + }, + { + "epoch": 0.19126499207965603, + "grad_norm": 0.6989885826530646, + "learning_rate": 9.333786970646507e-06, + "loss": 0.4341, + "step": 4226 + }, + { + "epoch": 0.1913102511880516, + "grad_norm": 0.6613263318372491, + "learning_rate": 9.333421394383374e-06, + "loss": 0.3878, + "step": 4227 + }, + { + "epoch": 0.19135551029644715, + "grad_norm": 0.5685047682663741, + "learning_rate": 9.333055725008323e-06, + "loss": 0.5035, + "step": 4228 + }, + { + "epoch": 0.1914007694048427, + "grad_norm": 0.7589492207434562, + "learning_rate": 9.332689962529213e-06, + "loss": 0.4241, + "step": 4229 + }, + { + "epoch": 0.19144602851323828, + "grad_norm": 0.4250628070188152, + "learning_rate": 9.332324106953903e-06, + "loss": 0.5119, + "step": 4230 + }, + { + "epoch": 0.19149128762163387, + "grad_norm": 0.7021573180456467, + "learning_rate": 9.331958158290253e-06, + "loss": 0.4065, + "step": 4231 + }, + { + "epoch": 0.19153654673002943, + "grad_norm": 0.382512596580531, + "learning_rate": 9.331592116546128e-06, + "loss": 0.4987, + "step": 4232 + }, + { + "epoch": 0.191581805838425, + "grad_norm": 0.7330051980181821, + "learning_rate": 9.33122598172939e-06, + "loss": 0.4129, + "step": 4233 + }, + { + "epoch": 0.19162706494682055, + "grad_norm": 0.689069453015296, + "learning_rate": 9.33085975384791e-06, + "loss": 0.4418, + "step": 4234 + }, + { + "epoch": 0.19167232405521611, + "grad_norm": 0.6607414917334931, + "learning_rate": 9.330493432909553e-06, + "loss": 0.3966, + "step": 4235 + }, + { + "epoch": 0.19171758316361168, + "grad_norm": 0.6062373121085459, + "learning_rate": 9.330127018922195e-06, + "loss": 0.3827, + "step": 4236 + }, + { + "epoch": 0.19176284227200724, + "grad_norm": 0.6657482893388922, + "learning_rate": 9.329760511893703e-06, + "loss": 0.4258, + "step": 4237 + }, + { + "epoch": 0.1918081013804028, + "grad_norm": 0.76336256720969, + "learning_rate": 9.329393911831957e-06, + "loss": 0.5121, + "step": 4238 + }, + { + "epoch": 0.19185336048879836, + "grad_norm": 0.570098781814426, + "learning_rate": 9.329027218744833e-06, + "loss": 0.4937, + "step": 4239 + }, + { + "epoch": 0.19189861959719393, + "grad_norm": 0.6741787772987584, + "learning_rate": 9.328660432640211e-06, + "loss": 0.4024, + "step": 4240 + }, + { + "epoch": 0.1919438787055895, + "grad_norm": 0.6141085288648717, + "learning_rate": 9.32829355352597e-06, + "loss": 0.4197, + "step": 4241 + }, + { + "epoch": 0.19198913781398508, + "grad_norm": 0.535364704619855, + "learning_rate": 9.327926581409992e-06, + "loss": 0.5256, + "step": 4242 + }, + { + "epoch": 0.19203439692238064, + "grad_norm": 0.6452703911522668, + "learning_rate": 9.327559516300164e-06, + "loss": 0.4067, + "step": 4243 + }, + { + "epoch": 0.1920796560307762, + "grad_norm": 0.6459242711060674, + "learning_rate": 9.327192358204374e-06, + "loss": 0.3749, + "step": 4244 + }, + { + "epoch": 0.19212491513917176, + "grad_norm": 0.6934224689498821, + "learning_rate": 9.32682510713051e-06, + "loss": 0.397, + "step": 4245 + }, + { + "epoch": 0.19217017424756733, + "grad_norm": 0.7035646271268499, + "learning_rate": 9.326457763086463e-06, + "loss": 0.3713, + "step": 4246 + }, + { + "epoch": 0.1922154333559629, + "grad_norm": 0.6359014330987115, + "learning_rate": 9.326090326080129e-06, + "loss": 0.412, + "step": 4247 + }, + { + "epoch": 0.19226069246435845, + "grad_norm": 0.8562684223334093, + "learning_rate": 9.325722796119396e-06, + "loss": 0.5247, + "step": 4248 + }, + { + "epoch": 0.192305951572754, + "grad_norm": 0.668983090986784, + "learning_rate": 9.325355173212169e-06, + "loss": 0.3677, + "step": 4249 + }, + { + "epoch": 0.19235121068114958, + "grad_norm": 0.6735918825748062, + "learning_rate": 9.324987457366342e-06, + "loss": 0.4335, + "step": 4250 + }, + { + "epoch": 0.19239646978954514, + "grad_norm": 0.6111348754854273, + "learning_rate": 9.324619648589818e-06, + "loss": 0.3478, + "step": 4251 + }, + { + "epoch": 0.1924417288979407, + "grad_norm": 0.6529173510325869, + "learning_rate": 9.324251746890501e-06, + "loss": 0.3914, + "step": 4252 + }, + { + "epoch": 0.19248698800633626, + "grad_norm": 0.7176116943040441, + "learning_rate": 9.323883752276294e-06, + "loss": 0.4417, + "step": 4253 + }, + { + "epoch": 0.19253224711473185, + "grad_norm": 0.8261099452942321, + "learning_rate": 9.323515664755105e-06, + "loss": 0.3822, + "step": 4254 + }, + { + "epoch": 0.19257750622312741, + "grad_norm": 0.7602870493102301, + "learning_rate": 9.323147484334843e-06, + "loss": 0.4208, + "step": 4255 + }, + { + "epoch": 0.19262276533152298, + "grad_norm": 0.6966871082756921, + "learning_rate": 9.322779211023418e-06, + "loss": 0.431, + "step": 4256 + }, + { + "epoch": 0.19266802443991854, + "grad_norm": 0.6646502179431876, + "learning_rate": 9.322410844828747e-06, + "loss": 0.4047, + "step": 4257 + }, + { + "epoch": 0.1927132835483141, + "grad_norm": 0.7620936660471155, + "learning_rate": 9.322042385758738e-06, + "loss": 0.4006, + "step": 4258 + }, + { + "epoch": 0.19275854265670966, + "grad_norm": 0.5508551135263602, + "learning_rate": 9.321673833821316e-06, + "loss": 0.4854, + "step": 4259 + }, + { + "epoch": 0.19280380176510523, + "grad_norm": 0.7260689341128518, + "learning_rate": 9.321305189024395e-06, + "loss": 0.4336, + "step": 4260 + }, + { + "epoch": 0.1928490608735008, + "grad_norm": 0.6686325587042482, + "learning_rate": 9.320936451375896e-06, + "loss": 0.3674, + "step": 4261 + }, + { + "epoch": 0.19289431998189635, + "grad_norm": 0.6248650776883086, + "learning_rate": 9.320567620883746e-06, + "loss": 0.3821, + "step": 4262 + }, + { + "epoch": 0.1929395790902919, + "grad_norm": 0.708121072026063, + "learning_rate": 9.320198697555866e-06, + "loss": 0.407, + "step": 4263 + }, + { + "epoch": 0.19298483819868748, + "grad_norm": 0.6231187055806207, + "learning_rate": 9.319829681400185e-06, + "loss": 0.3846, + "step": 4264 + }, + { + "epoch": 0.19303009730708304, + "grad_norm": 0.43480382071748097, + "learning_rate": 9.319460572424632e-06, + "loss": 0.5126, + "step": 4265 + }, + { + "epoch": 0.19307535641547863, + "grad_norm": 0.658036330731171, + "learning_rate": 9.319091370637136e-06, + "loss": 0.4143, + "step": 4266 + }, + { + "epoch": 0.1931206155238742, + "grad_norm": 0.6675050586561079, + "learning_rate": 9.318722076045632e-06, + "loss": 0.3835, + "step": 4267 + }, + { + "epoch": 0.19316587463226975, + "grad_norm": 0.6878269185310447, + "learning_rate": 9.318352688658055e-06, + "loss": 0.389, + "step": 4268 + }, + { + "epoch": 0.19321113374066531, + "grad_norm": 0.6417992974753269, + "learning_rate": 9.317983208482342e-06, + "loss": 0.4138, + "step": 4269 + }, + { + "epoch": 0.19325639284906088, + "grad_norm": 0.7400557604489254, + "learning_rate": 9.317613635526431e-06, + "loss": 0.4442, + "step": 4270 + }, + { + "epoch": 0.19330165195745644, + "grad_norm": 0.6493969617728693, + "learning_rate": 9.317243969798263e-06, + "loss": 0.3417, + "step": 4271 + }, + { + "epoch": 0.193346911065852, + "grad_norm": 0.7276072755450002, + "learning_rate": 9.31687421130578e-06, + "loss": 0.4165, + "step": 4272 + }, + { + "epoch": 0.19339217017424756, + "grad_norm": 0.7372973315741538, + "learning_rate": 9.31650436005693e-06, + "loss": 0.4105, + "step": 4273 + }, + { + "epoch": 0.19343742928264313, + "grad_norm": 0.7257643318303959, + "learning_rate": 9.31613441605966e-06, + "loss": 0.4357, + "step": 4274 + }, + { + "epoch": 0.1934826883910387, + "grad_norm": 0.4424464950211227, + "learning_rate": 9.315764379321916e-06, + "loss": 0.4984, + "step": 4275 + }, + { + "epoch": 0.19352794749943425, + "grad_norm": 0.6614602225854239, + "learning_rate": 9.31539424985165e-06, + "loss": 0.3993, + "step": 4276 + }, + { + "epoch": 0.1935732066078298, + "grad_norm": 0.7541761185746563, + "learning_rate": 9.315024027656815e-06, + "loss": 0.3853, + "step": 4277 + }, + { + "epoch": 0.1936184657162254, + "grad_norm": 0.660166374597361, + "learning_rate": 9.314653712745368e-06, + "loss": 0.3879, + "step": 4278 + }, + { + "epoch": 0.19366372482462096, + "grad_norm": 0.6435859897251333, + "learning_rate": 9.314283305125262e-06, + "loss": 0.4038, + "step": 4279 + }, + { + "epoch": 0.19370898393301653, + "grad_norm": 0.7788972612870466, + "learning_rate": 9.313912804804459e-06, + "loss": 0.4066, + "step": 4280 + }, + { + "epoch": 0.1937542430414121, + "grad_norm": 0.6496745811186846, + "learning_rate": 9.31354221179092e-06, + "loss": 0.4215, + "step": 4281 + }, + { + "epoch": 0.19379950214980765, + "grad_norm": 0.6848040494516953, + "learning_rate": 9.313171526092606e-06, + "loss": 0.422, + "step": 4282 + }, + { + "epoch": 0.1938447612582032, + "grad_norm": 0.8014840865761476, + "learning_rate": 9.312800747717484e-06, + "loss": 0.3954, + "step": 4283 + }, + { + "epoch": 0.19389002036659878, + "grad_norm": 0.6019739424529075, + "learning_rate": 9.312429876673517e-06, + "loss": 0.3774, + "step": 4284 + }, + { + "epoch": 0.19393527947499434, + "grad_norm": 0.6340649489666178, + "learning_rate": 9.312058912968679e-06, + "loss": 0.3709, + "step": 4285 + }, + { + "epoch": 0.1939805385833899, + "grad_norm": 0.6679937885689637, + "learning_rate": 9.311687856610939e-06, + "loss": 0.4114, + "step": 4286 + }, + { + "epoch": 0.19402579769178546, + "grad_norm": 0.6207236883394622, + "learning_rate": 9.311316707608267e-06, + "loss": 0.3748, + "step": 4287 + }, + { + "epoch": 0.19407105680018102, + "grad_norm": 0.580243437551749, + "learning_rate": 9.31094546596864e-06, + "loss": 0.3919, + "step": 4288 + }, + { + "epoch": 0.19411631590857661, + "grad_norm": 0.7394004485489974, + "learning_rate": 9.310574131700036e-06, + "loss": 0.396, + "step": 4289 + }, + { + "epoch": 0.19416157501697218, + "grad_norm": 0.7232307625440043, + "learning_rate": 9.310202704810433e-06, + "loss": 0.4577, + "step": 4290 + }, + { + "epoch": 0.19420683412536774, + "grad_norm": 0.6854297919590081, + "learning_rate": 9.309831185307812e-06, + "loss": 0.381, + "step": 4291 + }, + { + "epoch": 0.1942520932337633, + "grad_norm": 0.6196581479239693, + "learning_rate": 9.309459573200154e-06, + "loss": 0.4015, + "step": 4292 + }, + { + "epoch": 0.19429735234215886, + "grad_norm": 0.6615913159694972, + "learning_rate": 9.309087868495447e-06, + "loss": 0.3935, + "step": 4293 + }, + { + "epoch": 0.19434261145055443, + "grad_norm": 0.6393318696287117, + "learning_rate": 9.308716071201676e-06, + "loss": 0.4008, + "step": 4294 + }, + { + "epoch": 0.19438787055895, + "grad_norm": 0.546204787393143, + "learning_rate": 9.308344181326829e-06, + "loss": 0.4989, + "step": 4295 + }, + { + "epoch": 0.19443312966734555, + "grad_norm": 0.6960738962155074, + "learning_rate": 9.307972198878897e-06, + "loss": 0.3923, + "step": 4296 + }, + { + "epoch": 0.1944783887757411, + "grad_norm": 0.33892437302645556, + "learning_rate": 9.307600123865874e-06, + "loss": 0.4821, + "step": 4297 + }, + { + "epoch": 0.19452364788413667, + "grad_norm": 0.6390442836312251, + "learning_rate": 9.307227956295754e-06, + "loss": 0.3633, + "step": 4298 + }, + { + "epoch": 0.19456890699253224, + "grad_norm": 0.6792186733996218, + "learning_rate": 9.306855696176536e-06, + "loss": 0.3651, + "step": 4299 + }, + { + "epoch": 0.1946141661009278, + "grad_norm": 0.4289816986536164, + "learning_rate": 9.306483343516212e-06, + "loss": 0.4678, + "step": 4300 + }, + { + "epoch": 0.1946594252093234, + "grad_norm": 0.7078556709325038, + "learning_rate": 9.30611089832279e-06, + "loss": 0.4272, + "step": 4301 + }, + { + "epoch": 0.19470468431771895, + "grad_norm": 0.670063578620746, + "learning_rate": 9.30573836060427e-06, + "loss": 0.3604, + "step": 4302 + }, + { + "epoch": 0.1947499434261145, + "grad_norm": 0.6125312638812249, + "learning_rate": 9.305365730368658e-06, + "loss": 0.343, + "step": 4303 + }, + { + "epoch": 0.19479520253451008, + "grad_norm": 0.43965317617828314, + "learning_rate": 9.304993007623958e-06, + "loss": 0.5143, + "step": 4304 + }, + { + "epoch": 0.19484046164290564, + "grad_norm": 0.6869474154118801, + "learning_rate": 9.30462019237818e-06, + "loss": 0.4482, + "step": 4305 + }, + { + "epoch": 0.1948857207513012, + "grad_norm": 0.37417953819718464, + "learning_rate": 9.304247284639335e-06, + "loss": 0.5008, + "step": 4306 + }, + { + "epoch": 0.19493097985969676, + "grad_norm": 0.6682325190494395, + "learning_rate": 9.303874284415435e-06, + "loss": 0.3748, + "step": 4307 + }, + { + "epoch": 0.19497623896809232, + "grad_norm": 0.6462237565900745, + "learning_rate": 9.303501191714494e-06, + "loss": 0.4181, + "step": 4308 + }, + { + "epoch": 0.1950214980764879, + "grad_norm": 0.644397907031416, + "learning_rate": 9.303128006544531e-06, + "loss": 0.3434, + "step": 4309 + }, + { + "epoch": 0.19506675718488345, + "grad_norm": 0.6405159433598613, + "learning_rate": 9.302754728913563e-06, + "loss": 0.3615, + "step": 4310 + }, + { + "epoch": 0.195112016293279, + "grad_norm": 0.7136582827059487, + "learning_rate": 9.302381358829612e-06, + "loss": 0.4064, + "step": 4311 + }, + { + "epoch": 0.19515727540167457, + "grad_norm": 0.6703286149054923, + "learning_rate": 9.302007896300697e-06, + "loss": 0.4185, + "step": 4312 + }, + { + "epoch": 0.19520253451007016, + "grad_norm": 0.67624507234566, + "learning_rate": 9.301634341334846e-06, + "loss": 0.3992, + "step": 4313 + }, + { + "epoch": 0.19524779361846573, + "grad_norm": 0.6507390990718162, + "learning_rate": 9.301260693940084e-06, + "loss": 0.3774, + "step": 4314 + }, + { + "epoch": 0.1952930527268613, + "grad_norm": 0.635224165327857, + "learning_rate": 9.300886954124442e-06, + "loss": 0.391, + "step": 4315 + }, + { + "epoch": 0.19533831183525685, + "grad_norm": 0.820758878812254, + "learning_rate": 9.300513121895946e-06, + "loss": 0.4185, + "step": 4316 + }, + { + "epoch": 0.1953835709436524, + "grad_norm": 0.6567914034471967, + "learning_rate": 9.300139197262633e-06, + "loss": 0.3871, + "step": 4317 + }, + { + "epoch": 0.19542883005204797, + "grad_norm": 0.7745445471527284, + "learning_rate": 9.299765180232534e-06, + "loss": 0.4992, + "step": 4318 + }, + { + "epoch": 0.19547408916044354, + "grad_norm": 0.5686358701093436, + "learning_rate": 9.299391070813687e-06, + "loss": 0.5268, + "step": 4319 + }, + { + "epoch": 0.1955193482688391, + "grad_norm": 0.7189558651821492, + "learning_rate": 9.29901686901413e-06, + "loss": 0.3943, + "step": 4320 + }, + { + "epoch": 0.19556460737723466, + "grad_norm": 0.6913062360595764, + "learning_rate": 9.298642574841906e-06, + "loss": 0.3723, + "step": 4321 + }, + { + "epoch": 0.19560986648563022, + "grad_norm": 0.7396020547936617, + "learning_rate": 9.298268188305054e-06, + "loss": 0.373, + "step": 4322 + }, + { + "epoch": 0.19565512559402579, + "grad_norm": 0.7529357435564967, + "learning_rate": 9.29789370941162e-06, + "loss": 0.4975, + "step": 4323 + }, + { + "epoch": 0.19570038470242138, + "grad_norm": 0.7697627354205817, + "learning_rate": 9.29751913816965e-06, + "loss": 0.3901, + "step": 4324 + }, + { + "epoch": 0.19574564381081694, + "grad_norm": 0.7005308781484924, + "learning_rate": 9.297144474587193e-06, + "loss": 0.4247, + "step": 4325 + }, + { + "epoch": 0.1957909029192125, + "grad_norm": 0.6823939930425462, + "learning_rate": 9.296769718672298e-06, + "loss": 0.4269, + "step": 4326 + }, + { + "epoch": 0.19583616202760806, + "grad_norm": 0.614401669857915, + "learning_rate": 9.296394870433018e-06, + "loss": 0.5073, + "step": 4327 + }, + { + "epoch": 0.19588142113600363, + "grad_norm": 0.7837169926571875, + "learning_rate": 9.29601992987741e-06, + "loss": 0.4299, + "step": 4328 + }, + { + "epoch": 0.1959266802443992, + "grad_norm": 0.7365197541628816, + "learning_rate": 9.295644897013526e-06, + "loss": 0.4003, + "step": 4329 + }, + { + "epoch": 0.19597193935279475, + "grad_norm": 0.6291839507518706, + "learning_rate": 9.295269771849426e-06, + "loss": 0.4143, + "step": 4330 + }, + { + "epoch": 0.1960171984611903, + "grad_norm": 0.7709732047355847, + "learning_rate": 9.294894554393172e-06, + "loss": 0.4568, + "step": 4331 + }, + { + "epoch": 0.19606245756958587, + "grad_norm": 0.4735295845056872, + "learning_rate": 9.294519244652825e-06, + "loss": 0.5193, + "step": 4332 + }, + { + "epoch": 0.19610771667798144, + "grad_norm": 0.7441985176769523, + "learning_rate": 9.294143842636447e-06, + "loss": 0.4162, + "step": 4333 + }, + { + "epoch": 0.196152975786377, + "grad_norm": 0.6201583591024671, + "learning_rate": 9.293768348352106e-06, + "loss": 0.3498, + "step": 4334 + }, + { + "epoch": 0.19619823489477256, + "grad_norm": 1.1910528546319035, + "learning_rate": 9.293392761807873e-06, + "loss": 0.3856, + "step": 4335 + }, + { + "epoch": 0.19624349400316815, + "grad_norm": 0.7391466697999081, + "learning_rate": 9.293017083011814e-06, + "loss": 0.3971, + "step": 4336 + }, + { + "epoch": 0.1962887531115637, + "grad_norm": 0.679637583536768, + "learning_rate": 9.292641311972004e-06, + "loss": 0.3835, + "step": 4337 + }, + { + "epoch": 0.19633401221995928, + "grad_norm": 0.6965119342754589, + "learning_rate": 9.292265448696515e-06, + "loss": 0.4116, + "step": 4338 + }, + { + "epoch": 0.19637927132835484, + "grad_norm": 0.6969239676501272, + "learning_rate": 9.291889493193424e-06, + "loss": 0.3934, + "step": 4339 + }, + { + "epoch": 0.1964245304367504, + "grad_norm": 0.6554684903091974, + "learning_rate": 9.29151344547081e-06, + "loss": 0.4031, + "step": 4340 + }, + { + "epoch": 0.19646978954514596, + "grad_norm": 0.6461420773694276, + "learning_rate": 9.291137305536752e-06, + "loss": 0.3987, + "step": 4341 + }, + { + "epoch": 0.19651504865354152, + "grad_norm": 0.6893853761388993, + "learning_rate": 9.290761073399333e-06, + "loss": 0.3913, + "step": 4342 + }, + { + "epoch": 0.1965603077619371, + "grad_norm": 0.6926547278011074, + "learning_rate": 9.290384749066636e-06, + "loss": 0.4473, + "step": 4343 + }, + { + "epoch": 0.19660556687033265, + "grad_norm": 0.7730563566336075, + "learning_rate": 9.290008332546749e-06, + "loss": 0.3774, + "step": 4344 + }, + { + "epoch": 0.1966508259787282, + "grad_norm": 0.761834350061357, + "learning_rate": 9.289631823847758e-06, + "loss": 0.3956, + "step": 4345 + }, + { + "epoch": 0.19669608508712377, + "grad_norm": 0.6493671717411257, + "learning_rate": 9.289255222977754e-06, + "loss": 0.4139, + "step": 4346 + }, + { + "epoch": 0.19674134419551934, + "grad_norm": 0.4625826341249837, + "learning_rate": 9.288878529944827e-06, + "loss": 0.4967, + "step": 4347 + }, + { + "epoch": 0.19678660330391493, + "grad_norm": 0.7436204158165354, + "learning_rate": 9.288501744757073e-06, + "loss": 0.4225, + "step": 4348 + }, + { + "epoch": 0.1968318624123105, + "grad_norm": 0.709135533044813, + "learning_rate": 9.28812486742259e-06, + "loss": 0.4244, + "step": 4349 + }, + { + "epoch": 0.19687712152070605, + "grad_norm": 0.6641857934741487, + "learning_rate": 9.287747897949471e-06, + "loss": 0.415, + "step": 4350 + }, + { + "epoch": 0.1969223806291016, + "grad_norm": 0.714834663860521, + "learning_rate": 9.287370836345819e-06, + "loss": 0.4266, + "step": 4351 + }, + { + "epoch": 0.19696763973749717, + "grad_norm": 0.648691599092435, + "learning_rate": 9.286993682619736e-06, + "loss": 0.356, + "step": 4352 + }, + { + "epoch": 0.19701289884589274, + "grad_norm": 0.6686053436740277, + "learning_rate": 9.286616436779326e-06, + "loss": 0.3934, + "step": 4353 + }, + { + "epoch": 0.1970581579542883, + "grad_norm": 0.6551766852685967, + "learning_rate": 9.286239098832693e-06, + "loss": 0.4217, + "step": 4354 + }, + { + "epoch": 0.19710341706268386, + "grad_norm": 0.6901906038206445, + "learning_rate": 9.285861668787947e-06, + "loss": 0.4014, + "step": 4355 + }, + { + "epoch": 0.19714867617107942, + "grad_norm": 0.6851944238808891, + "learning_rate": 9.285484146653195e-06, + "loss": 0.429, + "step": 4356 + }, + { + "epoch": 0.19719393527947499, + "grad_norm": 0.5409067984664292, + "learning_rate": 9.285106532436552e-06, + "loss": 0.5184, + "step": 4357 + }, + { + "epoch": 0.19723919438787055, + "grad_norm": 0.4302281353429989, + "learning_rate": 9.28472882614613e-06, + "loss": 0.5058, + "step": 4358 + }, + { + "epoch": 0.1972844534962661, + "grad_norm": 0.7081951904028086, + "learning_rate": 9.284351027790044e-06, + "loss": 0.3741, + "step": 4359 + }, + { + "epoch": 0.1973297126046617, + "grad_norm": 0.6824997425810786, + "learning_rate": 9.283973137376414e-06, + "loss": 0.4113, + "step": 4360 + }, + { + "epoch": 0.19737497171305726, + "grad_norm": 0.7060524282807865, + "learning_rate": 9.283595154913358e-06, + "loss": 0.3592, + "step": 4361 + }, + { + "epoch": 0.19742023082145282, + "grad_norm": 0.7613516912992961, + "learning_rate": 9.283217080409e-06, + "loss": 0.4269, + "step": 4362 + }, + { + "epoch": 0.1974654899298484, + "grad_norm": 0.7603432579957048, + "learning_rate": 9.28283891387146e-06, + "loss": 0.4193, + "step": 4363 + }, + { + "epoch": 0.19751074903824395, + "grad_norm": 0.6901066095588091, + "learning_rate": 9.282460655308864e-06, + "loss": 0.4444, + "step": 4364 + }, + { + "epoch": 0.1975560081466395, + "grad_norm": 0.584544309983049, + "learning_rate": 9.282082304729343e-06, + "loss": 0.3687, + "step": 4365 + }, + { + "epoch": 0.19760126725503507, + "grad_norm": 0.7097341226175008, + "learning_rate": 9.281703862141024e-06, + "loss": 0.3928, + "step": 4366 + }, + { + "epoch": 0.19764652636343064, + "grad_norm": 0.704967973293008, + "learning_rate": 9.28132532755204e-06, + "loss": 0.3784, + "step": 4367 + }, + { + "epoch": 0.1976917854718262, + "grad_norm": 0.9059692746686824, + "learning_rate": 9.280946700970524e-06, + "loss": 0.4982, + "step": 4368 + }, + { + "epoch": 0.19773704458022176, + "grad_norm": 0.6941175981179759, + "learning_rate": 9.280567982404611e-06, + "loss": 0.3921, + "step": 4369 + }, + { + "epoch": 0.19778230368861732, + "grad_norm": 0.7020988815556046, + "learning_rate": 9.280189171862439e-06, + "loss": 0.361, + "step": 4370 + }, + { + "epoch": 0.1978275627970129, + "grad_norm": 0.733757185054716, + "learning_rate": 9.279810269352147e-06, + "loss": 0.4315, + "step": 4371 + }, + { + "epoch": 0.19787282190540847, + "grad_norm": 0.9030378119274879, + "learning_rate": 9.279431274881876e-06, + "loss": 0.3648, + "step": 4372 + }, + { + "epoch": 0.19791808101380404, + "grad_norm": 0.6822015900189836, + "learning_rate": 9.279052188459772e-06, + "loss": 0.4028, + "step": 4373 + }, + { + "epoch": 0.1979633401221996, + "grad_norm": 0.6302617832804666, + "learning_rate": 9.278673010093977e-06, + "loss": 0.5142, + "step": 4374 + }, + { + "epoch": 0.19800859923059516, + "grad_norm": 0.496785804689605, + "learning_rate": 9.278293739792642e-06, + "loss": 0.5014, + "step": 4375 + }, + { + "epoch": 0.19805385833899072, + "grad_norm": 0.7594395569593272, + "learning_rate": 9.277914377563911e-06, + "loss": 0.4004, + "step": 4376 + }, + { + "epoch": 0.19809911744738629, + "grad_norm": 0.6961484073026286, + "learning_rate": 9.277534923415941e-06, + "loss": 0.3856, + "step": 4377 + }, + { + "epoch": 0.19814437655578185, + "grad_norm": 0.752079978554707, + "learning_rate": 9.277155377356881e-06, + "loss": 0.4135, + "step": 4378 + }, + { + "epoch": 0.1981896356641774, + "grad_norm": 0.7544369391544148, + "learning_rate": 9.27677573939489e-06, + "loss": 0.3979, + "step": 4379 + }, + { + "epoch": 0.19823489477257297, + "grad_norm": 0.730897043381073, + "learning_rate": 9.276396009538122e-06, + "loss": 0.4056, + "step": 4380 + }, + { + "epoch": 0.19828015388096853, + "grad_norm": 0.6920481467125367, + "learning_rate": 9.276016187794739e-06, + "loss": 0.4194, + "step": 4381 + }, + { + "epoch": 0.1983254129893641, + "grad_norm": 0.6528896991545643, + "learning_rate": 9.275636274172901e-06, + "loss": 0.4046, + "step": 4382 + }, + { + "epoch": 0.1983706720977597, + "grad_norm": 0.7660841197828047, + "learning_rate": 9.27525626868077e-06, + "loss": 0.3838, + "step": 4383 + }, + { + "epoch": 0.19841593120615525, + "grad_norm": 0.7133729442628481, + "learning_rate": 9.274876171326514e-06, + "loss": 0.3808, + "step": 4384 + }, + { + "epoch": 0.1984611903145508, + "grad_norm": 0.6404607940509356, + "learning_rate": 9.274495982118297e-06, + "loss": 0.3779, + "step": 4385 + }, + { + "epoch": 0.19850644942294637, + "grad_norm": 0.7199080919720658, + "learning_rate": 9.27411570106429e-06, + "loss": 0.4167, + "step": 4386 + }, + { + "epoch": 0.19855170853134194, + "grad_norm": 1.2733519021150366, + "learning_rate": 9.273735328172664e-06, + "loss": 0.5265, + "step": 4387 + }, + { + "epoch": 0.1985969676397375, + "grad_norm": 0.7103108166886162, + "learning_rate": 9.273354863451589e-06, + "loss": 0.3537, + "step": 4388 + }, + { + "epoch": 0.19864222674813306, + "grad_norm": 0.7246408967666235, + "learning_rate": 9.272974306909246e-06, + "loss": 0.3739, + "step": 4389 + }, + { + "epoch": 0.19868748585652862, + "grad_norm": 0.6757107097574218, + "learning_rate": 9.272593658553806e-06, + "loss": 0.354, + "step": 4390 + }, + { + "epoch": 0.19873274496492419, + "grad_norm": 0.7067224266353317, + "learning_rate": 9.272212918393452e-06, + "loss": 0.4137, + "step": 4391 + }, + { + "epoch": 0.19877800407331975, + "grad_norm": 0.6155517958991198, + "learning_rate": 9.271832086436364e-06, + "loss": 0.3582, + "step": 4392 + }, + { + "epoch": 0.1988232631817153, + "grad_norm": 0.6805250960188165, + "learning_rate": 9.271451162690723e-06, + "loss": 0.3961, + "step": 4393 + }, + { + "epoch": 0.19886852229011087, + "grad_norm": 0.6844985043600663, + "learning_rate": 9.271070147164715e-06, + "loss": 0.375, + "step": 4394 + }, + { + "epoch": 0.19891378139850646, + "grad_norm": 0.7043381339443586, + "learning_rate": 9.270689039866528e-06, + "loss": 0.4425, + "step": 4395 + }, + { + "epoch": 0.19895904050690202, + "grad_norm": 0.6478549868387321, + "learning_rate": 9.270307840804349e-06, + "loss": 0.3539, + "step": 4396 + }, + { + "epoch": 0.1990042996152976, + "grad_norm": 0.6498360959963834, + "learning_rate": 9.26992654998637e-06, + "loss": 0.3831, + "step": 4397 + }, + { + "epoch": 0.19904955872369315, + "grad_norm": 0.674985630028175, + "learning_rate": 9.269545167420786e-06, + "loss": 0.4313, + "step": 4398 + }, + { + "epoch": 0.1990948178320887, + "grad_norm": 0.6890540926953495, + "learning_rate": 9.269163693115786e-06, + "loss": 0.4219, + "step": 4399 + }, + { + "epoch": 0.19914007694048427, + "grad_norm": 0.6898930046935953, + "learning_rate": 9.268782127079571e-06, + "loss": 0.3732, + "step": 4400 + }, + { + "epoch": 0.19918533604887984, + "grad_norm": 0.6177411270268293, + "learning_rate": 9.26840046932034e-06, + "loss": 0.3636, + "step": 4401 + }, + { + "epoch": 0.1992305951572754, + "grad_norm": 1.3825448268802494, + "learning_rate": 9.26801871984629e-06, + "loss": 0.5663, + "step": 4402 + }, + { + "epoch": 0.19927585426567096, + "grad_norm": 0.9528669719885208, + "learning_rate": 9.267636878665629e-06, + "loss": 0.5335, + "step": 4403 + }, + { + "epoch": 0.19932111337406652, + "grad_norm": 0.6940354386270224, + "learning_rate": 9.267254945786556e-06, + "loss": 0.3852, + "step": 4404 + }, + { + "epoch": 0.19936637248246208, + "grad_norm": 0.7044042232798097, + "learning_rate": 9.26687292121728e-06, + "loss": 0.3684, + "step": 4405 + }, + { + "epoch": 0.19941163159085765, + "grad_norm": 0.8272582311689283, + "learning_rate": 9.26649080496601e-06, + "loss": 0.4547, + "step": 4406 + }, + { + "epoch": 0.19945689069925324, + "grad_norm": 0.8991881377986967, + "learning_rate": 9.266108597040957e-06, + "loss": 0.4182, + "step": 4407 + }, + { + "epoch": 0.1995021498076488, + "grad_norm": 0.8684215744351997, + "learning_rate": 9.265726297450332e-06, + "loss": 0.4388, + "step": 4408 + }, + { + "epoch": 0.19954740891604436, + "grad_norm": 0.671845166331204, + "learning_rate": 9.265343906202351e-06, + "loss": 0.376, + "step": 4409 + }, + { + "epoch": 0.19959266802443992, + "grad_norm": 0.7218212880590468, + "learning_rate": 9.264961423305229e-06, + "loss": 0.4265, + "step": 4410 + }, + { + "epoch": 0.19963792713283549, + "grad_norm": 0.7227038515063788, + "learning_rate": 9.264578848767184e-06, + "loss": 0.3955, + "step": 4411 + }, + { + "epoch": 0.19968318624123105, + "grad_norm": 0.7151889788085047, + "learning_rate": 9.264196182596438e-06, + "loss": 0.4277, + "step": 4412 + }, + { + "epoch": 0.1997284453496266, + "grad_norm": 0.8204253178429187, + "learning_rate": 9.26381342480121e-06, + "loss": 0.3914, + "step": 4413 + }, + { + "epoch": 0.19977370445802217, + "grad_norm": 0.7712295837039834, + "learning_rate": 9.26343057538973e-06, + "loss": 0.3781, + "step": 4414 + }, + { + "epoch": 0.19981896356641773, + "grad_norm": 0.7006062972466253, + "learning_rate": 9.263047634370221e-06, + "loss": 0.3883, + "step": 4415 + }, + { + "epoch": 0.1998642226748133, + "grad_norm": 0.7042611085484666, + "learning_rate": 9.26266460175091e-06, + "loss": 0.4038, + "step": 4416 + }, + { + "epoch": 0.19990948178320886, + "grad_norm": 0.7092910007919806, + "learning_rate": 9.262281477540029e-06, + "loss": 0.4182, + "step": 4417 + }, + { + "epoch": 0.19995474089160445, + "grad_norm": 0.6352626827612116, + "learning_rate": 9.26189826174581e-06, + "loss": 0.4053, + "step": 4418 + }, + { + "epoch": 0.2, + "grad_norm": 0.6908034332242652, + "learning_rate": 9.261514954376487e-06, + "loss": 0.4037, + "step": 4419 + }, + { + "epoch": 0.20004525910839557, + "grad_norm": 0.6393519206087547, + "learning_rate": 9.261131555440295e-06, + "loss": 0.4285, + "step": 4420 + }, + { + "epoch": 0.20009051821679114, + "grad_norm": 0.7543033606632701, + "learning_rate": 9.260748064945473e-06, + "loss": 0.3904, + "step": 4421 + }, + { + "epoch": 0.2001357773251867, + "grad_norm": 0.8441036963952208, + "learning_rate": 9.26036448290026e-06, + "loss": 0.4251, + "step": 4422 + }, + { + "epoch": 0.20018103643358226, + "grad_norm": 0.7412155904408332, + "learning_rate": 9.259980809312901e-06, + "loss": 0.3955, + "step": 4423 + }, + { + "epoch": 0.20022629554197782, + "grad_norm": 0.7085212725378105, + "learning_rate": 9.259597044191635e-06, + "loss": 0.3939, + "step": 4424 + }, + { + "epoch": 0.20027155465037338, + "grad_norm": 3.2353281721630096, + "learning_rate": 9.259213187544714e-06, + "loss": 0.6369, + "step": 4425 + }, + { + "epoch": 0.20031681375876895, + "grad_norm": 1.81014429072996, + "learning_rate": 9.25882923938038e-06, + "loss": 0.5755, + "step": 4426 + }, + { + "epoch": 0.2003620728671645, + "grad_norm": 0.9136144605035063, + "learning_rate": 9.25844519970689e-06, + "loss": 0.4165, + "step": 4427 + }, + { + "epoch": 0.20040733197556007, + "grad_norm": 1.02823154637771, + "learning_rate": 9.258061068532487e-06, + "loss": 0.4422, + "step": 4428 + }, + { + "epoch": 0.20045259108395563, + "grad_norm": 1.1601240780768014, + "learning_rate": 9.257676845865431e-06, + "loss": 0.4443, + "step": 4429 + }, + { + "epoch": 0.20049785019235122, + "grad_norm": 2.571160437651429, + "learning_rate": 9.257292531713977e-06, + "loss": 0.5761, + "step": 4430 + }, + { + "epoch": 0.20054310930074679, + "grad_norm": 0.8266987002197626, + "learning_rate": 9.25690812608638e-06, + "loss": 0.4099, + "step": 4431 + }, + { + "epoch": 0.20058836840914235, + "grad_norm": 0.7510003168521713, + "learning_rate": 9.256523628990903e-06, + "loss": 0.3916, + "step": 4432 + }, + { + "epoch": 0.2006336275175379, + "grad_norm": 0.7427283363249146, + "learning_rate": 9.256139040435806e-06, + "loss": 0.4012, + "step": 4433 + }, + { + "epoch": 0.20067888662593347, + "grad_norm": 0.8078106575136649, + "learning_rate": 9.255754360429353e-06, + "loss": 0.3643, + "step": 4434 + }, + { + "epoch": 0.20072414573432903, + "grad_norm": 0.8547619551004447, + "learning_rate": 9.255369588979806e-06, + "loss": 0.3855, + "step": 4435 + }, + { + "epoch": 0.2007694048427246, + "grad_norm": 0.7763954730463974, + "learning_rate": 9.25498472609544e-06, + "loss": 0.3987, + "step": 4436 + }, + { + "epoch": 0.20081466395112016, + "grad_norm": 0.8427741534604029, + "learning_rate": 9.254599771784519e-06, + "loss": 0.4286, + "step": 4437 + }, + { + "epoch": 0.20085992305951572, + "grad_norm": 0.7836133705709749, + "learning_rate": 9.254214726055314e-06, + "loss": 0.3951, + "step": 4438 + }, + { + "epoch": 0.20090518216791128, + "grad_norm": 0.7298023584383054, + "learning_rate": 9.253829588916103e-06, + "loss": 0.4004, + "step": 4439 + }, + { + "epoch": 0.20095044127630685, + "grad_norm": 0.665970725982351, + "learning_rate": 9.253444360375157e-06, + "loss": 0.417, + "step": 4440 + }, + { + "epoch": 0.2009957003847024, + "grad_norm": 1.531114314437859, + "learning_rate": 9.253059040440757e-06, + "loss": 0.6065, + "step": 4441 + }, + { + "epoch": 0.201040959493098, + "grad_norm": 0.7515967029741009, + "learning_rate": 9.25267362912118e-06, + "loss": 0.4067, + "step": 4442 + }, + { + "epoch": 0.20108621860149356, + "grad_norm": 0.6829143827997587, + "learning_rate": 9.252288126424707e-06, + "loss": 0.3985, + "step": 4443 + }, + { + "epoch": 0.20113147770988912, + "grad_norm": 1.1010757231316262, + "learning_rate": 9.251902532359622e-06, + "loss": 0.5895, + "step": 4444 + }, + { + "epoch": 0.20117673681828468, + "grad_norm": 0.7326225536696734, + "learning_rate": 9.25151684693421e-06, + "loss": 0.3961, + "step": 4445 + }, + { + "epoch": 0.20122199592668025, + "grad_norm": 0.7448227710158162, + "learning_rate": 9.251131070156761e-06, + "loss": 0.4333, + "step": 4446 + }, + { + "epoch": 0.2012672550350758, + "grad_norm": 0.734666576540432, + "learning_rate": 9.250745202035558e-06, + "loss": 0.4081, + "step": 4447 + }, + { + "epoch": 0.20131251414347137, + "grad_norm": 0.6936139236742962, + "learning_rate": 9.250359242578898e-06, + "loss": 0.3982, + "step": 4448 + }, + { + "epoch": 0.20135777325186693, + "grad_norm": 0.7448189175784459, + "learning_rate": 9.249973191795072e-06, + "loss": 0.3906, + "step": 4449 + }, + { + "epoch": 0.2014030323602625, + "grad_norm": 0.7389504242537271, + "learning_rate": 9.249587049692375e-06, + "loss": 0.436, + "step": 4450 + }, + { + "epoch": 0.20144829146865806, + "grad_norm": 0.8361161553792722, + "learning_rate": 9.249200816279103e-06, + "loss": 0.5595, + "step": 4451 + }, + { + "epoch": 0.20149355057705362, + "grad_norm": 0.7152862153144932, + "learning_rate": 9.248814491563555e-06, + "loss": 0.3676, + "step": 4452 + }, + { + "epoch": 0.20153880968544918, + "grad_norm": 0.6533846117754827, + "learning_rate": 9.248428075554034e-06, + "loss": 0.3462, + "step": 4453 + }, + { + "epoch": 0.20158406879384477, + "grad_norm": 0.8677187836764106, + "learning_rate": 9.248041568258843e-06, + "loss": 0.4081, + "step": 4454 + }, + { + "epoch": 0.20162932790224034, + "grad_norm": 0.6707462106808738, + "learning_rate": 9.247654969686283e-06, + "loss": 0.4266, + "step": 4455 + }, + { + "epoch": 0.2016745870106359, + "grad_norm": 0.672407091659003, + "learning_rate": 9.247268279844666e-06, + "loss": 0.41, + "step": 4456 + }, + { + "epoch": 0.20171984611903146, + "grad_norm": 0.6815229751795202, + "learning_rate": 9.246881498742296e-06, + "loss": 0.4167, + "step": 4457 + }, + { + "epoch": 0.20176510522742702, + "grad_norm": 0.5479951855336056, + "learning_rate": 9.246494626387487e-06, + "loss": 0.5093, + "step": 4458 + }, + { + "epoch": 0.20181036433582258, + "grad_norm": 0.5161973718916856, + "learning_rate": 9.24610766278855e-06, + "loss": 0.5098, + "step": 4459 + }, + { + "epoch": 0.20185562344421815, + "grad_norm": 0.850925537761303, + "learning_rate": 9.245720607953802e-06, + "loss": 0.3833, + "step": 4460 + }, + { + "epoch": 0.2019008825526137, + "grad_norm": 0.6542363235875374, + "learning_rate": 9.245333461891555e-06, + "loss": 0.3858, + "step": 4461 + }, + { + "epoch": 0.20194614166100927, + "grad_norm": 0.6973756715606783, + "learning_rate": 9.244946224610132e-06, + "loss": 0.373, + "step": 4462 + }, + { + "epoch": 0.20199140076940483, + "grad_norm": 0.8079341217942448, + "learning_rate": 9.244558896117852e-06, + "loss": 0.4349, + "step": 4463 + }, + { + "epoch": 0.2020366598778004, + "grad_norm": 0.681137135301652, + "learning_rate": 9.244171476423037e-06, + "loss": 0.4003, + "step": 4464 + }, + { + "epoch": 0.20208191898619599, + "grad_norm": 0.7047389289061264, + "learning_rate": 9.243783965534012e-06, + "loss": 0.4466, + "step": 4465 + }, + { + "epoch": 0.20212717809459155, + "grad_norm": 0.7248341836247567, + "learning_rate": 9.243396363459104e-06, + "loss": 0.3839, + "step": 4466 + }, + { + "epoch": 0.2021724372029871, + "grad_norm": 0.6816766720407302, + "learning_rate": 9.24300867020664e-06, + "loss": 0.4275, + "step": 4467 + }, + { + "epoch": 0.20221769631138267, + "grad_norm": 0.6565503559461989, + "learning_rate": 9.242620885784952e-06, + "loss": 0.3764, + "step": 4468 + }, + { + "epoch": 0.20226295541977823, + "grad_norm": 0.7332199448787484, + "learning_rate": 9.242233010202371e-06, + "loss": 0.3909, + "step": 4469 + }, + { + "epoch": 0.2023082145281738, + "grad_norm": 0.6763839124000454, + "learning_rate": 9.241845043467232e-06, + "loss": 0.5072, + "step": 4470 + }, + { + "epoch": 0.20235347363656936, + "grad_norm": 0.691110792809285, + "learning_rate": 9.241456985587868e-06, + "loss": 0.3975, + "step": 4471 + }, + { + "epoch": 0.20239873274496492, + "grad_norm": 0.6932678792221445, + "learning_rate": 9.241068836572623e-06, + "loss": 0.4609, + "step": 4472 + }, + { + "epoch": 0.20244399185336048, + "grad_norm": 0.44079329807617135, + "learning_rate": 9.240680596429833e-06, + "loss": 0.495, + "step": 4473 + }, + { + "epoch": 0.20248925096175605, + "grad_norm": 0.655790544577791, + "learning_rate": 9.240292265167843e-06, + "loss": 0.4043, + "step": 4474 + }, + { + "epoch": 0.2025345100701516, + "grad_norm": 0.7597126537185416, + "learning_rate": 9.239903842794995e-06, + "loss": 0.361, + "step": 4475 + }, + { + "epoch": 0.20257976917854717, + "grad_norm": 0.6928027687811528, + "learning_rate": 9.239515329319633e-06, + "loss": 0.4059, + "step": 4476 + }, + { + "epoch": 0.20262502828694276, + "grad_norm": 0.6265742356763842, + "learning_rate": 9.23912672475011e-06, + "loss": 0.3775, + "step": 4477 + }, + { + "epoch": 0.20267028739533832, + "grad_norm": 0.6250366573033741, + "learning_rate": 9.238738029094771e-06, + "loss": 0.3929, + "step": 4478 + }, + { + "epoch": 0.20271554650373388, + "grad_norm": 0.6239194232238193, + "learning_rate": 9.238349242361971e-06, + "loss": 0.3946, + "step": 4479 + }, + { + "epoch": 0.20276080561212945, + "grad_norm": 0.49157297510570386, + "learning_rate": 9.237960364560063e-06, + "loss": 0.5188, + "step": 4480 + }, + { + "epoch": 0.202806064720525, + "grad_norm": 0.7051363283191563, + "learning_rate": 9.237571395697403e-06, + "loss": 0.3835, + "step": 4481 + }, + { + "epoch": 0.20285132382892057, + "grad_norm": 0.6660464023096989, + "learning_rate": 9.237182335782347e-06, + "loss": 0.4022, + "step": 4482 + }, + { + "epoch": 0.20289658293731613, + "grad_norm": 0.6843714863796969, + "learning_rate": 9.236793184823257e-06, + "loss": 0.397, + "step": 4483 + }, + { + "epoch": 0.2029418420457117, + "grad_norm": 0.6855307725350448, + "learning_rate": 9.236403942828494e-06, + "loss": 0.3815, + "step": 4484 + }, + { + "epoch": 0.20298710115410726, + "grad_norm": 0.7079043018016902, + "learning_rate": 9.236014609806421e-06, + "loss": 0.3846, + "step": 4485 + }, + { + "epoch": 0.20303236026250282, + "grad_norm": 0.6504384598720968, + "learning_rate": 9.235625185765403e-06, + "loss": 0.3855, + "step": 4486 + }, + { + "epoch": 0.20307761937089838, + "grad_norm": 0.6534159175098676, + "learning_rate": 9.235235670713808e-06, + "loss": 0.3779, + "step": 4487 + }, + { + "epoch": 0.20312287847929394, + "grad_norm": 0.41340276621194005, + "learning_rate": 9.23484606466001e-06, + "loss": 0.5256, + "step": 4488 + }, + { + "epoch": 0.20316813758768953, + "grad_norm": 0.3317961618649806, + "learning_rate": 9.234456367612373e-06, + "loss": 0.5193, + "step": 4489 + }, + { + "epoch": 0.2032133966960851, + "grad_norm": 0.7802740604366346, + "learning_rate": 9.234066579579274e-06, + "loss": 0.396, + "step": 4490 + }, + { + "epoch": 0.20325865580448066, + "grad_norm": 0.3090938209094079, + "learning_rate": 9.23367670056909e-06, + "loss": 0.465, + "step": 4491 + }, + { + "epoch": 0.20330391491287622, + "grad_norm": 0.7517899148301332, + "learning_rate": 9.233286730590195e-06, + "loss": 0.4396, + "step": 4492 + }, + { + "epoch": 0.20334917402127178, + "grad_norm": 0.3312876729542182, + "learning_rate": 9.23289666965097e-06, + "loss": 0.5304, + "step": 4493 + }, + { + "epoch": 0.20339443312966735, + "grad_norm": 0.7427229107966618, + "learning_rate": 9.232506517759797e-06, + "loss": 0.3931, + "step": 4494 + }, + { + "epoch": 0.2034396922380629, + "grad_norm": 0.6789139552402076, + "learning_rate": 9.232116274925056e-06, + "loss": 0.4469, + "step": 4495 + }, + { + "epoch": 0.20348495134645847, + "grad_norm": 0.7699554131664681, + "learning_rate": 9.231725941155133e-06, + "loss": 0.4291, + "step": 4496 + }, + { + "epoch": 0.20353021045485403, + "grad_norm": 0.6974959924396135, + "learning_rate": 9.231335516458419e-06, + "loss": 0.3907, + "step": 4497 + }, + { + "epoch": 0.2035754695632496, + "grad_norm": 0.6737647687437592, + "learning_rate": 9.2309450008433e-06, + "loss": 0.445, + "step": 4498 + }, + { + "epoch": 0.20362072867164516, + "grad_norm": 0.4044279854670894, + "learning_rate": 9.230554394318167e-06, + "loss": 0.4961, + "step": 4499 + }, + { + "epoch": 0.20366598778004075, + "grad_norm": 0.844669490129284, + "learning_rate": 9.230163696891415e-06, + "loss": 0.3862, + "step": 4500 + }, + { + "epoch": 0.2037112468884363, + "grad_norm": 0.7059403482053921, + "learning_rate": 9.229772908571435e-06, + "loss": 0.3812, + "step": 4501 + }, + { + "epoch": 0.20375650599683187, + "grad_norm": 0.6133336353944165, + "learning_rate": 9.229382029366625e-06, + "loss": 0.382, + "step": 4502 + }, + { + "epoch": 0.20380176510522743, + "grad_norm": 0.6774167500015552, + "learning_rate": 9.228991059285387e-06, + "loss": 0.3902, + "step": 4503 + }, + { + "epoch": 0.203847024213623, + "grad_norm": 0.32660098914735397, + "learning_rate": 9.228599998336119e-06, + "loss": 0.5226, + "step": 4504 + }, + { + "epoch": 0.20389228332201856, + "grad_norm": 0.8621953064122951, + "learning_rate": 9.228208846527222e-06, + "loss": 0.4481, + "step": 4505 + }, + { + "epoch": 0.20393754243041412, + "grad_norm": 0.31618272426187605, + "learning_rate": 9.227817603867106e-06, + "loss": 0.521, + "step": 4506 + }, + { + "epoch": 0.20398280153880968, + "grad_norm": 0.6973370488241651, + "learning_rate": 9.227426270364172e-06, + "loss": 0.408, + "step": 4507 + }, + { + "epoch": 0.20402806064720524, + "grad_norm": 0.7224976157253408, + "learning_rate": 9.227034846026833e-06, + "loss": 0.4008, + "step": 4508 + }, + { + "epoch": 0.2040733197556008, + "grad_norm": 0.860177658259118, + "learning_rate": 9.226643330863497e-06, + "loss": 0.4123, + "step": 4509 + }, + { + "epoch": 0.20411857886399637, + "grad_norm": 0.9113439419672582, + "learning_rate": 9.226251724882576e-06, + "loss": 0.4213, + "step": 4510 + }, + { + "epoch": 0.20416383797239193, + "grad_norm": 0.6339986185921339, + "learning_rate": 9.225860028092486e-06, + "loss": 0.3931, + "step": 4511 + }, + { + "epoch": 0.20420909708078752, + "grad_norm": 0.6893893577078529, + "learning_rate": 9.225468240501643e-06, + "loss": 0.3616, + "step": 4512 + }, + { + "epoch": 0.20425435618918308, + "grad_norm": 0.7505527891331154, + "learning_rate": 9.225076362118464e-06, + "loss": 0.4093, + "step": 4513 + }, + { + "epoch": 0.20429961529757865, + "grad_norm": 0.6566878964422919, + "learning_rate": 9.22468439295137e-06, + "loss": 0.3789, + "step": 4514 + }, + { + "epoch": 0.2043448744059742, + "grad_norm": 0.4184513712004044, + "learning_rate": 9.224292333008785e-06, + "loss": 0.5064, + "step": 4515 + }, + { + "epoch": 0.20439013351436977, + "grad_norm": 0.6435411751541094, + "learning_rate": 9.223900182299132e-06, + "loss": 0.3671, + "step": 4516 + }, + { + "epoch": 0.20443539262276533, + "grad_norm": 0.7608819934526175, + "learning_rate": 9.223507940830836e-06, + "loss": 0.3805, + "step": 4517 + }, + { + "epoch": 0.2044806517311609, + "grad_norm": 0.7234395427513599, + "learning_rate": 9.223115608612325e-06, + "loss": 0.3975, + "step": 4518 + }, + { + "epoch": 0.20452591083955646, + "grad_norm": 0.6773361232917884, + "learning_rate": 9.222723185652031e-06, + "loss": 0.3955, + "step": 4519 + }, + { + "epoch": 0.20457116994795202, + "grad_norm": 0.7176545433153804, + "learning_rate": 9.222330671958385e-06, + "loss": 0.3876, + "step": 4520 + }, + { + "epoch": 0.20461642905634758, + "grad_norm": 0.6506277456232169, + "learning_rate": 9.22193806753982e-06, + "loss": 0.4109, + "step": 4521 + }, + { + "epoch": 0.20466168816474314, + "grad_norm": 0.6114119716823015, + "learning_rate": 9.221545372404774e-06, + "loss": 0.3546, + "step": 4522 + }, + { + "epoch": 0.2047069472731387, + "grad_norm": 0.6235199644185562, + "learning_rate": 9.22115258656168e-06, + "loss": 0.384, + "step": 4523 + }, + { + "epoch": 0.2047522063815343, + "grad_norm": 0.7221677461499507, + "learning_rate": 9.220759710018984e-06, + "loss": 0.3897, + "step": 4524 + }, + { + "epoch": 0.20479746548992986, + "grad_norm": 0.6620027763210206, + "learning_rate": 9.220366742785126e-06, + "loss": 0.4044, + "step": 4525 + }, + { + "epoch": 0.20484272459832542, + "grad_norm": 0.6396484049106496, + "learning_rate": 9.219973684868546e-06, + "loss": 0.387, + "step": 4526 + }, + { + "epoch": 0.20488798370672098, + "grad_norm": 0.6500113781338169, + "learning_rate": 9.219580536277693e-06, + "loss": 0.3711, + "step": 4527 + }, + { + "epoch": 0.20493324281511655, + "grad_norm": 0.8404289788973495, + "learning_rate": 9.219187297021015e-06, + "loss": 0.3726, + "step": 4528 + }, + { + "epoch": 0.2049785019235121, + "grad_norm": 0.6387752010088015, + "learning_rate": 9.218793967106959e-06, + "loss": 0.4157, + "step": 4529 + }, + { + "epoch": 0.20502376103190767, + "grad_norm": 0.630614010902661, + "learning_rate": 9.218400546543977e-06, + "loss": 0.366, + "step": 4530 + }, + { + "epoch": 0.20506902014030323, + "grad_norm": 0.6171563864543054, + "learning_rate": 9.218007035340525e-06, + "loss": 0.4018, + "step": 4531 + }, + { + "epoch": 0.2051142792486988, + "grad_norm": 0.6403875839699732, + "learning_rate": 9.217613433505056e-06, + "loss": 0.415, + "step": 4532 + }, + { + "epoch": 0.20515953835709436, + "grad_norm": 0.650257811916877, + "learning_rate": 9.217219741046026e-06, + "loss": 0.4413, + "step": 4533 + }, + { + "epoch": 0.20520479746548992, + "grad_norm": 0.38375700790017325, + "learning_rate": 9.216825957971898e-06, + "loss": 0.4794, + "step": 4534 + }, + { + "epoch": 0.20525005657388548, + "grad_norm": 0.6478140522618808, + "learning_rate": 9.21643208429113e-06, + "loss": 0.3696, + "step": 4535 + }, + { + "epoch": 0.20529531568228107, + "grad_norm": 0.6876090924588174, + "learning_rate": 9.216038120012187e-06, + "loss": 0.4065, + "step": 4536 + }, + { + "epoch": 0.20534057479067663, + "grad_norm": 0.7690946772442716, + "learning_rate": 9.215644065143533e-06, + "loss": 0.4241, + "step": 4537 + }, + { + "epoch": 0.2053858338990722, + "grad_norm": 0.3022901550826388, + "learning_rate": 9.215249919693634e-06, + "loss": 0.4742, + "step": 4538 + }, + { + "epoch": 0.20543109300746776, + "grad_norm": 0.7201790266563611, + "learning_rate": 9.214855683670962e-06, + "loss": 0.4444, + "step": 4539 + }, + { + "epoch": 0.20547635211586332, + "grad_norm": 0.3224356216160571, + "learning_rate": 9.214461357083986e-06, + "loss": 0.5113, + "step": 4540 + }, + { + "epoch": 0.20552161122425888, + "grad_norm": 0.6949901984886445, + "learning_rate": 9.21406693994118e-06, + "loss": 0.4504, + "step": 4541 + }, + { + "epoch": 0.20556687033265444, + "grad_norm": 0.2908178295108323, + "learning_rate": 9.213672432251016e-06, + "loss": 0.5201, + "step": 4542 + }, + { + "epoch": 0.20561212944105, + "grad_norm": 0.6020446632402668, + "learning_rate": 9.213277834021975e-06, + "loss": 0.3505, + "step": 4543 + }, + { + "epoch": 0.20565738854944557, + "grad_norm": 0.6681811347078651, + "learning_rate": 9.212883145262532e-06, + "loss": 0.4143, + "step": 4544 + }, + { + "epoch": 0.20570264765784113, + "grad_norm": 0.5824281917974231, + "learning_rate": 9.212488365981169e-06, + "loss": 0.3621, + "step": 4545 + }, + { + "epoch": 0.2057479067662367, + "grad_norm": 0.6887703737480376, + "learning_rate": 9.21209349618637e-06, + "loss": 0.408, + "step": 4546 + }, + { + "epoch": 0.20579316587463228, + "grad_norm": 0.6825994800894678, + "learning_rate": 9.211698535886617e-06, + "loss": 0.4061, + "step": 4547 + }, + { + "epoch": 0.20583842498302785, + "grad_norm": 0.6205959778563953, + "learning_rate": 9.211303485090396e-06, + "loss": 0.3746, + "step": 4548 + }, + { + "epoch": 0.2058836840914234, + "grad_norm": 0.3568690622234325, + "learning_rate": 9.210908343806201e-06, + "loss": 0.5068, + "step": 4549 + }, + { + "epoch": 0.20592894319981897, + "grad_norm": 0.7072278527888455, + "learning_rate": 9.210513112042516e-06, + "loss": 0.4337, + "step": 4550 + }, + { + "epoch": 0.20597420230821453, + "grad_norm": 0.2905103607140547, + "learning_rate": 9.210117789807837e-06, + "loss": 0.4811, + "step": 4551 + }, + { + "epoch": 0.2060194614166101, + "grad_norm": 0.7029007626023053, + "learning_rate": 9.209722377110657e-06, + "loss": 0.4229, + "step": 4552 + }, + { + "epoch": 0.20606472052500566, + "grad_norm": 0.292195895010017, + "learning_rate": 9.20932687395947e-06, + "loss": 0.5031, + "step": 4553 + }, + { + "epoch": 0.20610997963340122, + "grad_norm": 0.6145259280329646, + "learning_rate": 9.20893128036278e-06, + "loss": 0.3548, + "step": 4554 + }, + { + "epoch": 0.20615523874179678, + "grad_norm": 0.7336553661516277, + "learning_rate": 9.208535596329082e-06, + "loss": 0.4202, + "step": 4555 + }, + { + "epoch": 0.20620049785019234, + "grad_norm": 0.35952433690955826, + "learning_rate": 9.20813982186688e-06, + "loss": 0.4885, + "step": 4556 + }, + { + "epoch": 0.2062457569585879, + "grad_norm": 0.3178685454909754, + "learning_rate": 9.207743956984676e-06, + "loss": 0.4909, + "step": 4557 + }, + { + "epoch": 0.20629101606698347, + "grad_norm": 0.7560864491204569, + "learning_rate": 9.20734800169098e-06, + "loss": 0.4113, + "step": 4558 + }, + { + "epoch": 0.20633627517537906, + "grad_norm": 0.6815854069387833, + "learning_rate": 9.206951955994294e-06, + "loss": 0.4012, + "step": 4559 + }, + { + "epoch": 0.20638153428377462, + "grad_norm": 0.6708607127704782, + "learning_rate": 9.206555819903132e-06, + "loss": 0.3886, + "step": 4560 + }, + { + "epoch": 0.20642679339217018, + "grad_norm": 0.6822575874755423, + "learning_rate": 9.206159593426005e-06, + "loss": 0.3947, + "step": 4561 + }, + { + "epoch": 0.20647205250056574, + "grad_norm": 0.6443314679032428, + "learning_rate": 9.205763276571429e-06, + "loss": 0.3646, + "step": 4562 + }, + { + "epoch": 0.2065173116089613, + "grad_norm": 1.2434698621002092, + "learning_rate": 9.205366869347915e-06, + "loss": 0.4032, + "step": 4563 + }, + { + "epoch": 0.20656257071735687, + "grad_norm": 0.5846776817337689, + "learning_rate": 9.204970371763984e-06, + "loss": 0.354, + "step": 4564 + }, + { + "epoch": 0.20660782982575243, + "grad_norm": 0.6663693027142774, + "learning_rate": 9.204573783828153e-06, + "loss": 0.3752, + "step": 4565 + }, + { + "epoch": 0.206653088934148, + "grad_norm": 0.6392985106616054, + "learning_rate": 9.204177105548946e-06, + "loss": 0.3578, + "step": 4566 + }, + { + "epoch": 0.20669834804254356, + "grad_norm": 0.6708860689350385, + "learning_rate": 9.203780336934885e-06, + "loss": 0.378, + "step": 4567 + }, + { + "epoch": 0.20674360715093912, + "grad_norm": 0.6624608867324993, + "learning_rate": 9.203383477994495e-06, + "loss": 0.3772, + "step": 4568 + }, + { + "epoch": 0.20678886625933468, + "grad_norm": 0.443250526541554, + "learning_rate": 9.202986528736302e-06, + "loss": 0.4944, + "step": 4569 + }, + { + "epoch": 0.20683412536773024, + "grad_norm": 0.7065139455060182, + "learning_rate": 9.20258948916884e-06, + "loss": 0.3936, + "step": 4570 + }, + { + "epoch": 0.20687938447612583, + "grad_norm": 0.6886726217442054, + "learning_rate": 9.202192359300635e-06, + "loss": 0.385, + "step": 4571 + }, + { + "epoch": 0.2069246435845214, + "grad_norm": 0.30440484021814024, + "learning_rate": 9.201795139140224e-06, + "loss": 0.4913, + "step": 4572 + }, + { + "epoch": 0.20696990269291696, + "grad_norm": 0.6461373151052034, + "learning_rate": 9.201397828696139e-06, + "loss": 0.4234, + "step": 4573 + }, + { + "epoch": 0.20701516180131252, + "grad_norm": 0.29313820021793546, + "learning_rate": 9.201000427976917e-06, + "loss": 0.4926, + "step": 4574 + }, + { + "epoch": 0.20706042090970808, + "grad_norm": 0.7446593774804321, + "learning_rate": 9.2006029369911e-06, + "loss": 0.4272, + "step": 4575 + }, + { + "epoch": 0.20710568001810364, + "grad_norm": 0.634239966775946, + "learning_rate": 9.200205355747228e-06, + "loss": 0.41, + "step": 4576 + }, + { + "epoch": 0.2071509391264992, + "grad_norm": 0.6269753464584988, + "learning_rate": 9.199807684253842e-06, + "loss": 0.4049, + "step": 4577 + }, + { + "epoch": 0.20719619823489477, + "grad_norm": 0.6626327081044174, + "learning_rate": 9.199409922519487e-06, + "loss": 0.4133, + "step": 4578 + }, + { + "epoch": 0.20724145734329033, + "grad_norm": 0.6810410460327005, + "learning_rate": 9.19901207055271e-06, + "loss": 0.4049, + "step": 4579 + }, + { + "epoch": 0.2072867164516859, + "grad_norm": 0.677169394674404, + "learning_rate": 9.198614128362062e-06, + "loss": 0.4485, + "step": 4580 + }, + { + "epoch": 0.20733197556008146, + "grad_norm": 0.6882687805510503, + "learning_rate": 9.19821609595609e-06, + "loss": 0.399, + "step": 4581 + }, + { + "epoch": 0.20737723466847702, + "grad_norm": 0.7153669943119992, + "learning_rate": 9.197817973343347e-06, + "loss": 0.3734, + "step": 4582 + }, + { + "epoch": 0.2074224937768726, + "grad_norm": 0.659508194441196, + "learning_rate": 9.197419760532389e-06, + "loss": 0.4033, + "step": 4583 + }, + { + "epoch": 0.20746775288526817, + "grad_norm": 0.6520433998599509, + "learning_rate": 9.197021457531771e-06, + "loss": 0.3872, + "step": 4584 + }, + { + "epoch": 0.20751301199366373, + "grad_norm": 0.6629252034178089, + "learning_rate": 9.196623064350054e-06, + "loss": 0.3936, + "step": 4585 + }, + { + "epoch": 0.2075582711020593, + "grad_norm": 0.8014334519954863, + "learning_rate": 9.196224580995796e-06, + "loss": 0.3918, + "step": 4586 + }, + { + "epoch": 0.20760353021045486, + "grad_norm": 0.6381738015711796, + "learning_rate": 9.19582600747756e-06, + "loss": 0.3992, + "step": 4587 + }, + { + "epoch": 0.20764878931885042, + "grad_norm": 0.6575473018865028, + "learning_rate": 9.195427343803906e-06, + "loss": 0.4203, + "step": 4588 + }, + { + "epoch": 0.20769404842724598, + "grad_norm": 0.680756389919026, + "learning_rate": 9.195028589983407e-06, + "loss": 0.4196, + "step": 4589 + }, + { + "epoch": 0.20773930753564154, + "grad_norm": 0.7390360892869532, + "learning_rate": 9.194629746024627e-06, + "loss": 0.3954, + "step": 4590 + }, + { + "epoch": 0.2077845666440371, + "grad_norm": 0.6774172823682195, + "learning_rate": 9.194230811936135e-06, + "loss": 0.4134, + "step": 4591 + }, + { + "epoch": 0.20782982575243267, + "grad_norm": 0.4994134842411283, + "learning_rate": 9.193831787726507e-06, + "loss": 0.518, + "step": 4592 + }, + { + "epoch": 0.20787508486082823, + "grad_norm": 0.8287166767605357, + "learning_rate": 9.193432673404312e-06, + "loss": 0.3993, + "step": 4593 + }, + { + "epoch": 0.20792034396922382, + "grad_norm": 0.751905987339554, + "learning_rate": 9.19303346897813e-06, + "loss": 0.41, + "step": 4594 + }, + { + "epoch": 0.20796560307761938, + "grad_norm": 0.7410296417952482, + "learning_rate": 9.192634174456536e-06, + "loss": 0.3207, + "step": 4595 + }, + { + "epoch": 0.20801086218601494, + "grad_norm": 0.6774001605097975, + "learning_rate": 9.19223478984811e-06, + "loss": 0.3675, + "step": 4596 + }, + { + "epoch": 0.2080561212944105, + "grad_norm": 0.6846876993441788, + "learning_rate": 9.191835315161432e-06, + "loss": 0.4113, + "step": 4597 + }, + { + "epoch": 0.20810138040280607, + "grad_norm": 0.6541964859436085, + "learning_rate": 9.191435750405091e-06, + "loss": 0.3714, + "step": 4598 + }, + { + "epoch": 0.20814663951120163, + "grad_norm": 0.6553196982674202, + "learning_rate": 9.191036095587667e-06, + "loss": 0.4332, + "step": 4599 + }, + { + "epoch": 0.2081918986195972, + "grad_norm": 0.69992746418274, + "learning_rate": 9.190636350717747e-06, + "loss": 0.3774, + "step": 4600 + }, + { + "epoch": 0.20823715772799276, + "grad_norm": 0.685563902486613, + "learning_rate": 9.190236515803926e-06, + "loss": 0.3841, + "step": 4601 + }, + { + "epoch": 0.20828241683638832, + "grad_norm": 0.7595242844336855, + "learning_rate": 9.18983659085479e-06, + "loss": 0.3941, + "step": 4602 + }, + { + "epoch": 0.20832767594478388, + "grad_norm": 0.6384986197619993, + "learning_rate": 9.189436575878933e-06, + "loss": 0.3761, + "step": 4603 + }, + { + "epoch": 0.20837293505317944, + "grad_norm": 0.7432963933397082, + "learning_rate": 9.189036470884951e-06, + "loss": 0.3927, + "step": 4604 + }, + { + "epoch": 0.208418194161575, + "grad_norm": 0.6954626477052497, + "learning_rate": 9.188636275881442e-06, + "loss": 0.3661, + "step": 4605 + }, + { + "epoch": 0.2084634532699706, + "grad_norm": 0.7741203657135156, + "learning_rate": 9.188235990877004e-06, + "loss": 0.4073, + "step": 4606 + }, + { + "epoch": 0.20850871237836616, + "grad_norm": 0.4754471604353748, + "learning_rate": 9.187835615880235e-06, + "loss": 0.5383, + "step": 4607 + }, + { + "epoch": 0.20855397148676172, + "grad_norm": 0.800689655749288, + "learning_rate": 9.187435150899743e-06, + "loss": 0.4326, + "step": 4608 + }, + { + "epoch": 0.20859923059515728, + "grad_norm": 0.3767292595817881, + "learning_rate": 9.187034595944131e-06, + "loss": 0.4918, + "step": 4609 + }, + { + "epoch": 0.20864448970355284, + "grad_norm": 0.6318101316229977, + "learning_rate": 9.186633951022005e-06, + "loss": 0.3991, + "step": 4610 + }, + { + "epoch": 0.2086897488119484, + "grad_norm": 0.2964277166418596, + "learning_rate": 9.186233216141972e-06, + "loss": 0.4824, + "step": 4611 + }, + { + "epoch": 0.20873500792034397, + "grad_norm": 0.32570830424870334, + "learning_rate": 9.185832391312644e-06, + "loss": 0.4822, + "step": 4612 + }, + { + "epoch": 0.20878026702873953, + "grad_norm": 0.2974554326662702, + "learning_rate": 9.185431476542635e-06, + "loss": 0.4878, + "step": 4613 + }, + { + "epoch": 0.2088255261371351, + "grad_norm": 0.7701402097321183, + "learning_rate": 9.185030471840557e-06, + "loss": 0.3986, + "step": 4614 + }, + { + "epoch": 0.20887078524553065, + "grad_norm": 0.7491767662695776, + "learning_rate": 9.184629377215028e-06, + "loss": 0.4542, + "step": 4615 + }, + { + "epoch": 0.20891604435392622, + "grad_norm": 0.7697111484129529, + "learning_rate": 9.184228192674667e-06, + "loss": 0.3708, + "step": 4616 + }, + { + "epoch": 0.20896130346232178, + "grad_norm": 0.6412765775554661, + "learning_rate": 9.183826918228092e-06, + "loss": 0.3901, + "step": 4617 + }, + { + "epoch": 0.20900656257071737, + "grad_norm": 0.719052047311315, + "learning_rate": 9.183425553883925e-06, + "loss": 0.3966, + "step": 4618 + }, + { + "epoch": 0.20905182167911293, + "grad_norm": 0.6533657663372489, + "learning_rate": 9.183024099650793e-06, + "loss": 0.3682, + "step": 4619 + }, + { + "epoch": 0.2090970807875085, + "grad_norm": 0.4063216099091764, + "learning_rate": 9.18262255553732e-06, + "loss": 0.5193, + "step": 4620 + }, + { + "epoch": 0.20914233989590406, + "grad_norm": 0.716403564936248, + "learning_rate": 9.182220921552132e-06, + "loss": 0.4351, + "step": 4621 + }, + { + "epoch": 0.20918759900429962, + "grad_norm": 0.6644833222370995, + "learning_rate": 9.181819197703864e-06, + "loss": 0.3863, + "step": 4622 + }, + { + "epoch": 0.20923285811269518, + "grad_norm": 0.7599099253584791, + "learning_rate": 9.181417384001143e-06, + "loss": 0.3436, + "step": 4623 + }, + { + "epoch": 0.20927811722109074, + "grad_norm": 0.6960306482533338, + "learning_rate": 9.181015480452607e-06, + "loss": 0.3701, + "step": 4624 + }, + { + "epoch": 0.2093233763294863, + "grad_norm": 0.6677901326076892, + "learning_rate": 9.180613487066888e-06, + "loss": 0.4453, + "step": 4625 + }, + { + "epoch": 0.20936863543788187, + "grad_norm": 0.7052036480574205, + "learning_rate": 9.180211403852623e-06, + "loss": 0.4158, + "step": 4626 + }, + { + "epoch": 0.20941389454627743, + "grad_norm": 0.6767244474943633, + "learning_rate": 9.179809230818458e-06, + "loss": 0.3774, + "step": 4627 + }, + { + "epoch": 0.209459153654673, + "grad_norm": 0.6896840117858403, + "learning_rate": 9.179406967973025e-06, + "loss": 0.423, + "step": 4628 + }, + { + "epoch": 0.20950441276306855, + "grad_norm": 1.0927703091373575, + "learning_rate": 9.179004615324976e-06, + "loss": 0.3883, + "step": 4629 + }, + { + "epoch": 0.20954967187146414, + "grad_norm": 0.3893396568527757, + "learning_rate": 9.178602172882951e-06, + "loss": 0.5055, + "step": 4630 + }, + { + "epoch": 0.2095949309798597, + "grad_norm": 0.734134054975077, + "learning_rate": 9.178199640655598e-06, + "loss": 0.3971, + "step": 4631 + }, + { + "epoch": 0.20964019008825527, + "grad_norm": 0.6996391227153452, + "learning_rate": 9.177797018651568e-06, + "loss": 0.4527, + "step": 4632 + }, + { + "epoch": 0.20968544919665083, + "grad_norm": 0.6747446956371578, + "learning_rate": 9.177394306879513e-06, + "loss": 0.4148, + "step": 4633 + }, + { + "epoch": 0.2097307083050464, + "grad_norm": 0.6681883466966753, + "learning_rate": 9.176991505348082e-06, + "loss": 0.4469, + "step": 4634 + }, + { + "epoch": 0.20977596741344195, + "grad_norm": 0.6171146969041853, + "learning_rate": 9.176588614065934e-06, + "loss": 0.3342, + "step": 4635 + }, + { + "epoch": 0.20982122652183752, + "grad_norm": 0.6505458312196792, + "learning_rate": 9.17618563304172e-06, + "loss": 0.414, + "step": 4636 + }, + { + "epoch": 0.20986648563023308, + "grad_norm": 0.6281754410034931, + "learning_rate": 9.175782562284108e-06, + "loss": 0.409, + "step": 4637 + }, + { + "epoch": 0.20991174473862864, + "grad_norm": 0.6888797123187864, + "learning_rate": 9.175379401801752e-06, + "loss": 0.4315, + "step": 4638 + }, + { + "epoch": 0.2099570038470242, + "grad_norm": 0.7493278171281029, + "learning_rate": 9.174976151603314e-06, + "loss": 0.4113, + "step": 4639 + }, + { + "epoch": 0.21000226295541977, + "grad_norm": 0.8294347486423541, + "learning_rate": 9.174572811697464e-06, + "loss": 0.3607, + "step": 4640 + }, + { + "epoch": 0.21004752206381536, + "grad_norm": 0.7000019570919632, + "learning_rate": 9.174169382092864e-06, + "loss": 0.3661, + "step": 4641 + }, + { + "epoch": 0.21009278117221092, + "grad_norm": 0.6980852810654846, + "learning_rate": 9.173765862798185e-06, + "loss": 0.4516, + "step": 4642 + }, + { + "epoch": 0.21013804028060648, + "grad_norm": 0.7211418317715546, + "learning_rate": 9.173362253822095e-06, + "loss": 0.3795, + "step": 4643 + }, + { + "epoch": 0.21018329938900204, + "grad_norm": 0.39470052963079344, + "learning_rate": 9.172958555173268e-06, + "loss": 0.4853, + "step": 4644 + }, + { + "epoch": 0.2102285584973976, + "grad_norm": 0.33231990173857073, + "learning_rate": 9.17255476686038e-06, + "loss": 0.497, + "step": 4645 + }, + { + "epoch": 0.21027381760579317, + "grad_norm": 0.6787839011727957, + "learning_rate": 9.172150888892102e-06, + "loss": 0.3966, + "step": 4646 + }, + { + "epoch": 0.21031907671418873, + "grad_norm": 0.7294657149087491, + "learning_rate": 9.171746921277116e-06, + "loss": 0.4028, + "step": 4647 + }, + { + "epoch": 0.2103643358225843, + "grad_norm": 0.7001236222785713, + "learning_rate": 9.171342864024103e-06, + "loss": 0.3603, + "step": 4648 + }, + { + "epoch": 0.21040959493097985, + "grad_norm": 0.9831069171644347, + "learning_rate": 9.17093871714174e-06, + "loss": 0.4124, + "step": 4649 + }, + { + "epoch": 0.21045485403937542, + "grad_norm": 0.6921596856617678, + "learning_rate": 9.170534480638718e-06, + "loss": 0.3952, + "step": 4650 + }, + { + "epoch": 0.21050011314777098, + "grad_norm": 0.8188302904529361, + "learning_rate": 9.170130154523715e-06, + "loss": 0.3858, + "step": 4651 + }, + { + "epoch": 0.21054537225616654, + "grad_norm": 0.6255134043307441, + "learning_rate": 9.169725738805425e-06, + "loss": 0.399, + "step": 4652 + }, + { + "epoch": 0.21059063136456213, + "grad_norm": 0.6553509538956197, + "learning_rate": 9.169321233492534e-06, + "loss": 0.3949, + "step": 4653 + }, + { + "epoch": 0.2106358904729577, + "grad_norm": 0.6746561449928478, + "learning_rate": 9.168916638593736e-06, + "loss": 0.3631, + "step": 4654 + }, + { + "epoch": 0.21068114958135326, + "grad_norm": 0.6714508923031717, + "learning_rate": 9.168511954117723e-06, + "loss": 0.3969, + "step": 4655 + }, + { + "epoch": 0.21072640868974882, + "grad_norm": 0.6108944498419144, + "learning_rate": 9.16810718007319e-06, + "loss": 0.4342, + "step": 4656 + }, + { + "epoch": 0.21077166779814438, + "grad_norm": 0.6910327522428834, + "learning_rate": 9.167702316468835e-06, + "loss": 0.4071, + "step": 4657 + }, + { + "epoch": 0.21081692690653994, + "grad_norm": 0.6512363183886198, + "learning_rate": 9.167297363313357e-06, + "loss": 0.4079, + "step": 4658 + }, + { + "epoch": 0.2108621860149355, + "grad_norm": 0.6883357018712101, + "learning_rate": 9.166892320615459e-06, + "loss": 0.371, + "step": 4659 + }, + { + "epoch": 0.21090744512333107, + "grad_norm": 0.6252312961373009, + "learning_rate": 9.166487188383841e-06, + "loss": 0.4051, + "step": 4660 + }, + { + "epoch": 0.21095270423172663, + "grad_norm": 0.6589575854416991, + "learning_rate": 9.166081966627211e-06, + "loss": 0.4222, + "step": 4661 + }, + { + "epoch": 0.2109979633401222, + "grad_norm": 0.6861814844204853, + "learning_rate": 9.165676655354274e-06, + "loss": 0.4831, + "step": 4662 + }, + { + "epoch": 0.21104322244851775, + "grad_norm": 0.6707217311626233, + "learning_rate": 9.16527125457374e-06, + "loss": 0.3438, + "step": 4663 + }, + { + "epoch": 0.21108848155691332, + "grad_norm": 0.7936046441370954, + "learning_rate": 9.16486576429432e-06, + "loss": 0.3499, + "step": 4664 + }, + { + "epoch": 0.2111337406653089, + "grad_norm": 0.7234342589495827, + "learning_rate": 9.164460184524726e-06, + "loss": 0.3742, + "step": 4665 + }, + { + "epoch": 0.21117899977370447, + "grad_norm": 0.6772999050647835, + "learning_rate": 9.16405451527367e-06, + "loss": 0.3686, + "step": 4666 + }, + { + "epoch": 0.21122425888210003, + "grad_norm": 0.6520754696457214, + "learning_rate": 9.163648756549875e-06, + "loss": 0.4921, + "step": 4667 + }, + { + "epoch": 0.2112695179904956, + "grad_norm": 0.7762013029665157, + "learning_rate": 9.163242908362053e-06, + "loss": 0.3969, + "step": 4668 + }, + { + "epoch": 0.21131477709889115, + "grad_norm": 0.6829559452971228, + "learning_rate": 9.16283697071893e-06, + "loss": 0.4119, + "step": 4669 + }, + { + "epoch": 0.21136003620728672, + "grad_norm": 0.6784654757287741, + "learning_rate": 9.162430943629224e-06, + "loss": 0.4044, + "step": 4670 + }, + { + "epoch": 0.21140529531568228, + "grad_norm": 0.336867182270296, + "learning_rate": 9.162024827101663e-06, + "loss": 0.4802, + "step": 4671 + }, + { + "epoch": 0.21145055442407784, + "grad_norm": 0.3422647615733906, + "learning_rate": 9.161618621144967e-06, + "loss": 0.5177, + "step": 4672 + }, + { + "epoch": 0.2114958135324734, + "grad_norm": 1.1008475237506572, + "learning_rate": 9.161212325767873e-06, + "loss": 0.4127, + "step": 4673 + }, + { + "epoch": 0.21154107264086897, + "grad_norm": 0.6685866425309338, + "learning_rate": 9.160805940979104e-06, + "loss": 0.38, + "step": 4674 + }, + { + "epoch": 0.21158633174926453, + "grad_norm": 0.6905136705212983, + "learning_rate": 9.160399466787392e-06, + "loss": 0.4012, + "step": 4675 + }, + { + "epoch": 0.21163159085766012, + "grad_norm": 0.9911104484428561, + "learning_rate": 9.159992903201478e-06, + "loss": 0.3776, + "step": 4676 + }, + { + "epoch": 0.21167684996605568, + "grad_norm": 0.7258200921880575, + "learning_rate": 9.15958625023009e-06, + "loss": 0.4013, + "step": 4677 + }, + { + "epoch": 0.21172210907445124, + "grad_norm": 0.40608231174106374, + "learning_rate": 9.15917950788197e-06, + "loss": 0.4963, + "step": 4678 + }, + { + "epoch": 0.2117673681828468, + "grad_norm": 0.753776177198563, + "learning_rate": 9.158772676165854e-06, + "loss": 0.3785, + "step": 4679 + }, + { + "epoch": 0.21181262729124237, + "grad_norm": 0.4500270088526661, + "learning_rate": 9.158365755090488e-06, + "loss": 0.5132, + "step": 4680 + }, + { + "epoch": 0.21185788639963793, + "grad_norm": 1.1467954943239418, + "learning_rate": 9.157958744664612e-06, + "loss": 0.4071, + "step": 4681 + }, + { + "epoch": 0.2119031455080335, + "grad_norm": 0.6675630555278697, + "learning_rate": 9.157551644896974e-06, + "loss": 0.4126, + "step": 4682 + }, + { + "epoch": 0.21194840461642905, + "grad_norm": 0.29502147340878954, + "learning_rate": 9.15714445579632e-06, + "loss": 0.4719, + "step": 4683 + }, + { + "epoch": 0.21199366372482462, + "grad_norm": 0.3252216219097457, + "learning_rate": 9.156737177371399e-06, + "loss": 0.5063, + "step": 4684 + }, + { + "epoch": 0.21203892283322018, + "grad_norm": 1.8135010055214982, + "learning_rate": 9.156329809630962e-06, + "loss": 0.4284, + "step": 4685 + }, + { + "epoch": 0.21208418194161574, + "grad_norm": 0.38930115148456806, + "learning_rate": 9.155922352583763e-06, + "loss": 0.4703, + "step": 4686 + }, + { + "epoch": 0.2121294410500113, + "grad_norm": 1.280235310925464, + "learning_rate": 9.155514806238557e-06, + "loss": 0.3811, + "step": 4687 + }, + { + "epoch": 0.2121747001584069, + "grad_norm": 0.31157176205214804, + "learning_rate": 9.1551071706041e-06, + "loss": 0.5066, + "step": 4688 + }, + { + "epoch": 0.21221995926680245, + "grad_norm": 1.170161611611983, + "learning_rate": 9.154699445689151e-06, + "loss": 0.3751, + "step": 4689 + }, + { + "epoch": 0.21226521837519802, + "grad_norm": 0.6990135258357858, + "learning_rate": 9.154291631502471e-06, + "loss": 0.4575, + "step": 4690 + }, + { + "epoch": 0.21231047748359358, + "grad_norm": 0.7945124398062862, + "learning_rate": 9.153883728052824e-06, + "loss": 0.3965, + "step": 4691 + }, + { + "epoch": 0.21235573659198914, + "grad_norm": 1.098263554852461, + "learning_rate": 9.153475735348973e-06, + "loss": 0.3956, + "step": 4692 + }, + { + "epoch": 0.2124009957003847, + "grad_norm": 0.7792909497721552, + "learning_rate": 9.153067653399684e-06, + "loss": 0.4391, + "step": 4693 + }, + { + "epoch": 0.21244625480878027, + "grad_norm": 0.7769896070381618, + "learning_rate": 9.152659482213727e-06, + "loss": 0.3678, + "step": 4694 + }, + { + "epoch": 0.21249151391717583, + "grad_norm": 0.7248722333731626, + "learning_rate": 9.152251221799871e-06, + "loss": 0.3668, + "step": 4695 + }, + { + "epoch": 0.2125367730255714, + "grad_norm": 0.7951512445368836, + "learning_rate": 9.15184287216689e-06, + "loss": 0.3748, + "step": 4696 + }, + { + "epoch": 0.21258203213396695, + "grad_norm": 0.46089270965008194, + "learning_rate": 9.151434433323556e-06, + "loss": 0.5111, + "step": 4697 + }, + { + "epoch": 0.21262729124236251, + "grad_norm": 0.7608710232314421, + "learning_rate": 9.151025905278647e-06, + "loss": 0.3889, + "step": 4698 + }, + { + "epoch": 0.21267255035075808, + "grad_norm": 0.6912646554784957, + "learning_rate": 9.15061728804094e-06, + "loss": 0.3943, + "step": 4699 + }, + { + "epoch": 0.21271780945915367, + "grad_norm": 0.6628035744904157, + "learning_rate": 9.150208581619215e-06, + "loss": 0.3742, + "step": 4700 + }, + { + "epoch": 0.21276306856754923, + "grad_norm": 0.8216102198117472, + "learning_rate": 9.149799786022256e-06, + "loss": 0.3574, + "step": 4701 + }, + { + "epoch": 0.2128083276759448, + "grad_norm": 0.7410885559035563, + "learning_rate": 9.149390901258841e-06, + "loss": 0.3828, + "step": 4702 + }, + { + "epoch": 0.21285358678434035, + "grad_norm": 0.6536436140256802, + "learning_rate": 9.14898192733776e-06, + "loss": 0.3664, + "step": 4703 + }, + { + "epoch": 0.21289884589273592, + "grad_norm": 0.6977789389528802, + "learning_rate": 9.148572864267804e-06, + "loss": 0.429, + "step": 4704 + }, + { + "epoch": 0.21294410500113148, + "grad_norm": 0.7408643479307443, + "learning_rate": 9.148163712057755e-06, + "loss": 0.4213, + "step": 4705 + }, + { + "epoch": 0.21298936410952704, + "grad_norm": 0.6933655118606328, + "learning_rate": 9.147754470716407e-06, + "loss": 0.3908, + "step": 4706 + }, + { + "epoch": 0.2130346232179226, + "grad_norm": 0.6693690607323451, + "learning_rate": 9.147345140252557e-06, + "loss": 0.3323, + "step": 4707 + }, + { + "epoch": 0.21307988232631817, + "grad_norm": 0.6021530915845484, + "learning_rate": 9.146935720674996e-06, + "loss": 0.3634, + "step": 4708 + }, + { + "epoch": 0.21312514143471373, + "grad_norm": 0.37323411086081587, + "learning_rate": 9.146526211992523e-06, + "loss": 0.4996, + "step": 4709 + }, + { + "epoch": 0.2131704005431093, + "grad_norm": 0.6934432204208978, + "learning_rate": 9.146116614213938e-06, + "loss": 0.3723, + "step": 4710 + }, + { + "epoch": 0.21321565965150485, + "grad_norm": 0.684048900725944, + "learning_rate": 9.14570692734804e-06, + "loss": 0.4022, + "step": 4711 + }, + { + "epoch": 0.21326091875990044, + "grad_norm": 0.6876883349474486, + "learning_rate": 9.145297151403631e-06, + "loss": 0.3528, + "step": 4712 + }, + { + "epoch": 0.213306177868296, + "grad_norm": 0.6900448103049897, + "learning_rate": 9.14488728638952e-06, + "loss": 0.3581, + "step": 4713 + }, + { + "epoch": 0.21335143697669157, + "grad_norm": 0.6654430713269235, + "learning_rate": 9.144477332314509e-06, + "loss": 0.3596, + "step": 4714 + }, + { + "epoch": 0.21339669608508713, + "grad_norm": 0.7570070927296473, + "learning_rate": 9.14406728918741e-06, + "loss": 0.3447, + "step": 4715 + }, + { + "epoch": 0.2134419551934827, + "grad_norm": 0.7467852955198542, + "learning_rate": 9.143657157017034e-06, + "loss": 0.4206, + "step": 4716 + }, + { + "epoch": 0.21348721430187825, + "grad_norm": 0.6630878162500251, + "learning_rate": 9.14324693581219e-06, + "loss": 0.3856, + "step": 4717 + }, + { + "epoch": 0.21353247341027382, + "grad_norm": 0.582274695283196, + "learning_rate": 9.142836625581694e-06, + "loss": 0.3687, + "step": 4718 + }, + { + "epoch": 0.21357773251866938, + "grad_norm": 0.6545244065786855, + "learning_rate": 9.142426226334365e-06, + "loss": 0.3833, + "step": 4719 + }, + { + "epoch": 0.21362299162706494, + "grad_norm": 0.6883635673957185, + "learning_rate": 9.142015738079017e-06, + "loss": 0.4312, + "step": 4720 + }, + { + "epoch": 0.2136682507354605, + "grad_norm": 0.6751076178839724, + "learning_rate": 9.141605160824473e-06, + "loss": 0.3916, + "step": 4721 + }, + { + "epoch": 0.21371350984385606, + "grad_norm": 0.5309631556880734, + "learning_rate": 9.141194494579553e-06, + "loss": 0.511, + "step": 4722 + }, + { + "epoch": 0.21375876895225165, + "grad_norm": 0.7167569087582294, + "learning_rate": 9.140783739353083e-06, + "loss": 0.3938, + "step": 4723 + }, + { + "epoch": 0.21380402806064722, + "grad_norm": 0.662087956027807, + "learning_rate": 9.140372895153887e-06, + "loss": 0.4021, + "step": 4724 + }, + { + "epoch": 0.21384928716904278, + "grad_norm": 0.6237946620606332, + "learning_rate": 9.139961961990796e-06, + "loss": 0.4173, + "step": 4725 + }, + { + "epoch": 0.21389454627743834, + "grad_norm": 0.6856754692559144, + "learning_rate": 9.139550939872635e-06, + "loss": 0.4065, + "step": 4726 + }, + { + "epoch": 0.2139398053858339, + "grad_norm": 0.7084939389228818, + "learning_rate": 9.139139828808238e-06, + "loss": 0.4086, + "step": 4727 + }, + { + "epoch": 0.21398506449422947, + "grad_norm": 0.6144425495088535, + "learning_rate": 9.13872862880644e-06, + "loss": 0.3686, + "step": 4728 + }, + { + "epoch": 0.21403032360262503, + "grad_norm": 0.6861154793751022, + "learning_rate": 9.138317339876073e-06, + "loss": 0.3772, + "step": 4729 + }, + { + "epoch": 0.2140755827110206, + "grad_norm": 0.5890343111376076, + "learning_rate": 9.137905962025977e-06, + "loss": 0.5363, + "step": 4730 + }, + { + "epoch": 0.21412084181941615, + "grad_norm": 0.7217914460843785, + "learning_rate": 9.13749449526499e-06, + "loss": 0.4486, + "step": 4731 + }, + { + "epoch": 0.21416610092781171, + "grad_norm": 0.6471609436769172, + "learning_rate": 9.137082939601953e-06, + "loss": 0.4231, + "step": 4732 + }, + { + "epoch": 0.21421136003620728, + "grad_norm": 0.6431682681393758, + "learning_rate": 9.136671295045713e-06, + "loss": 0.4308, + "step": 4733 + }, + { + "epoch": 0.21425661914460284, + "grad_norm": 0.6473156110408935, + "learning_rate": 9.13625956160511e-06, + "loss": 0.3905, + "step": 4734 + }, + { + "epoch": 0.21430187825299843, + "grad_norm": 0.6914681606814352, + "learning_rate": 9.135847739288991e-06, + "loss": 0.4215, + "step": 4735 + }, + { + "epoch": 0.214347137361394, + "grad_norm": 0.6284487613866135, + "learning_rate": 9.135435828106208e-06, + "loss": 0.3643, + "step": 4736 + }, + { + "epoch": 0.21439239646978955, + "grad_norm": 0.6258062829449805, + "learning_rate": 9.135023828065609e-06, + "loss": 0.3776, + "step": 4737 + }, + { + "epoch": 0.21443765557818512, + "grad_norm": 0.6517393869689241, + "learning_rate": 9.13461173917605e-06, + "loss": 0.3803, + "step": 4738 + }, + { + "epoch": 0.21448291468658068, + "grad_norm": 0.6343432447683893, + "learning_rate": 9.134199561446379e-06, + "loss": 0.4224, + "step": 4739 + }, + { + "epoch": 0.21452817379497624, + "grad_norm": 0.7060100945070159, + "learning_rate": 9.13378729488546e-06, + "loss": 0.3879, + "step": 4740 + }, + { + "epoch": 0.2145734329033718, + "grad_norm": 0.6590280271404857, + "learning_rate": 9.133374939502147e-06, + "loss": 0.3826, + "step": 4741 + }, + { + "epoch": 0.21461869201176736, + "grad_norm": 0.6401579046669076, + "learning_rate": 9.132962495305302e-06, + "loss": 0.373, + "step": 4742 + }, + { + "epoch": 0.21466395112016293, + "grad_norm": 0.8626848325193524, + "learning_rate": 9.132549962303786e-06, + "loss": 0.4087, + "step": 4743 + }, + { + "epoch": 0.2147092102285585, + "grad_norm": 0.6145110012458083, + "learning_rate": 9.132137340506464e-06, + "loss": 0.3839, + "step": 4744 + }, + { + "epoch": 0.21475446933695405, + "grad_norm": 0.6269187719056039, + "learning_rate": 9.131724629922199e-06, + "loss": 0.3955, + "step": 4745 + }, + { + "epoch": 0.2147997284453496, + "grad_norm": 0.6731846589000745, + "learning_rate": 9.131311830559864e-06, + "loss": 0.3995, + "step": 4746 + }, + { + "epoch": 0.2148449875537452, + "grad_norm": 0.628819585985676, + "learning_rate": 9.130898942428326e-06, + "loss": 0.3406, + "step": 4747 + }, + { + "epoch": 0.21489024666214077, + "grad_norm": 0.6643307393653383, + "learning_rate": 9.130485965536455e-06, + "loss": 0.4098, + "step": 4748 + }, + { + "epoch": 0.21493550577053633, + "grad_norm": 0.6360776379996099, + "learning_rate": 9.130072899893127e-06, + "loss": 0.4016, + "step": 4749 + }, + { + "epoch": 0.2149807648789319, + "grad_norm": 0.6744085126225986, + "learning_rate": 9.129659745507219e-06, + "loss": 0.405, + "step": 4750 + }, + { + "epoch": 0.21502602398732745, + "grad_norm": 0.6673408539577744, + "learning_rate": 9.129246502387602e-06, + "loss": 0.3999, + "step": 4751 + }, + { + "epoch": 0.21507128309572301, + "grad_norm": 0.7208579860734767, + "learning_rate": 9.128833170543164e-06, + "loss": 0.4242, + "step": 4752 + }, + { + "epoch": 0.21511654220411858, + "grad_norm": 0.5792654836484264, + "learning_rate": 9.12841974998278e-06, + "loss": 0.3537, + "step": 4753 + }, + { + "epoch": 0.21516180131251414, + "grad_norm": 0.6245147101677967, + "learning_rate": 9.128006240715335e-06, + "loss": 0.3769, + "step": 4754 + }, + { + "epoch": 0.2152070604209097, + "grad_norm": 0.7417289049341894, + "learning_rate": 9.127592642749714e-06, + "loss": 0.4466, + "step": 4755 + }, + { + "epoch": 0.21525231952930526, + "grad_norm": 0.7202075800051113, + "learning_rate": 9.127178956094805e-06, + "loss": 0.4151, + "step": 4756 + }, + { + "epoch": 0.21529757863770083, + "grad_norm": 0.6338443835750316, + "learning_rate": 9.126765180759495e-06, + "loss": 0.4143, + "step": 4757 + }, + { + "epoch": 0.2153428377460964, + "grad_norm": 0.7038352222725998, + "learning_rate": 9.126351316752677e-06, + "loss": 0.3949, + "step": 4758 + }, + { + "epoch": 0.21538809685449198, + "grad_norm": 0.7621821057855113, + "learning_rate": 9.125937364083241e-06, + "loss": 0.383, + "step": 4759 + }, + { + "epoch": 0.21543335596288754, + "grad_norm": 0.6183332780913333, + "learning_rate": 9.125523322760084e-06, + "loss": 0.3882, + "step": 4760 + }, + { + "epoch": 0.2154786150712831, + "grad_norm": 0.7084547271369006, + "learning_rate": 9.1251091927921e-06, + "loss": 0.3748, + "step": 4761 + }, + { + "epoch": 0.21552387417967867, + "grad_norm": 0.6274635436111439, + "learning_rate": 9.124694974188188e-06, + "loss": 0.4986, + "step": 4762 + }, + { + "epoch": 0.21556913328807423, + "grad_norm": 0.3987272632360564, + "learning_rate": 9.124280666957251e-06, + "loss": 0.4994, + "step": 4763 + }, + { + "epoch": 0.2156143923964698, + "grad_norm": 0.3045904274637565, + "learning_rate": 9.123866271108188e-06, + "loss": 0.4929, + "step": 4764 + }, + { + "epoch": 0.21565965150486535, + "grad_norm": 0.7009826919682061, + "learning_rate": 9.123451786649906e-06, + "loss": 0.3927, + "step": 4765 + }, + { + "epoch": 0.21570491061326091, + "grad_norm": 0.6765166384139596, + "learning_rate": 9.123037213591308e-06, + "loss": 0.4443, + "step": 4766 + }, + { + "epoch": 0.21575016972165648, + "grad_norm": 0.6274547084962255, + "learning_rate": 9.122622551941303e-06, + "loss": 0.4134, + "step": 4767 + }, + { + "epoch": 0.21579542883005204, + "grad_norm": 0.6800427324859651, + "learning_rate": 9.122207801708802e-06, + "loss": 0.4027, + "step": 4768 + }, + { + "epoch": 0.2158406879384476, + "grad_norm": 0.6689189439956374, + "learning_rate": 9.121792962902715e-06, + "loss": 0.3804, + "step": 4769 + }, + { + "epoch": 0.2158859470468432, + "grad_norm": 1.0224311576834435, + "learning_rate": 9.121378035531957e-06, + "loss": 0.3983, + "step": 4770 + }, + { + "epoch": 0.21593120615523875, + "grad_norm": 0.6625989732933459, + "learning_rate": 9.120963019605442e-06, + "loss": 0.3779, + "step": 4771 + }, + { + "epoch": 0.21597646526363432, + "grad_norm": 0.9989802934708129, + "learning_rate": 9.12054791513209e-06, + "loss": 0.5067, + "step": 4772 + }, + { + "epoch": 0.21602172437202988, + "grad_norm": 0.6473775166086109, + "learning_rate": 9.120132722120817e-06, + "loss": 0.3762, + "step": 4773 + }, + { + "epoch": 0.21606698348042544, + "grad_norm": 0.6851233182293817, + "learning_rate": 9.119717440580547e-06, + "loss": 0.4046, + "step": 4774 + }, + { + "epoch": 0.216112242588821, + "grad_norm": 0.6633570528725057, + "learning_rate": 9.1193020705202e-06, + "loss": 0.3725, + "step": 4775 + }, + { + "epoch": 0.21615750169721656, + "grad_norm": 0.38037237653395206, + "learning_rate": 9.118886611948704e-06, + "loss": 0.4849, + "step": 4776 + }, + { + "epoch": 0.21620276080561213, + "grad_norm": 0.7112060405305283, + "learning_rate": 9.118471064874985e-06, + "loss": 0.3842, + "step": 4777 + }, + { + "epoch": 0.2162480199140077, + "grad_norm": 0.5085666920474634, + "learning_rate": 9.118055429307972e-06, + "loss": 0.4991, + "step": 4778 + }, + { + "epoch": 0.21629327902240325, + "grad_norm": 0.7313166012275039, + "learning_rate": 9.117639705256595e-06, + "loss": 0.4218, + "step": 4779 + }, + { + "epoch": 0.2163385381307988, + "grad_norm": 0.5094894776139752, + "learning_rate": 9.117223892729788e-06, + "loss": 0.5184, + "step": 4780 + }, + { + "epoch": 0.21638379723919438, + "grad_norm": 0.8138243525194827, + "learning_rate": 9.116807991736483e-06, + "loss": 0.4278, + "step": 4781 + }, + { + "epoch": 0.21642905634758997, + "grad_norm": 0.7507256150215487, + "learning_rate": 9.11639200228562e-06, + "loss": 0.3727, + "step": 4782 + }, + { + "epoch": 0.21647431545598553, + "grad_norm": 0.7920366485254184, + "learning_rate": 9.115975924386133e-06, + "loss": 0.4084, + "step": 4783 + }, + { + "epoch": 0.2165195745643811, + "grad_norm": 0.6485474045722099, + "learning_rate": 9.115559758046967e-06, + "loss": 0.3937, + "step": 4784 + }, + { + "epoch": 0.21656483367277665, + "grad_norm": 0.935641414641697, + "learning_rate": 9.115143503277061e-06, + "loss": 0.4281, + "step": 4785 + }, + { + "epoch": 0.21661009278117221, + "grad_norm": 0.690370433047125, + "learning_rate": 9.11472716008536e-06, + "loss": 0.4214, + "step": 4786 + }, + { + "epoch": 0.21665535188956778, + "grad_norm": 0.6883280151352387, + "learning_rate": 9.114310728480809e-06, + "loss": 0.4211, + "step": 4787 + }, + { + "epoch": 0.21670061099796334, + "grad_norm": 0.6955993473765785, + "learning_rate": 9.113894208472357e-06, + "loss": 0.4069, + "step": 4788 + }, + { + "epoch": 0.2167458701063589, + "grad_norm": 0.9616140275679443, + "learning_rate": 9.113477600068954e-06, + "loss": 0.3835, + "step": 4789 + }, + { + "epoch": 0.21679112921475446, + "grad_norm": 0.6925944169611911, + "learning_rate": 9.11306090327955e-06, + "loss": 0.4239, + "step": 4790 + }, + { + "epoch": 0.21683638832315003, + "grad_norm": 0.6841971425614416, + "learning_rate": 9.112644118113098e-06, + "loss": 0.4307, + "step": 4791 + }, + { + "epoch": 0.2168816474315456, + "grad_norm": 0.6420255859979275, + "learning_rate": 9.112227244578557e-06, + "loss": 0.3921, + "step": 4792 + }, + { + "epoch": 0.21692690653994115, + "grad_norm": 1.0857826233798953, + "learning_rate": 9.111810282684883e-06, + "loss": 0.4979, + "step": 4793 + }, + { + "epoch": 0.21697216564833674, + "grad_norm": 0.7103692532900913, + "learning_rate": 9.111393232441033e-06, + "loss": 0.5308, + "step": 4794 + }, + { + "epoch": 0.2170174247567323, + "grad_norm": 0.31048946305929215, + "learning_rate": 9.11097609385597e-06, + "loss": 0.515, + "step": 4795 + }, + { + "epoch": 0.21706268386512786, + "grad_norm": 0.7646353820957875, + "learning_rate": 9.110558866938657e-06, + "loss": 0.3976, + "step": 4796 + }, + { + "epoch": 0.21710794297352343, + "grad_norm": 0.847772148923698, + "learning_rate": 9.110141551698058e-06, + "loss": 0.4052, + "step": 4797 + }, + { + "epoch": 0.217153202081919, + "grad_norm": 0.781170161994958, + "learning_rate": 9.10972414814314e-06, + "loss": 0.3784, + "step": 4798 + }, + { + "epoch": 0.21719846119031455, + "grad_norm": 0.7387783443504868, + "learning_rate": 9.109306656282873e-06, + "loss": 0.4047, + "step": 4799 + }, + { + "epoch": 0.2172437202987101, + "grad_norm": 0.7776877608382816, + "learning_rate": 9.108889076126226e-06, + "loss": 0.415, + "step": 4800 + }, + { + "epoch": 0.21728897940710568, + "grad_norm": 1.8493601277062992, + "learning_rate": 9.108471407682173e-06, + "loss": 0.5213, + "step": 4801 + }, + { + "epoch": 0.21733423851550124, + "grad_norm": 0.7543258013996105, + "learning_rate": 9.108053650959687e-06, + "loss": 0.401, + "step": 4802 + }, + { + "epoch": 0.2173794976238968, + "grad_norm": 0.6386692161119694, + "learning_rate": 9.107635805967746e-06, + "loss": 0.4269, + "step": 4803 + }, + { + "epoch": 0.21742475673229236, + "grad_norm": 0.7614681871635838, + "learning_rate": 9.107217872715326e-06, + "loss": 0.5302, + "step": 4804 + }, + { + "epoch": 0.21747001584068792, + "grad_norm": 0.7008794665046963, + "learning_rate": 9.10679985121141e-06, + "loss": 0.4, + "step": 4805 + }, + { + "epoch": 0.21751527494908351, + "grad_norm": 0.6508911059006347, + "learning_rate": 9.106381741464976e-06, + "loss": 0.5107, + "step": 4806 + }, + { + "epoch": 0.21756053405747908, + "grad_norm": 0.7100459278013462, + "learning_rate": 9.105963543485012e-06, + "loss": 0.3736, + "step": 4807 + }, + { + "epoch": 0.21760579316587464, + "grad_norm": 0.7069088764353862, + "learning_rate": 9.105545257280502e-06, + "loss": 0.4255, + "step": 4808 + }, + { + "epoch": 0.2176510522742702, + "grad_norm": 0.7355101644306209, + "learning_rate": 9.105126882860431e-06, + "loss": 0.4482, + "step": 4809 + }, + { + "epoch": 0.21769631138266576, + "grad_norm": 0.6537061814493169, + "learning_rate": 9.104708420233794e-06, + "loss": 0.3661, + "step": 4810 + }, + { + "epoch": 0.21774157049106133, + "grad_norm": 0.7340349633672104, + "learning_rate": 9.104289869409577e-06, + "loss": 0.3673, + "step": 4811 + }, + { + "epoch": 0.2177868295994569, + "grad_norm": 0.6970942224495815, + "learning_rate": 9.103871230396778e-06, + "loss": 0.3988, + "step": 4812 + }, + { + "epoch": 0.21783208870785245, + "grad_norm": 0.6889946495799093, + "learning_rate": 9.10345250320439e-06, + "loss": 0.3794, + "step": 4813 + }, + { + "epoch": 0.217877347816248, + "grad_norm": 1.2788229066156596, + "learning_rate": 9.103033687841412e-06, + "loss": 0.4945, + "step": 4814 + }, + { + "epoch": 0.21792260692464357, + "grad_norm": 0.6827859617043349, + "learning_rate": 9.10261478431684e-06, + "loss": 0.3967, + "step": 4815 + }, + { + "epoch": 0.21796786603303914, + "grad_norm": 0.6939430838461157, + "learning_rate": 9.102195792639677e-06, + "loss": 0.4055, + "step": 4816 + }, + { + "epoch": 0.21801312514143473, + "grad_norm": 0.6831299091964597, + "learning_rate": 9.101776712818924e-06, + "loss": 0.4031, + "step": 4817 + }, + { + "epoch": 0.2180583842498303, + "grad_norm": 0.6629483845316078, + "learning_rate": 9.101357544863589e-06, + "loss": 0.398, + "step": 4818 + }, + { + "epoch": 0.21810364335822585, + "grad_norm": 0.7171445450976456, + "learning_rate": 9.100938288782675e-06, + "loss": 0.4025, + "step": 4819 + }, + { + "epoch": 0.2181489024666214, + "grad_norm": 0.5089509713601021, + "learning_rate": 9.100518944585194e-06, + "loss": 0.4927, + "step": 4820 + }, + { + "epoch": 0.21819416157501698, + "grad_norm": 0.7030141717004029, + "learning_rate": 9.100099512280155e-06, + "loss": 0.4026, + "step": 4821 + }, + { + "epoch": 0.21823942068341254, + "grad_norm": 0.668643383248003, + "learning_rate": 9.099679991876567e-06, + "loss": 0.3793, + "step": 4822 + }, + { + "epoch": 0.2182846797918081, + "grad_norm": 0.6782888733885742, + "learning_rate": 9.09926038338345e-06, + "loss": 0.3817, + "step": 4823 + }, + { + "epoch": 0.21832993890020366, + "grad_norm": 0.6695192386556748, + "learning_rate": 9.098840686809816e-06, + "loss": 0.354, + "step": 4824 + }, + { + "epoch": 0.21837519800859923, + "grad_norm": 0.6071035795399119, + "learning_rate": 9.098420902164684e-06, + "loss": 0.5142, + "step": 4825 + }, + { + "epoch": 0.2184204571169948, + "grad_norm": 0.6713769264047075, + "learning_rate": 9.098001029457074e-06, + "loss": 0.4157, + "step": 4826 + }, + { + "epoch": 0.21846571622539035, + "grad_norm": 0.6264425918657583, + "learning_rate": 9.097581068696009e-06, + "loss": 0.3699, + "step": 4827 + }, + { + "epoch": 0.2185109753337859, + "grad_norm": 0.6349502047551863, + "learning_rate": 9.09716101989051e-06, + "loss": 0.4022, + "step": 4828 + }, + { + "epoch": 0.2185562344421815, + "grad_norm": 0.6473338464170852, + "learning_rate": 9.096740883049606e-06, + "loss": 0.3828, + "step": 4829 + }, + { + "epoch": 0.21860149355057706, + "grad_norm": 0.6804309986334778, + "learning_rate": 9.096320658182323e-06, + "loss": 0.3805, + "step": 4830 + }, + { + "epoch": 0.21864675265897263, + "grad_norm": 0.4044106437016609, + "learning_rate": 9.095900345297688e-06, + "loss": 0.5005, + "step": 4831 + }, + { + "epoch": 0.2186920117673682, + "grad_norm": 0.6281173047090177, + "learning_rate": 9.095479944404735e-06, + "loss": 0.3727, + "step": 4832 + }, + { + "epoch": 0.21873727087576375, + "grad_norm": 0.6724742320266274, + "learning_rate": 9.095059455512496e-06, + "loss": 0.4261, + "step": 4833 + }, + { + "epoch": 0.2187825299841593, + "grad_norm": 0.6443652166595981, + "learning_rate": 9.094638878630007e-06, + "loss": 0.3509, + "step": 4834 + }, + { + "epoch": 0.21882778909255488, + "grad_norm": 1.0013061561516774, + "learning_rate": 9.094218213766304e-06, + "loss": 0.3848, + "step": 4835 + }, + { + "epoch": 0.21887304820095044, + "grad_norm": 0.957768121562034, + "learning_rate": 9.093797460930426e-06, + "loss": 0.4139, + "step": 4836 + }, + { + "epoch": 0.218918307309346, + "grad_norm": 0.7983684200974046, + "learning_rate": 9.093376620131414e-06, + "loss": 0.4067, + "step": 4837 + }, + { + "epoch": 0.21896356641774156, + "grad_norm": 0.648737570449002, + "learning_rate": 9.09295569137831e-06, + "loss": 0.4392, + "step": 4838 + }, + { + "epoch": 0.21900882552613712, + "grad_norm": 0.7519313642500751, + "learning_rate": 9.092534674680158e-06, + "loss": 0.3965, + "step": 4839 + }, + { + "epoch": 0.2190540846345327, + "grad_norm": 0.6612125002526383, + "learning_rate": 9.092113570046005e-06, + "loss": 0.397, + "step": 4840 + }, + { + "epoch": 0.21909934374292828, + "grad_norm": 0.6496470297316416, + "learning_rate": 9.0916923774849e-06, + "loss": 0.3922, + "step": 4841 + }, + { + "epoch": 0.21914460285132384, + "grad_norm": 0.6049859360584547, + "learning_rate": 9.091271097005894e-06, + "loss": 0.3593, + "step": 4842 + }, + { + "epoch": 0.2191898619597194, + "grad_norm": 0.66596102844228, + "learning_rate": 9.090849728618034e-06, + "loss": 0.3995, + "step": 4843 + }, + { + "epoch": 0.21923512106811496, + "grad_norm": 0.658302161673842, + "learning_rate": 9.090428272330381e-06, + "loss": 0.397, + "step": 4844 + }, + { + "epoch": 0.21928038017651053, + "grad_norm": 0.7329682214731195, + "learning_rate": 9.090006728151986e-06, + "loss": 0.4294, + "step": 4845 + }, + { + "epoch": 0.2193256392849061, + "grad_norm": 0.7542852602708906, + "learning_rate": 9.089585096091906e-06, + "loss": 0.4163, + "step": 4846 + }, + { + "epoch": 0.21937089839330165, + "grad_norm": 0.7274274023986838, + "learning_rate": 9.089163376159205e-06, + "loss": 0.4238, + "step": 4847 + }, + { + "epoch": 0.2194161575016972, + "grad_norm": 0.6175911757542972, + "learning_rate": 9.08874156836294e-06, + "loss": 0.4075, + "step": 4848 + }, + { + "epoch": 0.21946141661009277, + "grad_norm": 0.6682924426530112, + "learning_rate": 9.088319672712179e-06, + "loss": 0.4295, + "step": 4849 + }, + { + "epoch": 0.21950667571848834, + "grad_norm": 1.1939628574369072, + "learning_rate": 9.087897689215983e-06, + "loss": 0.4059, + "step": 4850 + }, + { + "epoch": 0.2195519348268839, + "grad_norm": 0.43468805882083733, + "learning_rate": 9.087475617883419e-06, + "loss": 0.5302, + "step": 4851 + }, + { + "epoch": 0.2195971939352795, + "grad_norm": 0.6512979730787424, + "learning_rate": 9.08705345872356e-06, + "loss": 0.3914, + "step": 4852 + }, + { + "epoch": 0.21964245304367505, + "grad_norm": 0.6743507359541027, + "learning_rate": 9.086631211745474e-06, + "loss": 0.3487, + "step": 4853 + }, + { + "epoch": 0.2196877121520706, + "grad_norm": 0.35648851900559125, + "learning_rate": 9.086208876958233e-06, + "loss": 0.5257, + "step": 4854 + }, + { + "epoch": 0.21973297126046618, + "grad_norm": 0.6652147485374682, + "learning_rate": 9.085786454370915e-06, + "loss": 0.3392, + "step": 4855 + }, + { + "epoch": 0.21977823036886174, + "grad_norm": 0.7496692106275002, + "learning_rate": 9.085363943992593e-06, + "loss": 0.4144, + "step": 4856 + }, + { + "epoch": 0.2198234894772573, + "grad_norm": 0.3034966816973855, + "learning_rate": 9.084941345832348e-06, + "loss": 0.4836, + "step": 4857 + }, + { + "epoch": 0.21986874858565286, + "grad_norm": 0.6818992193702953, + "learning_rate": 9.08451865989926e-06, + "loss": 0.3607, + "step": 4858 + }, + { + "epoch": 0.21991400769404842, + "grad_norm": 0.734131224447571, + "learning_rate": 9.08409588620241e-06, + "loss": 0.4213, + "step": 4859 + }, + { + "epoch": 0.219959266802444, + "grad_norm": 0.7196822706743875, + "learning_rate": 9.083673024750882e-06, + "loss": 0.3966, + "step": 4860 + }, + { + "epoch": 0.22000452591083955, + "grad_norm": 0.7125834370384616, + "learning_rate": 9.083250075553765e-06, + "loss": 0.3849, + "step": 4861 + }, + { + "epoch": 0.2200497850192351, + "grad_norm": 0.7844549672485713, + "learning_rate": 9.082827038620143e-06, + "loss": 0.4505, + "step": 4862 + }, + { + "epoch": 0.22009504412763067, + "grad_norm": 0.6848019492200854, + "learning_rate": 9.082403913959109e-06, + "loss": 0.3821, + "step": 4863 + }, + { + "epoch": 0.22014030323602626, + "grad_norm": 0.34865430290147625, + "learning_rate": 9.08198070157975e-06, + "loss": 0.517, + "step": 4864 + }, + { + "epoch": 0.22018556234442183, + "grad_norm": 0.5992614487673076, + "learning_rate": 9.081557401491164e-06, + "loss": 0.3561, + "step": 4865 + }, + { + "epoch": 0.2202308214528174, + "grad_norm": 0.6710248014391402, + "learning_rate": 9.081134013702447e-06, + "loss": 0.3714, + "step": 4866 + }, + { + "epoch": 0.22027608056121295, + "grad_norm": 0.6860968982296224, + "learning_rate": 9.080710538222692e-06, + "loss": 0.4516, + "step": 4867 + }, + { + "epoch": 0.2203213396696085, + "grad_norm": 0.7244537982890571, + "learning_rate": 9.080286975061e-06, + "loss": 0.4373, + "step": 4868 + }, + { + "epoch": 0.22036659877800407, + "grad_norm": 0.6788330842807545, + "learning_rate": 9.079863324226473e-06, + "loss": 0.4273, + "step": 4869 + }, + { + "epoch": 0.22041185788639964, + "grad_norm": 0.6624675454264172, + "learning_rate": 9.079439585728214e-06, + "loss": 0.3791, + "step": 4870 + }, + { + "epoch": 0.2204571169947952, + "grad_norm": 0.6730079736173539, + "learning_rate": 9.079015759575327e-06, + "loss": 0.3956, + "step": 4871 + }, + { + "epoch": 0.22050237610319076, + "grad_norm": 0.699365885683548, + "learning_rate": 9.078591845776921e-06, + "loss": 0.3508, + "step": 4872 + }, + { + "epoch": 0.22054763521158632, + "grad_norm": 0.6799562739889206, + "learning_rate": 9.0781678443421e-06, + "loss": 0.4171, + "step": 4873 + }, + { + "epoch": 0.22059289431998189, + "grad_norm": 0.6385646491210312, + "learning_rate": 9.077743755279977e-06, + "loss": 0.3761, + "step": 4874 + }, + { + "epoch": 0.22063815342837745, + "grad_norm": 0.6774749292746666, + "learning_rate": 9.077319578599667e-06, + "loss": 0.3867, + "step": 4875 + }, + { + "epoch": 0.22068341253677304, + "grad_norm": 0.6287712017883627, + "learning_rate": 9.076895314310282e-06, + "loss": 0.3534, + "step": 4876 + }, + { + "epoch": 0.2207286716451686, + "grad_norm": 0.6665113821873541, + "learning_rate": 9.076470962420935e-06, + "loss": 0.3711, + "step": 4877 + }, + { + "epoch": 0.22077393075356416, + "grad_norm": 0.6834998561502025, + "learning_rate": 9.076046522940749e-06, + "loss": 0.3757, + "step": 4878 + }, + { + "epoch": 0.22081918986195972, + "grad_norm": 0.7744574841169455, + "learning_rate": 9.075621995878841e-06, + "loss": 0.4163, + "step": 4879 + }, + { + "epoch": 0.2208644489703553, + "grad_norm": 0.6814692976919767, + "learning_rate": 9.075197381244333e-06, + "loss": 0.407, + "step": 4880 + }, + { + "epoch": 0.22090970807875085, + "grad_norm": 0.3696973213244952, + "learning_rate": 9.074772679046351e-06, + "loss": 0.5034, + "step": 4881 + }, + { + "epoch": 0.2209549671871464, + "grad_norm": 0.6658175315998246, + "learning_rate": 9.074347889294017e-06, + "loss": 0.3985, + "step": 4882 + }, + { + "epoch": 0.22100022629554197, + "grad_norm": 0.7359226606161277, + "learning_rate": 9.073923011996462e-06, + "loss": 0.424, + "step": 4883 + }, + { + "epoch": 0.22104548540393754, + "grad_norm": 0.3223135860462851, + "learning_rate": 9.073498047162813e-06, + "loss": 0.496, + "step": 4884 + }, + { + "epoch": 0.2210907445123331, + "grad_norm": 0.6504920645333246, + "learning_rate": 9.073072994802202e-06, + "loss": 0.4055, + "step": 4885 + }, + { + "epoch": 0.22113600362072866, + "grad_norm": 0.7523501341301879, + "learning_rate": 9.072647854923763e-06, + "loss": 0.3905, + "step": 4886 + }, + { + "epoch": 0.22118126272912422, + "grad_norm": 0.7486959836693489, + "learning_rate": 9.072222627536627e-06, + "loss": 0.3838, + "step": 4887 + }, + { + "epoch": 0.2212265218375198, + "grad_norm": 0.6528488225448029, + "learning_rate": 9.071797312649934e-06, + "loss": 0.3548, + "step": 4888 + }, + { + "epoch": 0.22127178094591538, + "grad_norm": 0.6901906657633865, + "learning_rate": 9.071371910272823e-06, + "loss": 0.3931, + "step": 4889 + }, + { + "epoch": 0.22131704005431094, + "grad_norm": 0.7001577349443593, + "learning_rate": 9.070946420414435e-06, + "loss": 0.3866, + "step": 4890 + }, + { + "epoch": 0.2213622991627065, + "grad_norm": 0.801922623369581, + "learning_rate": 9.07052084308391e-06, + "loss": 0.392, + "step": 4891 + }, + { + "epoch": 0.22140755827110206, + "grad_norm": 0.7677849535633429, + "learning_rate": 9.070095178290394e-06, + "loss": 0.414, + "step": 4892 + }, + { + "epoch": 0.22145281737949762, + "grad_norm": 0.6805848241897616, + "learning_rate": 9.069669426043033e-06, + "loss": 0.3772, + "step": 4893 + }, + { + "epoch": 0.2214980764878932, + "grad_norm": 0.4350282351307276, + "learning_rate": 9.069243586350976e-06, + "loss": 0.5201, + "step": 4894 + }, + { + "epoch": 0.22154333559628875, + "grad_norm": 0.7355164443478349, + "learning_rate": 9.068817659223371e-06, + "loss": 0.3992, + "step": 4895 + }, + { + "epoch": 0.2215885947046843, + "grad_norm": 0.7023395182277847, + "learning_rate": 9.068391644669371e-06, + "loss": 0.406, + "step": 4896 + }, + { + "epoch": 0.22163385381307987, + "grad_norm": 0.7063160229165169, + "learning_rate": 9.067965542698129e-06, + "loss": 0.41, + "step": 4897 + }, + { + "epoch": 0.22167911292147544, + "grad_norm": 0.6950126592196099, + "learning_rate": 9.067539353318804e-06, + "loss": 0.4181, + "step": 4898 + }, + { + "epoch": 0.22172437202987103, + "grad_norm": 0.6869503147100258, + "learning_rate": 9.067113076540547e-06, + "loss": 0.4187, + "step": 4899 + }, + { + "epoch": 0.2217696311382666, + "grad_norm": 0.6871055919686695, + "learning_rate": 9.066686712372524e-06, + "loss": 0.3908, + "step": 4900 + }, + { + "epoch": 0.22181489024666215, + "grad_norm": 0.6954917745019584, + "learning_rate": 9.066260260823893e-06, + "loss": 0.369, + "step": 4901 + }, + { + "epoch": 0.2218601493550577, + "grad_norm": 0.6749196877672734, + "learning_rate": 9.065833721903817e-06, + "loss": 0.4043, + "step": 4902 + }, + { + "epoch": 0.22190540846345327, + "grad_norm": 0.6561073305341862, + "learning_rate": 9.065407095621462e-06, + "loss": 0.4153, + "step": 4903 + }, + { + "epoch": 0.22195066757184884, + "grad_norm": 0.6712022464959736, + "learning_rate": 9.064980381985993e-06, + "loss": 0.3959, + "step": 4904 + }, + { + "epoch": 0.2219959266802444, + "grad_norm": 0.8511922636720965, + "learning_rate": 9.064553581006583e-06, + "loss": 0.3698, + "step": 4905 + }, + { + "epoch": 0.22204118578863996, + "grad_norm": 0.6086633175646591, + "learning_rate": 9.064126692692397e-06, + "loss": 0.4295, + "step": 4906 + }, + { + "epoch": 0.22208644489703552, + "grad_norm": 0.5905928955912909, + "learning_rate": 9.063699717052612e-06, + "loss": 0.373, + "step": 4907 + }, + { + "epoch": 0.22213170400543109, + "grad_norm": 0.4037618393026883, + "learning_rate": 9.0632726540964e-06, + "loss": 0.5065, + "step": 4908 + }, + { + "epoch": 0.22217696311382665, + "grad_norm": 0.7951152684635662, + "learning_rate": 9.06284550383294e-06, + "loss": 0.4814, + "step": 4909 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 0.7227826564261085, + "learning_rate": 9.062418266271406e-06, + "loss": 0.3257, + "step": 4910 + }, + { + "epoch": 0.2222674813306178, + "grad_norm": 0.6299762399568244, + "learning_rate": 9.06199094142098e-06, + "loss": 0.3548, + "step": 4911 + }, + { + "epoch": 0.22231274043901336, + "grad_norm": 0.6391259383273397, + "learning_rate": 9.061563529290845e-06, + "loss": 0.3968, + "step": 4912 + }, + { + "epoch": 0.22235799954740892, + "grad_norm": 0.6254815763024072, + "learning_rate": 9.061136029890186e-06, + "loss": 0.4148, + "step": 4913 + }, + { + "epoch": 0.2224032586558045, + "grad_norm": 0.2979564299577659, + "learning_rate": 9.060708443228184e-06, + "loss": 0.512, + "step": 4914 + }, + { + "epoch": 0.22244851776420005, + "grad_norm": 0.7084697271068684, + "learning_rate": 9.060280769314028e-06, + "loss": 0.4223, + "step": 4915 + }, + { + "epoch": 0.2224937768725956, + "grad_norm": 0.6349471347363206, + "learning_rate": 9.05985300815691e-06, + "loss": 0.3844, + "step": 4916 + }, + { + "epoch": 0.22253903598099117, + "grad_norm": 2.6816014194505997, + "learning_rate": 9.05942515976602e-06, + "loss": 0.3938, + "step": 4917 + }, + { + "epoch": 0.22258429508938674, + "grad_norm": 0.6663388143533746, + "learning_rate": 9.05899722415055e-06, + "loss": 0.3803, + "step": 4918 + }, + { + "epoch": 0.2226295541977823, + "grad_norm": 0.720519900506803, + "learning_rate": 9.058569201319696e-06, + "loss": 0.374, + "step": 4919 + }, + { + "epoch": 0.22267481330617786, + "grad_norm": 0.6702356758327594, + "learning_rate": 9.058141091282656e-06, + "loss": 0.3908, + "step": 4920 + }, + { + "epoch": 0.22272007241457342, + "grad_norm": 0.6841496388893396, + "learning_rate": 9.057712894048627e-06, + "loss": 0.3944, + "step": 4921 + }, + { + "epoch": 0.22276533152296898, + "grad_norm": 0.6330740612123831, + "learning_rate": 9.05728460962681e-06, + "loss": 0.3823, + "step": 4922 + }, + { + "epoch": 0.22281059063136457, + "grad_norm": 0.6340769606214909, + "learning_rate": 9.056856238026408e-06, + "loss": 0.4255, + "step": 4923 + }, + { + "epoch": 0.22285584973976014, + "grad_norm": 0.6726079933198121, + "learning_rate": 9.056427779256624e-06, + "loss": 0.3963, + "step": 4924 + }, + { + "epoch": 0.2229011088481557, + "grad_norm": 0.3608419911007155, + "learning_rate": 9.055999233326667e-06, + "loss": 0.5117, + "step": 4925 + }, + { + "epoch": 0.22294636795655126, + "grad_norm": 1.0467144881169608, + "learning_rate": 9.055570600245744e-06, + "loss": 0.3622, + "step": 4926 + }, + { + "epoch": 0.22299162706494682, + "grad_norm": 0.7188640558240326, + "learning_rate": 9.055141880023062e-06, + "loss": 0.3882, + "step": 4927 + }, + { + "epoch": 0.22303688617334239, + "grad_norm": 0.7274003485447285, + "learning_rate": 9.054713072667838e-06, + "loss": 0.4102, + "step": 4928 + }, + { + "epoch": 0.22308214528173795, + "grad_norm": 0.35090527102288355, + "learning_rate": 9.054284178189281e-06, + "loss": 0.488, + "step": 4929 + }, + { + "epoch": 0.2231274043901335, + "grad_norm": 0.643082305355415, + "learning_rate": 9.05385519659661e-06, + "loss": 0.3465, + "step": 4930 + }, + { + "epoch": 0.22317266349852907, + "grad_norm": 0.6488098190014425, + "learning_rate": 9.05342612789904e-06, + "loss": 0.4201, + "step": 4931 + }, + { + "epoch": 0.22321792260692463, + "grad_norm": 0.6559485495898078, + "learning_rate": 9.052996972105794e-06, + "loss": 0.3536, + "step": 4932 + }, + { + "epoch": 0.2232631817153202, + "grad_norm": 0.6305430257580593, + "learning_rate": 9.052567729226089e-06, + "loss": 0.3803, + "step": 4933 + }, + { + "epoch": 0.22330844082371576, + "grad_norm": 0.693319002377718, + "learning_rate": 9.052138399269153e-06, + "loss": 0.404, + "step": 4934 + }, + { + "epoch": 0.22335369993211135, + "grad_norm": 0.7031246161246214, + "learning_rate": 9.051708982244205e-06, + "loss": 0.4099, + "step": 4935 + }, + { + "epoch": 0.2233989590405069, + "grad_norm": 0.3962595002461269, + "learning_rate": 9.051279478160475e-06, + "loss": 0.5132, + "step": 4936 + }, + { + "epoch": 0.22344421814890247, + "grad_norm": 0.6743678198292562, + "learning_rate": 9.050849887027192e-06, + "loss": 0.3888, + "step": 4937 + }, + { + "epoch": 0.22348947725729804, + "grad_norm": 0.7553552357137391, + "learning_rate": 9.050420208853587e-06, + "loss": 0.3712, + "step": 4938 + }, + { + "epoch": 0.2235347363656936, + "grad_norm": 0.6468895793062895, + "learning_rate": 9.04999044364889e-06, + "loss": 0.3895, + "step": 4939 + }, + { + "epoch": 0.22357999547408916, + "grad_norm": 0.6049019690618703, + "learning_rate": 9.049560591422339e-06, + "loss": 0.3454, + "step": 4940 + }, + { + "epoch": 0.22362525458248472, + "grad_norm": 0.6421990664350431, + "learning_rate": 9.049130652183167e-06, + "loss": 0.3908, + "step": 4941 + }, + { + "epoch": 0.22367051369088028, + "grad_norm": 0.6610407945684632, + "learning_rate": 9.048700625940613e-06, + "loss": 0.3668, + "step": 4942 + }, + { + "epoch": 0.22371577279927585, + "grad_norm": 0.3797001342865075, + "learning_rate": 9.048270512703917e-06, + "loss": 0.5241, + "step": 4943 + }, + { + "epoch": 0.2237610319076714, + "grad_norm": 0.3268726665548248, + "learning_rate": 9.04784031248232e-06, + "loss": 0.5158, + "step": 4944 + }, + { + "epoch": 0.22380629101606697, + "grad_norm": 0.7192392891866869, + "learning_rate": 9.04741002528507e-06, + "loss": 0.4637, + "step": 4945 + }, + { + "epoch": 0.22385155012446256, + "grad_norm": 0.6991235003684834, + "learning_rate": 9.046979651121407e-06, + "loss": 0.3877, + "step": 4946 + }, + { + "epoch": 0.22389680923285812, + "grad_norm": 0.39804851670087743, + "learning_rate": 9.04654919000058e-06, + "loss": 0.5109, + "step": 4947 + }, + { + "epoch": 0.22394206834125369, + "grad_norm": 0.37682713032256965, + "learning_rate": 9.046118641931841e-06, + "loss": 0.471, + "step": 4948 + }, + { + "epoch": 0.22398732744964925, + "grad_norm": 0.7482524081049696, + "learning_rate": 9.045688006924438e-06, + "loss": 0.4281, + "step": 4949 + }, + { + "epoch": 0.2240325865580448, + "grad_norm": 0.6714930277958379, + "learning_rate": 9.045257284987625e-06, + "loss": 0.3682, + "step": 4950 + }, + { + "epoch": 0.22407784566644037, + "grad_norm": 0.668180324072035, + "learning_rate": 9.044826476130657e-06, + "loss": 0.3829, + "step": 4951 + }, + { + "epoch": 0.22412310477483594, + "grad_norm": 0.6081873599786615, + "learning_rate": 9.04439558036279e-06, + "loss": 0.3952, + "step": 4952 + }, + { + "epoch": 0.2241683638832315, + "grad_norm": 0.46132916924040257, + "learning_rate": 9.043964597693285e-06, + "loss": 0.5081, + "step": 4953 + }, + { + "epoch": 0.22421362299162706, + "grad_norm": 0.7123279512728615, + "learning_rate": 9.043533528131401e-06, + "loss": 0.3934, + "step": 4954 + }, + { + "epoch": 0.22425888210002262, + "grad_norm": 0.6788394561902364, + "learning_rate": 9.0431023716864e-06, + "loss": 0.412, + "step": 4955 + }, + { + "epoch": 0.22430414120841818, + "grad_norm": 0.6516290357865095, + "learning_rate": 9.042671128367545e-06, + "loss": 0.4584, + "step": 4956 + }, + { + "epoch": 0.22434940031681375, + "grad_norm": 0.694717466352686, + "learning_rate": 9.042239798184104e-06, + "loss": 0.3739, + "step": 4957 + }, + { + "epoch": 0.22439465942520934, + "grad_norm": 0.6566955026673125, + "learning_rate": 9.041808381145345e-06, + "loss": 0.426, + "step": 4958 + }, + { + "epoch": 0.2244399185336049, + "grad_norm": 0.7044272633799166, + "learning_rate": 9.041376877260537e-06, + "loss": 0.3974, + "step": 4959 + }, + { + "epoch": 0.22448517764200046, + "grad_norm": 0.36168351486043804, + "learning_rate": 9.040945286538954e-06, + "loss": 0.483, + "step": 4960 + }, + { + "epoch": 0.22453043675039602, + "grad_norm": 0.7206470073148731, + "learning_rate": 9.040513608989865e-06, + "loss": 0.4604, + "step": 4961 + }, + { + "epoch": 0.22457569585879159, + "grad_norm": 0.6872488577391577, + "learning_rate": 9.040081844622549e-06, + "loss": 0.3785, + "step": 4962 + }, + { + "epoch": 0.22462095496718715, + "grad_norm": 0.6441005996153664, + "learning_rate": 9.039649993446282e-06, + "loss": 0.3796, + "step": 4963 + }, + { + "epoch": 0.2246662140755827, + "grad_norm": 0.6648495871965038, + "learning_rate": 9.039218055470345e-06, + "loss": 0.3969, + "step": 4964 + }, + { + "epoch": 0.22471147318397827, + "grad_norm": 0.6206659315926869, + "learning_rate": 9.038786030704015e-06, + "loss": 0.4097, + "step": 4965 + }, + { + "epoch": 0.22475673229237383, + "grad_norm": 0.572166293045714, + "learning_rate": 9.038353919156579e-06, + "loss": 0.3693, + "step": 4966 + }, + { + "epoch": 0.2248019914007694, + "grad_norm": 0.688238215927942, + "learning_rate": 9.03792172083732e-06, + "loss": 0.393, + "step": 4967 + }, + { + "epoch": 0.22484725050916496, + "grad_norm": 0.6426199823718562, + "learning_rate": 9.037489435755525e-06, + "loss": 0.385, + "step": 4968 + }, + { + "epoch": 0.22489250961756052, + "grad_norm": 0.6863085194919177, + "learning_rate": 9.037057063920482e-06, + "loss": 0.4335, + "step": 4969 + }, + { + "epoch": 0.2249377687259561, + "grad_norm": 0.3480317198597673, + "learning_rate": 9.03662460534148e-06, + "loss": 0.4737, + "step": 4970 + }, + { + "epoch": 0.22498302783435167, + "grad_norm": 0.7441531712838492, + "learning_rate": 9.036192060027815e-06, + "loss": 0.4075, + "step": 4971 + }, + { + "epoch": 0.22502828694274724, + "grad_norm": 0.6849480310304413, + "learning_rate": 9.035759427988779e-06, + "loss": 0.3916, + "step": 4972 + }, + { + "epoch": 0.2250735460511428, + "grad_norm": 0.6509099795779941, + "learning_rate": 9.035326709233666e-06, + "loss": 0.3677, + "step": 4973 + }, + { + "epoch": 0.22511880515953836, + "grad_norm": 0.6816022275651715, + "learning_rate": 9.034893903771776e-06, + "loss": 0.3982, + "step": 4974 + }, + { + "epoch": 0.22516406426793392, + "grad_norm": 0.656449812683951, + "learning_rate": 9.034461011612408e-06, + "loss": 0.3834, + "step": 4975 + }, + { + "epoch": 0.22520932337632948, + "grad_norm": 0.37343104904151614, + "learning_rate": 9.034028032764866e-06, + "loss": 0.5213, + "step": 4976 + }, + { + "epoch": 0.22525458248472505, + "grad_norm": 0.6434372858287496, + "learning_rate": 9.033594967238449e-06, + "loss": 0.3551, + "step": 4977 + }, + { + "epoch": 0.2252998415931206, + "grad_norm": 0.635442460720965, + "learning_rate": 9.033161815042465e-06, + "loss": 0.385, + "step": 4978 + }, + { + "epoch": 0.22534510070151617, + "grad_norm": 0.31351963290651574, + "learning_rate": 9.032728576186221e-06, + "loss": 0.5183, + "step": 4979 + }, + { + "epoch": 0.22539035980991173, + "grad_norm": 0.7047869382640914, + "learning_rate": 9.032295250679024e-06, + "loss": 0.4107, + "step": 4980 + }, + { + "epoch": 0.2254356189183073, + "grad_norm": 0.802688035345281, + "learning_rate": 9.031861838530187e-06, + "loss": 0.3933, + "step": 4981 + }, + { + "epoch": 0.22548087802670289, + "grad_norm": 0.6748141649156711, + "learning_rate": 9.031428339749023e-06, + "loss": 0.3806, + "step": 4982 + }, + { + "epoch": 0.22552613713509845, + "grad_norm": 0.31771599393442523, + "learning_rate": 9.030994754344845e-06, + "loss": 0.4662, + "step": 4983 + }, + { + "epoch": 0.225571396243494, + "grad_norm": 0.6717370322459778, + "learning_rate": 9.03056108232697e-06, + "loss": 0.3726, + "step": 4984 + }, + { + "epoch": 0.22561665535188957, + "grad_norm": 0.3275985126479388, + "learning_rate": 9.030127323704716e-06, + "loss": 0.5291, + "step": 4985 + }, + { + "epoch": 0.22566191446028513, + "grad_norm": 0.6814558422571404, + "learning_rate": 9.029693478487403e-06, + "loss": 0.4068, + "step": 4986 + }, + { + "epoch": 0.2257071735686807, + "grad_norm": 0.7062955257138119, + "learning_rate": 9.029259546684352e-06, + "loss": 0.3794, + "step": 4987 + }, + { + "epoch": 0.22575243267707626, + "grad_norm": 0.29081694014049086, + "learning_rate": 9.028825528304892e-06, + "loss": 0.4816, + "step": 4988 + }, + { + "epoch": 0.22579769178547182, + "grad_norm": 0.7861575746631435, + "learning_rate": 9.028391423358343e-06, + "loss": 0.3616, + "step": 4989 + }, + { + "epoch": 0.22584295089386738, + "grad_norm": 0.644040995539964, + "learning_rate": 9.027957231854034e-06, + "loss": 0.3572, + "step": 4990 + }, + { + "epoch": 0.22588821000226295, + "grad_norm": 0.6394027967714493, + "learning_rate": 9.027522953801296e-06, + "loss": 0.3785, + "step": 4991 + }, + { + "epoch": 0.2259334691106585, + "grad_norm": 0.662027116266261, + "learning_rate": 9.027088589209458e-06, + "loss": 0.3816, + "step": 4992 + }, + { + "epoch": 0.2259787282190541, + "grad_norm": 0.6663392960488509, + "learning_rate": 9.026654138087857e-06, + "loss": 0.4121, + "step": 4993 + }, + { + "epoch": 0.22602398732744966, + "grad_norm": 0.6182570656575896, + "learning_rate": 9.026219600445824e-06, + "loss": 0.3979, + "step": 4994 + }, + { + "epoch": 0.22606924643584522, + "grad_norm": 0.6965154917042926, + "learning_rate": 9.025784976292698e-06, + "loss": 0.3589, + "step": 4995 + }, + { + "epoch": 0.22611450554424078, + "grad_norm": 0.6926703656472599, + "learning_rate": 9.025350265637816e-06, + "loss": 0.379, + "step": 4996 + }, + { + "epoch": 0.22615976465263635, + "grad_norm": 0.6645148067448459, + "learning_rate": 9.02491546849052e-06, + "loss": 0.3825, + "step": 4997 + }, + { + "epoch": 0.2262050237610319, + "grad_norm": 0.6664176952365746, + "learning_rate": 9.024480584860151e-06, + "loss": 0.4086, + "step": 4998 + }, + { + "epoch": 0.22625028286942747, + "grad_norm": 0.6365534795201367, + "learning_rate": 9.024045614756056e-06, + "loss": 0.405, + "step": 4999 + }, + { + "epoch": 0.22629554197782303, + "grad_norm": 0.6339182513959327, + "learning_rate": 9.02361055818758e-06, + "loss": 0.389, + "step": 5000 + }, + { + "epoch": 0.2263408010862186, + "grad_norm": 0.6353193403763745, + "learning_rate": 9.02317541516407e-06, + "loss": 0.3989, + "step": 5001 + }, + { + "epoch": 0.22638606019461416, + "grad_norm": 0.6190937243125736, + "learning_rate": 9.022740185694877e-06, + "loss": 0.4203, + "step": 5002 + }, + { + "epoch": 0.22643131930300972, + "grad_norm": 0.6636631009871302, + "learning_rate": 9.022304869789352e-06, + "loss": 0.3938, + "step": 5003 + }, + { + "epoch": 0.22647657841140528, + "grad_norm": 0.7262606727914647, + "learning_rate": 9.02186946745685e-06, + "loss": 0.3902, + "step": 5004 + }, + { + "epoch": 0.22652183751980087, + "grad_norm": 0.6552484435543318, + "learning_rate": 9.021433978706724e-06, + "loss": 0.3833, + "step": 5005 + }, + { + "epoch": 0.22656709662819643, + "grad_norm": 0.6619672606846077, + "learning_rate": 9.020998403548333e-06, + "loss": 0.38, + "step": 5006 + }, + { + "epoch": 0.226612355736592, + "grad_norm": 0.6452943746949554, + "learning_rate": 9.020562741991035e-06, + "loss": 0.3952, + "step": 5007 + }, + { + "epoch": 0.22665761484498756, + "grad_norm": 0.7229966603330219, + "learning_rate": 9.020126994044194e-06, + "loss": 0.4396, + "step": 5008 + }, + { + "epoch": 0.22670287395338312, + "grad_norm": 0.6631422008112223, + "learning_rate": 9.01969115971717e-06, + "loss": 0.4145, + "step": 5009 + }, + { + "epoch": 0.22674813306177868, + "grad_norm": 0.6025960255581672, + "learning_rate": 9.019255239019327e-06, + "loss": 0.3982, + "step": 5010 + }, + { + "epoch": 0.22679339217017425, + "grad_norm": 0.640514584070971, + "learning_rate": 9.018819231960035e-06, + "loss": 0.4105, + "step": 5011 + }, + { + "epoch": 0.2268386512785698, + "grad_norm": 0.6926676693423613, + "learning_rate": 9.01838313854866e-06, + "loss": 0.4185, + "step": 5012 + }, + { + "epoch": 0.22688391038696537, + "grad_norm": 0.646092239513566, + "learning_rate": 9.017946958794572e-06, + "loss": 0.3833, + "step": 5013 + }, + { + "epoch": 0.22692916949536093, + "grad_norm": 0.43804579728241944, + "learning_rate": 9.017510692707144e-06, + "loss": 0.4814, + "step": 5014 + }, + { + "epoch": 0.2269744286037565, + "grad_norm": 0.6746575132691192, + "learning_rate": 9.01707434029575e-06, + "loss": 0.4079, + "step": 5015 + }, + { + "epoch": 0.22701968771215206, + "grad_norm": 0.6754684713002371, + "learning_rate": 9.016637901569767e-06, + "loss": 0.3984, + "step": 5016 + }, + { + "epoch": 0.22706494682054765, + "grad_norm": 0.33722037950031575, + "learning_rate": 9.01620137653857e-06, + "loss": 0.5116, + "step": 5017 + }, + { + "epoch": 0.2271102059289432, + "grad_norm": 0.6355902009523129, + "learning_rate": 9.015764765211542e-06, + "loss": 0.3737, + "step": 5018 + }, + { + "epoch": 0.22715546503733877, + "grad_norm": 0.7013672992936948, + "learning_rate": 9.015328067598064e-06, + "loss": 0.455, + "step": 5019 + }, + { + "epoch": 0.22720072414573433, + "grad_norm": 0.636650952638715, + "learning_rate": 9.014891283707517e-06, + "loss": 0.3807, + "step": 5020 + }, + { + "epoch": 0.2272459832541299, + "grad_norm": 0.6591969812812456, + "learning_rate": 9.014454413549285e-06, + "loss": 0.4363, + "step": 5021 + }, + { + "epoch": 0.22729124236252546, + "grad_norm": 0.6480320041512702, + "learning_rate": 9.014017457132759e-06, + "loss": 0.3622, + "step": 5022 + }, + { + "epoch": 0.22733650147092102, + "grad_norm": 0.7046158006976412, + "learning_rate": 9.013580414467324e-06, + "loss": 0.395, + "step": 5023 + }, + { + "epoch": 0.22738176057931658, + "grad_norm": 0.7412498466774871, + "learning_rate": 9.013143285562375e-06, + "loss": 0.4135, + "step": 5024 + }, + { + "epoch": 0.22742701968771215, + "grad_norm": 0.6430333473293939, + "learning_rate": 9.012706070427302e-06, + "loss": 0.3934, + "step": 5025 + }, + { + "epoch": 0.2274722787961077, + "grad_norm": 0.9565897525460444, + "learning_rate": 9.012268769071499e-06, + "loss": 0.3622, + "step": 5026 + }, + { + "epoch": 0.22751753790450327, + "grad_norm": 0.7943779811391722, + "learning_rate": 9.011831381504362e-06, + "loss": 0.3784, + "step": 5027 + }, + { + "epoch": 0.22756279701289886, + "grad_norm": 0.6853892780934475, + "learning_rate": 9.011393907735291e-06, + "loss": 0.3891, + "step": 5028 + }, + { + "epoch": 0.22760805612129442, + "grad_norm": 0.5847454644149267, + "learning_rate": 9.010956347773685e-06, + "loss": 0.4946, + "step": 5029 + }, + { + "epoch": 0.22765331522968998, + "grad_norm": 0.43884271089363497, + "learning_rate": 9.010518701628946e-06, + "loss": 0.5247, + "step": 5030 + }, + { + "epoch": 0.22769857433808555, + "grad_norm": 0.7332460232839678, + "learning_rate": 9.010080969310477e-06, + "loss": 0.427, + "step": 5031 + }, + { + "epoch": 0.2277438334464811, + "grad_norm": 0.666964101425838, + "learning_rate": 9.009643150827683e-06, + "loss": 0.3768, + "step": 5032 + }, + { + "epoch": 0.22778909255487667, + "grad_norm": 0.6490034288028559, + "learning_rate": 9.009205246189974e-06, + "loss": 0.4198, + "step": 5033 + }, + { + "epoch": 0.22783435166327223, + "grad_norm": 0.6593400548042422, + "learning_rate": 9.008767255406757e-06, + "loss": 0.3847, + "step": 5034 + }, + { + "epoch": 0.2278796107716678, + "grad_norm": 0.6084761452473841, + "learning_rate": 9.008329178487442e-06, + "loss": 0.5144, + "step": 5035 + }, + { + "epoch": 0.22792486988006336, + "grad_norm": 0.6718615799842731, + "learning_rate": 9.007891015441447e-06, + "loss": 0.3904, + "step": 5036 + }, + { + "epoch": 0.22797012898845892, + "grad_norm": 0.6738115602196058, + "learning_rate": 9.007452766278181e-06, + "loss": 0.3686, + "step": 5037 + }, + { + "epoch": 0.22801538809685448, + "grad_norm": 0.6424073143560038, + "learning_rate": 9.007014431007064e-06, + "loss": 0.4093, + "step": 5038 + }, + { + "epoch": 0.22806064720525004, + "grad_norm": 0.7084602860135315, + "learning_rate": 9.006576009637513e-06, + "loss": 0.4291, + "step": 5039 + }, + { + "epoch": 0.22810590631364563, + "grad_norm": 0.6434498431158302, + "learning_rate": 9.00613750217895e-06, + "loss": 0.3985, + "step": 5040 + }, + { + "epoch": 0.2281511654220412, + "grad_norm": 0.40294794849350446, + "learning_rate": 9.005698908640795e-06, + "loss": 0.5242, + "step": 5041 + }, + { + "epoch": 0.22819642453043676, + "grad_norm": 0.6032961522009523, + "learning_rate": 9.005260229032471e-06, + "loss": 0.3963, + "step": 5042 + }, + { + "epoch": 0.22824168363883232, + "grad_norm": 0.3356321513424042, + "learning_rate": 9.004821463363409e-06, + "loss": 0.475, + "step": 5043 + }, + { + "epoch": 0.22828694274722788, + "grad_norm": 0.5938021240632243, + "learning_rate": 9.004382611643032e-06, + "loss": 0.3356, + "step": 5044 + }, + { + "epoch": 0.22833220185562345, + "grad_norm": 0.7223163154013754, + "learning_rate": 9.003943673880771e-06, + "loss": 0.4112, + "step": 5045 + }, + { + "epoch": 0.228377460964019, + "grad_norm": 0.6531894900267514, + "learning_rate": 9.00350465008606e-06, + "loss": 0.3885, + "step": 5046 + }, + { + "epoch": 0.22842272007241457, + "grad_norm": 0.3875028933102217, + "learning_rate": 9.003065540268328e-06, + "loss": 0.5096, + "step": 5047 + }, + { + "epoch": 0.22846797918081013, + "grad_norm": 0.6800193885461148, + "learning_rate": 9.00262634443701e-06, + "loss": 0.393, + "step": 5048 + }, + { + "epoch": 0.2285132382892057, + "grad_norm": 0.6674094552885022, + "learning_rate": 9.002187062601548e-06, + "loss": 0.3392, + "step": 5049 + }, + { + "epoch": 0.22855849739760126, + "grad_norm": 0.6999580178301209, + "learning_rate": 9.001747694771378e-06, + "loss": 0.3492, + "step": 5050 + }, + { + "epoch": 0.22860375650599682, + "grad_norm": 0.6571573244248741, + "learning_rate": 9.00130824095594e-06, + "loss": 0.3999, + "step": 5051 + }, + { + "epoch": 0.2286490156143924, + "grad_norm": 0.3742644643532557, + "learning_rate": 9.000868701164676e-06, + "loss": 0.4805, + "step": 5052 + }, + { + "epoch": 0.22869427472278797, + "grad_norm": 0.7135463099458239, + "learning_rate": 9.00042907540703e-06, + "loss": 0.4351, + "step": 5053 + }, + { + "epoch": 0.22873953383118353, + "grad_norm": 0.6325496509989086, + "learning_rate": 8.999989363692453e-06, + "loss": 0.3836, + "step": 5054 + }, + { + "epoch": 0.2287847929395791, + "grad_norm": 0.7088140449106423, + "learning_rate": 8.999549566030389e-06, + "loss": 0.4416, + "step": 5055 + }, + { + "epoch": 0.22883005204797466, + "grad_norm": 0.6212518022240358, + "learning_rate": 8.999109682430288e-06, + "loss": 0.3555, + "step": 5056 + }, + { + "epoch": 0.22887531115637022, + "grad_norm": 0.7263540895949513, + "learning_rate": 8.9986697129016e-06, + "loss": 0.3844, + "step": 5057 + }, + { + "epoch": 0.22892057026476578, + "grad_norm": 0.6386208062962684, + "learning_rate": 8.998229657453783e-06, + "loss": 0.359, + "step": 5058 + }, + { + "epoch": 0.22896582937316134, + "grad_norm": 0.7299702917808321, + "learning_rate": 8.99778951609629e-06, + "loss": 0.3813, + "step": 5059 + }, + { + "epoch": 0.2290110884815569, + "grad_norm": 0.6187123146757414, + "learning_rate": 8.997349288838579e-06, + "loss": 0.3686, + "step": 5060 + }, + { + "epoch": 0.22905634758995247, + "grad_norm": 0.6462565768504316, + "learning_rate": 8.996908975690107e-06, + "loss": 0.361, + "step": 5061 + }, + { + "epoch": 0.22910160669834803, + "grad_norm": 0.43439600221257585, + "learning_rate": 8.996468576660337e-06, + "loss": 0.4917, + "step": 5062 + }, + { + "epoch": 0.2291468658067436, + "grad_norm": 0.6981144837914366, + "learning_rate": 8.996028091758733e-06, + "loss": 0.3568, + "step": 5063 + }, + { + "epoch": 0.22919212491513918, + "grad_norm": 0.665355672986008, + "learning_rate": 8.995587520994757e-06, + "loss": 0.3985, + "step": 5064 + }, + { + "epoch": 0.22923738402353475, + "grad_norm": 0.6531402191226752, + "learning_rate": 8.995146864377877e-06, + "loss": 0.3997, + "step": 5065 + }, + { + "epoch": 0.2292826431319303, + "grad_norm": 0.6692184657619775, + "learning_rate": 8.994706121917562e-06, + "loss": 0.3675, + "step": 5066 + }, + { + "epoch": 0.22932790224032587, + "grad_norm": 0.748061770522374, + "learning_rate": 8.99426529362328e-06, + "loss": 0.3545, + "step": 5067 + }, + { + "epoch": 0.22937316134872143, + "grad_norm": 0.6629816145696559, + "learning_rate": 8.993824379504505e-06, + "loss": 0.3949, + "step": 5068 + }, + { + "epoch": 0.229418420457117, + "grad_norm": 0.36058234225704106, + "learning_rate": 8.99338337957071e-06, + "loss": 0.4942, + "step": 5069 + }, + { + "epoch": 0.22946367956551256, + "grad_norm": 0.6403442029161902, + "learning_rate": 8.99294229383137e-06, + "loss": 0.4098, + "step": 5070 + }, + { + "epoch": 0.22950893867390812, + "grad_norm": 0.30217042809740324, + "learning_rate": 8.992501122295964e-06, + "loss": 0.5023, + "step": 5071 + }, + { + "epoch": 0.22955419778230368, + "grad_norm": 0.7060732314966579, + "learning_rate": 8.992059864973972e-06, + "loss": 0.392, + "step": 5072 + }, + { + "epoch": 0.22959945689069924, + "grad_norm": 0.31473324252979823, + "learning_rate": 8.991618521874874e-06, + "loss": 0.5342, + "step": 5073 + }, + { + "epoch": 0.2296447159990948, + "grad_norm": 0.7440077572148602, + "learning_rate": 8.991177093008153e-06, + "loss": 0.4034, + "step": 5074 + }, + { + "epoch": 0.2296899751074904, + "grad_norm": 0.3344787423121618, + "learning_rate": 8.990735578383295e-06, + "loss": 0.5276, + "step": 5075 + }, + { + "epoch": 0.22973523421588596, + "grad_norm": 0.7355397133446866, + "learning_rate": 8.990293978009782e-06, + "loss": 0.3846, + "step": 5076 + }, + { + "epoch": 0.22978049332428152, + "grad_norm": 0.6111272041129948, + "learning_rate": 8.98985229189711e-06, + "loss": 0.3438, + "step": 5077 + }, + { + "epoch": 0.22982575243267708, + "grad_norm": 0.6582935033341474, + "learning_rate": 8.989410520054767e-06, + "loss": 0.3601, + "step": 5078 + }, + { + "epoch": 0.22987101154107265, + "grad_norm": 0.6543794262424822, + "learning_rate": 8.988968662492243e-06, + "loss": 0.4188, + "step": 5079 + }, + { + "epoch": 0.2299162706494682, + "grad_norm": 0.3491993514780067, + "learning_rate": 8.988526719219035e-06, + "loss": 0.5115, + "step": 5080 + }, + { + "epoch": 0.22996152975786377, + "grad_norm": 0.3329227948894098, + "learning_rate": 8.988084690244636e-06, + "loss": 0.5075, + "step": 5081 + }, + { + "epoch": 0.23000678886625933, + "grad_norm": 0.3059273323766298, + "learning_rate": 8.987642575578546e-06, + "loss": 0.4829, + "step": 5082 + }, + { + "epoch": 0.2300520479746549, + "grad_norm": 0.6749932682170735, + "learning_rate": 8.987200375230262e-06, + "loss": 0.4061, + "step": 5083 + }, + { + "epoch": 0.23009730708305046, + "grad_norm": 0.667571209794923, + "learning_rate": 8.986758089209292e-06, + "loss": 0.4166, + "step": 5084 + }, + { + "epoch": 0.23014256619144602, + "grad_norm": 0.6994861621747892, + "learning_rate": 8.986315717525132e-06, + "loss": 0.4102, + "step": 5085 + }, + { + "epoch": 0.23018782529984158, + "grad_norm": 0.6364880522630176, + "learning_rate": 8.98587326018729e-06, + "loss": 0.4032, + "step": 5086 + }, + { + "epoch": 0.23023308440823717, + "grad_norm": 0.40828602820192145, + "learning_rate": 8.985430717205276e-06, + "loss": 0.4834, + "step": 5087 + }, + { + "epoch": 0.23027834351663273, + "grad_norm": 0.6472024336771361, + "learning_rate": 8.984988088588594e-06, + "loss": 0.4167, + "step": 5088 + }, + { + "epoch": 0.2303236026250283, + "grad_norm": 0.6382904284079052, + "learning_rate": 8.984545374346758e-06, + "loss": 0.3806, + "step": 5089 + }, + { + "epoch": 0.23036886173342386, + "grad_norm": 0.7405927409143391, + "learning_rate": 8.98410257448928e-06, + "loss": 0.4345, + "step": 5090 + }, + { + "epoch": 0.23041412084181942, + "grad_norm": 0.6151881033617659, + "learning_rate": 8.983659689025673e-06, + "loss": 0.3811, + "step": 5091 + }, + { + "epoch": 0.23045937995021498, + "grad_norm": 0.6586186202326227, + "learning_rate": 8.983216717965453e-06, + "loss": 0.3562, + "step": 5092 + }, + { + "epoch": 0.23050463905861054, + "grad_norm": 0.6552286587750954, + "learning_rate": 8.98277366131814e-06, + "loss": 0.3732, + "step": 5093 + }, + { + "epoch": 0.2305498981670061, + "grad_norm": 0.6744611940595147, + "learning_rate": 8.982330519093255e-06, + "loss": 0.3634, + "step": 5094 + }, + { + "epoch": 0.23059515727540167, + "grad_norm": 0.6468474579989157, + "learning_rate": 8.981887291300315e-06, + "loss": 0.3916, + "step": 5095 + }, + { + "epoch": 0.23064041638379723, + "grad_norm": 0.6413550963762416, + "learning_rate": 8.981443977948848e-06, + "loss": 0.4041, + "step": 5096 + }, + { + "epoch": 0.2306856754921928, + "grad_norm": 0.3875886722541602, + "learning_rate": 8.98100057904838e-06, + "loss": 0.5196, + "step": 5097 + }, + { + "epoch": 0.23073093460058836, + "grad_norm": 0.3240288028641131, + "learning_rate": 8.980557094608433e-06, + "loss": 0.4977, + "step": 5098 + }, + { + "epoch": 0.23077619370898395, + "grad_norm": 0.6977386054479082, + "learning_rate": 8.980113524638541e-06, + "loss": 0.3847, + "step": 5099 + }, + { + "epoch": 0.2308214528173795, + "grad_norm": 0.65294049746612, + "learning_rate": 8.979669869148234e-06, + "loss": 0.3908, + "step": 5100 + }, + { + "epoch": 0.23086671192577507, + "grad_norm": 0.6586498592271675, + "learning_rate": 8.979226128147043e-06, + "loss": 0.3793, + "step": 5101 + }, + { + "epoch": 0.23091197103417063, + "grad_norm": 0.624180963723518, + "learning_rate": 8.978782301644503e-06, + "loss": 0.4071, + "step": 5102 + }, + { + "epoch": 0.2309572301425662, + "grad_norm": 0.3947402995617219, + "learning_rate": 8.978338389650152e-06, + "loss": 0.4873, + "step": 5103 + }, + { + "epoch": 0.23100248925096176, + "grad_norm": 0.7039306333910293, + "learning_rate": 8.977894392173527e-06, + "loss": 0.4046, + "step": 5104 + }, + { + "epoch": 0.23104774835935732, + "grad_norm": 0.6445887028036205, + "learning_rate": 8.97745030922417e-06, + "loss": 0.4122, + "step": 5105 + }, + { + "epoch": 0.23109300746775288, + "grad_norm": 0.7006501371896489, + "learning_rate": 8.977006140811621e-06, + "loss": 0.3884, + "step": 5106 + }, + { + "epoch": 0.23113826657614844, + "grad_norm": 0.6703032827813812, + "learning_rate": 8.976561886945426e-06, + "loss": 0.3607, + "step": 5107 + }, + { + "epoch": 0.231183525684544, + "grad_norm": 0.7015094869729309, + "learning_rate": 8.976117547635125e-06, + "loss": 0.4294, + "step": 5108 + }, + { + "epoch": 0.23122878479293957, + "grad_norm": 0.6317103668825891, + "learning_rate": 8.975673122890273e-06, + "loss": 0.4084, + "step": 5109 + }, + { + "epoch": 0.23127404390133513, + "grad_norm": 0.6348862463002435, + "learning_rate": 8.975228612720415e-06, + "loss": 0.3692, + "step": 5110 + }, + { + "epoch": 0.23131930300973072, + "grad_norm": 0.7192469572737816, + "learning_rate": 8.974784017135104e-06, + "loss": 0.4034, + "step": 5111 + }, + { + "epoch": 0.23136456211812628, + "grad_norm": 0.70426941566466, + "learning_rate": 8.974339336143892e-06, + "loss": 0.3641, + "step": 5112 + }, + { + "epoch": 0.23140982122652184, + "grad_norm": 0.7329488639684603, + "learning_rate": 8.973894569756333e-06, + "loss": 0.4074, + "step": 5113 + }, + { + "epoch": 0.2314550803349174, + "grad_norm": 0.678743773114706, + "learning_rate": 8.973449717981984e-06, + "loss": 0.3797, + "step": 5114 + }, + { + "epoch": 0.23150033944331297, + "grad_norm": 0.6801450151035214, + "learning_rate": 8.973004780830405e-06, + "loss": 0.4082, + "step": 5115 + }, + { + "epoch": 0.23154559855170853, + "grad_norm": 0.8518430751724988, + "learning_rate": 8.972559758311156e-06, + "loss": 0.3891, + "step": 5116 + }, + { + "epoch": 0.2315908576601041, + "grad_norm": 0.4006010921762384, + "learning_rate": 8.972114650433798e-06, + "loss": 0.5036, + "step": 5117 + }, + { + "epoch": 0.23163611676849966, + "grad_norm": 0.33678025565893854, + "learning_rate": 8.971669457207896e-06, + "loss": 0.5028, + "step": 5118 + }, + { + "epoch": 0.23168137587689522, + "grad_norm": 0.7913353825927536, + "learning_rate": 8.971224178643015e-06, + "loss": 0.3729, + "step": 5119 + }, + { + "epoch": 0.23172663498529078, + "grad_norm": 0.7398032625351002, + "learning_rate": 8.970778814748722e-06, + "loss": 0.3754, + "step": 5120 + }, + { + "epoch": 0.23177189409368634, + "grad_norm": 0.6682354188081827, + "learning_rate": 8.97033336553459e-06, + "loss": 0.396, + "step": 5121 + }, + { + "epoch": 0.23181715320208193, + "grad_norm": 0.8144947277111944, + "learning_rate": 8.969887831010185e-06, + "loss": 0.4829, + "step": 5122 + }, + { + "epoch": 0.2318624123104775, + "grad_norm": 0.7451888612956965, + "learning_rate": 8.969442211185086e-06, + "loss": 0.388, + "step": 5123 + }, + { + "epoch": 0.23190767141887306, + "grad_norm": 0.7696054579132323, + "learning_rate": 8.968996506068863e-06, + "loss": 0.3659, + "step": 5124 + }, + { + "epoch": 0.23195293052726862, + "grad_norm": 0.47226834278168806, + "learning_rate": 8.968550715671096e-06, + "loss": 0.5083, + "step": 5125 + }, + { + "epoch": 0.23199818963566418, + "grad_norm": 0.8043182628259528, + "learning_rate": 8.968104840001362e-06, + "loss": 0.363, + "step": 5126 + }, + { + "epoch": 0.23204344874405974, + "grad_norm": 0.8018927387330322, + "learning_rate": 8.967658879069243e-06, + "loss": 0.3646, + "step": 5127 + }, + { + "epoch": 0.2320887078524553, + "grad_norm": 0.6455595712351934, + "learning_rate": 8.96721283288432e-06, + "loss": 0.3807, + "step": 5128 + }, + { + "epoch": 0.23213396696085087, + "grad_norm": 0.6741490222011743, + "learning_rate": 8.966766701456177e-06, + "loss": 0.4122, + "step": 5129 + }, + { + "epoch": 0.23217922606924643, + "grad_norm": 0.7687792391535636, + "learning_rate": 8.9663204847944e-06, + "loss": 0.4552, + "step": 5130 + }, + { + "epoch": 0.232224485177642, + "grad_norm": 0.6669012351077497, + "learning_rate": 8.965874182908578e-06, + "loss": 0.3792, + "step": 5131 + }, + { + "epoch": 0.23226974428603755, + "grad_norm": 0.3517548575118009, + "learning_rate": 8.9654277958083e-06, + "loss": 0.4974, + "step": 5132 + }, + { + "epoch": 0.23231500339443312, + "grad_norm": 0.6987967216935925, + "learning_rate": 8.96498132350316e-06, + "loss": 0.3722, + "step": 5133 + }, + { + "epoch": 0.2323602625028287, + "grad_norm": 2.2598695936336903, + "learning_rate": 8.964534766002747e-06, + "loss": 0.3949, + "step": 5134 + }, + { + "epoch": 0.23240552161122427, + "grad_norm": 0.6806879214900802, + "learning_rate": 8.964088123316657e-06, + "loss": 0.4195, + "step": 5135 + }, + { + "epoch": 0.23245078071961983, + "grad_norm": 0.6509213495136762, + "learning_rate": 8.96364139545449e-06, + "loss": 0.3989, + "step": 5136 + }, + { + "epoch": 0.2324960398280154, + "grad_norm": 0.6993968892851343, + "learning_rate": 8.96319458242584e-06, + "loss": 0.398, + "step": 5137 + }, + { + "epoch": 0.23254129893641096, + "grad_norm": 0.3881002933457079, + "learning_rate": 8.962747684240313e-06, + "loss": 0.5247, + "step": 5138 + }, + { + "epoch": 0.23258655804480652, + "grad_norm": 0.7525172989755955, + "learning_rate": 8.962300700907508e-06, + "loss": 0.4302, + "step": 5139 + }, + { + "epoch": 0.23263181715320208, + "grad_norm": 0.6624381151083798, + "learning_rate": 8.96185363243703e-06, + "loss": 0.3945, + "step": 5140 + }, + { + "epoch": 0.23267707626159764, + "grad_norm": 0.6056783431125236, + "learning_rate": 8.961406478838486e-06, + "loss": 0.3918, + "step": 5141 + }, + { + "epoch": 0.2327223353699932, + "grad_norm": 0.6902131262621901, + "learning_rate": 8.960959240121483e-06, + "loss": 0.4018, + "step": 5142 + }, + { + "epoch": 0.23276759447838877, + "grad_norm": 0.7433333943110101, + "learning_rate": 8.96051191629563e-06, + "loss": 0.433, + "step": 5143 + }, + { + "epoch": 0.23281285358678433, + "grad_norm": 0.6448248656475403, + "learning_rate": 8.96006450737054e-06, + "loss": 0.3489, + "step": 5144 + }, + { + "epoch": 0.2328581126951799, + "grad_norm": 0.6608729931513597, + "learning_rate": 8.959617013355829e-06, + "loss": 0.4211, + "step": 5145 + }, + { + "epoch": 0.23290337180357548, + "grad_norm": 0.714836914340452, + "learning_rate": 8.959169434261106e-06, + "loss": 0.404, + "step": 5146 + }, + { + "epoch": 0.23294863091197104, + "grad_norm": 0.4241072094567102, + "learning_rate": 8.958721770095993e-06, + "loss": 0.4861, + "step": 5147 + }, + { + "epoch": 0.2329938900203666, + "grad_norm": 0.7033691745901531, + "learning_rate": 8.958274020870107e-06, + "loss": 0.3767, + "step": 5148 + }, + { + "epoch": 0.23303914912876217, + "grad_norm": 0.6697782259656342, + "learning_rate": 8.95782618659307e-06, + "loss": 0.3817, + "step": 5149 + }, + { + "epoch": 0.23308440823715773, + "grad_norm": 0.684941030312248, + "learning_rate": 8.957378267274502e-06, + "loss": 0.3734, + "step": 5150 + }, + { + "epoch": 0.2331296673455533, + "grad_norm": 0.6454346323086292, + "learning_rate": 8.95693026292403e-06, + "loss": 0.3462, + "step": 5151 + }, + { + "epoch": 0.23317492645394886, + "grad_norm": 0.6743028983951925, + "learning_rate": 8.956482173551281e-06, + "loss": 0.3936, + "step": 5152 + }, + { + "epoch": 0.23322018556234442, + "grad_norm": 0.35510312218255524, + "learning_rate": 8.956033999165881e-06, + "loss": 0.5015, + "step": 5153 + }, + { + "epoch": 0.23326544467073998, + "grad_norm": 0.33903672826457865, + "learning_rate": 8.95558573977746e-06, + "loss": 0.4976, + "step": 5154 + }, + { + "epoch": 0.23331070377913554, + "grad_norm": 0.6732348430980337, + "learning_rate": 8.955137395395649e-06, + "loss": 0.4227, + "step": 5155 + }, + { + "epoch": 0.2333559628875311, + "grad_norm": 0.6513263774892051, + "learning_rate": 8.954688966030083e-06, + "loss": 0.3716, + "step": 5156 + }, + { + "epoch": 0.23340122199592667, + "grad_norm": 0.6796153436099222, + "learning_rate": 8.954240451690396e-06, + "loss": 0.3805, + "step": 5157 + }, + { + "epoch": 0.23344648110432226, + "grad_norm": 0.7066157020242287, + "learning_rate": 8.953791852386229e-06, + "loss": 0.4376, + "step": 5158 + }, + { + "epoch": 0.23349174021271782, + "grad_norm": 0.4003704868791629, + "learning_rate": 8.953343168127218e-06, + "loss": 0.5046, + "step": 5159 + }, + { + "epoch": 0.23353699932111338, + "grad_norm": 0.7553986056398494, + "learning_rate": 8.952894398923003e-06, + "loss": 0.3873, + "step": 5160 + }, + { + "epoch": 0.23358225842950894, + "grad_norm": 0.3605697912438177, + "learning_rate": 8.952445544783227e-06, + "loss": 0.4796, + "step": 5161 + }, + { + "epoch": 0.2336275175379045, + "grad_norm": 0.33431313155846065, + "learning_rate": 8.951996605717537e-06, + "loss": 0.5052, + "step": 5162 + }, + { + "epoch": 0.23367277664630007, + "grad_norm": 0.6935848168831603, + "learning_rate": 8.951547581735576e-06, + "loss": 0.3645, + "step": 5163 + }, + { + "epoch": 0.23371803575469563, + "grad_norm": 0.28657580245470116, + "learning_rate": 8.951098472846994e-06, + "loss": 0.5062, + "step": 5164 + }, + { + "epoch": 0.2337632948630912, + "grad_norm": 0.6478693357949638, + "learning_rate": 8.950649279061441e-06, + "loss": 0.3496, + "step": 5165 + }, + { + "epoch": 0.23380855397148675, + "grad_norm": 0.6352113123421139, + "learning_rate": 8.950200000388569e-06, + "loss": 0.4445, + "step": 5166 + }, + { + "epoch": 0.23385381307988232, + "grad_norm": 0.6870906658498175, + "learning_rate": 8.94975063683803e-06, + "loss": 0.4252, + "step": 5167 + }, + { + "epoch": 0.23389907218827788, + "grad_norm": 0.6279771064277256, + "learning_rate": 8.949301188419481e-06, + "loss": 0.353, + "step": 5168 + }, + { + "epoch": 0.23394433129667347, + "grad_norm": 0.6866365440285913, + "learning_rate": 8.948851655142579e-06, + "loss": 0.3868, + "step": 5169 + }, + { + "epoch": 0.23398959040506903, + "grad_norm": 0.634897252379839, + "learning_rate": 8.948402037016984e-06, + "loss": 0.4156, + "step": 5170 + }, + { + "epoch": 0.2340348495134646, + "grad_norm": 0.6840741049902902, + "learning_rate": 8.947952334052354e-06, + "loss": 0.384, + "step": 5171 + }, + { + "epoch": 0.23408010862186016, + "grad_norm": 0.6709720523253192, + "learning_rate": 8.947502546258354e-06, + "loss": 0.3184, + "step": 5172 + }, + { + "epoch": 0.23412536773025572, + "grad_norm": 0.6546340406137499, + "learning_rate": 8.947052673644649e-06, + "loss": 0.3699, + "step": 5173 + }, + { + "epoch": 0.23417062683865128, + "grad_norm": 1.1575843601066382, + "learning_rate": 8.946602716220903e-06, + "loss": 0.3687, + "step": 5174 + }, + { + "epoch": 0.23421588594704684, + "grad_norm": 0.5592146591684121, + "learning_rate": 8.946152673996786e-06, + "loss": 0.5038, + "step": 5175 + }, + { + "epoch": 0.2342611450554424, + "grad_norm": 0.438025934009356, + "learning_rate": 8.94570254698197e-06, + "loss": 0.4751, + "step": 5176 + }, + { + "epoch": 0.23430640416383797, + "grad_norm": 0.3331041402234504, + "learning_rate": 8.94525233518612e-06, + "loss": 0.5063, + "step": 5177 + }, + { + "epoch": 0.23435166327223353, + "grad_norm": 0.7216492408264652, + "learning_rate": 8.944802038618919e-06, + "loss": 0.3478, + "step": 5178 + }, + { + "epoch": 0.2343969223806291, + "grad_norm": 0.6858494288023315, + "learning_rate": 8.944351657290037e-06, + "loss": 0.3644, + "step": 5179 + }, + { + "epoch": 0.23444218148902465, + "grad_norm": 0.6451322581270235, + "learning_rate": 8.94390119120915e-06, + "loss": 0.3867, + "step": 5180 + }, + { + "epoch": 0.23448744059742024, + "grad_norm": 0.7447336454417398, + "learning_rate": 8.94345064038594e-06, + "loss": 0.4206, + "step": 5181 + }, + { + "epoch": 0.2345326997058158, + "grad_norm": 0.7668578669225723, + "learning_rate": 8.943000004830087e-06, + "loss": 0.514, + "step": 5182 + }, + { + "epoch": 0.23457795881421137, + "grad_norm": 0.718206994552857, + "learning_rate": 8.942549284551274e-06, + "loss": 0.4438, + "step": 5183 + }, + { + "epoch": 0.23462321792260693, + "grad_norm": 0.7203731751657185, + "learning_rate": 8.942098479559185e-06, + "loss": 0.4234, + "step": 5184 + }, + { + "epoch": 0.2346684770310025, + "grad_norm": 0.6292088498449315, + "learning_rate": 8.941647589863507e-06, + "loss": 0.4002, + "step": 5185 + }, + { + "epoch": 0.23471373613939805, + "grad_norm": 0.6600783332319208, + "learning_rate": 8.941196615473929e-06, + "loss": 0.4002, + "step": 5186 + }, + { + "epoch": 0.23475899524779362, + "grad_norm": 0.7434646236905292, + "learning_rate": 8.94074555640014e-06, + "loss": 0.4058, + "step": 5187 + }, + { + "epoch": 0.23480425435618918, + "grad_norm": 0.6471649173552345, + "learning_rate": 8.940294412651831e-06, + "loss": 0.3731, + "step": 5188 + }, + { + "epoch": 0.23484951346458474, + "grad_norm": 0.7267012818438411, + "learning_rate": 8.939843184238698e-06, + "loss": 0.4365, + "step": 5189 + }, + { + "epoch": 0.2348947725729803, + "grad_norm": 0.42194579918599756, + "learning_rate": 8.939391871170435e-06, + "loss": 0.4843, + "step": 5190 + }, + { + "epoch": 0.23494003168137587, + "grad_norm": 0.6951856656124707, + "learning_rate": 8.93894047345674e-06, + "loss": 0.4126, + "step": 5191 + }, + { + "epoch": 0.23498529078977143, + "grad_norm": 0.6568313845426516, + "learning_rate": 8.93848899110731e-06, + "loss": 0.391, + "step": 5192 + }, + { + "epoch": 0.23503054989816702, + "grad_norm": 0.7239464873852787, + "learning_rate": 8.93803742413185e-06, + "loss": 0.3812, + "step": 5193 + }, + { + "epoch": 0.23507580900656258, + "grad_norm": 0.3504256078341399, + "learning_rate": 8.937585772540058e-06, + "loss": 0.5347, + "step": 5194 + }, + { + "epoch": 0.23512106811495814, + "grad_norm": 0.6827160249138898, + "learning_rate": 8.937134036341643e-06, + "loss": 0.3973, + "step": 5195 + }, + { + "epoch": 0.2351663272233537, + "grad_norm": 0.6703648183205427, + "learning_rate": 8.93668221554631e-06, + "loss": 0.4111, + "step": 5196 + }, + { + "epoch": 0.23521158633174927, + "grad_norm": 0.31976622494354495, + "learning_rate": 8.936230310163765e-06, + "loss": 0.5168, + "step": 5197 + }, + { + "epoch": 0.23525684544014483, + "grad_norm": 0.6581618322094055, + "learning_rate": 8.935778320203721e-06, + "loss": 0.3728, + "step": 5198 + }, + { + "epoch": 0.2353021045485404, + "grad_norm": 0.6170063595280307, + "learning_rate": 8.935326245675887e-06, + "loss": 0.3508, + "step": 5199 + }, + { + "epoch": 0.23534736365693595, + "grad_norm": 0.33336389010807627, + "learning_rate": 8.934874086589981e-06, + "loss": 0.4836, + "step": 5200 + }, + { + "epoch": 0.23539262276533152, + "grad_norm": 0.9567379336319621, + "learning_rate": 8.934421842955715e-06, + "loss": 0.376, + "step": 5201 + }, + { + "epoch": 0.23543788187372708, + "grad_norm": 0.6638392906292916, + "learning_rate": 8.933969514782808e-06, + "loss": 0.35, + "step": 5202 + }, + { + "epoch": 0.23548314098212264, + "grad_norm": 0.65336301854799, + "learning_rate": 8.933517102080977e-06, + "loss": 0.3838, + "step": 5203 + }, + { + "epoch": 0.2355284000905182, + "grad_norm": 0.3227094570153876, + "learning_rate": 8.933064604859945e-06, + "loss": 0.4925, + "step": 5204 + }, + { + "epoch": 0.2355736591989138, + "grad_norm": 0.31246372138751155, + "learning_rate": 8.932612023129433e-06, + "loss": 0.5056, + "step": 5205 + }, + { + "epoch": 0.23561891830730936, + "grad_norm": 0.7701575727730373, + "learning_rate": 8.932159356899169e-06, + "loss": 0.3625, + "step": 5206 + }, + { + "epoch": 0.23566417741570492, + "grad_norm": 0.2880103831704138, + "learning_rate": 8.931706606178874e-06, + "loss": 0.4982, + "step": 5207 + }, + { + "epoch": 0.23570943652410048, + "grad_norm": 0.7208448019428256, + "learning_rate": 8.931253770978281e-06, + "loss": 0.3792, + "step": 5208 + }, + { + "epoch": 0.23575469563249604, + "grad_norm": 0.6607662590967202, + "learning_rate": 8.93080085130712e-06, + "loss": 0.39, + "step": 5209 + }, + { + "epoch": 0.2357999547408916, + "grad_norm": 0.6524379021573505, + "learning_rate": 8.930347847175118e-06, + "loss": 0.3755, + "step": 5210 + }, + { + "epoch": 0.23584521384928717, + "grad_norm": 0.3524805216490473, + "learning_rate": 8.929894758592016e-06, + "loss": 0.4992, + "step": 5211 + }, + { + "epoch": 0.23589047295768273, + "grad_norm": 0.6890393087617782, + "learning_rate": 8.929441585567543e-06, + "loss": 0.4145, + "step": 5212 + }, + { + "epoch": 0.2359357320660783, + "grad_norm": 0.35273439977535703, + "learning_rate": 8.928988328111437e-06, + "loss": 0.4905, + "step": 5213 + }, + { + "epoch": 0.23598099117447385, + "grad_norm": 0.6956924315211259, + "learning_rate": 8.928534986233441e-06, + "loss": 0.3921, + "step": 5214 + }, + { + "epoch": 0.23602625028286942, + "grad_norm": 0.6630715659033836, + "learning_rate": 8.928081559943293e-06, + "loss": 0.3345, + "step": 5215 + }, + { + "epoch": 0.236071509391265, + "grad_norm": 0.6177416326477722, + "learning_rate": 8.927628049250736e-06, + "loss": 0.36, + "step": 5216 + }, + { + "epoch": 0.23611676849966057, + "grad_norm": 0.4239754000254461, + "learning_rate": 8.927174454165518e-06, + "loss": 0.4947, + "step": 5217 + }, + { + "epoch": 0.23616202760805613, + "grad_norm": 0.7221304351259809, + "learning_rate": 8.926720774697379e-06, + "loss": 0.3808, + "step": 5218 + }, + { + "epoch": 0.2362072867164517, + "grad_norm": 0.7340501642183168, + "learning_rate": 8.926267010856072e-06, + "loss": 0.4445, + "step": 5219 + }, + { + "epoch": 0.23625254582484725, + "grad_norm": 0.6257379899081139, + "learning_rate": 8.925813162651345e-06, + "loss": 0.402, + "step": 5220 + }, + { + "epoch": 0.23629780493324282, + "grad_norm": 0.6189493871758575, + "learning_rate": 8.92535923009295e-06, + "loss": 0.4159, + "step": 5221 + }, + { + "epoch": 0.23634306404163838, + "grad_norm": 0.6375101034399829, + "learning_rate": 8.924905213190641e-06, + "loss": 0.3664, + "step": 5222 + }, + { + "epoch": 0.23638832315003394, + "grad_norm": 0.659813540503722, + "learning_rate": 8.924451111954173e-06, + "loss": 0.4071, + "step": 5223 + }, + { + "epoch": 0.2364335822584295, + "grad_norm": 0.6552180360974372, + "learning_rate": 8.923996926393306e-06, + "loss": 0.3749, + "step": 5224 + }, + { + "epoch": 0.23647884136682507, + "grad_norm": 0.3810805951151939, + "learning_rate": 8.923542656517795e-06, + "loss": 0.5057, + "step": 5225 + }, + { + "epoch": 0.23652410047522063, + "grad_norm": 0.6787402212616696, + "learning_rate": 8.923088302337402e-06, + "loss": 0.3789, + "step": 5226 + }, + { + "epoch": 0.2365693595836162, + "grad_norm": 0.6387130171561558, + "learning_rate": 8.922633863861891e-06, + "loss": 0.4037, + "step": 5227 + }, + { + "epoch": 0.23661461869201178, + "grad_norm": 0.3172792652610559, + "learning_rate": 8.922179341101027e-06, + "loss": 0.4942, + "step": 5228 + }, + { + "epoch": 0.23665987780040734, + "grad_norm": 0.6719534269724785, + "learning_rate": 8.921724734064573e-06, + "loss": 0.3778, + "step": 5229 + }, + { + "epoch": 0.2367051369088029, + "grad_norm": 0.7536957548197958, + "learning_rate": 8.9212700427623e-06, + "loss": 0.4318, + "step": 5230 + }, + { + "epoch": 0.23675039601719847, + "grad_norm": 0.7039894799263119, + "learning_rate": 8.920815267203977e-06, + "loss": 0.3631, + "step": 5231 + }, + { + "epoch": 0.23679565512559403, + "grad_norm": 0.6832250244855517, + "learning_rate": 8.920360407399375e-06, + "loss": 0.4254, + "step": 5232 + }, + { + "epoch": 0.2368409142339896, + "grad_norm": 0.697516368031495, + "learning_rate": 8.919905463358269e-06, + "loss": 0.3406, + "step": 5233 + }, + { + "epoch": 0.23688617334238515, + "grad_norm": 0.6938707188368235, + "learning_rate": 8.919450435090433e-06, + "loss": 0.4, + "step": 5234 + }, + { + "epoch": 0.23693143245078072, + "grad_norm": 0.6964208971518877, + "learning_rate": 8.918995322605646e-06, + "loss": 0.4226, + "step": 5235 + }, + { + "epoch": 0.23697669155917628, + "grad_norm": 0.41458513233775623, + "learning_rate": 8.918540125913686e-06, + "loss": 0.492, + "step": 5236 + }, + { + "epoch": 0.23702195066757184, + "grad_norm": 0.6790128431432206, + "learning_rate": 8.918084845024334e-06, + "loss": 0.3861, + "step": 5237 + }, + { + "epoch": 0.2370672097759674, + "grad_norm": 0.6650948718946313, + "learning_rate": 8.917629479947369e-06, + "loss": 0.3855, + "step": 5238 + }, + { + "epoch": 0.23711246888436296, + "grad_norm": 0.32944873763671007, + "learning_rate": 8.917174030692582e-06, + "loss": 0.5137, + "step": 5239 + }, + { + "epoch": 0.23715772799275855, + "grad_norm": 0.7527930233644158, + "learning_rate": 8.916718497269755e-06, + "loss": 0.4035, + "step": 5240 + }, + { + "epoch": 0.23720298710115412, + "grad_norm": 0.7248777445942971, + "learning_rate": 8.916262879688674e-06, + "loss": 0.3802, + "step": 5241 + }, + { + "epoch": 0.23724824620954968, + "grad_norm": 0.6412358156653725, + "learning_rate": 8.915807177959133e-06, + "loss": 0.3757, + "step": 5242 + }, + { + "epoch": 0.23729350531794524, + "grad_norm": 0.387452448857917, + "learning_rate": 8.915351392090925e-06, + "loss": 0.4974, + "step": 5243 + }, + { + "epoch": 0.2373387644263408, + "grad_norm": 0.7509110448440282, + "learning_rate": 8.914895522093839e-06, + "loss": 0.3416, + "step": 5244 + }, + { + "epoch": 0.23738402353473637, + "grad_norm": 0.6916588684495765, + "learning_rate": 8.91443956797767e-06, + "loss": 0.3728, + "step": 5245 + }, + { + "epoch": 0.23742928264313193, + "grad_norm": 0.6397813659500448, + "learning_rate": 8.91398352975222e-06, + "loss": 0.3884, + "step": 5246 + }, + { + "epoch": 0.2374745417515275, + "grad_norm": 1.3669528665831592, + "learning_rate": 8.913527407427282e-06, + "loss": 0.3725, + "step": 5247 + }, + { + "epoch": 0.23751980085992305, + "grad_norm": 0.714301416019482, + "learning_rate": 8.91307120101266e-06, + "loss": 0.3635, + "step": 5248 + }, + { + "epoch": 0.23756505996831861, + "grad_norm": 0.6345896928698095, + "learning_rate": 8.912614910518158e-06, + "loss": 0.3996, + "step": 5249 + }, + { + "epoch": 0.23761031907671418, + "grad_norm": 0.6514324533755185, + "learning_rate": 8.912158535953576e-06, + "loss": 0.3743, + "step": 5250 + }, + { + "epoch": 0.23765557818510977, + "grad_norm": 0.6343997575674019, + "learning_rate": 8.911702077328723e-06, + "loss": 0.3851, + "step": 5251 + }, + { + "epoch": 0.23770083729350533, + "grad_norm": 0.3922728341327845, + "learning_rate": 8.911245534653409e-06, + "loss": 0.5126, + "step": 5252 + }, + { + "epoch": 0.2377460964019009, + "grad_norm": 1.0385948182811826, + "learning_rate": 8.910788907937437e-06, + "loss": 0.3682, + "step": 5253 + }, + { + "epoch": 0.23779135551029645, + "grad_norm": 0.3969089405014628, + "learning_rate": 8.910332197190623e-06, + "loss": 0.5111, + "step": 5254 + }, + { + "epoch": 0.23783661461869202, + "grad_norm": 0.7374498506816389, + "learning_rate": 8.90987540242278e-06, + "loss": 0.3867, + "step": 5255 + }, + { + "epoch": 0.23788187372708758, + "grad_norm": 0.3126931159539168, + "learning_rate": 8.909418523643724e-06, + "loss": 0.4951, + "step": 5256 + }, + { + "epoch": 0.23792713283548314, + "grad_norm": 0.6573278021852665, + "learning_rate": 8.908961560863271e-06, + "loss": 0.3913, + "step": 5257 + }, + { + "epoch": 0.2379723919438787, + "grad_norm": 0.6754692628404956, + "learning_rate": 8.908504514091239e-06, + "loss": 0.4083, + "step": 5258 + }, + { + "epoch": 0.23801765105227427, + "grad_norm": 0.7009135373593584, + "learning_rate": 8.908047383337447e-06, + "loss": 0.3772, + "step": 5259 + }, + { + "epoch": 0.23806291016066983, + "grad_norm": 0.6307270990180422, + "learning_rate": 8.907590168611724e-06, + "loss": 0.4036, + "step": 5260 + }, + { + "epoch": 0.2381081692690654, + "grad_norm": 0.3787399442226855, + "learning_rate": 8.907132869923886e-06, + "loss": 0.4853, + "step": 5261 + }, + { + "epoch": 0.23815342837746095, + "grad_norm": 0.3359140168820819, + "learning_rate": 8.906675487283764e-06, + "loss": 0.4977, + "step": 5262 + }, + { + "epoch": 0.23819868748585654, + "grad_norm": 0.7146020697874832, + "learning_rate": 8.906218020701182e-06, + "loss": 0.405, + "step": 5263 + }, + { + "epoch": 0.2382439465942521, + "grad_norm": 0.6773098535047575, + "learning_rate": 8.905760470185974e-06, + "loss": 0.3726, + "step": 5264 + }, + { + "epoch": 0.23828920570264767, + "grad_norm": 0.31021528458286485, + "learning_rate": 8.90530283574797e-06, + "loss": 0.4924, + "step": 5265 + }, + { + "epoch": 0.23833446481104323, + "grad_norm": 0.6946956337630187, + "learning_rate": 8.904845117397e-06, + "loss": 0.3884, + "step": 5266 + }, + { + "epoch": 0.2383797239194388, + "grad_norm": 0.6487113983731831, + "learning_rate": 8.904387315142901e-06, + "loss": 0.3681, + "step": 5267 + }, + { + "epoch": 0.23842498302783435, + "grad_norm": 0.7148333414265486, + "learning_rate": 8.903929428995512e-06, + "loss": 0.3657, + "step": 5268 + }, + { + "epoch": 0.23847024213622992, + "grad_norm": 0.6329638363612822, + "learning_rate": 8.903471458964668e-06, + "loss": 0.3237, + "step": 5269 + }, + { + "epoch": 0.23851550124462548, + "grad_norm": 0.6737207547648599, + "learning_rate": 8.903013405060212e-06, + "loss": 0.3561, + "step": 5270 + }, + { + "epoch": 0.23856076035302104, + "grad_norm": 0.7075110234059288, + "learning_rate": 8.902555267291984e-06, + "loss": 0.3589, + "step": 5271 + }, + { + "epoch": 0.2386060194614166, + "grad_norm": 0.6371642429692355, + "learning_rate": 8.90209704566983e-06, + "loss": 0.3779, + "step": 5272 + }, + { + "epoch": 0.23865127856981216, + "grad_norm": 0.6740952463066865, + "learning_rate": 8.901638740203594e-06, + "loss": 0.3782, + "step": 5273 + }, + { + "epoch": 0.23869653767820773, + "grad_norm": 0.6595696256173088, + "learning_rate": 8.901180350903125e-06, + "loss": 0.3964, + "step": 5274 + }, + { + "epoch": 0.23874179678660332, + "grad_norm": 0.6521331171437978, + "learning_rate": 8.900721877778271e-06, + "loss": 0.3956, + "step": 5275 + }, + { + "epoch": 0.23878705589499888, + "grad_norm": 0.6925461727004567, + "learning_rate": 8.900263320838886e-06, + "loss": 0.357, + "step": 5276 + }, + { + "epoch": 0.23883231500339444, + "grad_norm": 0.5489727830840708, + "learning_rate": 8.899804680094818e-06, + "loss": 0.525, + "step": 5277 + }, + { + "epoch": 0.23887757411179, + "grad_norm": 0.6763946770947068, + "learning_rate": 8.899345955555928e-06, + "loss": 0.4357, + "step": 5278 + }, + { + "epoch": 0.23892283322018557, + "grad_norm": 0.7201089468189605, + "learning_rate": 8.898887147232066e-06, + "loss": 0.429, + "step": 5279 + }, + { + "epoch": 0.23896809232858113, + "grad_norm": 0.8181534036902893, + "learning_rate": 8.898428255133098e-06, + "loss": 0.3797, + "step": 5280 + }, + { + "epoch": 0.2390133514369767, + "grad_norm": 0.38237795007697833, + "learning_rate": 8.897969279268877e-06, + "loss": 0.5104, + "step": 5281 + }, + { + "epoch": 0.23905861054537225, + "grad_norm": 0.3744096064923189, + "learning_rate": 8.897510219649268e-06, + "loss": 0.4722, + "step": 5282 + }, + { + "epoch": 0.23910386965376781, + "grad_norm": 0.6605872245547142, + "learning_rate": 8.897051076284135e-06, + "loss": 0.3431, + "step": 5283 + }, + { + "epoch": 0.23914912876216338, + "grad_norm": 0.7269263863684868, + "learning_rate": 8.896591849183343e-06, + "loss": 0.3862, + "step": 5284 + }, + { + "epoch": 0.23919438787055894, + "grad_norm": 0.3298538520006822, + "learning_rate": 8.89613253835676e-06, + "loss": 0.4864, + "step": 5285 + }, + { + "epoch": 0.2392396469789545, + "grad_norm": 0.32610787321322837, + "learning_rate": 8.895673143814254e-06, + "loss": 0.481, + "step": 5286 + }, + { + "epoch": 0.2392849060873501, + "grad_norm": 0.692626113129391, + "learning_rate": 8.895213665565698e-06, + "loss": 0.4213, + "step": 5287 + }, + { + "epoch": 0.23933016519574565, + "grad_norm": 0.6756453499928873, + "learning_rate": 8.894754103620963e-06, + "loss": 0.3849, + "step": 5288 + }, + { + "epoch": 0.23937542430414122, + "grad_norm": 0.6977591454885193, + "learning_rate": 8.894294457989924e-06, + "loss": 0.399, + "step": 5289 + }, + { + "epoch": 0.23942068341253678, + "grad_norm": 0.7247753730009222, + "learning_rate": 8.893834728682459e-06, + "loss": 0.4267, + "step": 5290 + }, + { + "epoch": 0.23946594252093234, + "grad_norm": 0.6429940559942522, + "learning_rate": 8.893374915708443e-06, + "loss": 0.3309, + "step": 5291 + }, + { + "epoch": 0.2395112016293279, + "grad_norm": 0.6512035834490856, + "learning_rate": 8.892915019077757e-06, + "loss": 0.3736, + "step": 5292 + }, + { + "epoch": 0.23955646073772346, + "grad_norm": 0.6223161302422051, + "learning_rate": 8.892455038800286e-06, + "loss": 0.3783, + "step": 5293 + }, + { + "epoch": 0.23960171984611903, + "grad_norm": 0.6208666737007078, + "learning_rate": 8.891994974885909e-06, + "loss": 0.4299, + "step": 5294 + }, + { + "epoch": 0.2396469789545146, + "grad_norm": 0.5913234393655277, + "learning_rate": 8.891534827344514e-06, + "loss": 0.5051, + "step": 5295 + }, + { + "epoch": 0.23969223806291015, + "grad_norm": 0.6841295356040774, + "learning_rate": 8.891074596185987e-06, + "loss": 0.4066, + "step": 5296 + }, + { + "epoch": 0.2397374971713057, + "grad_norm": 0.7106840922605392, + "learning_rate": 8.890614281420218e-06, + "loss": 0.4207, + "step": 5297 + }, + { + "epoch": 0.2397827562797013, + "grad_norm": 0.6294873742101729, + "learning_rate": 8.890153883057097e-06, + "loss": 0.3652, + "step": 5298 + }, + { + "epoch": 0.23982801538809687, + "grad_norm": 0.6989552091439057, + "learning_rate": 8.889693401106516e-06, + "loss": 0.3965, + "step": 5299 + }, + { + "epoch": 0.23987327449649243, + "grad_norm": 0.6747147530686606, + "learning_rate": 8.889232835578372e-06, + "loss": 0.3384, + "step": 5300 + }, + { + "epoch": 0.239918533604888, + "grad_norm": 0.6658941217429191, + "learning_rate": 8.888772186482557e-06, + "loss": 0.3815, + "step": 5301 + }, + { + "epoch": 0.23996379271328355, + "grad_norm": 0.664905784458334, + "learning_rate": 8.888311453828973e-06, + "loss": 0.397, + "step": 5302 + }, + { + "epoch": 0.24000905182167911, + "grad_norm": 0.7081115510513791, + "learning_rate": 8.887850637627517e-06, + "loss": 0.356, + "step": 5303 + }, + { + "epoch": 0.24005431093007468, + "grad_norm": 0.7100845586062496, + "learning_rate": 8.88738973788809e-06, + "loss": 0.3899, + "step": 5304 + }, + { + "epoch": 0.24009957003847024, + "grad_norm": 0.7060600319820818, + "learning_rate": 8.8869287546206e-06, + "loss": 0.4217, + "step": 5305 + }, + { + "epoch": 0.2401448291468658, + "grad_norm": 0.6256192045316736, + "learning_rate": 8.886467687834946e-06, + "loss": 0.3582, + "step": 5306 + }, + { + "epoch": 0.24019008825526136, + "grad_norm": 0.8902326378633588, + "learning_rate": 8.88600653754104e-06, + "loss": 0.3692, + "step": 5307 + }, + { + "epoch": 0.24023534736365693, + "grad_norm": 0.6399337314616411, + "learning_rate": 8.885545303748786e-06, + "loss": 0.4017, + "step": 5308 + }, + { + "epoch": 0.2402806064720525, + "grad_norm": 0.6870566412315947, + "learning_rate": 8.8850839864681e-06, + "loss": 0.3831, + "step": 5309 + }, + { + "epoch": 0.24032586558044808, + "grad_norm": 0.6238546104441927, + "learning_rate": 8.884622585708888e-06, + "loss": 0.3703, + "step": 5310 + }, + { + "epoch": 0.24037112468884364, + "grad_norm": 0.99261545468166, + "learning_rate": 8.88416110148107e-06, + "loss": 0.4248, + "step": 5311 + }, + { + "epoch": 0.2404163837972392, + "grad_norm": 0.6688459208895308, + "learning_rate": 8.883699533794558e-06, + "loss": 0.4143, + "step": 5312 + }, + { + "epoch": 0.24046164290563476, + "grad_norm": 0.649799512025244, + "learning_rate": 8.883237882659271e-06, + "loss": 0.3451, + "step": 5313 + }, + { + "epoch": 0.24050690201403033, + "grad_norm": 0.6311487774639363, + "learning_rate": 8.882776148085129e-06, + "loss": 0.3589, + "step": 5314 + }, + { + "epoch": 0.2405521611224259, + "grad_norm": 0.6287242122377914, + "learning_rate": 8.882314330082051e-06, + "loss": 0.3873, + "step": 5315 + }, + { + "epoch": 0.24059742023082145, + "grad_norm": 0.6422213213204758, + "learning_rate": 8.881852428659963e-06, + "loss": 0.3887, + "step": 5316 + }, + { + "epoch": 0.240642679339217, + "grad_norm": 0.7059140395496692, + "learning_rate": 8.881390443828788e-06, + "loss": 0.3831, + "step": 5317 + }, + { + "epoch": 0.24068793844761258, + "grad_norm": 0.6526053796106218, + "learning_rate": 8.880928375598453e-06, + "loss": 0.4216, + "step": 5318 + }, + { + "epoch": 0.24073319755600814, + "grad_norm": 0.6717680282483763, + "learning_rate": 8.880466223978887e-06, + "loss": 0.4068, + "step": 5319 + }, + { + "epoch": 0.2407784566644037, + "grad_norm": 0.6617612520838584, + "learning_rate": 8.880003988980019e-06, + "loss": 0.3809, + "step": 5320 + }, + { + "epoch": 0.24082371577279926, + "grad_norm": 0.6311219890329217, + "learning_rate": 8.879541670611784e-06, + "loss": 0.3892, + "step": 5321 + }, + { + "epoch": 0.24086897488119485, + "grad_norm": 0.7371992835369472, + "learning_rate": 8.879079268884113e-06, + "loss": 0.369, + "step": 5322 + }, + { + "epoch": 0.24091423398959042, + "grad_norm": 0.8539629154715503, + "learning_rate": 8.878616783806939e-06, + "loss": 0.3762, + "step": 5323 + }, + { + "epoch": 0.24095949309798598, + "grad_norm": 0.6722749149455938, + "learning_rate": 8.878154215390204e-06, + "loss": 0.3884, + "step": 5324 + }, + { + "epoch": 0.24100475220638154, + "grad_norm": 0.6719106851436162, + "learning_rate": 8.877691563643848e-06, + "loss": 0.3862, + "step": 5325 + }, + { + "epoch": 0.2410500113147771, + "grad_norm": 0.6312868275883254, + "learning_rate": 8.877228828577809e-06, + "loss": 0.3606, + "step": 5326 + }, + { + "epoch": 0.24109527042317266, + "grad_norm": 0.47262316317293573, + "learning_rate": 8.876766010202029e-06, + "loss": 0.5013, + "step": 5327 + }, + { + "epoch": 0.24114052953156823, + "grad_norm": 0.7079327142025273, + "learning_rate": 8.876303108526455e-06, + "loss": 0.4296, + "step": 5328 + }, + { + "epoch": 0.2411857886399638, + "grad_norm": 0.658648471499382, + "learning_rate": 8.875840123561033e-06, + "loss": 0.3946, + "step": 5329 + }, + { + "epoch": 0.24123104774835935, + "grad_norm": 0.30867312660550816, + "learning_rate": 8.875377055315709e-06, + "loss": 0.4739, + "step": 5330 + }, + { + "epoch": 0.2412763068567549, + "grad_norm": 0.6468211564058911, + "learning_rate": 8.874913903800436e-06, + "loss": 0.3642, + "step": 5331 + }, + { + "epoch": 0.24132156596515048, + "grad_norm": 0.6775896448599275, + "learning_rate": 8.874450669025161e-06, + "loss": 0.415, + "step": 5332 + }, + { + "epoch": 0.24136682507354604, + "grad_norm": 0.6495529703738707, + "learning_rate": 8.873987350999843e-06, + "loss": 0.3654, + "step": 5333 + }, + { + "epoch": 0.24141208418194163, + "grad_norm": 0.4410663750016654, + "learning_rate": 8.873523949734435e-06, + "loss": 0.5283, + "step": 5334 + }, + { + "epoch": 0.2414573432903372, + "grad_norm": 0.6581032625586402, + "learning_rate": 8.873060465238894e-06, + "loss": 0.3393, + "step": 5335 + }, + { + "epoch": 0.24150260239873275, + "grad_norm": 0.6763820592845609, + "learning_rate": 8.872596897523178e-06, + "loss": 0.3877, + "step": 5336 + }, + { + "epoch": 0.24154786150712831, + "grad_norm": 0.741517536605685, + "learning_rate": 8.872133246597247e-06, + "loss": 0.3722, + "step": 5337 + }, + { + "epoch": 0.24159312061552388, + "grad_norm": 0.6843360209148616, + "learning_rate": 8.871669512471068e-06, + "loss": 0.3671, + "step": 5338 + }, + { + "epoch": 0.24163837972391944, + "grad_norm": 0.6594839446020356, + "learning_rate": 8.871205695154601e-06, + "loss": 0.3787, + "step": 5339 + }, + { + "epoch": 0.241683638832315, + "grad_norm": 0.37323150087019386, + "learning_rate": 8.870741794657814e-06, + "loss": 0.4727, + "step": 5340 + }, + { + "epoch": 0.24172889794071056, + "grad_norm": 0.3654118177626498, + "learning_rate": 8.870277810990671e-06, + "loss": 0.5142, + "step": 5341 + }, + { + "epoch": 0.24177415704910613, + "grad_norm": 0.6965027222944363, + "learning_rate": 8.869813744163147e-06, + "loss": 0.4065, + "step": 5342 + }, + { + "epoch": 0.2418194161575017, + "grad_norm": 0.7002338027510239, + "learning_rate": 8.86934959418521e-06, + "loss": 0.3781, + "step": 5343 + }, + { + "epoch": 0.24186467526589725, + "grad_norm": 0.6496249565713254, + "learning_rate": 8.868885361066835e-06, + "loss": 0.3628, + "step": 5344 + }, + { + "epoch": 0.24190993437429284, + "grad_norm": 0.4174009112938387, + "learning_rate": 8.868421044817994e-06, + "loss": 0.4999, + "step": 5345 + }, + { + "epoch": 0.2419551934826884, + "grad_norm": 0.7319192637518557, + "learning_rate": 8.867956645448667e-06, + "loss": 0.4531, + "step": 5346 + }, + { + "epoch": 0.24200045259108396, + "grad_norm": 0.6793857881569461, + "learning_rate": 8.86749216296883e-06, + "loss": 0.3535, + "step": 5347 + }, + { + "epoch": 0.24204571169947953, + "grad_norm": 0.3200373606682935, + "learning_rate": 8.867027597388467e-06, + "loss": 0.4857, + "step": 5348 + }, + { + "epoch": 0.2420909708078751, + "grad_norm": 0.319293383103753, + "learning_rate": 8.866562948717555e-06, + "loss": 0.4753, + "step": 5349 + }, + { + "epoch": 0.24213622991627065, + "grad_norm": 0.30674679923048015, + "learning_rate": 8.866098216966081e-06, + "loss": 0.5124, + "step": 5350 + }, + { + "epoch": 0.2421814890246662, + "grad_norm": 0.30221657868269836, + "learning_rate": 8.865633402144032e-06, + "loss": 0.5116, + "step": 5351 + }, + { + "epoch": 0.24222674813306178, + "grad_norm": 0.29530341560010703, + "learning_rate": 8.865168504261392e-06, + "loss": 0.4946, + "step": 5352 + }, + { + "epoch": 0.24227200724145734, + "grad_norm": 0.7944404800721792, + "learning_rate": 8.864703523328153e-06, + "loss": 0.396, + "step": 5353 + }, + { + "epoch": 0.2423172663498529, + "grad_norm": 0.33803840764758725, + "learning_rate": 8.864238459354303e-06, + "loss": 0.4723, + "step": 5354 + }, + { + "epoch": 0.24236252545824846, + "grad_norm": 0.6518971223670713, + "learning_rate": 8.863773312349838e-06, + "loss": 0.3695, + "step": 5355 + }, + { + "epoch": 0.24240778456664402, + "grad_norm": 0.3380621018492585, + "learning_rate": 8.86330808232475e-06, + "loss": 0.5039, + "step": 5356 + }, + { + "epoch": 0.24245304367503961, + "grad_norm": 0.7447560157517059, + "learning_rate": 8.862842769289037e-06, + "loss": 0.3771, + "step": 5357 + }, + { + "epoch": 0.24249830278343518, + "grad_norm": 0.7501764157418583, + "learning_rate": 8.862377373252697e-06, + "loss": 0.3699, + "step": 5358 + }, + { + "epoch": 0.24254356189183074, + "grad_norm": 0.6265046919283168, + "learning_rate": 8.86191189422573e-06, + "loss": 0.4108, + "step": 5359 + }, + { + "epoch": 0.2425888210002263, + "grad_norm": 0.6313737707557908, + "learning_rate": 8.861446332218138e-06, + "loss": 0.37, + "step": 5360 + }, + { + "epoch": 0.24263408010862186, + "grad_norm": 0.7014318487147055, + "learning_rate": 8.860980687239922e-06, + "loss": 0.4077, + "step": 5361 + }, + { + "epoch": 0.24267933921701743, + "grad_norm": 0.7094592637276969, + "learning_rate": 8.86051495930109e-06, + "loss": 0.3813, + "step": 5362 + }, + { + "epoch": 0.242724598325413, + "grad_norm": 0.6135837069346777, + "learning_rate": 8.860049148411649e-06, + "loss": 0.3697, + "step": 5363 + }, + { + "epoch": 0.24276985743380855, + "grad_norm": 0.6548772644362388, + "learning_rate": 8.859583254581604e-06, + "loss": 0.4045, + "step": 5364 + }, + { + "epoch": 0.2428151165422041, + "grad_norm": 0.4681752414364971, + "learning_rate": 8.859117277820972e-06, + "loss": 0.5107, + "step": 5365 + }, + { + "epoch": 0.24286037565059967, + "grad_norm": 0.7547141701141868, + "learning_rate": 8.85865121813976e-06, + "loss": 0.4012, + "step": 5366 + }, + { + "epoch": 0.24290563475899524, + "grad_norm": 0.6690380482512771, + "learning_rate": 8.858185075547987e-06, + "loss": 0.4845, + "step": 5367 + }, + { + "epoch": 0.2429508938673908, + "grad_norm": 0.6493813874743187, + "learning_rate": 8.857718850055663e-06, + "loss": 0.3698, + "step": 5368 + }, + { + "epoch": 0.2429961529757864, + "grad_norm": 0.7556859630562037, + "learning_rate": 8.857252541672812e-06, + "loss": 0.429, + "step": 5369 + }, + { + "epoch": 0.24304141208418195, + "grad_norm": 0.6888913998842668, + "learning_rate": 8.856786150409448e-06, + "loss": 0.3613, + "step": 5370 + }, + { + "epoch": 0.2430866711925775, + "grad_norm": 0.648604761289055, + "learning_rate": 8.856319676275595e-06, + "loss": 0.3691, + "step": 5371 + }, + { + "epoch": 0.24313193030097308, + "grad_norm": 0.5213434269140345, + "learning_rate": 8.855853119281278e-06, + "loss": 0.4927, + "step": 5372 + }, + { + "epoch": 0.24317718940936864, + "grad_norm": 0.6643186693105698, + "learning_rate": 8.855386479436518e-06, + "loss": 0.4262, + "step": 5373 + }, + { + "epoch": 0.2432224485177642, + "grad_norm": 0.6483221393724724, + "learning_rate": 8.854919756751343e-06, + "loss": 0.3478, + "step": 5374 + }, + { + "epoch": 0.24326770762615976, + "grad_norm": 0.611895884992864, + "learning_rate": 8.854452951235784e-06, + "loss": 0.351, + "step": 5375 + }, + { + "epoch": 0.24331296673455532, + "grad_norm": 0.6942629669349091, + "learning_rate": 8.853986062899869e-06, + "loss": 0.4008, + "step": 5376 + }, + { + "epoch": 0.2433582258429509, + "grad_norm": 0.6256217162597607, + "learning_rate": 8.853519091753629e-06, + "loss": 0.3447, + "step": 5377 + }, + { + "epoch": 0.24340348495134645, + "grad_norm": 0.6993569125923726, + "learning_rate": 8.853052037807099e-06, + "loss": 0.3695, + "step": 5378 + }, + { + "epoch": 0.243448744059742, + "grad_norm": 0.6163882470276689, + "learning_rate": 8.852584901070314e-06, + "loss": 0.4004, + "step": 5379 + }, + { + "epoch": 0.24349400316813757, + "grad_norm": 0.6280681657569366, + "learning_rate": 8.852117681553312e-06, + "loss": 0.3811, + "step": 5380 + }, + { + "epoch": 0.24353926227653316, + "grad_norm": 0.44492594792209034, + "learning_rate": 8.851650379266133e-06, + "loss": 0.5072, + "step": 5381 + }, + { + "epoch": 0.24358452138492873, + "grad_norm": 0.7206872723939073, + "learning_rate": 8.851182994218815e-06, + "loss": 0.3498, + "step": 5382 + }, + { + "epoch": 0.2436297804933243, + "grad_norm": 0.6696865366923967, + "learning_rate": 8.850715526421404e-06, + "loss": 0.4066, + "step": 5383 + }, + { + "epoch": 0.24367503960171985, + "grad_norm": 0.6626802638484196, + "learning_rate": 8.850247975883942e-06, + "loss": 0.3876, + "step": 5384 + }, + { + "epoch": 0.2437202987101154, + "grad_norm": 0.6755263223808509, + "learning_rate": 8.849780342616477e-06, + "loss": 0.3734, + "step": 5385 + }, + { + "epoch": 0.24376555781851098, + "grad_norm": 0.6319556903000817, + "learning_rate": 8.849312626629055e-06, + "loss": 0.3842, + "step": 5386 + }, + { + "epoch": 0.24381081692690654, + "grad_norm": 0.6609492411166165, + "learning_rate": 8.848844827931727e-06, + "loss": 0.3761, + "step": 5387 + }, + { + "epoch": 0.2438560760353021, + "grad_norm": 0.6241567642176671, + "learning_rate": 8.848376946534545e-06, + "loss": 0.3792, + "step": 5388 + }, + { + "epoch": 0.24390133514369766, + "grad_norm": 0.7183087410306984, + "learning_rate": 8.847908982447561e-06, + "loss": 0.4036, + "step": 5389 + }, + { + "epoch": 0.24394659425209322, + "grad_norm": 0.7218942848451008, + "learning_rate": 8.847440935680833e-06, + "loss": 0.3661, + "step": 5390 + }, + { + "epoch": 0.2439918533604888, + "grad_norm": 0.6694338164281567, + "learning_rate": 8.846972806244415e-06, + "loss": 0.3447, + "step": 5391 + }, + { + "epoch": 0.24403711246888438, + "grad_norm": 0.6908215455706206, + "learning_rate": 8.846504594148366e-06, + "loss": 0.3893, + "step": 5392 + }, + { + "epoch": 0.24408237157727994, + "grad_norm": 0.6561908456944305, + "learning_rate": 8.846036299402747e-06, + "loss": 0.4021, + "step": 5393 + }, + { + "epoch": 0.2441276306856755, + "grad_norm": 0.4597583708167697, + "learning_rate": 8.84556792201762e-06, + "loss": 0.5165, + "step": 5394 + }, + { + "epoch": 0.24417288979407106, + "grad_norm": 0.6821535747642689, + "learning_rate": 8.845099462003049e-06, + "loss": 0.4006, + "step": 5395 + }, + { + "epoch": 0.24421814890246663, + "grad_norm": 0.3192590633854796, + "learning_rate": 8.844630919369099e-06, + "loss": 0.4962, + "step": 5396 + }, + { + "epoch": 0.2442634080108622, + "grad_norm": 0.7205417774604912, + "learning_rate": 8.84416229412584e-06, + "loss": 0.3918, + "step": 5397 + }, + { + "epoch": 0.24430866711925775, + "grad_norm": 0.647692178832509, + "learning_rate": 8.84369358628334e-06, + "loss": 0.3968, + "step": 5398 + }, + { + "epoch": 0.2443539262276533, + "grad_norm": 0.6842297749923265, + "learning_rate": 8.843224795851668e-06, + "loss": 0.4078, + "step": 5399 + }, + { + "epoch": 0.24439918533604887, + "grad_norm": 0.7569759066971131, + "learning_rate": 8.8427559228409e-06, + "loss": 0.3973, + "step": 5400 + }, + { + "epoch": 0.24444444444444444, + "grad_norm": 0.6414284504701626, + "learning_rate": 8.842286967261109e-06, + "loss": 0.3699, + "step": 5401 + }, + { + "epoch": 0.24448970355284, + "grad_norm": 0.44428672016307075, + "learning_rate": 8.841817929122373e-06, + "loss": 0.4885, + "step": 5402 + }, + { + "epoch": 0.24453496266123556, + "grad_norm": 0.7171617724470963, + "learning_rate": 8.841348808434766e-06, + "loss": 0.3672, + "step": 5403 + }, + { + "epoch": 0.24458022176963115, + "grad_norm": 0.37253477989149236, + "learning_rate": 8.840879605208374e-06, + "loss": 0.4672, + "step": 5404 + }, + { + "epoch": 0.2446254808780267, + "grad_norm": 0.7108694071388104, + "learning_rate": 8.840410319453274e-06, + "loss": 0.4388, + "step": 5405 + }, + { + "epoch": 0.24467073998642228, + "grad_norm": 0.6828806879996595, + "learning_rate": 8.839940951179552e-06, + "loss": 0.3855, + "step": 5406 + }, + { + "epoch": 0.24471599909481784, + "grad_norm": 0.3537449598703036, + "learning_rate": 8.839471500397292e-06, + "loss": 0.4777, + "step": 5407 + }, + { + "epoch": 0.2447612582032134, + "grad_norm": 0.7020097644297246, + "learning_rate": 8.83900196711658e-06, + "loss": 0.3823, + "step": 5408 + }, + { + "epoch": 0.24480651731160896, + "grad_norm": 0.6807381310828038, + "learning_rate": 8.838532351347509e-06, + "loss": 0.4012, + "step": 5409 + }, + { + "epoch": 0.24485177642000452, + "grad_norm": 0.7073324268304318, + "learning_rate": 8.838062653100165e-06, + "loss": 0.4034, + "step": 5410 + }, + { + "epoch": 0.2448970355284001, + "grad_norm": 0.7850841150864362, + "learning_rate": 8.837592872384643e-06, + "loss": 0.3713, + "step": 5411 + }, + { + "epoch": 0.24494229463679565, + "grad_norm": 0.6864920531829904, + "learning_rate": 8.837123009211038e-06, + "loss": 0.4047, + "step": 5412 + }, + { + "epoch": 0.2449875537451912, + "grad_norm": 0.48113930173789576, + "learning_rate": 8.836653063589443e-06, + "loss": 0.4991, + "step": 5413 + }, + { + "epoch": 0.24503281285358677, + "grad_norm": 0.4411436163117178, + "learning_rate": 8.836183035529954e-06, + "loss": 0.4979, + "step": 5414 + }, + { + "epoch": 0.24507807196198234, + "grad_norm": 0.7466742119661637, + "learning_rate": 8.835712925042678e-06, + "loss": 0.3974, + "step": 5415 + }, + { + "epoch": 0.24512333107037793, + "grad_norm": 0.7179041559362122, + "learning_rate": 8.83524273213771e-06, + "loss": 0.3743, + "step": 5416 + }, + { + "epoch": 0.2451685901787735, + "grad_norm": 0.6572146908802492, + "learning_rate": 8.834772456825155e-06, + "loss": 0.4023, + "step": 5417 + }, + { + "epoch": 0.24521384928716905, + "grad_norm": 0.5038071350955399, + "learning_rate": 8.834302099115118e-06, + "loss": 0.5026, + "step": 5418 + }, + { + "epoch": 0.2452591083955646, + "grad_norm": 0.8540148243050669, + "learning_rate": 8.833831659017703e-06, + "loss": 0.3645, + "step": 5419 + }, + { + "epoch": 0.24530436750396017, + "grad_norm": 0.6457518436451026, + "learning_rate": 8.833361136543021e-06, + "loss": 0.3766, + "step": 5420 + }, + { + "epoch": 0.24534962661235574, + "grad_norm": 0.7338945386325528, + "learning_rate": 8.832890531701184e-06, + "loss": 0.4062, + "step": 5421 + }, + { + "epoch": 0.2453948857207513, + "grad_norm": 0.7518020196047538, + "learning_rate": 8.832419844502298e-06, + "loss": 0.4127, + "step": 5422 + }, + { + "epoch": 0.24544014482914686, + "grad_norm": 0.6440486004965358, + "learning_rate": 8.831949074956483e-06, + "loss": 0.3488, + "step": 5423 + }, + { + "epoch": 0.24548540393754242, + "grad_norm": 0.3900943962261443, + "learning_rate": 8.831478223073848e-06, + "loss": 0.4871, + "step": 5424 + }, + { + "epoch": 0.24553066304593799, + "grad_norm": 0.36365756981959374, + "learning_rate": 8.831007288864517e-06, + "loss": 0.4751, + "step": 5425 + }, + { + "epoch": 0.24557592215433355, + "grad_norm": 0.9342732459976021, + "learning_rate": 8.830536272338602e-06, + "loss": 0.3965, + "step": 5426 + }, + { + "epoch": 0.24562118126272914, + "grad_norm": 0.7180496226403557, + "learning_rate": 8.830065173506229e-06, + "loss": 0.4072, + "step": 5427 + }, + { + "epoch": 0.2456664403711247, + "grad_norm": 0.6947647126181522, + "learning_rate": 8.829593992377518e-06, + "loss": 0.3607, + "step": 5428 + }, + { + "epoch": 0.24571169947952026, + "grad_norm": 0.4581266564149199, + "learning_rate": 8.829122728962594e-06, + "loss": 0.4792, + "step": 5429 + }, + { + "epoch": 0.24575695858791582, + "grad_norm": 0.837846180542243, + "learning_rate": 8.828651383271582e-06, + "loss": 0.4022, + "step": 5430 + }, + { + "epoch": 0.2458022176963114, + "grad_norm": 0.8475559381295793, + "learning_rate": 8.828179955314612e-06, + "loss": 0.3761, + "step": 5431 + }, + { + "epoch": 0.24584747680470695, + "grad_norm": 0.679435079467592, + "learning_rate": 8.827708445101813e-06, + "loss": 0.3274, + "step": 5432 + }, + { + "epoch": 0.2458927359131025, + "grad_norm": 0.7471578999986938, + "learning_rate": 8.827236852643313e-06, + "loss": 0.3658, + "step": 5433 + }, + { + "epoch": 0.24593799502149807, + "grad_norm": 0.7937943646003452, + "learning_rate": 8.826765177949248e-06, + "loss": 0.3832, + "step": 5434 + }, + { + "epoch": 0.24598325412989364, + "grad_norm": 0.7468722171561877, + "learning_rate": 8.826293421029754e-06, + "loss": 0.3874, + "step": 5435 + }, + { + "epoch": 0.2460285132382892, + "grad_norm": 0.6400369840427028, + "learning_rate": 8.825821581894964e-06, + "loss": 0.4022, + "step": 5436 + }, + { + "epoch": 0.24607377234668476, + "grad_norm": 0.40295831861440534, + "learning_rate": 8.82534966055502e-06, + "loss": 0.4951, + "step": 5437 + }, + { + "epoch": 0.24611903145508032, + "grad_norm": 0.401916187918165, + "learning_rate": 8.824877657020058e-06, + "loss": 0.4945, + "step": 5438 + }, + { + "epoch": 0.2461642905634759, + "grad_norm": 0.7838023003559151, + "learning_rate": 8.824405571300225e-06, + "loss": 0.3804, + "step": 5439 + }, + { + "epoch": 0.24620954967187147, + "grad_norm": 0.9976284832408775, + "learning_rate": 8.82393340340566e-06, + "loss": 0.3876, + "step": 5440 + }, + { + "epoch": 0.24625480878026704, + "grad_norm": 0.6993269738534612, + "learning_rate": 8.823461153346512e-06, + "loss": 0.3865, + "step": 5441 + }, + { + "epoch": 0.2463000678886626, + "grad_norm": 0.6974540450082418, + "learning_rate": 8.822988821132925e-06, + "loss": 0.3761, + "step": 5442 + }, + { + "epoch": 0.24634532699705816, + "grad_norm": 0.6574267966627634, + "learning_rate": 8.822516406775051e-06, + "loss": 0.3712, + "step": 5443 + }, + { + "epoch": 0.24639058610545372, + "grad_norm": 0.7192051932707151, + "learning_rate": 8.822043910283041e-06, + "loss": 0.3694, + "step": 5444 + }, + { + "epoch": 0.24643584521384929, + "grad_norm": 0.6941915904680251, + "learning_rate": 8.821571331667043e-06, + "loss": 0.356, + "step": 5445 + }, + { + "epoch": 0.24648110432224485, + "grad_norm": 0.6784074255903936, + "learning_rate": 8.821098670937215e-06, + "loss": 0.4146, + "step": 5446 + }, + { + "epoch": 0.2465263634306404, + "grad_norm": 0.6372613000327357, + "learning_rate": 8.820625928103712e-06, + "loss": 0.3657, + "step": 5447 + }, + { + "epoch": 0.24657162253903597, + "grad_norm": 0.565621513237359, + "learning_rate": 8.820153103176692e-06, + "loss": 0.4783, + "step": 5448 + }, + { + "epoch": 0.24661688164743154, + "grad_norm": 0.7421946650728314, + "learning_rate": 8.819680196166315e-06, + "loss": 0.405, + "step": 5449 + }, + { + "epoch": 0.2466621407558271, + "grad_norm": 0.7230679706345529, + "learning_rate": 8.819207207082741e-06, + "loss": 0.3889, + "step": 5450 + }, + { + "epoch": 0.2467073998642227, + "grad_norm": 0.6871674592758005, + "learning_rate": 8.818734135936136e-06, + "loss": 0.3559, + "step": 5451 + }, + { + "epoch": 0.24675265897261825, + "grad_norm": 0.704175915763451, + "learning_rate": 8.818260982736662e-06, + "loss": 0.389, + "step": 5452 + }, + { + "epoch": 0.2467979180810138, + "grad_norm": 0.6566117862980755, + "learning_rate": 8.817787747494484e-06, + "loss": 0.4332, + "step": 5453 + }, + { + "epoch": 0.24684317718940937, + "grad_norm": 1.9586366774313648, + "learning_rate": 8.817314430219775e-06, + "loss": 0.3669, + "step": 5454 + }, + { + "epoch": 0.24688843629780494, + "grad_norm": 0.6883090670651413, + "learning_rate": 8.816841030922702e-06, + "loss": 0.3951, + "step": 5455 + }, + { + "epoch": 0.2469336954062005, + "grad_norm": 0.6684753360093686, + "learning_rate": 8.816367549613439e-06, + "loss": 0.3708, + "step": 5456 + }, + { + "epoch": 0.24697895451459606, + "grad_norm": 0.6699780649262084, + "learning_rate": 8.815893986302158e-06, + "loss": 0.36, + "step": 5457 + }, + { + "epoch": 0.24702421362299162, + "grad_norm": 0.6751583780687819, + "learning_rate": 8.815420340999034e-06, + "loss": 0.3692, + "step": 5458 + }, + { + "epoch": 0.24706947273138719, + "grad_norm": 0.8641158746125323, + "learning_rate": 8.814946613714244e-06, + "loss": 0.3599, + "step": 5459 + }, + { + "epoch": 0.24711473183978275, + "grad_norm": 0.6746092328932488, + "learning_rate": 8.81447280445797e-06, + "loss": 0.428, + "step": 5460 + }, + { + "epoch": 0.2471599909481783, + "grad_norm": 0.8619884401255935, + "learning_rate": 8.81399891324039e-06, + "loss": 0.3568, + "step": 5461 + }, + { + "epoch": 0.24720525005657387, + "grad_norm": 0.5537299656020676, + "learning_rate": 8.813524940071687e-06, + "loss": 0.5229, + "step": 5462 + }, + { + "epoch": 0.24725050916496946, + "grad_norm": 0.7466826870291752, + "learning_rate": 8.813050884962046e-06, + "loss": 0.3674, + "step": 5463 + }, + { + "epoch": 0.24729576827336502, + "grad_norm": 0.32719231858791137, + "learning_rate": 8.812576747921653e-06, + "loss": 0.4921, + "step": 5464 + }, + { + "epoch": 0.2473410273817606, + "grad_norm": 0.7554589756926923, + "learning_rate": 8.812102528960693e-06, + "loss": 0.3852, + "step": 5465 + }, + { + "epoch": 0.24738628649015615, + "grad_norm": 0.7777119738955086, + "learning_rate": 8.81162822808936e-06, + "loss": 0.4032, + "step": 5466 + }, + { + "epoch": 0.2474315455985517, + "grad_norm": 0.5932440156262864, + "learning_rate": 8.811153845317842e-06, + "loss": 0.3426, + "step": 5467 + }, + { + "epoch": 0.24747680470694727, + "grad_norm": 0.7747935373734771, + "learning_rate": 8.810679380656331e-06, + "loss": 0.3747, + "step": 5468 + }, + { + "epoch": 0.24752206381534284, + "grad_norm": 0.834579211230089, + "learning_rate": 8.810204834115026e-06, + "loss": 0.3553, + "step": 5469 + }, + { + "epoch": 0.2475673229237384, + "grad_norm": 0.7751947065367353, + "learning_rate": 8.80973020570412e-06, + "loss": 0.3669, + "step": 5470 + }, + { + "epoch": 0.24761258203213396, + "grad_norm": 0.6760212197334325, + "learning_rate": 8.809255495433814e-06, + "loss": 0.3715, + "step": 5471 + }, + { + "epoch": 0.24765784114052952, + "grad_norm": 0.7701164610513445, + "learning_rate": 8.808780703314305e-06, + "loss": 0.4749, + "step": 5472 + }, + { + "epoch": 0.24770310024892508, + "grad_norm": 0.7626850010355772, + "learning_rate": 8.808305829355797e-06, + "loss": 0.3791, + "step": 5473 + }, + { + "epoch": 0.24774835935732067, + "grad_norm": 0.7405751108830647, + "learning_rate": 8.807830873568493e-06, + "loss": 0.4131, + "step": 5474 + }, + { + "epoch": 0.24779361846571624, + "grad_norm": 0.39037150438658097, + "learning_rate": 8.8073558359626e-06, + "loss": 0.4998, + "step": 5475 + }, + { + "epoch": 0.2478388775741118, + "grad_norm": 0.6539788911826464, + "learning_rate": 8.806880716548322e-06, + "loss": 0.4044, + "step": 5476 + }, + { + "epoch": 0.24788413668250736, + "grad_norm": 0.6535225496446154, + "learning_rate": 8.80640551533587e-06, + "loss": 0.3887, + "step": 5477 + }, + { + "epoch": 0.24792939579090292, + "grad_norm": 0.4697297560970744, + "learning_rate": 8.805930232335454e-06, + "loss": 0.5045, + "step": 5478 + }, + { + "epoch": 0.24797465489929849, + "grad_norm": 0.7089333794869764, + "learning_rate": 8.805454867557284e-06, + "loss": 0.3496, + "step": 5479 + }, + { + "epoch": 0.24801991400769405, + "grad_norm": 0.6986426965189364, + "learning_rate": 8.804979421011579e-06, + "loss": 0.4134, + "step": 5480 + }, + { + "epoch": 0.2480651731160896, + "grad_norm": 0.6432027825709875, + "learning_rate": 8.804503892708552e-06, + "loss": 0.3891, + "step": 5481 + }, + { + "epoch": 0.24811043222448517, + "grad_norm": 0.6516825549121646, + "learning_rate": 8.80402828265842e-06, + "loss": 0.3936, + "step": 5482 + }, + { + "epoch": 0.24815569133288073, + "grad_norm": 0.6905744064567494, + "learning_rate": 8.803552590871406e-06, + "loss": 0.3776, + "step": 5483 + }, + { + "epoch": 0.2482009504412763, + "grad_norm": 0.770332771693627, + "learning_rate": 8.803076817357725e-06, + "loss": 0.4089, + "step": 5484 + }, + { + "epoch": 0.24824620954967186, + "grad_norm": 0.7445899391534694, + "learning_rate": 8.802600962127606e-06, + "loss": 0.4047, + "step": 5485 + }, + { + "epoch": 0.24829146865806745, + "grad_norm": 0.610507084099802, + "learning_rate": 8.802125025191268e-06, + "loss": 0.4052, + "step": 5486 + }, + { + "epoch": 0.248336727766463, + "grad_norm": 0.6567358608756095, + "learning_rate": 8.801649006558943e-06, + "loss": 0.4266, + "step": 5487 + }, + { + "epoch": 0.24838198687485857, + "grad_norm": 0.6596038921834942, + "learning_rate": 8.801172906240857e-06, + "loss": 0.4, + "step": 5488 + }, + { + "epoch": 0.24842724598325414, + "grad_norm": 0.590396728859398, + "learning_rate": 8.800696724247239e-06, + "loss": 0.3684, + "step": 5489 + }, + { + "epoch": 0.2484725050916497, + "grad_norm": 0.6386192813750432, + "learning_rate": 8.800220460588321e-06, + "loss": 0.3714, + "step": 5490 + }, + { + "epoch": 0.24851776420004526, + "grad_norm": 0.6997118465528872, + "learning_rate": 8.799744115274339e-06, + "loss": 0.4147, + "step": 5491 + }, + { + "epoch": 0.24856302330844082, + "grad_norm": 0.6565255682296418, + "learning_rate": 8.799267688315523e-06, + "loss": 0.3748, + "step": 5492 + }, + { + "epoch": 0.24860828241683638, + "grad_norm": 0.6225869450633494, + "learning_rate": 8.798791179722114e-06, + "loss": 0.3724, + "step": 5493 + }, + { + "epoch": 0.24865354152523195, + "grad_norm": 0.7199023735140465, + "learning_rate": 8.798314589504348e-06, + "loss": 0.3847, + "step": 5494 + }, + { + "epoch": 0.2486988006336275, + "grad_norm": 0.6212669132572823, + "learning_rate": 8.79783791767247e-06, + "loss": 0.5017, + "step": 5495 + }, + { + "epoch": 0.24874405974202307, + "grad_norm": 0.6840190455449334, + "learning_rate": 8.797361164236717e-06, + "loss": 0.3892, + "step": 5496 + }, + { + "epoch": 0.24878931885041863, + "grad_norm": 0.673935866571196, + "learning_rate": 8.796884329207337e-06, + "loss": 0.3901, + "step": 5497 + }, + { + "epoch": 0.24883457795881422, + "grad_norm": 0.6989693763930492, + "learning_rate": 8.796407412594573e-06, + "loss": 0.3867, + "step": 5498 + }, + { + "epoch": 0.24887983706720979, + "grad_norm": 0.3414127366149348, + "learning_rate": 8.795930414408676e-06, + "loss": 0.4909, + "step": 5499 + }, + { + "epoch": 0.24892509617560535, + "grad_norm": 0.3350419170199567, + "learning_rate": 8.795453334659889e-06, + "loss": 0.4988, + "step": 5500 + }, + { + "epoch": 0.2489703552840009, + "grad_norm": 0.7526309166356293, + "learning_rate": 8.79497617335847e-06, + "loss": 0.3886, + "step": 5501 + }, + { + "epoch": 0.24901561439239647, + "grad_norm": 0.6392150511382405, + "learning_rate": 8.794498930514666e-06, + "loss": 0.3453, + "step": 5502 + }, + { + "epoch": 0.24906087350079203, + "grad_norm": 0.6385867732761876, + "learning_rate": 8.794021606138734e-06, + "loss": 0.3432, + "step": 5503 + }, + { + "epoch": 0.2491061326091876, + "grad_norm": 0.6785794290773549, + "learning_rate": 8.793544200240932e-06, + "loss": 0.3906, + "step": 5504 + }, + { + "epoch": 0.24915139171758316, + "grad_norm": 0.6534436391608166, + "learning_rate": 8.793066712831515e-06, + "loss": 0.3868, + "step": 5505 + }, + { + "epoch": 0.24919665082597872, + "grad_norm": 0.6416942590392011, + "learning_rate": 8.792589143920743e-06, + "loss": 0.3912, + "step": 5506 + }, + { + "epoch": 0.24924190993437428, + "grad_norm": 0.6457455294679602, + "learning_rate": 8.792111493518878e-06, + "loss": 0.3795, + "step": 5507 + }, + { + "epoch": 0.24928716904276985, + "grad_norm": 0.6324241805057175, + "learning_rate": 8.791633761636186e-06, + "loss": 0.3617, + "step": 5508 + }, + { + "epoch": 0.2493324281511654, + "grad_norm": 0.7451581373182293, + "learning_rate": 8.791155948282927e-06, + "loss": 0.4277, + "step": 5509 + }, + { + "epoch": 0.249377687259561, + "grad_norm": 0.6652436681096543, + "learning_rate": 8.790678053469372e-06, + "loss": 0.4655, + "step": 5510 + }, + { + "epoch": 0.24942294636795656, + "grad_norm": 0.6862925638345763, + "learning_rate": 8.790200077205789e-06, + "loss": 0.4131, + "step": 5511 + }, + { + "epoch": 0.24946820547635212, + "grad_norm": 0.6767907349063768, + "learning_rate": 8.789722019502444e-06, + "loss": 0.415, + "step": 5512 + }, + { + "epoch": 0.24951346458474769, + "grad_norm": 0.3808039754062271, + "learning_rate": 8.789243880369613e-06, + "loss": 0.4734, + "step": 5513 + }, + { + "epoch": 0.24955872369314325, + "grad_norm": 0.6477071323204457, + "learning_rate": 8.78876565981757e-06, + "loss": 0.3865, + "step": 5514 + }, + { + "epoch": 0.2496039828015388, + "grad_norm": 0.7227539653597835, + "learning_rate": 8.788287357856588e-06, + "loss": 0.3678, + "step": 5515 + }, + { + "epoch": 0.24964924190993437, + "grad_norm": 0.3728619700056104, + "learning_rate": 8.787808974496946e-06, + "loss": 0.5286, + "step": 5516 + }, + { + "epoch": 0.24969450101832993, + "grad_norm": 0.8103874044499024, + "learning_rate": 8.787330509748924e-06, + "loss": 0.3696, + "step": 5517 + }, + { + "epoch": 0.2497397601267255, + "grad_norm": 0.6919952359581814, + "learning_rate": 8.786851963622799e-06, + "loss": 0.4202, + "step": 5518 + }, + { + "epoch": 0.24978501923512106, + "grad_norm": 0.6528924734294044, + "learning_rate": 8.786373336128858e-06, + "loss": 0.399, + "step": 5519 + }, + { + "epoch": 0.24983027834351662, + "grad_norm": 0.7227289627808617, + "learning_rate": 8.78589462727738e-06, + "loss": 0.3805, + "step": 5520 + }, + { + "epoch": 0.2498755374519122, + "grad_norm": 0.6116874693101342, + "learning_rate": 8.785415837078655e-06, + "loss": 0.3814, + "step": 5521 + }, + { + "epoch": 0.24992079656030777, + "grad_norm": 0.47326084506519833, + "learning_rate": 8.78493696554297e-06, + "loss": 0.5113, + "step": 5522 + }, + { + "epoch": 0.24996605566870334, + "grad_norm": 0.6477654979674229, + "learning_rate": 8.784458012680614e-06, + "loss": 0.3818, + "step": 5523 + }, + { + "epoch": 0.25001131477709887, + "grad_norm": 0.6210811401562978, + "learning_rate": 8.783978978501879e-06, + "loss": 0.379, + "step": 5524 + }, + { + "epoch": 0.25005657388549446, + "grad_norm": 0.6830342901861352, + "learning_rate": 8.783499863017057e-06, + "loss": 0.3561, + "step": 5525 + }, + { + "epoch": 0.25010183299389, + "grad_norm": 0.6731567670022601, + "learning_rate": 8.783020666236443e-06, + "loss": 0.4218, + "step": 5526 + }, + { + "epoch": 0.2501470921022856, + "grad_norm": 0.65807887584648, + "learning_rate": 8.782541388170334e-06, + "loss": 0.3543, + "step": 5527 + }, + { + "epoch": 0.2501923512106812, + "grad_norm": 0.8695506353899523, + "learning_rate": 8.782062028829028e-06, + "loss": 0.3845, + "step": 5528 + }, + { + "epoch": 0.2502376103190767, + "grad_norm": 0.6392487323412227, + "learning_rate": 8.781582588222823e-06, + "loss": 0.3622, + "step": 5529 + }, + { + "epoch": 0.2502828694274723, + "grad_norm": 0.5861804047598004, + "learning_rate": 8.781103066362024e-06, + "loss": 0.3361, + "step": 5530 + }, + { + "epoch": 0.25032812853586783, + "grad_norm": 0.6529690974582265, + "learning_rate": 8.780623463256932e-06, + "loss": 0.353, + "step": 5531 + }, + { + "epoch": 0.2503733876442634, + "grad_norm": 0.6388205223624392, + "learning_rate": 8.780143778917853e-06, + "loss": 0.3629, + "step": 5532 + }, + { + "epoch": 0.25041864675265896, + "grad_norm": 0.5135197990140405, + "learning_rate": 8.779664013355095e-06, + "loss": 0.5079, + "step": 5533 + }, + { + "epoch": 0.25046390586105455, + "grad_norm": 0.6759508366790495, + "learning_rate": 8.779184166578965e-06, + "loss": 0.3658, + "step": 5534 + }, + { + "epoch": 0.2505091649694501, + "grad_norm": 0.67004678301159, + "learning_rate": 8.778704238599775e-06, + "loss": 0.3724, + "step": 5535 + }, + { + "epoch": 0.25055442407784567, + "grad_norm": 0.7186727224626106, + "learning_rate": 8.778224229427836e-06, + "loss": 0.3948, + "step": 5536 + }, + { + "epoch": 0.2505996831862412, + "grad_norm": 0.6462913809859987, + "learning_rate": 8.777744139073461e-06, + "loss": 0.397, + "step": 5537 + }, + { + "epoch": 0.2506449422946368, + "grad_norm": 0.6317619249321907, + "learning_rate": 8.777263967546969e-06, + "loss": 0.4191, + "step": 5538 + }, + { + "epoch": 0.2506902014030324, + "grad_norm": 0.6514663160067772, + "learning_rate": 8.776783714858672e-06, + "loss": 0.3998, + "step": 5539 + }, + { + "epoch": 0.2507354605114279, + "grad_norm": 0.6606566810098499, + "learning_rate": 8.776303381018895e-06, + "loss": 0.3773, + "step": 5540 + }, + { + "epoch": 0.2507807196198235, + "grad_norm": 0.696273850276573, + "learning_rate": 8.775822966037956e-06, + "loss": 0.3711, + "step": 5541 + }, + { + "epoch": 0.25082597872821905, + "grad_norm": 0.6246857391708531, + "learning_rate": 8.775342469926178e-06, + "loss": 0.3843, + "step": 5542 + }, + { + "epoch": 0.25087123783661464, + "grad_norm": 0.6057970609141705, + "learning_rate": 8.774861892693886e-06, + "loss": 0.3556, + "step": 5543 + }, + { + "epoch": 0.25091649694501017, + "grad_norm": 0.6359022529488361, + "learning_rate": 8.774381234351406e-06, + "loss": 0.3758, + "step": 5544 + }, + { + "epoch": 0.25096175605340576, + "grad_norm": 0.6749517080642095, + "learning_rate": 8.773900494909065e-06, + "loss": 0.3607, + "step": 5545 + }, + { + "epoch": 0.2510070151618013, + "grad_norm": 0.45212094632264954, + "learning_rate": 8.77341967437719e-06, + "loss": 0.5107, + "step": 5546 + }, + { + "epoch": 0.2510522742701969, + "grad_norm": 0.6890815621422842, + "learning_rate": 8.77293877276612e-06, + "loss": 0.4256, + "step": 5547 + }, + { + "epoch": 0.2510975333785924, + "grad_norm": 0.6796933014357068, + "learning_rate": 8.77245779008618e-06, + "loss": 0.3561, + "step": 5548 + }, + { + "epoch": 0.251142792486988, + "grad_norm": 0.7136438532760775, + "learning_rate": 8.77197672634771e-06, + "loss": 0.3897, + "step": 5549 + }, + { + "epoch": 0.25118805159538354, + "grad_norm": 0.6531877591429376, + "learning_rate": 8.771495581561043e-06, + "loss": 0.4194, + "step": 5550 + }, + { + "epoch": 0.25123331070377913, + "grad_norm": 0.3472284463790447, + "learning_rate": 8.77101435573652e-06, + "loss": 0.5091, + "step": 5551 + }, + { + "epoch": 0.2512785698121747, + "grad_norm": 0.6351845727041716, + "learning_rate": 8.770533048884483e-06, + "loss": 0.3905, + "step": 5552 + }, + { + "epoch": 0.25132382892057026, + "grad_norm": 0.6696146617160268, + "learning_rate": 8.77005166101527e-06, + "loss": 0.439, + "step": 5553 + }, + { + "epoch": 0.25136908802896585, + "grad_norm": 0.6208498844865883, + "learning_rate": 8.769570192139224e-06, + "loss": 0.3722, + "step": 5554 + }, + { + "epoch": 0.2514143471373614, + "grad_norm": 0.7578644508445018, + "learning_rate": 8.76908864226669e-06, + "loss": 0.3978, + "step": 5555 + }, + { + "epoch": 0.251459606245757, + "grad_norm": 0.6320177265238178, + "learning_rate": 8.768607011408021e-06, + "loss": 0.3843, + "step": 5556 + }, + { + "epoch": 0.2515048653541525, + "grad_norm": 0.5742711543775584, + "learning_rate": 8.76812529957356e-06, + "loss": 0.3584, + "step": 5557 + }, + { + "epoch": 0.2515501244625481, + "grad_norm": 0.6223855477017578, + "learning_rate": 8.76764350677366e-06, + "loss": 0.3568, + "step": 5558 + }, + { + "epoch": 0.25159538357094363, + "grad_norm": 0.5993825762871401, + "learning_rate": 8.76716163301867e-06, + "loss": 0.3652, + "step": 5559 + }, + { + "epoch": 0.2516406426793392, + "grad_norm": 0.8458811737927945, + "learning_rate": 8.76667967831895e-06, + "loss": 0.3735, + "step": 5560 + }, + { + "epoch": 0.25168590178773476, + "grad_norm": 0.40384503794198034, + "learning_rate": 8.76619764268485e-06, + "loss": 0.492, + "step": 5561 + }, + { + "epoch": 0.25173116089613035, + "grad_norm": 0.6648333028328689, + "learning_rate": 8.76571552612673e-06, + "loss": 0.4102, + "step": 5562 + }, + { + "epoch": 0.25177642000452594, + "grad_norm": 0.6475418274382717, + "learning_rate": 8.765233328654949e-06, + "loss": 0.3926, + "step": 5563 + }, + { + "epoch": 0.25182167911292147, + "grad_norm": 0.6015760283334478, + "learning_rate": 8.764751050279868e-06, + "loss": 0.3489, + "step": 5564 + }, + { + "epoch": 0.25186693822131706, + "grad_norm": 0.6921602856411545, + "learning_rate": 8.764268691011851e-06, + "loss": 0.4221, + "step": 5565 + }, + { + "epoch": 0.2519121973297126, + "grad_norm": 0.7213082623671482, + "learning_rate": 8.763786250861258e-06, + "loss": 0.4028, + "step": 5566 + }, + { + "epoch": 0.2519574564381082, + "grad_norm": 0.6666064268052584, + "learning_rate": 8.76330372983846e-06, + "loss": 0.3781, + "step": 5567 + }, + { + "epoch": 0.2520027155465037, + "grad_norm": 0.6081436800667175, + "learning_rate": 8.762821127953821e-06, + "loss": 0.356, + "step": 5568 + }, + { + "epoch": 0.2520479746548993, + "grad_norm": 0.6309678293578441, + "learning_rate": 8.762338445217713e-06, + "loss": 0.3625, + "step": 5569 + }, + { + "epoch": 0.25209323376329484, + "grad_norm": 0.38533445695115726, + "learning_rate": 8.761855681640508e-06, + "loss": 0.4888, + "step": 5570 + }, + { + "epoch": 0.25213849287169043, + "grad_norm": 0.6644202585973386, + "learning_rate": 8.761372837232578e-06, + "loss": 0.3547, + "step": 5571 + }, + { + "epoch": 0.25218375198008597, + "grad_norm": 0.6490414722219586, + "learning_rate": 8.760889912004297e-06, + "loss": 0.3561, + "step": 5572 + }, + { + "epoch": 0.25222901108848156, + "grad_norm": 0.721093897991297, + "learning_rate": 8.760406905966045e-06, + "loss": 0.3941, + "step": 5573 + }, + { + "epoch": 0.25227427019687715, + "grad_norm": 0.7217691975495019, + "learning_rate": 8.759923819128196e-06, + "loss": 0.3836, + "step": 5574 + }, + { + "epoch": 0.2523195293052727, + "grad_norm": 0.3325969229718621, + "learning_rate": 8.759440651501131e-06, + "loss": 0.5095, + "step": 5575 + }, + { + "epoch": 0.2523647884136683, + "grad_norm": 0.7479140074598437, + "learning_rate": 8.758957403095234e-06, + "loss": 0.4595, + "step": 5576 + }, + { + "epoch": 0.2524100475220638, + "grad_norm": 0.7376226797049706, + "learning_rate": 8.758474073920887e-06, + "loss": 0.3628, + "step": 5577 + }, + { + "epoch": 0.2524553066304594, + "grad_norm": 0.32568130987292926, + "learning_rate": 8.757990663988474e-06, + "loss": 0.5044, + "step": 5578 + }, + { + "epoch": 0.25250056573885493, + "grad_norm": 0.7091443937278222, + "learning_rate": 8.757507173308385e-06, + "loss": 0.3442, + "step": 5579 + }, + { + "epoch": 0.2525458248472505, + "grad_norm": 0.6641702917856819, + "learning_rate": 8.757023601891006e-06, + "loss": 0.3829, + "step": 5580 + }, + { + "epoch": 0.25259108395564606, + "grad_norm": 0.7033983142051135, + "learning_rate": 8.756539949746729e-06, + "loss": 0.3979, + "step": 5581 + }, + { + "epoch": 0.25263634306404165, + "grad_norm": 0.32804607849724715, + "learning_rate": 8.756056216885946e-06, + "loss": 0.5033, + "step": 5582 + }, + { + "epoch": 0.2526816021724372, + "grad_norm": 0.7125925445274838, + "learning_rate": 8.755572403319052e-06, + "loss": 0.3892, + "step": 5583 + }, + { + "epoch": 0.25272686128083277, + "grad_norm": 0.6783287676002182, + "learning_rate": 8.75508850905644e-06, + "loss": 0.3807, + "step": 5584 + }, + { + "epoch": 0.2527721203892283, + "grad_norm": 0.6702317252289326, + "learning_rate": 8.754604534108509e-06, + "loss": 0.3759, + "step": 5585 + }, + { + "epoch": 0.2528173794976239, + "grad_norm": 0.6867608478804084, + "learning_rate": 8.754120478485659e-06, + "loss": 0.4367, + "step": 5586 + }, + { + "epoch": 0.2528626386060195, + "grad_norm": 0.6694843817164187, + "learning_rate": 8.753636342198289e-06, + "loss": 0.3638, + "step": 5587 + }, + { + "epoch": 0.252907897714415, + "grad_norm": 0.6358098373776894, + "learning_rate": 8.753152125256801e-06, + "loss": 0.3795, + "step": 5588 + }, + { + "epoch": 0.2529531568228106, + "grad_norm": 0.6631921437034257, + "learning_rate": 8.752667827671602e-06, + "loss": 0.3908, + "step": 5589 + }, + { + "epoch": 0.25299841593120614, + "grad_norm": 0.3710637208753156, + "learning_rate": 8.752183449453098e-06, + "loss": 0.4857, + "step": 5590 + }, + { + "epoch": 0.25304367503960173, + "grad_norm": 0.7082325129752405, + "learning_rate": 8.751698990611694e-06, + "loss": 0.4029, + "step": 5591 + }, + { + "epoch": 0.25308893414799727, + "grad_norm": 0.7012540179855176, + "learning_rate": 8.751214451157802e-06, + "loss": 0.3426, + "step": 5592 + }, + { + "epoch": 0.25313419325639286, + "grad_norm": 0.30426007358344753, + "learning_rate": 8.750729831101831e-06, + "loss": 0.4895, + "step": 5593 + }, + { + "epoch": 0.2531794523647884, + "grad_norm": 0.613238452659256, + "learning_rate": 8.750245130454197e-06, + "loss": 0.3505, + "step": 5594 + }, + { + "epoch": 0.253224711473184, + "grad_norm": 0.7036978272830566, + "learning_rate": 8.749760349225312e-06, + "loss": 0.4148, + "step": 5595 + }, + { + "epoch": 0.2532699705815795, + "grad_norm": 0.34027021169803484, + "learning_rate": 8.749275487425595e-06, + "loss": 0.5372, + "step": 5596 + }, + { + "epoch": 0.2533152296899751, + "grad_norm": 0.6430519959314669, + "learning_rate": 8.748790545065462e-06, + "loss": 0.3648, + "step": 5597 + }, + { + "epoch": 0.2533604887983707, + "grad_norm": 0.6481716182969333, + "learning_rate": 8.748305522155333e-06, + "loss": 0.3924, + "step": 5598 + }, + { + "epoch": 0.25340574790676623, + "grad_norm": 0.7850438631381669, + "learning_rate": 8.747820418705632e-06, + "loss": 0.3477, + "step": 5599 + }, + { + "epoch": 0.2534510070151618, + "grad_norm": 0.6475872459409273, + "learning_rate": 8.74733523472678e-06, + "loss": 0.3393, + "step": 5600 + }, + { + "epoch": 0.25349626612355736, + "grad_norm": 0.6892199701583623, + "learning_rate": 8.746849970229202e-06, + "loss": 0.3912, + "step": 5601 + }, + { + "epoch": 0.25354152523195295, + "grad_norm": 0.652841793709212, + "learning_rate": 8.746364625223326e-06, + "loss": 0.3977, + "step": 5602 + }, + { + "epoch": 0.2535867843403485, + "grad_norm": 0.7133437435431227, + "learning_rate": 8.74587919971958e-06, + "loss": 0.4435, + "step": 5603 + }, + { + "epoch": 0.25363204344874407, + "grad_norm": 0.6371564505576985, + "learning_rate": 8.745393693728395e-06, + "loss": 0.3763, + "step": 5604 + }, + { + "epoch": 0.2536773025571396, + "grad_norm": 0.5867002877282491, + "learning_rate": 8.744908107260204e-06, + "loss": 0.3753, + "step": 5605 + }, + { + "epoch": 0.2537225616655352, + "grad_norm": 0.6591111743754317, + "learning_rate": 8.744422440325437e-06, + "loss": 0.3785, + "step": 5606 + }, + { + "epoch": 0.25376782077393073, + "grad_norm": 0.702145490185904, + "learning_rate": 8.743936692934533e-06, + "loss": 0.4149, + "step": 5607 + }, + { + "epoch": 0.2538130798823263, + "grad_norm": 0.3819564845362773, + "learning_rate": 8.743450865097929e-06, + "loss": 0.5093, + "step": 5608 + }, + { + "epoch": 0.2538583389907219, + "grad_norm": 0.6355224732404193, + "learning_rate": 8.742964956826063e-06, + "loss": 0.3885, + "step": 5609 + }, + { + "epoch": 0.25390359809911744, + "grad_norm": 0.3119610838803885, + "learning_rate": 8.742478968129375e-06, + "loss": 0.4812, + "step": 5610 + }, + { + "epoch": 0.25394885720751303, + "grad_norm": 0.6582709352358617, + "learning_rate": 8.741992899018307e-06, + "loss": 0.3808, + "step": 5611 + }, + { + "epoch": 0.25399411631590857, + "grad_norm": 0.3003522544833689, + "learning_rate": 8.741506749503306e-06, + "loss": 0.4791, + "step": 5612 + }, + { + "epoch": 0.25403937542430416, + "grad_norm": 0.6558557492776759, + "learning_rate": 8.741020519594816e-06, + "loss": 0.4024, + "step": 5613 + }, + { + "epoch": 0.2540846345326997, + "grad_norm": 0.37483177884061825, + "learning_rate": 8.740534209303285e-06, + "loss": 0.5212, + "step": 5614 + }, + { + "epoch": 0.2541298936410953, + "grad_norm": 0.3089870798161669, + "learning_rate": 8.74004781863916e-06, + "loss": 0.5019, + "step": 5615 + }, + { + "epoch": 0.2541751527494908, + "grad_norm": 0.6220929272548339, + "learning_rate": 8.739561347612894e-06, + "loss": 0.3541, + "step": 5616 + }, + { + "epoch": 0.2542204118578864, + "grad_norm": 0.6973460496890179, + "learning_rate": 8.739074796234943e-06, + "loss": 0.3822, + "step": 5617 + }, + { + "epoch": 0.25426567096628194, + "grad_norm": 0.6709010629334192, + "learning_rate": 8.738588164515755e-06, + "loss": 0.3875, + "step": 5618 + }, + { + "epoch": 0.25431093007467753, + "grad_norm": 0.6981970747897914, + "learning_rate": 8.738101452465793e-06, + "loss": 0.3827, + "step": 5619 + }, + { + "epoch": 0.25435618918307307, + "grad_norm": 0.6752351421153933, + "learning_rate": 8.737614660095507e-06, + "loss": 0.3956, + "step": 5620 + }, + { + "epoch": 0.25440144829146866, + "grad_norm": 0.5064682169153885, + "learning_rate": 8.737127787415365e-06, + "loss": 0.4983, + "step": 5621 + }, + { + "epoch": 0.25444670739986425, + "grad_norm": 0.4210484452205898, + "learning_rate": 8.736640834435824e-06, + "loss": 0.5241, + "step": 5622 + }, + { + "epoch": 0.2544919665082598, + "grad_norm": 0.9187963479616968, + "learning_rate": 8.736153801167346e-06, + "loss": 0.374, + "step": 5623 + }, + { + "epoch": 0.25453722561665537, + "grad_norm": 0.6036483418713718, + "learning_rate": 8.735666687620398e-06, + "loss": 0.3771, + "step": 5624 + }, + { + "epoch": 0.2545824847250509, + "grad_norm": 0.6672026035990911, + "learning_rate": 8.735179493805446e-06, + "loss": 0.3879, + "step": 5625 + }, + { + "epoch": 0.2546277438334465, + "grad_norm": 0.6955777937848375, + "learning_rate": 8.73469221973296e-06, + "loss": 0.3721, + "step": 5626 + }, + { + "epoch": 0.25467300294184203, + "grad_norm": 0.6532425045514988, + "learning_rate": 8.734204865413407e-06, + "loss": 0.365, + "step": 5627 + }, + { + "epoch": 0.2547182620502376, + "grad_norm": 0.6804037652907705, + "learning_rate": 8.73371743085726e-06, + "loss": 0.394, + "step": 5628 + }, + { + "epoch": 0.25476352115863315, + "grad_norm": 0.6691062206288424, + "learning_rate": 8.733229916074995e-06, + "loss": 0.4164, + "step": 5629 + }, + { + "epoch": 0.25480878026702874, + "grad_norm": 0.6919494707655087, + "learning_rate": 8.732742321077082e-06, + "loss": 0.3914, + "step": 5630 + }, + { + "epoch": 0.2548540393754243, + "grad_norm": 0.6994508346956777, + "learning_rate": 8.732254645874002e-06, + "loss": 0.5069, + "step": 5631 + }, + { + "epoch": 0.25489929848381987, + "grad_norm": 0.48480809580622497, + "learning_rate": 8.731766890476232e-06, + "loss": 0.4924, + "step": 5632 + }, + { + "epoch": 0.25494455759221546, + "grad_norm": 0.7333716987396993, + "learning_rate": 8.731279054894254e-06, + "loss": 0.3927, + "step": 5633 + }, + { + "epoch": 0.254989816700611, + "grad_norm": 0.6965699173224933, + "learning_rate": 8.730791139138546e-06, + "loss": 0.3644, + "step": 5634 + }, + { + "epoch": 0.2550350758090066, + "grad_norm": 0.8260440813243546, + "learning_rate": 8.730303143219597e-06, + "loss": 0.3773, + "step": 5635 + }, + { + "epoch": 0.2550803349174021, + "grad_norm": 0.6660711629700041, + "learning_rate": 8.729815067147888e-06, + "loss": 0.383, + "step": 5636 + }, + { + "epoch": 0.2551255940257977, + "grad_norm": 0.736406331574392, + "learning_rate": 8.729326910933911e-06, + "loss": 0.3716, + "step": 5637 + }, + { + "epoch": 0.25517085313419324, + "grad_norm": 0.709712953761439, + "learning_rate": 8.728838674588151e-06, + "loss": 0.3965, + "step": 5638 + }, + { + "epoch": 0.25521611224258883, + "grad_norm": 0.6271166899069629, + "learning_rate": 8.728350358121101e-06, + "loss": 0.3537, + "step": 5639 + }, + { + "epoch": 0.25526137135098437, + "grad_norm": 0.6697285911368892, + "learning_rate": 8.727861961543253e-06, + "loss": 0.3755, + "step": 5640 + }, + { + "epoch": 0.25530663045937996, + "grad_norm": 0.6809780054179385, + "learning_rate": 8.7273734848651e-06, + "loss": 0.4126, + "step": 5641 + }, + { + "epoch": 0.2553518895677755, + "grad_norm": 0.7967430252315837, + "learning_rate": 8.726884928097138e-06, + "loss": 0.3632, + "step": 5642 + }, + { + "epoch": 0.2553971486761711, + "grad_norm": 1.381867297092667, + "learning_rate": 8.726396291249866e-06, + "loss": 0.5181, + "step": 5643 + }, + { + "epoch": 0.2554424077845666, + "grad_norm": 0.7415298912300912, + "learning_rate": 8.725907574333783e-06, + "loss": 0.3891, + "step": 5644 + }, + { + "epoch": 0.2554876668929622, + "grad_norm": 0.7454140763091222, + "learning_rate": 8.725418777359389e-06, + "loss": 0.4035, + "step": 5645 + }, + { + "epoch": 0.2555329260013578, + "grad_norm": 0.7945699973069527, + "learning_rate": 8.724929900337186e-06, + "loss": 0.3777, + "step": 5646 + }, + { + "epoch": 0.25557818510975333, + "grad_norm": 0.7909815852970442, + "learning_rate": 8.724440943277681e-06, + "loss": 0.3635, + "step": 5647 + }, + { + "epoch": 0.2556234442181489, + "grad_norm": 0.6718420734648589, + "learning_rate": 8.723951906191377e-06, + "loss": 0.4747, + "step": 5648 + }, + { + "epoch": 0.25566870332654446, + "grad_norm": 0.7038507433457524, + "learning_rate": 8.723462789088785e-06, + "loss": 0.5177, + "step": 5649 + }, + { + "epoch": 0.25571396243494005, + "grad_norm": 0.6407362587147867, + "learning_rate": 8.722973591980414e-06, + "loss": 0.3444, + "step": 5650 + }, + { + "epoch": 0.2557592215433356, + "grad_norm": 0.7535583737045262, + "learning_rate": 8.722484314876776e-06, + "loss": 0.3938, + "step": 5651 + }, + { + "epoch": 0.25580448065173117, + "grad_norm": 0.6430207864370134, + "learning_rate": 8.72199495778838e-06, + "loss": 0.345, + "step": 5652 + }, + { + "epoch": 0.2558497397601267, + "grad_norm": 0.6251897210396002, + "learning_rate": 8.721505520725745e-06, + "loss": 0.3648, + "step": 5653 + }, + { + "epoch": 0.2558949988685223, + "grad_norm": 0.6296987375458352, + "learning_rate": 8.721016003699385e-06, + "loss": 0.3697, + "step": 5654 + }, + { + "epoch": 0.25594025797691783, + "grad_norm": 0.6776477921529509, + "learning_rate": 8.72052640671982e-06, + "loss": 0.3498, + "step": 5655 + }, + { + "epoch": 0.2559855170853134, + "grad_norm": 0.5898537695546245, + "learning_rate": 8.72003672979757e-06, + "loss": 0.3576, + "step": 5656 + }, + { + "epoch": 0.256030776193709, + "grad_norm": 0.6294189740361547, + "learning_rate": 8.719546972943156e-06, + "loss": 0.3831, + "step": 5657 + }, + { + "epoch": 0.25607603530210454, + "grad_norm": 0.6665099049361219, + "learning_rate": 8.719057136167099e-06, + "loss": 0.3972, + "step": 5658 + }, + { + "epoch": 0.25612129441050013, + "grad_norm": 0.660593059607384, + "learning_rate": 8.71856721947993e-06, + "loss": 0.3794, + "step": 5659 + }, + { + "epoch": 0.25616655351889567, + "grad_norm": 0.7154169972792961, + "learning_rate": 8.718077222892169e-06, + "loss": 0.4129, + "step": 5660 + }, + { + "epoch": 0.25621181262729126, + "grad_norm": 0.6645475490795022, + "learning_rate": 8.717587146414348e-06, + "loss": 0.4019, + "step": 5661 + }, + { + "epoch": 0.2562570717356868, + "grad_norm": 0.6548301565215484, + "learning_rate": 8.717096990056999e-06, + "loss": 0.3973, + "step": 5662 + }, + { + "epoch": 0.2563023308440824, + "grad_norm": 0.779726610983311, + "learning_rate": 8.71660675383065e-06, + "loss": 0.4008, + "step": 5663 + }, + { + "epoch": 0.2563475899524779, + "grad_norm": 0.6523134947123276, + "learning_rate": 8.716116437745836e-06, + "loss": 0.4211, + "step": 5664 + }, + { + "epoch": 0.2563928490608735, + "grad_norm": 0.6608811987550522, + "learning_rate": 8.715626041813095e-06, + "loss": 0.4106, + "step": 5665 + }, + { + "epoch": 0.25643810816926904, + "grad_norm": 0.6898017659880898, + "learning_rate": 8.71513556604296e-06, + "loss": 0.382, + "step": 5666 + }, + { + "epoch": 0.25648336727766463, + "grad_norm": 0.723160907659878, + "learning_rate": 8.714645010445974e-06, + "loss": 0.4066, + "step": 5667 + }, + { + "epoch": 0.2565286263860602, + "grad_norm": 0.6429431624225951, + "learning_rate": 8.714154375032675e-06, + "loss": 0.4093, + "step": 5668 + }, + { + "epoch": 0.25657388549445576, + "grad_norm": 1.2113190533946177, + "learning_rate": 8.713663659813605e-06, + "loss": 0.5187, + "step": 5669 + }, + { + "epoch": 0.25661914460285135, + "grad_norm": 0.6354341373225793, + "learning_rate": 8.713172864799309e-06, + "loss": 0.371, + "step": 5670 + }, + { + "epoch": 0.2566644037112469, + "grad_norm": 0.45960048313751684, + "learning_rate": 8.712681990000332e-06, + "loss": 0.5104, + "step": 5671 + }, + { + "epoch": 0.25670966281964247, + "grad_norm": 0.6761932077538576, + "learning_rate": 8.71219103542722e-06, + "loss": 0.3442, + "step": 5672 + }, + { + "epoch": 0.256754921928038, + "grad_norm": 0.5932332779997048, + "learning_rate": 8.711700001090524e-06, + "loss": 0.4999, + "step": 5673 + }, + { + "epoch": 0.2568001810364336, + "grad_norm": 0.6854025592794466, + "learning_rate": 8.711208887000797e-06, + "loss": 0.4969, + "step": 5674 + }, + { + "epoch": 0.25684544014482913, + "grad_norm": 0.7360965312204293, + "learning_rate": 8.710717693168588e-06, + "loss": 0.3926, + "step": 5675 + }, + { + "epoch": 0.2568906992532247, + "grad_norm": 0.7146450211894528, + "learning_rate": 8.710226419604453e-06, + "loss": 0.405, + "step": 5676 + }, + { + "epoch": 0.25693595836162025, + "grad_norm": 0.6804543109515563, + "learning_rate": 8.709735066318946e-06, + "loss": 0.4143, + "step": 5677 + }, + { + "epoch": 0.25698121747001584, + "grad_norm": 0.6413194057286674, + "learning_rate": 8.709243633322627e-06, + "loss": 0.3447, + "step": 5678 + }, + { + "epoch": 0.2570264765784114, + "grad_norm": 0.6783568892967364, + "learning_rate": 8.708752120626054e-06, + "loss": 0.3746, + "step": 5679 + }, + { + "epoch": 0.25707173568680697, + "grad_norm": 0.6505544536816305, + "learning_rate": 8.708260528239788e-06, + "loss": 0.4018, + "step": 5680 + }, + { + "epoch": 0.25711699479520256, + "grad_norm": 0.742783726637506, + "learning_rate": 8.707768856174393e-06, + "loss": 0.3693, + "step": 5681 + }, + { + "epoch": 0.2571622539035981, + "grad_norm": 0.6665288886536155, + "learning_rate": 8.707277104440432e-06, + "loss": 0.3636, + "step": 5682 + }, + { + "epoch": 0.2572075130119937, + "grad_norm": 0.6467823550649642, + "learning_rate": 8.706785273048475e-06, + "loss": 0.3495, + "step": 5683 + }, + { + "epoch": 0.2572527721203892, + "grad_norm": 0.605064740924202, + "learning_rate": 8.706293362009084e-06, + "loss": 0.3749, + "step": 5684 + }, + { + "epoch": 0.2572980312287848, + "grad_norm": 0.7717223661878522, + "learning_rate": 8.705801371332832e-06, + "loss": 0.3755, + "step": 5685 + }, + { + "epoch": 0.25734329033718034, + "grad_norm": 0.6282435336830396, + "learning_rate": 8.70530930103029e-06, + "loss": 0.3562, + "step": 5686 + }, + { + "epoch": 0.25738854944557593, + "grad_norm": 0.7577896145589404, + "learning_rate": 8.704817151112033e-06, + "loss": 0.3891, + "step": 5687 + }, + { + "epoch": 0.25743380855397147, + "grad_norm": 0.7057241116379769, + "learning_rate": 8.704324921588631e-06, + "loss": 0.4212, + "step": 5688 + }, + { + "epoch": 0.25747906766236706, + "grad_norm": 0.7065461234902365, + "learning_rate": 8.703832612470665e-06, + "loss": 0.3825, + "step": 5689 + }, + { + "epoch": 0.2575243267707626, + "grad_norm": 0.7101016138777637, + "learning_rate": 8.703340223768713e-06, + "loss": 0.3642, + "step": 5690 + }, + { + "epoch": 0.2575695858791582, + "grad_norm": 0.6020716234533079, + "learning_rate": 8.70284775549335e-06, + "loss": 0.3474, + "step": 5691 + }, + { + "epoch": 0.25761484498755377, + "grad_norm": 0.7963449065128408, + "learning_rate": 8.702355207655164e-06, + "loss": 0.5115, + "step": 5692 + }, + { + "epoch": 0.2576601040959493, + "grad_norm": 0.6741311830333333, + "learning_rate": 8.701862580264735e-06, + "loss": 0.3854, + "step": 5693 + }, + { + "epoch": 0.2577053632043449, + "grad_norm": 0.45541735171368786, + "learning_rate": 8.701369873332647e-06, + "loss": 0.4829, + "step": 5694 + }, + { + "epoch": 0.25775062231274043, + "grad_norm": 0.6808063988576847, + "learning_rate": 8.70087708686949e-06, + "loss": 0.4208, + "step": 5695 + }, + { + "epoch": 0.257795881421136, + "grad_norm": 1.0002550438489555, + "learning_rate": 8.700384220885852e-06, + "loss": 0.3507, + "step": 5696 + }, + { + "epoch": 0.25784114052953155, + "grad_norm": 0.6578641214454441, + "learning_rate": 8.699891275392319e-06, + "loss": 0.4122, + "step": 5697 + }, + { + "epoch": 0.25788639963792714, + "grad_norm": 0.6321553880200829, + "learning_rate": 8.699398250399486e-06, + "loss": 0.3911, + "step": 5698 + }, + { + "epoch": 0.2579316587463227, + "grad_norm": 0.6306184982690165, + "learning_rate": 8.698905145917948e-06, + "loss": 0.3443, + "step": 5699 + }, + { + "epoch": 0.25797691785471827, + "grad_norm": 0.9831445650283764, + "learning_rate": 8.6984119619583e-06, + "loss": 0.4861, + "step": 5700 + }, + { + "epoch": 0.2580221769631138, + "grad_norm": 0.6193016097001647, + "learning_rate": 8.697918698531135e-06, + "loss": 0.3793, + "step": 5701 + }, + { + "epoch": 0.2580674360715094, + "grad_norm": 0.5713133255025882, + "learning_rate": 8.697425355647055e-06, + "loss": 0.4789, + "step": 5702 + }, + { + "epoch": 0.258112695179905, + "grad_norm": 0.6176027961816791, + "learning_rate": 8.696931933316661e-06, + "loss": 0.3762, + "step": 5703 + }, + { + "epoch": 0.2581579542883005, + "grad_norm": 0.7087957945916381, + "learning_rate": 8.696438431550553e-06, + "loss": 0.3791, + "step": 5704 + }, + { + "epoch": 0.2582032133966961, + "grad_norm": 0.6353374786867481, + "learning_rate": 8.695944850359337e-06, + "loss": 0.3673, + "step": 5705 + }, + { + "epoch": 0.25824847250509164, + "grad_norm": 0.6955272929655746, + "learning_rate": 8.695451189753616e-06, + "loss": 0.4057, + "step": 5706 + }, + { + "epoch": 0.25829373161348723, + "grad_norm": 0.6275498827078999, + "learning_rate": 8.694957449744e-06, + "loss": 0.3476, + "step": 5707 + }, + { + "epoch": 0.25833899072188277, + "grad_norm": 0.6950507771068638, + "learning_rate": 8.694463630341094e-06, + "loss": 0.3953, + "step": 5708 + }, + { + "epoch": 0.25838424983027836, + "grad_norm": 0.7994838251226956, + "learning_rate": 8.693969731555514e-06, + "loss": 0.3723, + "step": 5709 + }, + { + "epoch": 0.2584295089386739, + "grad_norm": 0.6387292929473521, + "learning_rate": 8.693475753397869e-06, + "loss": 0.373, + "step": 5710 + }, + { + "epoch": 0.2584747680470695, + "grad_norm": 0.6301470128141162, + "learning_rate": 8.692981695878772e-06, + "loss": 0.385, + "step": 5711 + }, + { + "epoch": 0.258520027155465, + "grad_norm": 0.5937941248355948, + "learning_rate": 8.692487559008843e-06, + "loss": 0.363, + "step": 5712 + }, + { + "epoch": 0.2585652862638606, + "grad_norm": 1.192506158297031, + "learning_rate": 8.691993342798698e-06, + "loss": 0.5158, + "step": 5713 + }, + { + "epoch": 0.25861054537225614, + "grad_norm": 0.6708971470570072, + "learning_rate": 8.691499047258952e-06, + "loss": 0.3817, + "step": 5714 + }, + { + "epoch": 0.25865580448065173, + "grad_norm": 0.6282540225233022, + "learning_rate": 8.69100467240023e-06, + "loss": 0.3691, + "step": 5715 + }, + { + "epoch": 0.2587010635890473, + "grad_norm": 0.6998716434175581, + "learning_rate": 8.690510218233153e-06, + "loss": 0.4013, + "step": 5716 + }, + { + "epoch": 0.25874632269744285, + "grad_norm": 0.6701690489887676, + "learning_rate": 8.690015684768347e-06, + "loss": 0.3527, + "step": 5717 + }, + { + "epoch": 0.25879158180583844, + "grad_norm": 0.6245332757292525, + "learning_rate": 8.689521072016436e-06, + "loss": 0.4015, + "step": 5718 + }, + { + "epoch": 0.258836840914234, + "grad_norm": 0.6928148401172459, + "learning_rate": 8.68902637998805e-06, + "loss": 0.393, + "step": 5719 + }, + { + "epoch": 0.25888210002262957, + "grad_norm": 0.6114044470861477, + "learning_rate": 8.688531608693817e-06, + "loss": 0.361, + "step": 5720 + }, + { + "epoch": 0.2589273591310251, + "grad_norm": 0.6257657653929006, + "learning_rate": 8.688036758144367e-06, + "loss": 0.3705, + "step": 5721 + }, + { + "epoch": 0.2589726182394207, + "grad_norm": 0.6996865989535014, + "learning_rate": 8.687541828350334e-06, + "loss": 0.5148, + "step": 5722 + }, + { + "epoch": 0.2590178773478162, + "grad_norm": 0.7563925845604034, + "learning_rate": 8.687046819322353e-06, + "loss": 0.4001, + "step": 5723 + }, + { + "epoch": 0.2590631364562118, + "grad_norm": 0.6674974207955788, + "learning_rate": 8.68655173107106e-06, + "loss": 0.412, + "step": 5724 + }, + { + "epoch": 0.25910839556460735, + "grad_norm": 0.6705895639170104, + "learning_rate": 8.686056563607093e-06, + "loss": 0.3571, + "step": 5725 + }, + { + "epoch": 0.25915365467300294, + "grad_norm": 0.7363012843803127, + "learning_rate": 8.685561316941091e-06, + "loss": 0.3844, + "step": 5726 + }, + { + "epoch": 0.25919891378139853, + "grad_norm": 0.6487455970498142, + "learning_rate": 8.685065991083695e-06, + "loss": 0.3841, + "step": 5727 + }, + { + "epoch": 0.25924417288979407, + "grad_norm": 0.6936695497920785, + "learning_rate": 8.68457058604555e-06, + "loss": 0.3753, + "step": 5728 + }, + { + "epoch": 0.25928943199818966, + "grad_norm": 0.4468251185731199, + "learning_rate": 8.684075101837298e-06, + "loss": 0.4773, + "step": 5729 + }, + { + "epoch": 0.2593346911065852, + "grad_norm": 0.6467029106064753, + "learning_rate": 8.683579538469587e-06, + "loss": 0.3659, + "step": 5730 + }, + { + "epoch": 0.2593799502149808, + "grad_norm": 0.38998358102716185, + "learning_rate": 8.683083895953066e-06, + "loss": 0.4814, + "step": 5731 + }, + { + "epoch": 0.2594252093233763, + "grad_norm": 0.7080368874591275, + "learning_rate": 8.682588174298384e-06, + "loss": 0.4224, + "step": 5732 + }, + { + "epoch": 0.2594704684317719, + "grad_norm": 0.6507574628339656, + "learning_rate": 8.68209237351619e-06, + "loss": 0.3714, + "step": 5733 + }, + { + "epoch": 0.25951572754016744, + "grad_norm": 0.6255841463576619, + "learning_rate": 8.681596493617141e-06, + "loss": 0.3706, + "step": 5734 + }, + { + "epoch": 0.25956098664856303, + "grad_norm": 0.7205900485067189, + "learning_rate": 8.681100534611891e-06, + "loss": 0.4071, + "step": 5735 + }, + { + "epoch": 0.25960624575695856, + "grad_norm": 0.6450927790544251, + "learning_rate": 8.680604496511095e-06, + "loss": 0.375, + "step": 5736 + }, + { + "epoch": 0.25965150486535415, + "grad_norm": 0.6590018160806728, + "learning_rate": 8.680108379325413e-06, + "loss": 0.3737, + "step": 5737 + }, + { + "epoch": 0.25969676397374974, + "grad_norm": 0.6418568583398381, + "learning_rate": 8.679612183065506e-06, + "loss": 0.3884, + "step": 5738 + }, + { + "epoch": 0.2597420230821453, + "grad_norm": 0.6454949426628106, + "learning_rate": 8.679115907742032e-06, + "loss": 0.3564, + "step": 5739 + }, + { + "epoch": 0.25978728219054087, + "grad_norm": 0.6739832666361274, + "learning_rate": 8.67861955336566e-06, + "loss": 0.3742, + "step": 5740 + }, + { + "epoch": 0.2598325412989364, + "grad_norm": 0.6683636845230098, + "learning_rate": 8.678123119947049e-06, + "loss": 0.3584, + "step": 5741 + }, + { + "epoch": 0.259877800407332, + "grad_norm": 0.6559644343457217, + "learning_rate": 8.677626607496869e-06, + "loss": 0.3648, + "step": 5742 + }, + { + "epoch": 0.25992305951572753, + "grad_norm": 0.6809757711271472, + "learning_rate": 8.677130016025788e-06, + "loss": 0.3556, + "step": 5743 + }, + { + "epoch": 0.2599683186241231, + "grad_norm": 0.6666856861528969, + "learning_rate": 8.676633345544476e-06, + "loss": 0.3886, + "step": 5744 + }, + { + "epoch": 0.26001357773251865, + "grad_norm": 0.6393280406948814, + "learning_rate": 8.676136596063607e-06, + "loss": 0.3589, + "step": 5745 + }, + { + "epoch": 0.26005883684091424, + "grad_norm": 0.678176855017373, + "learning_rate": 8.675639767593851e-06, + "loss": 0.3631, + "step": 5746 + }, + { + "epoch": 0.2601040959493098, + "grad_norm": 0.6610446597238993, + "learning_rate": 8.675142860145887e-06, + "loss": 0.3838, + "step": 5747 + }, + { + "epoch": 0.26014935505770537, + "grad_norm": 0.6162186768778396, + "learning_rate": 8.67464587373039e-06, + "loss": 0.3889, + "step": 5748 + }, + { + "epoch": 0.2601946141661009, + "grad_norm": 0.6431669043412701, + "learning_rate": 8.674148808358038e-06, + "loss": 0.4023, + "step": 5749 + }, + { + "epoch": 0.2602398732744965, + "grad_norm": 0.6334447577441437, + "learning_rate": 8.673651664039513e-06, + "loss": 0.352, + "step": 5750 + }, + { + "epoch": 0.2602851323828921, + "grad_norm": 0.629109002626069, + "learning_rate": 8.673154440785496e-06, + "loss": 0.4121, + "step": 5751 + }, + { + "epoch": 0.2603303914912876, + "grad_norm": 0.6882025240837115, + "learning_rate": 8.672657138606672e-06, + "loss": 0.403, + "step": 5752 + }, + { + "epoch": 0.2603756505996832, + "grad_norm": 0.6620543132588419, + "learning_rate": 8.672159757513726e-06, + "loss": 0.369, + "step": 5753 + }, + { + "epoch": 0.26042090970807874, + "grad_norm": 0.6516401962062613, + "learning_rate": 8.671662297517344e-06, + "loss": 0.4314, + "step": 5754 + }, + { + "epoch": 0.26046616881647433, + "grad_norm": 0.6667136130475712, + "learning_rate": 8.671164758628216e-06, + "loss": 0.3493, + "step": 5755 + }, + { + "epoch": 0.26051142792486986, + "grad_norm": 0.6643676696578114, + "learning_rate": 8.670667140857034e-06, + "loss": 0.3769, + "step": 5756 + }, + { + "epoch": 0.26055668703326545, + "grad_norm": 0.632758644728174, + "learning_rate": 8.670169444214487e-06, + "loss": 0.3735, + "step": 5757 + }, + { + "epoch": 0.260601946141661, + "grad_norm": 0.630402510364806, + "learning_rate": 8.669671668711272e-06, + "loss": 0.3773, + "step": 5758 + }, + { + "epoch": 0.2606472052500566, + "grad_norm": 0.6562395073335897, + "learning_rate": 8.669173814358082e-06, + "loss": 0.3849, + "step": 5759 + }, + { + "epoch": 0.2606924643584521, + "grad_norm": 0.6379547262212517, + "learning_rate": 8.668675881165616e-06, + "loss": 0.3595, + "step": 5760 + }, + { + "epoch": 0.2607377234668477, + "grad_norm": 0.6794966206353229, + "learning_rate": 8.668177869144574e-06, + "loss": 0.3756, + "step": 5761 + }, + { + "epoch": 0.2607829825752433, + "grad_norm": 0.6470626273490792, + "learning_rate": 8.667679778305654e-06, + "loss": 0.3591, + "step": 5762 + }, + { + "epoch": 0.26082824168363883, + "grad_norm": 0.6400645187021914, + "learning_rate": 8.66718160865956e-06, + "loss": 0.3728, + "step": 5763 + }, + { + "epoch": 0.2608735007920344, + "grad_norm": 0.7782560423349998, + "learning_rate": 8.666683360216998e-06, + "loss": 0.4782, + "step": 5764 + }, + { + "epoch": 0.26091875990042995, + "grad_norm": 0.6331256763361964, + "learning_rate": 8.66618503298867e-06, + "loss": 0.3924, + "step": 5765 + }, + { + "epoch": 0.26096401900882554, + "grad_norm": 0.6983330718539844, + "learning_rate": 8.665686626985286e-06, + "loss": 0.4098, + "step": 5766 + }, + { + "epoch": 0.2610092781172211, + "grad_norm": 0.347891864990072, + "learning_rate": 8.665188142217555e-06, + "loss": 0.4693, + "step": 5767 + }, + { + "epoch": 0.26105453722561667, + "grad_norm": 0.7149931938736016, + "learning_rate": 8.664689578696188e-06, + "loss": 0.3672, + "step": 5768 + }, + { + "epoch": 0.2610997963340122, + "grad_norm": 0.6324010359903846, + "learning_rate": 8.664190936431896e-06, + "loss": 0.3695, + "step": 5769 + }, + { + "epoch": 0.2611450554424078, + "grad_norm": 0.6574088753449782, + "learning_rate": 8.663692215435396e-06, + "loss": 0.3741, + "step": 5770 + }, + { + "epoch": 0.2611903145508033, + "grad_norm": 0.6924097149046233, + "learning_rate": 8.663193415717402e-06, + "loss": 0.3801, + "step": 5771 + }, + { + "epoch": 0.2612355736591989, + "grad_norm": 0.6212884502110326, + "learning_rate": 8.662694537288632e-06, + "loss": 0.5228, + "step": 5772 + }, + { + "epoch": 0.26128083276759445, + "grad_norm": 0.7482647862220366, + "learning_rate": 8.662195580159804e-06, + "loss": 0.3978, + "step": 5773 + }, + { + "epoch": 0.26132609187599004, + "grad_norm": 0.5149044333270514, + "learning_rate": 8.661696544341642e-06, + "loss": 0.4931, + "step": 5774 + }, + { + "epoch": 0.26137135098438563, + "grad_norm": 0.653471749140414, + "learning_rate": 8.661197429844868e-06, + "loss": 0.3388, + "step": 5775 + }, + { + "epoch": 0.26141661009278117, + "grad_norm": 0.6625056719664779, + "learning_rate": 8.660698236680205e-06, + "loss": 0.3696, + "step": 5776 + }, + { + "epoch": 0.26146186920117676, + "grad_norm": 0.6471294672905499, + "learning_rate": 8.66019896485838e-06, + "loss": 0.4087, + "step": 5777 + }, + { + "epoch": 0.2615071283095723, + "grad_norm": 0.7361480408869571, + "learning_rate": 8.65969961439012e-06, + "loss": 0.3858, + "step": 5778 + }, + { + "epoch": 0.2615523874179679, + "grad_norm": 0.649115216250859, + "learning_rate": 8.659200185286157e-06, + "loss": 0.3477, + "step": 5779 + }, + { + "epoch": 0.2615976465263634, + "grad_norm": 0.6561882849580633, + "learning_rate": 8.658700677557217e-06, + "loss": 0.4159, + "step": 5780 + }, + { + "epoch": 0.261642905634759, + "grad_norm": 0.6753162434144271, + "learning_rate": 8.658201091214038e-06, + "loss": 0.4287, + "step": 5781 + }, + { + "epoch": 0.26168816474315454, + "grad_norm": 0.6725502703479815, + "learning_rate": 8.657701426267355e-06, + "loss": 0.4304, + "step": 5782 + }, + { + "epoch": 0.26173342385155013, + "grad_norm": 0.6815944476324566, + "learning_rate": 8.657201682727898e-06, + "loss": 0.3641, + "step": 5783 + }, + { + "epoch": 0.26177868295994566, + "grad_norm": 0.5161456444441221, + "learning_rate": 8.656701860606412e-06, + "loss": 0.491, + "step": 5784 + }, + { + "epoch": 0.26182394206834125, + "grad_norm": 0.6652180348295492, + "learning_rate": 8.656201959913635e-06, + "loss": 0.3502, + "step": 5785 + }, + { + "epoch": 0.26186920117673684, + "grad_norm": 0.612731572351153, + "learning_rate": 8.655701980660305e-06, + "loss": 0.3635, + "step": 5786 + }, + { + "epoch": 0.2619144602851324, + "grad_norm": 0.6252464211687456, + "learning_rate": 8.655201922857166e-06, + "loss": 0.357, + "step": 5787 + }, + { + "epoch": 0.26195971939352797, + "grad_norm": 0.6850715997532323, + "learning_rate": 8.654701786514965e-06, + "loss": 0.3842, + "step": 5788 + }, + { + "epoch": 0.2620049785019235, + "grad_norm": 0.6445923933478774, + "learning_rate": 8.654201571644447e-06, + "loss": 0.3718, + "step": 5789 + }, + { + "epoch": 0.2620502376103191, + "grad_norm": 0.4093785123197519, + "learning_rate": 8.653701278256362e-06, + "loss": 0.4881, + "step": 5790 + }, + { + "epoch": 0.2620954967187146, + "grad_norm": 0.6996517734015768, + "learning_rate": 8.653200906361454e-06, + "loss": 0.3419, + "step": 5791 + }, + { + "epoch": 0.2621407558271102, + "grad_norm": 0.6759619747376816, + "learning_rate": 8.652700455970483e-06, + "loss": 0.3778, + "step": 5792 + }, + { + "epoch": 0.26218601493550575, + "grad_norm": 0.6963571140515151, + "learning_rate": 8.652199927094194e-06, + "loss": 0.3885, + "step": 5793 + }, + { + "epoch": 0.26223127404390134, + "grad_norm": 0.6076035876970179, + "learning_rate": 8.651699319743348e-06, + "loss": 0.3478, + "step": 5794 + }, + { + "epoch": 0.2622765331522969, + "grad_norm": 0.6776206762542706, + "learning_rate": 8.651198633928696e-06, + "loss": 0.3873, + "step": 5795 + }, + { + "epoch": 0.26232179226069247, + "grad_norm": 0.6661315977828, + "learning_rate": 8.650697869661002e-06, + "loss": 0.3991, + "step": 5796 + }, + { + "epoch": 0.26236705136908806, + "grad_norm": 0.46358353209235925, + "learning_rate": 8.650197026951022e-06, + "loss": 0.4936, + "step": 5797 + }, + { + "epoch": 0.2624123104774836, + "grad_norm": 0.6883901449463596, + "learning_rate": 8.649696105809518e-06, + "loss": 0.4183, + "step": 5798 + }, + { + "epoch": 0.2624575695858792, + "grad_norm": 0.5777003885885577, + "learning_rate": 8.649195106247256e-06, + "loss": 0.378, + "step": 5799 + }, + { + "epoch": 0.2625028286942747, + "grad_norm": 0.6298595377891067, + "learning_rate": 8.648694028274998e-06, + "loss": 0.417, + "step": 5800 + }, + { + "epoch": 0.2625480878026703, + "grad_norm": 0.6152758639766999, + "learning_rate": 8.64819287190351e-06, + "loss": 0.4013, + "step": 5801 + }, + { + "epoch": 0.26259334691106584, + "grad_norm": 0.3157038254738025, + "learning_rate": 8.647691637143562e-06, + "loss": 0.4892, + "step": 5802 + }, + { + "epoch": 0.26263860601946143, + "grad_norm": 0.28796182851067176, + "learning_rate": 8.647190324005925e-06, + "loss": 0.488, + "step": 5803 + }, + { + "epoch": 0.26268386512785696, + "grad_norm": 0.6537209460403814, + "learning_rate": 8.646688932501369e-06, + "loss": 0.3802, + "step": 5804 + }, + { + "epoch": 0.26272912423625255, + "grad_norm": 0.6960618753773958, + "learning_rate": 8.646187462640668e-06, + "loss": 0.4316, + "step": 5805 + }, + { + "epoch": 0.2627743833446481, + "grad_norm": 0.6628038199492589, + "learning_rate": 8.645685914434596e-06, + "loss": 0.4048, + "step": 5806 + }, + { + "epoch": 0.2628196424530437, + "grad_norm": 0.701227250896986, + "learning_rate": 8.64518428789393e-06, + "loss": 0.3942, + "step": 5807 + }, + { + "epoch": 0.2628649015614392, + "grad_norm": 0.6371205125596449, + "learning_rate": 8.644682583029452e-06, + "loss": 0.3832, + "step": 5808 + }, + { + "epoch": 0.2629101606698348, + "grad_norm": 0.6289246242511318, + "learning_rate": 8.644180799851936e-06, + "loss": 0.3707, + "step": 5809 + }, + { + "epoch": 0.2629554197782304, + "grad_norm": 0.6210970208658598, + "learning_rate": 8.643678938372167e-06, + "loss": 0.2983, + "step": 5810 + }, + { + "epoch": 0.2630006788866259, + "grad_norm": 0.6778788158675642, + "learning_rate": 8.643176998600931e-06, + "loss": 0.3708, + "step": 5811 + }, + { + "epoch": 0.2630459379950215, + "grad_norm": 0.4103471001964465, + "learning_rate": 8.642674980549008e-06, + "loss": 0.4846, + "step": 5812 + }, + { + "epoch": 0.26309119710341705, + "grad_norm": 0.6314054288667231, + "learning_rate": 8.642172884227187e-06, + "loss": 0.3729, + "step": 5813 + }, + { + "epoch": 0.26313645621181264, + "grad_norm": 0.6871376003891854, + "learning_rate": 8.641670709646258e-06, + "loss": 0.3556, + "step": 5814 + }, + { + "epoch": 0.2631817153202082, + "grad_norm": 0.32402773719708344, + "learning_rate": 8.64116845681701e-06, + "loss": 0.4937, + "step": 5815 + }, + { + "epoch": 0.26322697442860377, + "grad_norm": 0.6534754943829143, + "learning_rate": 8.640666125750234e-06, + "loss": 0.3971, + "step": 5816 + }, + { + "epoch": 0.2632722335369993, + "grad_norm": 0.7017365191775312, + "learning_rate": 8.640163716456726e-06, + "loss": 0.4009, + "step": 5817 + }, + { + "epoch": 0.2633174926453949, + "grad_norm": 0.30537086162876564, + "learning_rate": 8.639661228947278e-06, + "loss": 0.5117, + "step": 5818 + }, + { + "epoch": 0.2633627517537904, + "grad_norm": 0.7499047532831177, + "learning_rate": 8.63915866323269e-06, + "loss": 0.4687, + "step": 5819 + }, + { + "epoch": 0.263408010862186, + "grad_norm": 0.6707874277156979, + "learning_rate": 8.638656019323758e-06, + "loss": 0.3724, + "step": 5820 + }, + { + "epoch": 0.2634532699705816, + "grad_norm": 0.3199935029667527, + "learning_rate": 8.638153297231282e-06, + "loss": 0.4871, + "step": 5821 + }, + { + "epoch": 0.26349852907897714, + "grad_norm": 0.6652319046497731, + "learning_rate": 8.637650496966069e-06, + "loss": 0.35, + "step": 5822 + }, + { + "epoch": 0.26354378818737273, + "grad_norm": 0.6489623223735191, + "learning_rate": 8.637147618538918e-06, + "loss": 0.4248, + "step": 5823 + }, + { + "epoch": 0.26358904729576826, + "grad_norm": 0.6338461835366701, + "learning_rate": 8.636644661960634e-06, + "loss": 0.3737, + "step": 5824 + }, + { + "epoch": 0.26363430640416385, + "grad_norm": 0.2895408176277085, + "learning_rate": 8.636141627242025e-06, + "loss": 0.4618, + "step": 5825 + }, + { + "epoch": 0.2636795655125594, + "grad_norm": 0.6836727451279606, + "learning_rate": 8.6356385143939e-06, + "loss": 0.372, + "step": 5826 + }, + { + "epoch": 0.263724824620955, + "grad_norm": 0.6853694082506848, + "learning_rate": 8.635135323427072e-06, + "loss": 0.4041, + "step": 5827 + }, + { + "epoch": 0.2637700837293505, + "grad_norm": 0.5989493808594828, + "learning_rate": 8.634632054352347e-06, + "loss": 0.3827, + "step": 5828 + }, + { + "epoch": 0.2638153428377461, + "grad_norm": 0.6143907957530291, + "learning_rate": 8.634128707180544e-06, + "loss": 0.3936, + "step": 5829 + }, + { + "epoch": 0.26386060194614164, + "grad_norm": 0.6213073360715494, + "learning_rate": 8.633625281922477e-06, + "loss": 0.3784, + "step": 5830 + }, + { + "epoch": 0.2639058610545372, + "grad_norm": 0.30410771789820507, + "learning_rate": 8.63312177858896e-06, + "loss": 0.4935, + "step": 5831 + }, + { + "epoch": 0.2639511201629328, + "grad_norm": 0.6590420669823309, + "learning_rate": 8.632618197190817e-06, + "loss": 0.4466, + "step": 5832 + }, + { + "epoch": 0.26399637927132835, + "grad_norm": 0.6639909004609018, + "learning_rate": 8.632114537738865e-06, + "loss": 0.3941, + "step": 5833 + }, + { + "epoch": 0.26404163837972394, + "grad_norm": 0.629068927113239, + "learning_rate": 8.631610800243926e-06, + "loss": 0.4041, + "step": 5834 + }, + { + "epoch": 0.2640868974881195, + "grad_norm": 0.6548830543697949, + "learning_rate": 8.631106984716824e-06, + "loss": 0.3643, + "step": 5835 + }, + { + "epoch": 0.26413215659651507, + "grad_norm": 0.6267647987383094, + "learning_rate": 8.630603091168385e-06, + "loss": 0.3975, + "step": 5836 + }, + { + "epoch": 0.2641774157049106, + "grad_norm": 0.3366458561699845, + "learning_rate": 8.630099119609439e-06, + "loss": 0.4905, + "step": 5837 + }, + { + "epoch": 0.2642226748133062, + "grad_norm": 0.6485695922987192, + "learning_rate": 8.62959507005081e-06, + "loss": 0.3584, + "step": 5838 + }, + { + "epoch": 0.2642679339217017, + "grad_norm": 0.29037235688732815, + "learning_rate": 8.62909094250333e-06, + "loss": 0.4906, + "step": 5839 + }, + { + "epoch": 0.2643131930300973, + "grad_norm": 0.6792032775967267, + "learning_rate": 8.62858673697783e-06, + "loss": 0.3736, + "step": 5840 + }, + { + "epoch": 0.26435845213849285, + "grad_norm": 0.3204503110222696, + "learning_rate": 8.628082453485149e-06, + "loss": 0.5212, + "step": 5841 + }, + { + "epoch": 0.26440371124688844, + "grad_norm": 0.7177482170374119, + "learning_rate": 8.627578092036117e-06, + "loss": 0.3985, + "step": 5842 + }, + { + "epoch": 0.264448970355284, + "grad_norm": 0.8012517579137678, + "learning_rate": 8.627073652641573e-06, + "loss": 0.3961, + "step": 5843 + }, + { + "epoch": 0.26449422946367956, + "grad_norm": 0.6032837985123936, + "learning_rate": 8.626569135312354e-06, + "loss": 0.3912, + "step": 5844 + }, + { + "epoch": 0.26453948857207515, + "grad_norm": 0.6511910379174815, + "learning_rate": 8.626064540059305e-06, + "loss": 0.3977, + "step": 5845 + }, + { + "epoch": 0.2645847476804707, + "grad_norm": 0.6740604527846302, + "learning_rate": 8.625559866893265e-06, + "loss": 0.3606, + "step": 5846 + }, + { + "epoch": 0.2646300067888663, + "grad_norm": 0.6388403733226904, + "learning_rate": 8.625055115825078e-06, + "loss": 0.3666, + "step": 5847 + }, + { + "epoch": 0.2646752658972618, + "grad_norm": 0.6245931039340854, + "learning_rate": 8.624550286865592e-06, + "loss": 0.3362, + "step": 5848 + }, + { + "epoch": 0.2647205250056574, + "grad_norm": 0.6749344299546478, + "learning_rate": 8.62404538002565e-06, + "loss": 0.4211, + "step": 5849 + }, + { + "epoch": 0.26476578411405294, + "grad_norm": 0.6031113265169693, + "learning_rate": 8.623540395316105e-06, + "loss": 0.3206, + "step": 5850 + }, + { + "epoch": 0.26481104322244853, + "grad_norm": 0.7118892079516408, + "learning_rate": 8.623035332747804e-06, + "loss": 0.4087, + "step": 5851 + }, + { + "epoch": 0.26485630233084406, + "grad_norm": 0.7077213249212451, + "learning_rate": 8.622530192331602e-06, + "loss": 0.3765, + "step": 5852 + }, + { + "epoch": 0.26490156143923965, + "grad_norm": 0.7198794438308793, + "learning_rate": 8.622024974078354e-06, + "loss": 0.3482, + "step": 5853 + }, + { + "epoch": 0.2649468205476352, + "grad_norm": 0.778162946379442, + "learning_rate": 8.62151967799891e-06, + "loss": 0.3665, + "step": 5854 + }, + { + "epoch": 0.2649920796560308, + "grad_norm": 0.533309837016235, + "learning_rate": 8.621014304104131e-06, + "loss": 0.4961, + "step": 5855 + }, + { + "epoch": 0.26503733876442637, + "grad_norm": 0.7134140281191794, + "learning_rate": 8.620508852404878e-06, + "loss": 0.3803, + "step": 5856 + }, + { + "epoch": 0.2650825978728219, + "grad_norm": 0.6597382720345764, + "learning_rate": 8.620003322912008e-06, + "loss": 0.4084, + "step": 5857 + }, + { + "epoch": 0.2651278569812175, + "grad_norm": 0.6589962606418719, + "learning_rate": 8.619497715636385e-06, + "loss": 0.381, + "step": 5858 + }, + { + "epoch": 0.265173116089613, + "grad_norm": 0.6724882253908735, + "learning_rate": 8.618992030588872e-06, + "loss": 0.4202, + "step": 5859 + }, + { + "epoch": 0.2652183751980086, + "grad_norm": 0.708051466390435, + "learning_rate": 8.618486267780334e-06, + "loss": 0.3905, + "step": 5860 + }, + { + "epoch": 0.26526363430640415, + "grad_norm": 0.6241104554817613, + "learning_rate": 8.617980427221641e-06, + "loss": 0.3767, + "step": 5861 + }, + { + "epoch": 0.26530889341479974, + "grad_norm": 0.6367150687469564, + "learning_rate": 8.617474508923662e-06, + "loss": 0.3898, + "step": 5862 + }, + { + "epoch": 0.2653541525231953, + "grad_norm": 0.6543179077247595, + "learning_rate": 8.616968512897264e-06, + "loss": 0.4204, + "step": 5863 + }, + { + "epoch": 0.26539941163159086, + "grad_norm": 0.6934113699895663, + "learning_rate": 8.61646243915332e-06, + "loss": 0.4221, + "step": 5864 + }, + { + "epoch": 0.2654446707399864, + "grad_norm": 0.4148202704133109, + "learning_rate": 8.615956287702708e-06, + "loss": 0.4663, + "step": 5865 + }, + { + "epoch": 0.265489929848382, + "grad_norm": 0.35037573430565927, + "learning_rate": 8.615450058556301e-06, + "loss": 0.4917, + "step": 5866 + }, + { + "epoch": 0.2655351889567776, + "grad_norm": 0.7073144750498311, + "learning_rate": 8.614943751724973e-06, + "loss": 0.4249, + "step": 5867 + }, + { + "epoch": 0.2655804480651731, + "grad_norm": 0.7428661153855681, + "learning_rate": 8.614437367219609e-06, + "loss": 0.3834, + "step": 5868 + }, + { + "epoch": 0.2656257071735687, + "grad_norm": 0.6768255011074285, + "learning_rate": 8.613930905051087e-06, + "loss": 0.3994, + "step": 5869 + }, + { + "epoch": 0.26567096628196424, + "grad_norm": 0.7575444906896478, + "learning_rate": 8.613424365230287e-06, + "loss": 0.3717, + "step": 5870 + }, + { + "epoch": 0.26571622539035983, + "grad_norm": 0.7548909935508007, + "learning_rate": 8.612917747768097e-06, + "loss": 0.3623, + "step": 5871 + }, + { + "epoch": 0.26576148449875536, + "grad_norm": 0.6016701214142821, + "learning_rate": 8.6124110526754e-06, + "loss": 0.343, + "step": 5872 + }, + { + "epoch": 0.26580674360715095, + "grad_norm": 0.679660680308673, + "learning_rate": 8.611904279963085e-06, + "loss": 0.3906, + "step": 5873 + }, + { + "epoch": 0.2658520027155465, + "grad_norm": 0.6360930168064564, + "learning_rate": 8.61139742964204e-06, + "loss": 0.4084, + "step": 5874 + }, + { + "epoch": 0.2658972618239421, + "grad_norm": 0.6615358533938903, + "learning_rate": 8.610890501723155e-06, + "loss": 0.4094, + "step": 5875 + }, + { + "epoch": 0.2659425209323376, + "grad_norm": 0.7109395219743451, + "learning_rate": 8.610383496217323e-06, + "loss": 0.3736, + "step": 5876 + }, + { + "epoch": 0.2659877800407332, + "grad_norm": 0.6837406970708032, + "learning_rate": 8.609876413135439e-06, + "loss": 0.4766, + "step": 5877 + }, + { + "epoch": 0.26603303914912874, + "grad_norm": 0.7336459536199746, + "learning_rate": 8.609369252488398e-06, + "loss": 0.4239, + "step": 5878 + }, + { + "epoch": 0.2660782982575243, + "grad_norm": 0.6437473058572298, + "learning_rate": 8.608862014287095e-06, + "loss": 0.3776, + "step": 5879 + }, + { + "epoch": 0.2661235573659199, + "grad_norm": 0.32222665867547856, + "learning_rate": 8.608354698542433e-06, + "loss": 0.5048, + "step": 5880 + }, + { + "epoch": 0.26616881647431545, + "grad_norm": 0.6907684265874435, + "learning_rate": 8.607847305265312e-06, + "loss": 0.3886, + "step": 5881 + }, + { + "epoch": 0.26621407558271104, + "grad_norm": 0.648226465706051, + "learning_rate": 8.607339834466632e-06, + "loss": 0.3937, + "step": 5882 + }, + { + "epoch": 0.2662593346911066, + "grad_norm": 0.6804105185275151, + "learning_rate": 8.606832286157296e-06, + "loss": 0.3769, + "step": 5883 + }, + { + "epoch": 0.26630459379950217, + "grad_norm": 0.6633112138977015, + "learning_rate": 8.606324660348214e-06, + "loss": 0.3824, + "step": 5884 + }, + { + "epoch": 0.2663498529078977, + "grad_norm": 0.7789029630758616, + "learning_rate": 8.605816957050291e-06, + "loss": 0.3202, + "step": 5885 + }, + { + "epoch": 0.2663951120162933, + "grad_norm": 0.7116149857547136, + "learning_rate": 8.605309176274434e-06, + "loss": 0.3525, + "step": 5886 + }, + { + "epoch": 0.2664403711246888, + "grad_norm": 0.7987998989658341, + "learning_rate": 8.604801318031556e-06, + "loss": 0.385, + "step": 5887 + }, + { + "epoch": 0.2664856302330844, + "grad_norm": 0.6836694369333408, + "learning_rate": 8.604293382332572e-06, + "loss": 0.3733, + "step": 5888 + }, + { + "epoch": 0.26653088934147995, + "grad_norm": 0.6679344237964411, + "learning_rate": 8.60378536918839e-06, + "loss": 0.3588, + "step": 5889 + }, + { + "epoch": 0.26657614844987554, + "grad_norm": 0.6560760582437417, + "learning_rate": 8.60327727860993e-06, + "loss": 0.3992, + "step": 5890 + }, + { + "epoch": 0.26662140755827113, + "grad_norm": 0.7049765744315053, + "learning_rate": 8.602769110608107e-06, + "loss": 0.3676, + "step": 5891 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 0.6276088466610943, + "learning_rate": 8.602260865193841e-06, + "loss": 0.3812, + "step": 5892 + }, + { + "epoch": 0.26671192577506225, + "grad_norm": 0.6116868774203824, + "learning_rate": 8.601752542378052e-06, + "loss": 0.3714, + "step": 5893 + }, + { + "epoch": 0.2667571848834578, + "grad_norm": 0.6801239626111731, + "learning_rate": 8.601244142171665e-06, + "loss": 0.3809, + "step": 5894 + }, + { + "epoch": 0.2668024439918534, + "grad_norm": 0.821498545744599, + "learning_rate": 8.6007356645856e-06, + "loss": 0.5027, + "step": 5895 + }, + { + "epoch": 0.2668477031002489, + "grad_norm": 0.6657517193075106, + "learning_rate": 8.600227109630785e-06, + "loss": 0.3888, + "step": 5896 + }, + { + "epoch": 0.2668929622086445, + "grad_norm": 0.4859325881457738, + "learning_rate": 8.599718477318146e-06, + "loss": 0.4869, + "step": 5897 + }, + { + "epoch": 0.26693822131704004, + "grad_norm": 0.7389180211567423, + "learning_rate": 8.599209767658613e-06, + "loss": 0.3747, + "step": 5898 + }, + { + "epoch": 0.2669834804254356, + "grad_norm": 0.6422154576545112, + "learning_rate": 8.598700980663116e-06, + "loss": 0.3458, + "step": 5899 + }, + { + "epoch": 0.26702873953383116, + "grad_norm": 0.6237373016006862, + "learning_rate": 8.598192116342587e-06, + "loss": 0.38, + "step": 5900 + }, + { + "epoch": 0.26707399864222675, + "grad_norm": 0.6284487296234135, + "learning_rate": 8.597683174707961e-06, + "loss": 0.3297, + "step": 5901 + }, + { + "epoch": 0.2671192577506223, + "grad_norm": 0.8446235665355496, + "learning_rate": 8.597174155770174e-06, + "loss": 0.5076, + "step": 5902 + }, + { + "epoch": 0.2671645168590179, + "grad_norm": 0.7014310270863416, + "learning_rate": 8.596665059540161e-06, + "loss": 0.4103, + "step": 5903 + }, + { + "epoch": 0.26720977596741347, + "grad_norm": 0.698389379913561, + "learning_rate": 8.596155886028863e-06, + "loss": 0.4106, + "step": 5904 + }, + { + "epoch": 0.267255035075809, + "grad_norm": 0.6508038142607682, + "learning_rate": 8.59564663524722e-06, + "loss": 0.3526, + "step": 5905 + }, + { + "epoch": 0.2673002941842046, + "grad_norm": 0.6590762924057729, + "learning_rate": 8.595137307206171e-06, + "loss": 0.3979, + "step": 5906 + }, + { + "epoch": 0.2673455532926001, + "grad_norm": 0.5143468902112852, + "learning_rate": 8.594627901916667e-06, + "loss": 0.486, + "step": 5907 + }, + { + "epoch": 0.2673908124009957, + "grad_norm": 0.6668621637376194, + "learning_rate": 8.594118419389648e-06, + "loss": 0.3568, + "step": 5908 + }, + { + "epoch": 0.26743607150939125, + "grad_norm": 0.6586502935608585, + "learning_rate": 8.593608859636063e-06, + "loss": 0.3406, + "step": 5909 + }, + { + "epoch": 0.26748133061778684, + "grad_norm": 0.7020550504466091, + "learning_rate": 8.593099222666859e-06, + "loss": 0.4176, + "step": 5910 + }, + { + "epoch": 0.2675265897261824, + "grad_norm": 0.33072277187840926, + "learning_rate": 8.592589508492989e-06, + "loss": 0.4711, + "step": 5911 + }, + { + "epoch": 0.26757184883457796, + "grad_norm": 0.7552124560845879, + "learning_rate": 8.592079717125403e-06, + "loss": 0.3952, + "step": 5912 + }, + { + "epoch": 0.2676171079429735, + "grad_norm": 0.4440559086132741, + "learning_rate": 8.591569848575058e-06, + "loss": 0.4977, + "step": 5913 + }, + { + "epoch": 0.2676623670513691, + "grad_norm": 0.6809038852834273, + "learning_rate": 8.591059902852907e-06, + "loss": 0.4298, + "step": 5914 + }, + { + "epoch": 0.2677076261597647, + "grad_norm": 0.6532468455961231, + "learning_rate": 8.590549879969907e-06, + "loss": 0.3721, + "step": 5915 + }, + { + "epoch": 0.2677528852681602, + "grad_norm": 0.6528557835064469, + "learning_rate": 8.590039779937019e-06, + "loss": 0.3939, + "step": 5916 + }, + { + "epoch": 0.2677981443765558, + "grad_norm": 0.6577332862162955, + "learning_rate": 8.5895296027652e-06, + "loss": 0.3531, + "step": 5917 + }, + { + "epoch": 0.26784340348495134, + "grad_norm": 0.6707199520170976, + "learning_rate": 8.589019348465416e-06, + "loss": 0.377, + "step": 5918 + }, + { + "epoch": 0.2678886625933469, + "grad_norm": 0.6661069685645835, + "learning_rate": 8.588509017048629e-06, + "loss": 0.385, + "step": 5919 + }, + { + "epoch": 0.26793392170174246, + "grad_norm": 0.656977906811802, + "learning_rate": 8.587998608525806e-06, + "loss": 0.3797, + "step": 5920 + }, + { + "epoch": 0.26797918081013805, + "grad_norm": 0.6457510799865884, + "learning_rate": 8.58748812290791e-06, + "loss": 0.3865, + "step": 5921 + }, + { + "epoch": 0.2680244399185336, + "grad_norm": 0.6716714434913296, + "learning_rate": 8.586977560205914e-06, + "loss": 0.3889, + "step": 5922 + }, + { + "epoch": 0.2680696990269292, + "grad_norm": 0.6985273498781074, + "learning_rate": 8.586466920430785e-06, + "loss": 0.4133, + "step": 5923 + }, + { + "epoch": 0.2681149581353247, + "grad_norm": 0.6292758691095204, + "learning_rate": 8.585956203593497e-06, + "loss": 0.3627, + "step": 5924 + }, + { + "epoch": 0.2681602172437203, + "grad_norm": 0.5394491971196063, + "learning_rate": 8.585445409705026e-06, + "loss": 0.5169, + "step": 5925 + }, + { + "epoch": 0.2682054763521159, + "grad_norm": 0.6232604641935396, + "learning_rate": 8.584934538776342e-06, + "loss": 0.3383, + "step": 5926 + }, + { + "epoch": 0.2682507354605114, + "grad_norm": 0.6635149021073501, + "learning_rate": 8.584423590818427e-06, + "loss": 0.3532, + "step": 5927 + }, + { + "epoch": 0.268295994568907, + "grad_norm": 0.9092934282618736, + "learning_rate": 8.583912565842258e-06, + "loss": 0.3869, + "step": 5928 + }, + { + "epoch": 0.26834125367730255, + "grad_norm": 0.6085165446418936, + "learning_rate": 8.583401463858814e-06, + "loss": 0.3775, + "step": 5929 + }, + { + "epoch": 0.26838651278569814, + "grad_norm": 0.6810081434074116, + "learning_rate": 8.582890284879077e-06, + "loss": 0.3621, + "step": 5930 + }, + { + "epoch": 0.2684317718940937, + "grad_norm": 0.6595823794825919, + "learning_rate": 8.582379028914034e-06, + "loss": 0.4089, + "step": 5931 + }, + { + "epoch": 0.26847703100248926, + "grad_norm": 0.7243005246090832, + "learning_rate": 8.581867695974667e-06, + "loss": 0.4088, + "step": 5932 + }, + { + "epoch": 0.2685222901108848, + "grad_norm": 0.6679784436223112, + "learning_rate": 8.581356286071964e-06, + "loss": 0.3949, + "step": 5933 + }, + { + "epoch": 0.2685675492192804, + "grad_norm": 0.623973476384692, + "learning_rate": 8.580844799216914e-06, + "loss": 0.375, + "step": 5934 + }, + { + "epoch": 0.2686128083276759, + "grad_norm": 0.7071319896024497, + "learning_rate": 8.580333235420509e-06, + "loss": 0.3691, + "step": 5935 + }, + { + "epoch": 0.2686580674360715, + "grad_norm": 0.6381944332927595, + "learning_rate": 8.579821594693736e-06, + "loss": 0.345, + "step": 5936 + }, + { + "epoch": 0.26870332654446705, + "grad_norm": 0.5723820412634509, + "learning_rate": 8.579309877047593e-06, + "loss": 0.5103, + "step": 5937 + }, + { + "epoch": 0.26874858565286264, + "grad_norm": 0.6204819925409865, + "learning_rate": 8.578798082493074e-06, + "loss": 0.3309, + "step": 5938 + }, + { + "epoch": 0.2687938447612582, + "grad_norm": 0.7162615258973855, + "learning_rate": 8.578286211041173e-06, + "loss": 0.3869, + "step": 5939 + }, + { + "epoch": 0.26883910386965376, + "grad_norm": 0.6812068319674739, + "learning_rate": 8.577774262702894e-06, + "loss": 0.3934, + "step": 5940 + }, + { + "epoch": 0.26888436297804935, + "grad_norm": 0.8801846873382326, + "learning_rate": 8.577262237489234e-06, + "loss": 0.4178, + "step": 5941 + }, + { + "epoch": 0.2689296220864449, + "grad_norm": 0.7154494503050958, + "learning_rate": 8.576750135411194e-06, + "loss": 0.4163, + "step": 5942 + }, + { + "epoch": 0.2689748811948405, + "grad_norm": 0.41689872782674725, + "learning_rate": 8.57623795647978e-06, + "loss": 0.5022, + "step": 5943 + }, + { + "epoch": 0.269020140303236, + "grad_norm": 0.6760071606654183, + "learning_rate": 8.575725700705995e-06, + "loss": 0.3541, + "step": 5944 + }, + { + "epoch": 0.2690653994116316, + "grad_norm": 0.5954749930016797, + "learning_rate": 8.575213368100847e-06, + "loss": 0.3645, + "step": 5945 + }, + { + "epoch": 0.26911065852002714, + "grad_norm": 0.6367933833228567, + "learning_rate": 8.574700958675345e-06, + "loss": 0.333, + "step": 5946 + }, + { + "epoch": 0.2691559176284227, + "grad_norm": 0.7950089422983774, + "learning_rate": 8.574188472440497e-06, + "loss": 0.3649, + "step": 5947 + }, + { + "epoch": 0.26920117673681826, + "grad_norm": 0.7098041224453934, + "learning_rate": 8.573675909407316e-06, + "loss": 0.3815, + "step": 5948 + }, + { + "epoch": 0.26924643584521385, + "grad_norm": 0.6194752583100062, + "learning_rate": 8.573163269586818e-06, + "loss": 0.3499, + "step": 5949 + }, + { + "epoch": 0.26929169495360944, + "grad_norm": 0.613211866231783, + "learning_rate": 8.572650552990012e-06, + "loss": 0.3677, + "step": 5950 + }, + { + "epoch": 0.269336954062005, + "grad_norm": 0.7044084698902522, + "learning_rate": 8.572137759627919e-06, + "loss": 0.3674, + "step": 5951 + }, + { + "epoch": 0.26938221317040056, + "grad_norm": 0.6962715657146951, + "learning_rate": 8.571624889511558e-06, + "loss": 0.3938, + "step": 5952 + }, + { + "epoch": 0.2694274722787961, + "grad_norm": 0.637278621803258, + "learning_rate": 8.571111942651945e-06, + "loss": 0.4078, + "step": 5953 + }, + { + "epoch": 0.2694727313871917, + "grad_norm": 0.6978075288266782, + "learning_rate": 8.570598919060108e-06, + "loss": 0.3697, + "step": 5954 + }, + { + "epoch": 0.2695179904955872, + "grad_norm": 0.6322435094797664, + "learning_rate": 8.570085818747063e-06, + "loss": 0.3471, + "step": 5955 + }, + { + "epoch": 0.2695632496039828, + "grad_norm": 0.6994153624100978, + "learning_rate": 8.56957264172384e-06, + "loss": 0.3857, + "step": 5956 + }, + { + "epoch": 0.26960850871237835, + "grad_norm": 0.6860845640803551, + "learning_rate": 8.569059388001463e-06, + "loss": 0.3976, + "step": 5957 + }, + { + "epoch": 0.26965376782077394, + "grad_norm": 0.776993539496546, + "learning_rate": 8.568546057590963e-06, + "loss": 0.3963, + "step": 5958 + }, + { + "epoch": 0.26969902692916947, + "grad_norm": 0.41075113977310934, + "learning_rate": 8.568032650503366e-06, + "loss": 0.5327, + "step": 5959 + }, + { + "epoch": 0.26974428603756506, + "grad_norm": 0.6551096579982019, + "learning_rate": 8.567519166749707e-06, + "loss": 0.3683, + "step": 5960 + }, + { + "epoch": 0.26978954514596065, + "grad_norm": 0.9991521072753345, + "learning_rate": 8.567005606341019e-06, + "loss": 0.3517, + "step": 5961 + }, + { + "epoch": 0.2698348042543562, + "grad_norm": 0.6873912875023508, + "learning_rate": 8.566491969288333e-06, + "loss": 0.4392, + "step": 5962 + }, + { + "epoch": 0.2698800633627518, + "grad_norm": 0.6267614200958163, + "learning_rate": 8.565978255602692e-06, + "loss": 0.3718, + "step": 5963 + }, + { + "epoch": 0.2699253224711473, + "grad_norm": 0.755594663681423, + "learning_rate": 8.565464465295128e-06, + "loss": 0.442, + "step": 5964 + }, + { + "epoch": 0.2699705815795429, + "grad_norm": 0.6180654968091859, + "learning_rate": 8.564950598376683e-06, + "loss": 0.3679, + "step": 5965 + }, + { + "epoch": 0.27001584068793844, + "grad_norm": 0.809800571361141, + "learning_rate": 8.5644366548584e-06, + "loss": 0.4397, + "step": 5966 + }, + { + "epoch": 0.270061099796334, + "grad_norm": 0.6092547414226015, + "learning_rate": 8.563922634751318e-06, + "loss": 0.3701, + "step": 5967 + }, + { + "epoch": 0.27010635890472956, + "grad_norm": 0.6731909504435205, + "learning_rate": 8.563408538066486e-06, + "loss": 0.392, + "step": 5968 + }, + { + "epoch": 0.27015161801312515, + "grad_norm": 0.6775432593263921, + "learning_rate": 8.562894364814948e-06, + "loss": 0.3732, + "step": 5969 + }, + { + "epoch": 0.2701968771215207, + "grad_norm": 0.6727938342148715, + "learning_rate": 8.562380115007753e-06, + "loss": 0.376, + "step": 5970 + }, + { + "epoch": 0.2702421362299163, + "grad_norm": 0.6533268203426031, + "learning_rate": 8.561865788655951e-06, + "loss": 0.3576, + "step": 5971 + }, + { + "epoch": 0.2702873953383118, + "grad_norm": 0.6259228660706317, + "learning_rate": 8.561351385770592e-06, + "loss": 0.34, + "step": 5972 + }, + { + "epoch": 0.2703326544467074, + "grad_norm": 0.7074757571539375, + "learning_rate": 8.560836906362731e-06, + "loss": 0.3913, + "step": 5973 + }, + { + "epoch": 0.270377913555103, + "grad_norm": 0.7318314444482122, + "learning_rate": 8.56032235044342e-06, + "loss": 0.3668, + "step": 5974 + }, + { + "epoch": 0.2704231726634985, + "grad_norm": 0.6757028464267878, + "learning_rate": 8.559807718023715e-06, + "loss": 0.3639, + "step": 5975 + }, + { + "epoch": 0.2704684317718941, + "grad_norm": 0.6430596171748171, + "learning_rate": 8.559293009114678e-06, + "loss": 0.4166, + "step": 5976 + }, + { + "epoch": 0.27051369088028965, + "grad_norm": 0.6852484751594174, + "learning_rate": 8.558778223727363e-06, + "loss": 0.372, + "step": 5977 + }, + { + "epoch": 0.27055894998868524, + "grad_norm": 0.6088024575326358, + "learning_rate": 8.558263361872836e-06, + "loss": 0.4165, + "step": 5978 + }, + { + "epoch": 0.2706042090970808, + "grad_norm": 0.6452025606498345, + "learning_rate": 8.557748423562157e-06, + "loss": 0.3354, + "step": 5979 + }, + { + "epoch": 0.27064946820547636, + "grad_norm": 0.4629092068761617, + "learning_rate": 8.55723340880639e-06, + "loss": 0.4937, + "step": 5980 + }, + { + "epoch": 0.2706947273138719, + "grad_norm": 0.650889937361659, + "learning_rate": 8.556718317616603e-06, + "loss": 0.364, + "step": 5981 + }, + { + "epoch": 0.2707399864222675, + "grad_norm": 0.5910922166754542, + "learning_rate": 8.556203150003863e-06, + "loss": 0.3798, + "step": 5982 + }, + { + "epoch": 0.270785245530663, + "grad_norm": 0.6773198923876816, + "learning_rate": 8.55568790597924e-06, + "loss": 0.4274, + "step": 5983 + }, + { + "epoch": 0.2708305046390586, + "grad_norm": 0.6389749204912827, + "learning_rate": 8.555172585553804e-06, + "loss": 0.3604, + "step": 5984 + }, + { + "epoch": 0.2708757637474542, + "grad_norm": 0.6088735184267581, + "learning_rate": 8.55465718873863e-06, + "loss": 0.4338, + "step": 5985 + }, + { + "epoch": 0.27092102285584974, + "grad_norm": 0.6186470148336451, + "learning_rate": 8.554141715544788e-06, + "loss": 0.3711, + "step": 5986 + }, + { + "epoch": 0.2709662819642453, + "grad_norm": 0.6727668532608004, + "learning_rate": 8.553626165983355e-06, + "loss": 0.3897, + "step": 5987 + }, + { + "epoch": 0.27101154107264086, + "grad_norm": 0.616134976164528, + "learning_rate": 8.553110540065412e-06, + "loss": 0.3587, + "step": 5988 + }, + { + "epoch": 0.27105680018103645, + "grad_norm": 0.42615089223040237, + "learning_rate": 8.552594837802035e-06, + "loss": 0.476, + "step": 5989 + }, + { + "epoch": 0.271102059289432, + "grad_norm": 0.6261527251182383, + "learning_rate": 8.552079059204306e-06, + "loss": 0.3496, + "step": 5990 + }, + { + "epoch": 0.2711473183978276, + "grad_norm": 1.1080483705521476, + "learning_rate": 8.551563204283308e-06, + "loss": 0.3852, + "step": 5991 + }, + { + "epoch": 0.2711925775062231, + "grad_norm": 0.33901070493735247, + "learning_rate": 8.551047273050126e-06, + "loss": 0.4988, + "step": 5992 + }, + { + "epoch": 0.2712378366146187, + "grad_norm": 0.6859078191706226, + "learning_rate": 8.550531265515842e-06, + "loss": 0.3912, + "step": 5993 + }, + { + "epoch": 0.27128309572301423, + "grad_norm": 0.673173504588645, + "learning_rate": 8.550015181691546e-06, + "loss": 0.3891, + "step": 5994 + }, + { + "epoch": 0.2713283548314098, + "grad_norm": 0.3069681794950536, + "learning_rate": 8.549499021588328e-06, + "loss": 0.4939, + "step": 5995 + }, + { + "epoch": 0.27137361393980536, + "grad_norm": 0.6517678642102981, + "learning_rate": 8.548982785217277e-06, + "loss": 0.3418, + "step": 5996 + }, + { + "epoch": 0.27141887304820095, + "grad_norm": 0.3253512792732969, + "learning_rate": 8.548466472589485e-06, + "loss": 0.5133, + "step": 5997 + }, + { + "epoch": 0.27146413215659654, + "grad_norm": 0.6915729271927266, + "learning_rate": 8.547950083716047e-06, + "loss": 0.3375, + "step": 5998 + }, + { + "epoch": 0.2715093912649921, + "grad_norm": 0.6576267492188453, + "learning_rate": 8.547433618608059e-06, + "loss": 0.3902, + "step": 5999 + }, + { + "epoch": 0.27155465037338766, + "grad_norm": 0.6279246314213415, + "learning_rate": 8.546917077276618e-06, + "loss": 0.3917, + "step": 6000 + }, + { + "epoch": 0.2715999094817832, + "grad_norm": 0.6221555956642787, + "learning_rate": 8.54640045973282e-06, + "loss": 0.401, + "step": 6001 + }, + { + "epoch": 0.2716451685901788, + "grad_norm": 0.6306321739741833, + "learning_rate": 8.54588376598777e-06, + "loss": 0.3645, + "step": 6002 + }, + { + "epoch": 0.2716904276985743, + "grad_norm": 0.6262000288703357, + "learning_rate": 8.545366996052568e-06, + "loss": 0.3908, + "step": 6003 + }, + { + "epoch": 0.2717356868069699, + "grad_norm": 0.666632379653395, + "learning_rate": 8.54485014993832e-06, + "loss": 0.4311, + "step": 6004 + }, + { + "epoch": 0.27178094591536545, + "grad_norm": 0.6618339991231594, + "learning_rate": 8.544333227656126e-06, + "loss": 0.3649, + "step": 6005 + }, + { + "epoch": 0.27182620502376104, + "grad_norm": 0.6665292193157383, + "learning_rate": 8.543816229217099e-06, + "loss": 0.362, + "step": 6006 + }, + { + "epoch": 0.27187146413215657, + "grad_norm": 0.6743160090461385, + "learning_rate": 8.543299154632343e-06, + "loss": 0.4236, + "step": 6007 + }, + { + "epoch": 0.27191672324055216, + "grad_norm": 0.6499545296448126, + "learning_rate": 8.542782003912973e-06, + "loss": 0.3738, + "step": 6008 + }, + { + "epoch": 0.27196198234894775, + "grad_norm": 0.6244358131955613, + "learning_rate": 8.542264777070097e-06, + "loss": 0.3653, + "step": 6009 + }, + { + "epoch": 0.2720072414573433, + "grad_norm": 0.6584235185675267, + "learning_rate": 8.54174747411483e-06, + "loss": 0.3828, + "step": 6010 + }, + { + "epoch": 0.2720525005657389, + "grad_norm": 0.6079854139604739, + "learning_rate": 8.541230095058289e-06, + "loss": 0.3765, + "step": 6011 + }, + { + "epoch": 0.2720977596741344, + "grad_norm": 0.6601104823424143, + "learning_rate": 8.540712639911588e-06, + "loss": 0.3562, + "step": 6012 + }, + { + "epoch": 0.27214301878253, + "grad_norm": 0.6408304883319524, + "learning_rate": 8.540195108685846e-06, + "loss": 0.3878, + "step": 6013 + }, + { + "epoch": 0.27218827789092553, + "grad_norm": 0.6128296670508544, + "learning_rate": 8.539677501392187e-06, + "loss": 0.3665, + "step": 6014 + }, + { + "epoch": 0.2722335369993211, + "grad_norm": 0.6746159565674552, + "learning_rate": 8.539159818041727e-06, + "loss": 0.4219, + "step": 6015 + }, + { + "epoch": 0.27227879610771666, + "grad_norm": 0.6738103055188247, + "learning_rate": 8.538642058645595e-06, + "loss": 0.3991, + "step": 6016 + }, + { + "epoch": 0.27232405521611225, + "grad_norm": 0.5836860257061833, + "learning_rate": 8.538124223214909e-06, + "loss": 0.3372, + "step": 6017 + }, + { + "epoch": 0.2723693143245078, + "grad_norm": 0.49335442087411124, + "learning_rate": 8.537606311760804e-06, + "loss": 0.4964, + "step": 6018 + }, + { + "epoch": 0.2724145734329034, + "grad_norm": 0.6693697621695167, + "learning_rate": 8.537088324294403e-06, + "loss": 0.3229, + "step": 6019 + }, + { + "epoch": 0.27245983254129896, + "grad_norm": 0.6527074234229424, + "learning_rate": 8.536570260826837e-06, + "loss": 0.371, + "step": 6020 + }, + { + "epoch": 0.2725050916496945, + "grad_norm": 0.6517147449836838, + "learning_rate": 8.536052121369238e-06, + "loss": 0.3695, + "step": 6021 + }, + { + "epoch": 0.2725503507580901, + "grad_norm": 0.7243834845664229, + "learning_rate": 8.535533905932739e-06, + "loss": 0.381, + "step": 6022 + }, + { + "epoch": 0.2725956098664856, + "grad_norm": 0.651690902641303, + "learning_rate": 8.535015614528475e-06, + "loss": 0.3708, + "step": 6023 + }, + { + "epoch": 0.2726408689748812, + "grad_norm": 0.6353993471501393, + "learning_rate": 8.534497247167581e-06, + "loss": 0.3404, + "step": 6024 + }, + { + "epoch": 0.27268612808327675, + "grad_norm": 0.6883967390953978, + "learning_rate": 8.533978803861199e-06, + "loss": 0.4128, + "step": 6025 + }, + { + "epoch": 0.27273138719167234, + "grad_norm": 0.7210181559318217, + "learning_rate": 8.533460284620464e-06, + "loss": 0.3335, + "step": 6026 + }, + { + "epoch": 0.27277664630006787, + "grad_norm": 0.6455031954425401, + "learning_rate": 8.532941689456521e-06, + "loss": 0.3843, + "step": 6027 + }, + { + "epoch": 0.27282190540846346, + "grad_norm": 0.6338284754087283, + "learning_rate": 8.532423018380511e-06, + "loss": 0.3658, + "step": 6028 + }, + { + "epoch": 0.272867164516859, + "grad_norm": 0.5224069999647922, + "learning_rate": 8.53190427140358e-06, + "loss": 0.5052, + "step": 6029 + }, + { + "epoch": 0.2729124236252546, + "grad_norm": 0.6258685222511478, + "learning_rate": 8.531385448536875e-06, + "loss": 0.347, + "step": 6030 + }, + { + "epoch": 0.2729576827336501, + "grad_norm": 0.6682531378852933, + "learning_rate": 8.53086654979154e-06, + "loss": 0.4118, + "step": 6031 + }, + { + "epoch": 0.2730029418420457, + "grad_norm": 0.6144327014064551, + "learning_rate": 8.530347575178728e-06, + "loss": 0.3551, + "step": 6032 + }, + { + "epoch": 0.2730482009504413, + "grad_norm": 0.6289075987124088, + "learning_rate": 8.52982852470959e-06, + "loss": 0.3418, + "step": 6033 + }, + { + "epoch": 0.27309346005883683, + "grad_norm": 0.645673471979479, + "learning_rate": 8.529309398395275e-06, + "loss": 0.3926, + "step": 6034 + }, + { + "epoch": 0.2731387191672324, + "grad_norm": 0.690713840540433, + "learning_rate": 8.528790196246944e-06, + "loss": 0.4096, + "step": 6035 + }, + { + "epoch": 0.27318397827562796, + "grad_norm": 0.48055901693607467, + "learning_rate": 8.528270918275749e-06, + "loss": 0.4982, + "step": 6036 + }, + { + "epoch": 0.27322923738402355, + "grad_norm": 0.6483839043555008, + "learning_rate": 8.527751564492847e-06, + "loss": 0.3755, + "step": 6037 + }, + { + "epoch": 0.2732744964924191, + "grad_norm": 0.33294770241593863, + "learning_rate": 8.527232134909398e-06, + "loss": 0.4978, + "step": 6038 + }, + { + "epoch": 0.2733197556008147, + "grad_norm": 0.6955806026366336, + "learning_rate": 8.526712629536566e-06, + "loss": 0.3873, + "step": 6039 + }, + { + "epoch": 0.2733650147092102, + "grad_norm": 0.7046916383301215, + "learning_rate": 8.52619304838551e-06, + "loss": 0.3398, + "step": 6040 + }, + { + "epoch": 0.2734102738176058, + "grad_norm": 0.6306151655838789, + "learning_rate": 8.525673391467395e-06, + "loss": 0.3614, + "step": 6041 + }, + { + "epoch": 0.27345553292600133, + "grad_norm": 0.6994791751162374, + "learning_rate": 8.525153658793386e-06, + "loss": 0.4099, + "step": 6042 + }, + { + "epoch": 0.2735007920343969, + "grad_norm": 0.6159324117192329, + "learning_rate": 8.524633850374653e-06, + "loss": 0.3621, + "step": 6043 + }, + { + "epoch": 0.2735460511427925, + "grad_norm": 0.6079616116262638, + "learning_rate": 8.524113966222363e-06, + "loss": 0.3456, + "step": 6044 + }, + { + "epoch": 0.27359131025118805, + "grad_norm": 0.6898106765207705, + "learning_rate": 8.523594006347686e-06, + "loss": 0.4221, + "step": 6045 + }, + { + "epoch": 0.27363656935958364, + "grad_norm": 0.5991907495206867, + "learning_rate": 8.523073970761799e-06, + "loss": 0.3603, + "step": 6046 + }, + { + "epoch": 0.27368182846797917, + "grad_norm": 0.6934617136625089, + "learning_rate": 8.52255385947587e-06, + "loss": 0.4052, + "step": 6047 + }, + { + "epoch": 0.27372708757637476, + "grad_norm": 0.6112395249078179, + "learning_rate": 8.52203367250108e-06, + "loss": 0.372, + "step": 6048 + }, + { + "epoch": 0.2737723466847703, + "grad_norm": 0.6502628865263396, + "learning_rate": 8.521513409848601e-06, + "loss": 0.3435, + "step": 6049 + }, + { + "epoch": 0.2738176057931659, + "grad_norm": 0.6236769646363846, + "learning_rate": 8.520993071529614e-06, + "loss": 0.4928, + "step": 6050 + }, + { + "epoch": 0.2738628649015614, + "grad_norm": 0.6919958170062448, + "learning_rate": 8.520472657555301e-06, + "loss": 0.3707, + "step": 6051 + }, + { + "epoch": 0.273908124009957, + "grad_norm": 0.34575276990715864, + "learning_rate": 8.519952167936842e-06, + "loss": 0.4819, + "step": 6052 + }, + { + "epoch": 0.27395338311835254, + "grad_norm": 0.6692288837623679, + "learning_rate": 8.519431602685423e-06, + "loss": 0.3679, + "step": 6053 + }, + { + "epoch": 0.27399864222674813, + "grad_norm": 0.6513651971217073, + "learning_rate": 8.518910961812229e-06, + "loss": 0.3517, + "step": 6054 + }, + { + "epoch": 0.2740439013351437, + "grad_norm": 0.6701945415254389, + "learning_rate": 8.518390245328444e-06, + "loss": 0.3163, + "step": 6055 + }, + { + "epoch": 0.27408916044353926, + "grad_norm": 0.6611267699545874, + "learning_rate": 8.517869453245257e-06, + "loss": 0.3771, + "step": 6056 + }, + { + "epoch": 0.27413441955193485, + "grad_norm": 0.6839169716811373, + "learning_rate": 8.517348585573862e-06, + "loss": 0.3549, + "step": 6057 + }, + { + "epoch": 0.2741796786603304, + "grad_norm": 0.6835259767360924, + "learning_rate": 8.516827642325447e-06, + "loss": 0.3923, + "step": 6058 + }, + { + "epoch": 0.274224937768726, + "grad_norm": 0.597517933827256, + "learning_rate": 8.51630662351121e-06, + "loss": 0.3652, + "step": 6059 + }, + { + "epoch": 0.2742701968771215, + "grad_norm": 0.8775982858679074, + "learning_rate": 8.515785529142339e-06, + "loss": 0.5372, + "step": 6060 + }, + { + "epoch": 0.2743154559855171, + "grad_norm": 0.686589275683136, + "learning_rate": 8.515264359230038e-06, + "loss": 0.3742, + "step": 6061 + }, + { + "epoch": 0.27436071509391263, + "grad_norm": 0.647086052466487, + "learning_rate": 8.514743113785501e-06, + "loss": 0.3295, + "step": 6062 + }, + { + "epoch": 0.2744059742023082, + "grad_norm": 0.640801493246443, + "learning_rate": 8.51422179281993e-06, + "loss": 0.3412, + "step": 6063 + }, + { + "epoch": 0.27445123331070376, + "grad_norm": 0.6425145743473898, + "learning_rate": 8.513700396344527e-06, + "loss": 0.3725, + "step": 6064 + }, + { + "epoch": 0.27449649241909935, + "grad_norm": 0.7084102432303667, + "learning_rate": 8.51317892437049e-06, + "loss": 0.3484, + "step": 6065 + }, + { + "epoch": 0.2745417515274949, + "grad_norm": 0.6634924837539934, + "learning_rate": 8.512657376909031e-06, + "loss": 0.3933, + "step": 6066 + }, + { + "epoch": 0.27458701063589047, + "grad_norm": 0.6744386287510311, + "learning_rate": 8.512135753971353e-06, + "loss": 0.3514, + "step": 6067 + }, + { + "epoch": 0.27463226974428606, + "grad_norm": 0.6249090418967712, + "learning_rate": 8.511614055568665e-06, + "loss": 0.3501, + "step": 6068 + }, + { + "epoch": 0.2746775288526816, + "grad_norm": 0.6350782641412113, + "learning_rate": 8.511092281712174e-06, + "loss": 0.4163, + "step": 6069 + }, + { + "epoch": 0.2747227879610772, + "grad_norm": 0.6298461195689672, + "learning_rate": 8.510570432413095e-06, + "loss": 0.3859, + "step": 6070 + }, + { + "epoch": 0.2747680470694727, + "grad_norm": 0.6447723188705166, + "learning_rate": 8.510048507682637e-06, + "loss": 0.3542, + "step": 6071 + }, + { + "epoch": 0.2748133061778683, + "grad_norm": 0.6436348002464336, + "learning_rate": 8.50952650753202e-06, + "loss": 0.3221, + "step": 6072 + }, + { + "epoch": 0.27485856528626385, + "grad_norm": 0.6204655375985614, + "learning_rate": 8.509004431972455e-06, + "loss": 0.3524, + "step": 6073 + }, + { + "epoch": 0.27490382439465944, + "grad_norm": 0.6687700098555398, + "learning_rate": 8.508482281015163e-06, + "loss": 0.3919, + "step": 6074 + }, + { + "epoch": 0.27494908350305497, + "grad_norm": 0.6433898373052561, + "learning_rate": 8.50796005467136e-06, + "loss": 0.4072, + "step": 6075 + }, + { + "epoch": 0.27499434261145056, + "grad_norm": 0.6452541013286588, + "learning_rate": 8.507437752952271e-06, + "loss": 0.4101, + "step": 6076 + }, + { + "epoch": 0.2750396017198461, + "grad_norm": 0.6893631663895471, + "learning_rate": 8.506915375869118e-06, + "loss": 0.4915, + "step": 6077 + }, + { + "epoch": 0.2750848608282417, + "grad_norm": 0.692999676441218, + "learning_rate": 8.506392923433124e-06, + "loss": 0.3923, + "step": 6078 + }, + { + "epoch": 0.2751301199366373, + "grad_norm": 0.6724569502775599, + "learning_rate": 8.505870395655512e-06, + "loss": 0.4131, + "step": 6079 + }, + { + "epoch": 0.2751753790450328, + "grad_norm": 0.6804157172735723, + "learning_rate": 8.505347792547516e-06, + "loss": 0.3892, + "step": 6080 + }, + { + "epoch": 0.2752206381534284, + "grad_norm": 0.4057416708339337, + "learning_rate": 8.504825114120361e-06, + "loss": 0.5007, + "step": 6081 + }, + { + "epoch": 0.27526589726182393, + "grad_norm": 0.7245855816415157, + "learning_rate": 8.504302360385276e-06, + "loss": 0.3408, + "step": 6082 + }, + { + "epoch": 0.2753111563702195, + "grad_norm": 0.6641071295508348, + "learning_rate": 8.5037795313535e-06, + "loss": 0.3677, + "step": 6083 + }, + { + "epoch": 0.27535641547861506, + "grad_norm": 0.6976861804856807, + "learning_rate": 8.50325662703626e-06, + "loss": 0.399, + "step": 6084 + }, + { + "epoch": 0.27540167458701065, + "grad_norm": 0.662407202013515, + "learning_rate": 8.502733647444796e-06, + "loss": 0.3714, + "step": 6085 + }, + { + "epoch": 0.2754469336954062, + "grad_norm": 0.5031182360900243, + "learning_rate": 8.502210592590344e-06, + "loss": 0.4847, + "step": 6086 + }, + { + "epoch": 0.27549219280380177, + "grad_norm": 0.8223407065276434, + "learning_rate": 8.501687462484141e-06, + "loss": 0.3875, + "step": 6087 + }, + { + "epoch": 0.2755374519121973, + "grad_norm": 0.7308641110498155, + "learning_rate": 8.501164257137431e-06, + "loss": 0.4031, + "step": 6088 + }, + { + "epoch": 0.2755827110205929, + "grad_norm": 0.6205462016558165, + "learning_rate": 8.500640976561453e-06, + "loss": 0.3834, + "step": 6089 + }, + { + "epoch": 0.2756279701289885, + "grad_norm": 0.6532663048504899, + "learning_rate": 8.500117620767452e-06, + "loss": 0.3513, + "step": 6090 + }, + { + "epoch": 0.275673229237384, + "grad_norm": 0.6343766244654432, + "learning_rate": 8.499594189766674e-06, + "loss": 0.4097, + "step": 6091 + }, + { + "epoch": 0.2757184883457796, + "grad_norm": 0.6452984743606108, + "learning_rate": 8.499070683570363e-06, + "loss": 0.4342, + "step": 6092 + }, + { + "epoch": 0.27576374745417515, + "grad_norm": 0.670940591277106, + "learning_rate": 8.49854710218977e-06, + "loss": 0.401, + "step": 6093 + }, + { + "epoch": 0.27580900656257074, + "grad_norm": 0.6861574396517808, + "learning_rate": 8.498023445636145e-06, + "loss": 0.367, + "step": 6094 + }, + { + "epoch": 0.27585426567096627, + "grad_norm": 0.6537017546764119, + "learning_rate": 8.49749971392074e-06, + "loss": 0.3854, + "step": 6095 + }, + { + "epoch": 0.27589952477936186, + "grad_norm": 0.6706788741382547, + "learning_rate": 8.496975907054808e-06, + "loss": 0.3728, + "step": 6096 + }, + { + "epoch": 0.2759447838877574, + "grad_norm": 0.6577647196836957, + "learning_rate": 8.496452025049605e-06, + "loss": 0.386, + "step": 6097 + }, + { + "epoch": 0.275990042996153, + "grad_norm": 0.687328696884683, + "learning_rate": 8.495928067916383e-06, + "loss": 0.3405, + "step": 6098 + }, + { + "epoch": 0.2760353021045485, + "grad_norm": 0.6355701599063429, + "learning_rate": 8.495404035666409e-06, + "loss": 0.4077, + "step": 6099 + }, + { + "epoch": 0.2760805612129441, + "grad_norm": 0.6651890064834014, + "learning_rate": 8.494879928310934e-06, + "loss": 0.3735, + "step": 6100 + }, + { + "epoch": 0.27612582032133964, + "grad_norm": 0.6690293513244945, + "learning_rate": 8.494355745861223e-06, + "loss": 0.4075, + "step": 6101 + }, + { + "epoch": 0.27617107942973523, + "grad_norm": 0.6397491482824402, + "learning_rate": 8.49383148832854e-06, + "loss": 0.3712, + "step": 6102 + }, + { + "epoch": 0.2762163385381308, + "grad_norm": 0.6254108917060476, + "learning_rate": 8.493307155724147e-06, + "loss": 0.3548, + "step": 6103 + }, + { + "epoch": 0.27626159764652636, + "grad_norm": 0.6253800648922403, + "learning_rate": 8.492782748059314e-06, + "loss": 0.3484, + "step": 6104 + }, + { + "epoch": 0.27630685675492195, + "grad_norm": 1.127881443978114, + "learning_rate": 8.492258265345307e-06, + "loss": 0.3745, + "step": 6105 + }, + { + "epoch": 0.2763521158633175, + "grad_norm": 0.645476286737807, + "learning_rate": 8.491733707593395e-06, + "loss": 0.3739, + "step": 6106 + }, + { + "epoch": 0.2763973749717131, + "grad_norm": 0.6708347456649015, + "learning_rate": 8.49120907481485e-06, + "loss": 0.3677, + "step": 6107 + }, + { + "epoch": 0.2764426340801086, + "grad_norm": 0.6620974851891143, + "learning_rate": 8.490684367020944e-06, + "loss": 0.3921, + "step": 6108 + }, + { + "epoch": 0.2764878931885042, + "grad_norm": 0.6977561862853785, + "learning_rate": 8.490159584222952e-06, + "loss": 0.4095, + "step": 6109 + }, + { + "epoch": 0.27653315229689973, + "grad_norm": 0.5846728205583535, + "learning_rate": 8.48963472643215e-06, + "loss": 0.3874, + "step": 6110 + }, + { + "epoch": 0.2765784114052953, + "grad_norm": 0.6304765120857315, + "learning_rate": 8.489109793659815e-06, + "loss": 0.3824, + "step": 6111 + }, + { + "epoch": 0.27662367051369086, + "grad_norm": 0.69646468269515, + "learning_rate": 8.488584785917226e-06, + "loss": 0.4003, + "step": 6112 + }, + { + "epoch": 0.27666892962208645, + "grad_norm": 0.669757027053765, + "learning_rate": 8.488059703215666e-06, + "loss": 0.3358, + "step": 6113 + }, + { + "epoch": 0.27671418873048204, + "grad_norm": 0.6960057329359819, + "learning_rate": 8.487534545566414e-06, + "loss": 0.3456, + "step": 6114 + }, + { + "epoch": 0.27675944783887757, + "grad_norm": 0.6425349594788039, + "learning_rate": 8.487009312980756e-06, + "loss": 0.4085, + "step": 6115 + }, + { + "epoch": 0.27680470694727316, + "grad_norm": 0.7200906547318653, + "learning_rate": 8.486484005469977e-06, + "loss": 0.386, + "step": 6116 + }, + { + "epoch": 0.2768499660556687, + "grad_norm": 0.5778607950964567, + "learning_rate": 8.485958623045365e-06, + "loss": 0.3665, + "step": 6117 + }, + { + "epoch": 0.2768952251640643, + "grad_norm": 0.6721297193179439, + "learning_rate": 8.48543316571821e-06, + "loss": 0.3692, + "step": 6118 + }, + { + "epoch": 0.2769404842724598, + "grad_norm": 0.7550680623075885, + "learning_rate": 8.484907633499798e-06, + "loss": 0.3829, + "step": 6119 + }, + { + "epoch": 0.2769857433808554, + "grad_norm": 0.6393433200351009, + "learning_rate": 8.484382026401428e-06, + "loss": 0.3975, + "step": 6120 + }, + { + "epoch": 0.27703100248925094, + "grad_norm": 0.6772103434747855, + "learning_rate": 8.483856344434388e-06, + "loss": 0.3644, + "step": 6121 + }, + { + "epoch": 0.27707626159764653, + "grad_norm": 0.6192545725413289, + "learning_rate": 8.483330587609975e-06, + "loss": 0.3698, + "step": 6122 + }, + { + "epoch": 0.27712152070604207, + "grad_norm": 0.6638295813572664, + "learning_rate": 8.482804755939484e-06, + "loss": 0.3637, + "step": 6123 + }, + { + "epoch": 0.27716677981443766, + "grad_norm": 0.6374373395694628, + "learning_rate": 8.482278849434218e-06, + "loss": 0.4055, + "step": 6124 + }, + { + "epoch": 0.2772120389228332, + "grad_norm": 0.7399956950117406, + "learning_rate": 8.481752868105473e-06, + "loss": 0.3655, + "step": 6125 + }, + { + "epoch": 0.2772572980312288, + "grad_norm": 0.6252133276431657, + "learning_rate": 8.481226811964552e-06, + "loss": 0.367, + "step": 6126 + }, + { + "epoch": 0.2773025571396244, + "grad_norm": 0.6040986055476337, + "learning_rate": 8.48070068102276e-06, + "loss": 0.399, + "step": 6127 + }, + { + "epoch": 0.2773478162480199, + "grad_norm": 0.6680655356074402, + "learning_rate": 8.480174475291401e-06, + "loss": 0.386, + "step": 6128 + }, + { + "epoch": 0.2773930753564155, + "grad_norm": 0.6679662578379792, + "learning_rate": 8.47964819478178e-06, + "loss": 0.3413, + "step": 6129 + }, + { + "epoch": 0.27743833446481103, + "grad_norm": 0.6402503560072257, + "learning_rate": 8.479121839505205e-06, + "loss": 0.4141, + "step": 6130 + }, + { + "epoch": 0.2774835935732066, + "grad_norm": 0.6748241095133092, + "learning_rate": 8.478595409472988e-06, + "loss": 0.363, + "step": 6131 + }, + { + "epoch": 0.27752885268160216, + "grad_norm": 0.735162342819606, + "learning_rate": 8.47806890469644e-06, + "loss": 0.4045, + "step": 6132 + }, + { + "epoch": 0.27757411178999775, + "grad_norm": 0.6468589030966592, + "learning_rate": 8.477542325186873e-06, + "loss": 0.3892, + "step": 6133 + }, + { + "epoch": 0.2776193708983933, + "grad_norm": 0.6205833487739102, + "learning_rate": 8.4770156709556e-06, + "loss": 0.3388, + "step": 6134 + }, + { + "epoch": 0.27766463000678887, + "grad_norm": 0.6116203070506987, + "learning_rate": 8.476488942013941e-06, + "loss": 0.3953, + "step": 6135 + }, + { + "epoch": 0.2777098891151844, + "grad_norm": 0.41637885767207033, + "learning_rate": 8.475962138373212e-06, + "loss": 0.4961, + "step": 6136 + }, + { + "epoch": 0.27775514822358, + "grad_norm": 0.6635986927809545, + "learning_rate": 8.475435260044732e-06, + "loss": 0.3943, + "step": 6137 + }, + { + "epoch": 0.2778004073319756, + "grad_norm": 0.6525112217425668, + "learning_rate": 8.474908307039822e-06, + "loss": 0.3992, + "step": 6138 + }, + { + "epoch": 0.2778456664403711, + "grad_norm": 0.6005514128270779, + "learning_rate": 8.474381279369804e-06, + "loss": 0.3926, + "step": 6139 + }, + { + "epoch": 0.2778909255487667, + "grad_norm": 0.30909771445739137, + "learning_rate": 8.473854177046004e-06, + "loss": 0.479, + "step": 6140 + }, + { + "epoch": 0.27793618465716224, + "grad_norm": 0.6494619158079863, + "learning_rate": 8.473327000079748e-06, + "loss": 0.3605, + "step": 6141 + }, + { + "epoch": 0.27798144376555783, + "grad_norm": 0.3128065828485614, + "learning_rate": 8.472799748482361e-06, + "loss": 0.4826, + "step": 6142 + }, + { + "epoch": 0.27802670287395337, + "grad_norm": 0.28853261982474304, + "learning_rate": 8.472272422265172e-06, + "loss": 0.4897, + "step": 6143 + }, + { + "epoch": 0.27807196198234896, + "grad_norm": 0.6229091371671683, + "learning_rate": 8.471745021439516e-06, + "loss": 0.3932, + "step": 6144 + }, + { + "epoch": 0.2781172210907445, + "grad_norm": 0.6575726509092661, + "learning_rate": 8.47121754601672e-06, + "loss": 0.4207, + "step": 6145 + }, + { + "epoch": 0.2781624801991401, + "grad_norm": 0.7373976535519031, + "learning_rate": 8.47068999600812e-06, + "loss": 0.3768, + "step": 6146 + }, + { + "epoch": 0.2782077393075356, + "grad_norm": 0.3628628811080446, + "learning_rate": 8.470162371425052e-06, + "loss": 0.4912, + "step": 6147 + }, + { + "epoch": 0.2782529984159312, + "grad_norm": 0.6637909505966786, + "learning_rate": 8.469634672278853e-06, + "loss": 0.3541, + "step": 6148 + }, + { + "epoch": 0.2782982575243268, + "grad_norm": 0.311668781451325, + "learning_rate": 8.46910689858086e-06, + "loss": 0.4806, + "step": 6149 + }, + { + "epoch": 0.27834351663272233, + "grad_norm": 0.683883187993135, + "learning_rate": 8.468579050342414e-06, + "loss": 0.3922, + "step": 6150 + }, + { + "epoch": 0.2783887757411179, + "grad_norm": 0.6586497915069213, + "learning_rate": 8.468051127574858e-06, + "loss": 0.4114, + "step": 6151 + }, + { + "epoch": 0.27843403484951346, + "grad_norm": 0.6524905680374529, + "learning_rate": 8.467523130289535e-06, + "loss": 0.383, + "step": 6152 + }, + { + "epoch": 0.27847929395790905, + "grad_norm": 0.6940460177411515, + "learning_rate": 8.466995058497788e-06, + "loss": 0.345, + "step": 6153 + }, + { + "epoch": 0.2785245530663046, + "grad_norm": 0.4365224565945977, + "learning_rate": 8.466466912210967e-06, + "loss": 0.47, + "step": 6154 + }, + { + "epoch": 0.27856981217470017, + "grad_norm": 0.38480078277144353, + "learning_rate": 8.465938691440417e-06, + "loss": 0.5013, + "step": 6155 + }, + { + "epoch": 0.2786150712830957, + "grad_norm": 0.6727143832648759, + "learning_rate": 8.46541039619749e-06, + "loss": 0.3663, + "step": 6156 + }, + { + "epoch": 0.2786603303914913, + "grad_norm": 0.3096778956946426, + "learning_rate": 8.464882026493537e-06, + "loss": 0.4784, + "step": 6157 + }, + { + "epoch": 0.27870558949988683, + "grad_norm": 0.6339465451087098, + "learning_rate": 8.464353582339911e-06, + "loss": 0.3632, + "step": 6158 + }, + { + "epoch": 0.2787508486082824, + "grad_norm": 0.3841815298069502, + "learning_rate": 8.463825063747966e-06, + "loss": 0.4866, + "step": 6159 + }, + { + "epoch": 0.27879610771667795, + "grad_norm": 0.6775125423465649, + "learning_rate": 8.463296470729058e-06, + "loss": 0.3913, + "step": 6160 + }, + { + "epoch": 0.27884136682507354, + "grad_norm": 0.35137476460460587, + "learning_rate": 8.462767803294547e-06, + "loss": 0.475, + "step": 6161 + }, + { + "epoch": 0.27888662593346913, + "grad_norm": 0.6326437997259854, + "learning_rate": 8.462239061455791e-06, + "loss": 0.3446, + "step": 6162 + }, + { + "epoch": 0.27893188504186467, + "grad_norm": 0.2746024871363939, + "learning_rate": 8.461710245224149e-06, + "loss": 0.4814, + "step": 6163 + }, + { + "epoch": 0.27897714415026026, + "grad_norm": 0.6396593577582315, + "learning_rate": 8.461181354610988e-06, + "loss": 0.3749, + "step": 6164 + }, + { + "epoch": 0.2790224032586558, + "grad_norm": 0.6563268547409656, + "learning_rate": 8.460652389627668e-06, + "loss": 0.4009, + "step": 6165 + }, + { + "epoch": 0.2790676623670514, + "grad_norm": 0.4048207096311486, + "learning_rate": 8.46012335028556e-06, + "loss": 0.5349, + "step": 6166 + }, + { + "epoch": 0.2791129214754469, + "grad_norm": 0.6814293888375414, + "learning_rate": 8.459594236596024e-06, + "loss": 0.3856, + "step": 6167 + }, + { + "epoch": 0.2791581805838425, + "grad_norm": 0.7480971357179941, + "learning_rate": 8.459065048570434e-06, + "loss": 0.4051, + "step": 6168 + }, + { + "epoch": 0.27920343969223804, + "grad_norm": 0.6274966551812439, + "learning_rate": 8.45853578622016e-06, + "loss": 0.3512, + "step": 6169 + }, + { + "epoch": 0.27924869880063363, + "grad_norm": 0.6308842647074984, + "learning_rate": 8.458006449556576e-06, + "loss": 0.3576, + "step": 6170 + }, + { + "epoch": 0.27929395790902917, + "grad_norm": 0.32163095555358034, + "learning_rate": 8.457477038591054e-06, + "loss": 0.4928, + "step": 6171 + }, + { + "epoch": 0.27933921701742476, + "grad_norm": 0.6627694350400074, + "learning_rate": 8.456947553334966e-06, + "loss": 0.3557, + "step": 6172 + }, + { + "epoch": 0.27938447612582035, + "grad_norm": 0.6565527403152172, + "learning_rate": 8.456417993799695e-06, + "loss": 0.4173, + "step": 6173 + }, + { + "epoch": 0.2794297352342159, + "grad_norm": 0.2907819307107183, + "learning_rate": 8.455888359996616e-06, + "loss": 0.5071, + "step": 6174 + }, + { + "epoch": 0.27947499434261147, + "grad_norm": 0.8058453445617328, + "learning_rate": 8.455358651937111e-06, + "loss": 0.3688, + "step": 6175 + }, + { + "epoch": 0.279520253451007, + "grad_norm": 0.6762267533514817, + "learning_rate": 8.45482886963256e-06, + "loss": 0.3641, + "step": 6176 + }, + { + "epoch": 0.2795655125594026, + "grad_norm": 0.6706516377549697, + "learning_rate": 8.454299013094347e-06, + "loss": 0.3531, + "step": 6177 + }, + { + "epoch": 0.27961077166779813, + "grad_norm": 0.6571031966771673, + "learning_rate": 8.453769082333858e-06, + "loss": 0.3657, + "step": 6178 + }, + { + "epoch": 0.2796560307761937, + "grad_norm": 0.682765081302516, + "learning_rate": 8.453239077362478e-06, + "loss": 0.3127, + "step": 6179 + }, + { + "epoch": 0.27970128988458925, + "grad_norm": 0.3404651269271434, + "learning_rate": 8.452708998191597e-06, + "loss": 0.4783, + "step": 6180 + }, + { + "epoch": 0.27974654899298484, + "grad_norm": 0.6757919228891912, + "learning_rate": 8.452178844832603e-06, + "loss": 0.3785, + "step": 6181 + }, + { + "epoch": 0.2797918081013804, + "grad_norm": 0.8022516686986559, + "learning_rate": 8.451648617296889e-06, + "loss": 0.3378, + "step": 6182 + }, + { + "epoch": 0.27983706720977597, + "grad_norm": 0.317920038886496, + "learning_rate": 8.451118315595847e-06, + "loss": 0.489, + "step": 6183 + }, + { + "epoch": 0.27988232631817156, + "grad_norm": 0.6975404891441597, + "learning_rate": 8.45058793974087e-06, + "loss": 0.4417, + "step": 6184 + }, + { + "epoch": 0.2799275854265671, + "grad_norm": 0.69970890978507, + "learning_rate": 8.450057489743359e-06, + "loss": 0.4497, + "step": 6185 + }, + { + "epoch": 0.2799728445349627, + "grad_norm": 0.7103773384893085, + "learning_rate": 8.449526965614708e-06, + "loss": 0.3826, + "step": 6186 + }, + { + "epoch": 0.2800181036433582, + "grad_norm": 0.6669420693822625, + "learning_rate": 8.448996367366313e-06, + "loss": 0.3461, + "step": 6187 + }, + { + "epoch": 0.2800633627517538, + "grad_norm": 0.7245749365890447, + "learning_rate": 8.448465695009583e-06, + "loss": 0.3839, + "step": 6188 + }, + { + "epoch": 0.28010862186014934, + "grad_norm": 0.6974101733284066, + "learning_rate": 8.447934948555915e-06, + "loss": 0.3906, + "step": 6189 + }, + { + "epoch": 0.28015388096854493, + "grad_norm": 0.6700526606181927, + "learning_rate": 8.447404128016715e-06, + "loss": 0.338, + "step": 6190 + }, + { + "epoch": 0.28019914007694047, + "grad_norm": 0.6485693661815121, + "learning_rate": 8.446873233403388e-06, + "loss": 0.4185, + "step": 6191 + }, + { + "epoch": 0.28024439918533606, + "grad_norm": 0.6388113321491804, + "learning_rate": 8.446342264727341e-06, + "loss": 0.3867, + "step": 6192 + }, + { + "epoch": 0.2802896582937316, + "grad_norm": 0.6613139018209925, + "learning_rate": 8.445811221999983e-06, + "loss": 0.4226, + "step": 6193 + }, + { + "epoch": 0.2803349174021272, + "grad_norm": 0.514249841919849, + "learning_rate": 8.445280105232724e-06, + "loss": 0.4739, + "step": 6194 + }, + { + "epoch": 0.2803801765105227, + "grad_norm": 0.6705199570351592, + "learning_rate": 8.44474891443698e-06, + "loss": 0.3916, + "step": 6195 + }, + { + "epoch": 0.2804254356189183, + "grad_norm": 0.6287426122637085, + "learning_rate": 8.44421764962416e-06, + "loss": 0.3719, + "step": 6196 + }, + { + "epoch": 0.2804706947273139, + "grad_norm": 0.6513169442120156, + "learning_rate": 8.443686310805679e-06, + "loss": 0.3873, + "step": 6197 + }, + { + "epoch": 0.28051595383570943, + "grad_norm": 0.6885897435735384, + "learning_rate": 8.443154897992958e-06, + "loss": 0.3384, + "step": 6198 + }, + { + "epoch": 0.280561212944105, + "grad_norm": 0.6483482609359074, + "learning_rate": 8.442623411197412e-06, + "loss": 0.413, + "step": 6199 + }, + { + "epoch": 0.28060647205250056, + "grad_norm": 0.63645573302067, + "learning_rate": 8.442091850430463e-06, + "loss": 0.3639, + "step": 6200 + }, + { + "epoch": 0.28065173116089615, + "grad_norm": 0.48556986376096983, + "learning_rate": 8.441560215703531e-06, + "loss": 0.5057, + "step": 6201 + }, + { + "epoch": 0.2806969902692917, + "grad_norm": 0.6492821333841186, + "learning_rate": 8.441028507028041e-06, + "loss": 0.4118, + "step": 6202 + }, + { + "epoch": 0.28074224937768727, + "grad_norm": 0.6545075018939425, + "learning_rate": 8.440496724415415e-06, + "loss": 0.3803, + "step": 6203 + }, + { + "epoch": 0.2807875084860828, + "grad_norm": 0.6451701396785834, + "learning_rate": 8.439964867877082e-06, + "loss": 0.3693, + "step": 6204 + }, + { + "epoch": 0.2808327675944784, + "grad_norm": 0.7229587230904689, + "learning_rate": 8.439432937424468e-06, + "loss": 0.3748, + "step": 6205 + }, + { + "epoch": 0.28087802670287393, + "grad_norm": 0.6800637781863547, + "learning_rate": 8.438900933069006e-06, + "loss": 0.445, + "step": 6206 + }, + { + "epoch": 0.2809232858112695, + "grad_norm": 0.6669011748349529, + "learning_rate": 8.438368854822123e-06, + "loss": 0.3841, + "step": 6207 + }, + { + "epoch": 0.2809685449196651, + "grad_norm": 0.42745797731326585, + "learning_rate": 8.437836702695253e-06, + "loss": 0.4638, + "step": 6208 + }, + { + "epoch": 0.28101380402806064, + "grad_norm": 0.33492718032317736, + "learning_rate": 8.437304476699833e-06, + "loss": 0.4973, + "step": 6209 + }, + { + "epoch": 0.28105906313645623, + "grad_norm": 0.6915936016022975, + "learning_rate": 8.436772176847295e-06, + "loss": 0.3549, + "step": 6210 + }, + { + "epoch": 0.28110432224485177, + "grad_norm": 0.6375801191470075, + "learning_rate": 8.436239803149077e-06, + "loss": 0.3504, + "step": 6211 + }, + { + "epoch": 0.28114958135324736, + "grad_norm": 0.3721610081077562, + "learning_rate": 8.43570735561662e-06, + "loss": 0.4799, + "step": 6212 + }, + { + "epoch": 0.2811948404616429, + "grad_norm": 0.7265660941567311, + "learning_rate": 8.435174834261365e-06, + "loss": 0.4056, + "step": 6213 + }, + { + "epoch": 0.2812400995700385, + "grad_norm": 0.35685129525632925, + "learning_rate": 8.434642239094752e-06, + "loss": 0.469, + "step": 6214 + }, + { + "epoch": 0.281285358678434, + "grad_norm": 0.7138835831888775, + "learning_rate": 8.434109570128228e-06, + "loss": 0.3291, + "step": 6215 + }, + { + "epoch": 0.2813306177868296, + "grad_norm": 0.7130084286744158, + "learning_rate": 8.433576827373234e-06, + "loss": 0.3764, + "step": 6216 + }, + { + "epoch": 0.28137587689522514, + "grad_norm": 0.3521422973869999, + "learning_rate": 8.433044010841221e-06, + "loss": 0.5127, + "step": 6217 + }, + { + "epoch": 0.28142113600362073, + "grad_norm": 0.8142269849959118, + "learning_rate": 8.432511120543633e-06, + "loss": 0.4221, + "step": 6218 + }, + { + "epoch": 0.2814663951120163, + "grad_norm": 0.652441030531594, + "learning_rate": 8.431978156491927e-06, + "loss": 0.3707, + "step": 6219 + }, + { + "epoch": 0.28151165422041186, + "grad_norm": 0.7365615620283028, + "learning_rate": 8.43144511869755e-06, + "loss": 0.3895, + "step": 6220 + }, + { + "epoch": 0.28155691332880745, + "grad_norm": 0.6056462961860908, + "learning_rate": 8.430912007171957e-06, + "loss": 0.3803, + "step": 6221 + }, + { + "epoch": 0.281602172437203, + "grad_norm": 0.6091945279871832, + "learning_rate": 8.430378821926599e-06, + "loss": 0.4075, + "step": 6222 + }, + { + "epoch": 0.28164743154559857, + "grad_norm": 0.3147920289891761, + "learning_rate": 8.429845562972939e-06, + "loss": 0.5117, + "step": 6223 + }, + { + "epoch": 0.2816926906539941, + "grad_norm": 0.6431671482916711, + "learning_rate": 8.429312230322431e-06, + "loss": 0.3297, + "step": 6224 + }, + { + "epoch": 0.2817379497623897, + "grad_norm": 0.2877762484850333, + "learning_rate": 8.428778823986534e-06, + "loss": 0.4789, + "step": 6225 + }, + { + "epoch": 0.28178320887078523, + "grad_norm": 0.6581219748598858, + "learning_rate": 8.42824534397671e-06, + "loss": 0.3275, + "step": 6226 + }, + { + "epoch": 0.2818284679791808, + "grad_norm": 0.6211291035800235, + "learning_rate": 8.427711790304426e-06, + "loss": 0.367, + "step": 6227 + }, + { + "epoch": 0.28187372708757635, + "grad_norm": 0.7104653182176582, + "learning_rate": 8.427178162981141e-06, + "loss": 0.3692, + "step": 6228 + }, + { + "epoch": 0.28191898619597194, + "grad_norm": 0.6858916103546259, + "learning_rate": 8.426644462018323e-06, + "loss": 0.4087, + "step": 6229 + }, + { + "epoch": 0.2819642453043675, + "grad_norm": 0.630715733986845, + "learning_rate": 8.42611068742744e-06, + "loss": 0.3305, + "step": 6230 + }, + { + "epoch": 0.28200950441276307, + "grad_norm": 0.6855694797106894, + "learning_rate": 8.425576839219962e-06, + "loss": 0.3584, + "step": 6231 + }, + { + "epoch": 0.28205476352115866, + "grad_norm": 0.6616610678604273, + "learning_rate": 8.425042917407358e-06, + "loss": 0.3821, + "step": 6232 + }, + { + "epoch": 0.2821000226295542, + "grad_norm": 0.6621237669737059, + "learning_rate": 8.4245089220011e-06, + "loss": 0.3835, + "step": 6233 + }, + { + "epoch": 0.2821452817379498, + "grad_norm": 0.7034082290073037, + "learning_rate": 8.423974853012663e-06, + "loss": 0.386, + "step": 6234 + }, + { + "epoch": 0.2821905408463453, + "grad_norm": 0.7146277465228836, + "learning_rate": 8.423440710453524e-06, + "loss": 0.3895, + "step": 6235 + }, + { + "epoch": 0.2822357999547409, + "grad_norm": 0.6941430005620083, + "learning_rate": 8.422906494335155e-06, + "loss": 0.3778, + "step": 6236 + }, + { + "epoch": 0.28228105906313644, + "grad_norm": 0.6174910742970482, + "learning_rate": 8.42237220466904e-06, + "loss": 0.349, + "step": 6237 + }, + { + "epoch": 0.28232631817153203, + "grad_norm": 0.6327827603546239, + "learning_rate": 8.421837841466657e-06, + "loss": 0.3166, + "step": 6238 + }, + { + "epoch": 0.28237157727992757, + "grad_norm": 0.7934117873176336, + "learning_rate": 8.42130340473949e-06, + "loss": 0.35, + "step": 6239 + }, + { + "epoch": 0.28241683638832316, + "grad_norm": 0.6753158892052803, + "learning_rate": 8.420768894499018e-06, + "loss": 0.416, + "step": 6240 + }, + { + "epoch": 0.2824620954967187, + "grad_norm": 0.6114585858457859, + "learning_rate": 8.420234310756731e-06, + "loss": 0.3795, + "step": 6241 + }, + { + "epoch": 0.2825073546051143, + "grad_norm": 0.6503678006643415, + "learning_rate": 8.419699653524112e-06, + "loss": 0.3335, + "step": 6242 + }, + { + "epoch": 0.28255261371350987, + "grad_norm": 0.6703813746328721, + "learning_rate": 8.41916492281265e-06, + "loss": 0.3887, + "step": 6243 + }, + { + "epoch": 0.2825978728219054, + "grad_norm": 0.6616322211727921, + "learning_rate": 8.418630118633835e-06, + "loss": 0.3731, + "step": 6244 + }, + { + "epoch": 0.282643131930301, + "grad_norm": 0.679884118274133, + "learning_rate": 8.418095240999157e-06, + "loss": 0.3616, + "step": 6245 + }, + { + "epoch": 0.28268839103869653, + "grad_norm": 0.6420201373911756, + "learning_rate": 8.417560289920112e-06, + "loss": 0.3553, + "step": 6246 + }, + { + "epoch": 0.2827336501470921, + "grad_norm": 0.6892038150466372, + "learning_rate": 8.417025265408192e-06, + "loss": 0.3675, + "step": 6247 + }, + { + "epoch": 0.28277890925548765, + "grad_norm": 0.6629311105686149, + "learning_rate": 8.416490167474894e-06, + "loss": 0.433, + "step": 6248 + }, + { + "epoch": 0.28282416836388324, + "grad_norm": 0.753104654935313, + "learning_rate": 8.415954996131715e-06, + "loss": 0.332, + "step": 6249 + }, + { + "epoch": 0.2828694274722788, + "grad_norm": 0.4750248779522945, + "learning_rate": 8.415419751390155e-06, + "loss": 0.4924, + "step": 6250 + }, + { + "epoch": 0.28291468658067437, + "grad_norm": 0.6649641453862327, + "learning_rate": 8.414884433261712e-06, + "loss": 0.4092, + "step": 6251 + }, + { + "epoch": 0.2829599456890699, + "grad_norm": 0.7043728882105145, + "learning_rate": 8.414349041757895e-06, + "loss": 0.3647, + "step": 6252 + }, + { + "epoch": 0.2830052047974655, + "grad_norm": 0.7906797670385073, + "learning_rate": 8.4138135768902e-06, + "loss": 0.3976, + "step": 6253 + }, + { + "epoch": 0.283050463905861, + "grad_norm": 0.33427530855867776, + "learning_rate": 8.413278038670137e-06, + "loss": 0.4822, + "step": 6254 + }, + { + "epoch": 0.2830957230142566, + "grad_norm": 0.6971907768403794, + "learning_rate": 8.412742427109211e-06, + "loss": 0.3494, + "step": 6255 + }, + { + "epoch": 0.2831409821226522, + "grad_norm": 0.6501227087395217, + "learning_rate": 8.41220674221893e-06, + "loss": 0.3777, + "step": 6256 + }, + { + "epoch": 0.28318624123104774, + "grad_norm": 0.711428274830069, + "learning_rate": 8.41167098401081e-06, + "loss": 0.3844, + "step": 6257 + }, + { + "epoch": 0.28323150033944333, + "grad_norm": 0.6728050850151719, + "learning_rate": 8.411135152496357e-06, + "loss": 0.3792, + "step": 6258 + }, + { + "epoch": 0.28327675944783887, + "grad_norm": 0.6833250711380504, + "learning_rate": 8.410599247687085e-06, + "loss": 0.3719, + "step": 6259 + }, + { + "epoch": 0.28332201855623446, + "grad_norm": 0.6669147987584385, + "learning_rate": 8.41006326959451e-06, + "loss": 0.3496, + "step": 6260 + }, + { + "epoch": 0.28336727766463, + "grad_norm": 0.6839235194108939, + "learning_rate": 8.409527218230152e-06, + "loss": 0.3865, + "step": 6261 + }, + { + "epoch": 0.2834125367730256, + "grad_norm": 0.6959276699222522, + "learning_rate": 8.408991093605524e-06, + "loss": 0.3195, + "step": 6262 + }, + { + "epoch": 0.2834577958814211, + "grad_norm": 0.6382203905796654, + "learning_rate": 8.408454895732146e-06, + "loss": 0.3427, + "step": 6263 + }, + { + "epoch": 0.2835030549898167, + "grad_norm": 0.6391687745761867, + "learning_rate": 8.40791862462154e-06, + "loss": 0.3376, + "step": 6264 + }, + { + "epoch": 0.28354831409821224, + "grad_norm": 0.6564862085170919, + "learning_rate": 8.407382280285231e-06, + "loss": 0.388, + "step": 6265 + }, + { + "epoch": 0.28359357320660783, + "grad_norm": 0.6727667578398724, + "learning_rate": 8.406845862734741e-06, + "loss": 0.3607, + "step": 6266 + }, + { + "epoch": 0.2836388323150034, + "grad_norm": 0.6915184834363046, + "learning_rate": 8.406309371981597e-06, + "loss": 0.3528, + "step": 6267 + }, + { + "epoch": 0.28368409142339895, + "grad_norm": 0.6171206668536895, + "learning_rate": 8.405772808037326e-06, + "loss": 0.4959, + "step": 6268 + }, + { + "epoch": 0.28372935053179454, + "grad_norm": 0.6642918766150323, + "learning_rate": 8.405236170913458e-06, + "loss": 0.3933, + "step": 6269 + }, + { + "epoch": 0.2837746096401901, + "grad_norm": 0.44063677262443796, + "learning_rate": 8.404699460621523e-06, + "loss": 0.5106, + "step": 6270 + }, + { + "epoch": 0.28381986874858567, + "grad_norm": 0.6525737155916631, + "learning_rate": 8.404162677173052e-06, + "loss": 0.3706, + "step": 6271 + }, + { + "epoch": 0.2838651278569812, + "grad_norm": 0.6395490185811372, + "learning_rate": 8.403625820579582e-06, + "loss": 0.3579, + "step": 6272 + }, + { + "epoch": 0.2839103869653768, + "grad_norm": 0.6688331079105485, + "learning_rate": 8.403088890852646e-06, + "loss": 0.3629, + "step": 6273 + }, + { + "epoch": 0.2839556460737723, + "grad_norm": 0.6238772693835692, + "learning_rate": 8.402551888003781e-06, + "loss": 0.3402, + "step": 6274 + }, + { + "epoch": 0.2840009051821679, + "grad_norm": 0.6460567022078382, + "learning_rate": 8.402014812044525e-06, + "loss": 0.4083, + "step": 6275 + }, + { + "epoch": 0.28404616429056345, + "grad_norm": 0.6865228006752404, + "learning_rate": 8.401477662986421e-06, + "loss": 0.4873, + "step": 6276 + }, + { + "epoch": 0.28409142339895904, + "grad_norm": 0.7456566077740172, + "learning_rate": 8.400940440841008e-06, + "loss": 0.3645, + "step": 6277 + }, + { + "epoch": 0.28413668250735463, + "grad_norm": 0.6988846647845834, + "learning_rate": 8.40040314561983e-06, + "loss": 0.411, + "step": 6278 + }, + { + "epoch": 0.28418194161575017, + "grad_norm": 0.9361045391555585, + "learning_rate": 8.399865777334435e-06, + "loss": 0.3513, + "step": 6279 + }, + { + "epoch": 0.28422720072414576, + "grad_norm": 0.751607817915409, + "learning_rate": 8.399328335996362e-06, + "loss": 0.3572, + "step": 6280 + }, + { + "epoch": 0.2842724598325413, + "grad_norm": 0.6749108112927008, + "learning_rate": 8.398790821617166e-06, + "loss": 0.3846, + "step": 6281 + }, + { + "epoch": 0.2843177189409369, + "grad_norm": 0.7128668589052166, + "learning_rate": 8.398253234208391e-06, + "loss": 0.3705, + "step": 6282 + }, + { + "epoch": 0.2843629780493324, + "grad_norm": 0.6235231061493672, + "learning_rate": 8.397715573781596e-06, + "loss": 0.3688, + "step": 6283 + }, + { + "epoch": 0.284408237157728, + "grad_norm": 0.7008898344484392, + "learning_rate": 8.397177840348323e-06, + "loss": 0.3399, + "step": 6284 + }, + { + "epoch": 0.28445349626612354, + "grad_norm": 0.43211295832271546, + "learning_rate": 8.396640033920135e-06, + "loss": 0.4725, + "step": 6285 + }, + { + "epoch": 0.28449875537451913, + "grad_norm": 0.6773046329502455, + "learning_rate": 8.396102154508584e-06, + "loss": 0.3979, + "step": 6286 + }, + { + "epoch": 0.28454401448291466, + "grad_norm": 0.6202231828662841, + "learning_rate": 8.395564202125229e-06, + "loss": 0.4042, + "step": 6287 + }, + { + "epoch": 0.28458927359131025, + "grad_norm": 0.6785238184624034, + "learning_rate": 8.395026176781627e-06, + "loss": 0.3194, + "step": 6288 + }, + { + "epoch": 0.2846345326997058, + "grad_norm": 0.663363406948093, + "learning_rate": 8.394488078489339e-06, + "loss": 0.3632, + "step": 6289 + }, + { + "epoch": 0.2846797918081014, + "grad_norm": 0.656510124986166, + "learning_rate": 8.393949907259927e-06, + "loss": 0.3984, + "step": 6290 + }, + { + "epoch": 0.28472505091649697, + "grad_norm": 0.6625844048352743, + "learning_rate": 8.393411663104957e-06, + "loss": 0.3573, + "step": 6291 + }, + { + "epoch": 0.2847703100248925, + "grad_norm": 0.6682820561545953, + "learning_rate": 8.392873346035992e-06, + "loss": 0.3627, + "step": 6292 + }, + { + "epoch": 0.2848155691332881, + "grad_norm": 0.6039770807957126, + "learning_rate": 8.392334956064598e-06, + "loss": 0.3614, + "step": 6293 + }, + { + "epoch": 0.28486082824168363, + "grad_norm": 0.6601147390295486, + "learning_rate": 8.391796493202346e-06, + "loss": 0.3419, + "step": 6294 + }, + { + "epoch": 0.2849060873500792, + "grad_norm": 0.6409892616716775, + "learning_rate": 8.391257957460803e-06, + "loss": 0.377, + "step": 6295 + }, + { + "epoch": 0.28495134645847475, + "grad_norm": 0.6090673283201767, + "learning_rate": 8.390719348851544e-06, + "loss": 0.3615, + "step": 6296 + }, + { + "epoch": 0.28499660556687034, + "grad_norm": 0.6125879242381763, + "learning_rate": 8.390180667386138e-06, + "loss": 0.3521, + "step": 6297 + }, + { + "epoch": 0.2850418646752659, + "grad_norm": 0.7012367365291511, + "learning_rate": 8.389641913076163e-06, + "loss": 0.3305, + "step": 6298 + }, + { + "epoch": 0.28508712378366147, + "grad_norm": 0.6452780407011524, + "learning_rate": 8.389103085933192e-06, + "loss": 0.3889, + "step": 6299 + }, + { + "epoch": 0.285132382892057, + "grad_norm": 0.4827058990269452, + "learning_rate": 8.388564185968805e-06, + "loss": 0.5188, + "step": 6300 + }, + { + "epoch": 0.2851776420004526, + "grad_norm": 0.6965575404225468, + "learning_rate": 8.388025213194585e-06, + "loss": 0.368, + "step": 6301 + }, + { + "epoch": 0.2852229011088482, + "grad_norm": 0.7127863519099163, + "learning_rate": 8.387486167622103e-06, + "loss": 0.4024, + "step": 6302 + }, + { + "epoch": 0.2852681602172437, + "grad_norm": 0.3250995844318269, + "learning_rate": 8.38694704926295e-06, + "loss": 0.5249, + "step": 6303 + }, + { + "epoch": 0.2853134193256393, + "grad_norm": 0.6735640437482957, + "learning_rate": 8.386407858128707e-06, + "loss": 0.3605, + "step": 6304 + }, + { + "epoch": 0.28535867843403484, + "grad_norm": 0.3165298941877711, + "learning_rate": 8.385868594230958e-06, + "loss": 0.5164, + "step": 6305 + }, + { + "epoch": 0.28540393754243043, + "grad_norm": 0.6564690706710307, + "learning_rate": 8.385329257581295e-06, + "loss": 0.3368, + "step": 6306 + }, + { + "epoch": 0.28544919665082596, + "grad_norm": 0.6489326868070335, + "learning_rate": 8.3847898481913e-06, + "loss": 0.3394, + "step": 6307 + }, + { + "epoch": 0.28549445575922155, + "grad_norm": 0.40721945360601186, + "learning_rate": 8.384250366072568e-06, + "loss": 0.5119, + "step": 6308 + }, + { + "epoch": 0.2855397148676171, + "grad_norm": 0.33191010690961714, + "learning_rate": 8.38371081123669e-06, + "loss": 0.4886, + "step": 6309 + }, + { + "epoch": 0.2855849739760127, + "grad_norm": 0.7339684742337894, + "learning_rate": 8.383171183695258e-06, + "loss": 0.3459, + "step": 6310 + }, + { + "epoch": 0.2856302330844082, + "grad_norm": 0.7711528963712407, + "learning_rate": 8.382631483459869e-06, + "loss": 0.4392, + "step": 6311 + }, + { + "epoch": 0.2856754921928038, + "grad_norm": 0.6589848254857321, + "learning_rate": 8.382091710542118e-06, + "loss": 0.3706, + "step": 6312 + }, + { + "epoch": 0.2857207513011994, + "grad_norm": 0.6757671235326732, + "learning_rate": 8.381551864953603e-06, + "loss": 0.3859, + "step": 6313 + }, + { + "epoch": 0.28576601040959493, + "grad_norm": 0.449617795828226, + "learning_rate": 8.381011946705926e-06, + "loss": 0.5095, + "step": 6314 + }, + { + "epoch": 0.2858112695179905, + "grad_norm": 0.7213903453427956, + "learning_rate": 8.380471955810685e-06, + "loss": 0.3896, + "step": 6315 + }, + { + "epoch": 0.28585652862638605, + "grad_norm": 0.6514003171057873, + "learning_rate": 8.379931892279483e-06, + "loss": 0.3962, + "step": 6316 + }, + { + "epoch": 0.28590178773478164, + "grad_norm": 0.7085896477424691, + "learning_rate": 8.379391756123927e-06, + "loss": 0.366, + "step": 6317 + }, + { + "epoch": 0.2859470468431772, + "grad_norm": 0.6803649542757192, + "learning_rate": 8.37885154735562e-06, + "loss": 0.3723, + "step": 6318 + }, + { + "epoch": 0.28599230595157277, + "grad_norm": 1.2716815933415464, + "learning_rate": 8.37831126598617e-06, + "loss": 0.373, + "step": 6319 + }, + { + "epoch": 0.2860375650599683, + "grad_norm": 0.6554974108933356, + "learning_rate": 8.377770912027187e-06, + "loss": 0.4031, + "step": 6320 + }, + { + "epoch": 0.2860828241683639, + "grad_norm": 0.7453757586179238, + "learning_rate": 8.377230485490282e-06, + "loss": 0.3675, + "step": 6321 + }, + { + "epoch": 0.2861280832767594, + "grad_norm": 0.7501007963538981, + "learning_rate": 8.376689986387066e-06, + "loss": 0.3803, + "step": 6322 + }, + { + "epoch": 0.286173342385155, + "grad_norm": 0.6447615198903361, + "learning_rate": 8.376149414729154e-06, + "loss": 0.3433, + "step": 6323 + }, + { + "epoch": 0.28621860149355055, + "grad_norm": 0.7109482396314323, + "learning_rate": 8.375608770528157e-06, + "loss": 0.3758, + "step": 6324 + }, + { + "epoch": 0.28626386060194614, + "grad_norm": 0.3596396620662271, + "learning_rate": 8.375068053795697e-06, + "loss": 0.4917, + "step": 6325 + }, + { + "epoch": 0.28630911971034173, + "grad_norm": 0.3406801314012656, + "learning_rate": 8.37452726454339e-06, + "loss": 0.4738, + "step": 6326 + }, + { + "epoch": 0.28635437881873727, + "grad_norm": 0.6914537614782104, + "learning_rate": 8.373986402782857e-06, + "loss": 0.3971, + "step": 6327 + }, + { + "epoch": 0.28639963792713286, + "grad_norm": 0.6957523570372517, + "learning_rate": 8.373445468525719e-06, + "loss": 0.3774, + "step": 6328 + }, + { + "epoch": 0.2864448970355284, + "grad_norm": 0.6690897788009853, + "learning_rate": 8.372904461783596e-06, + "loss": 0.3812, + "step": 6329 + }, + { + "epoch": 0.286490156143924, + "grad_norm": 0.3872804952218078, + "learning_rate": 8.372363382568116e-06, + "loss": 0.5283, + "step": 6330 + }, + { + "epoch": 0.2865354152523195, + "grad_norm": 0.7496446064848421, + "learning_rate": 8.371822230890905e-06, + "loss": 0.3606, + "step": 6331 + }, + { + "epoch": 0.2865806743607151, + "grad_norm": 0.754423666023919, + "learning_rate": 8.371281006763589e-06, + "loss": 0.4128, + "step": 6332 + }, + { + "epoch": 0.28662593346911064, + "grad_norm": 0.6736836893255465, + "learning_rate": 8.3707397101978e-06, + "loss": 0.3817, + "step": 6333 + }, + { + "epoch": 0.28667119257750623, + "grad_norm": 0.6353507976696714, + "learning_rate": 8.370198341205167e-06, + "loss": 0.3441, + "step": 6334 + }, + { + "epoch": 0.28671645168590176, + "grad_norm": 0.40956676546522564, + "learning_rate": 8.36965689979732e-06, + "loss": 0.5078, + "step": 6335 + }, + { + "epoch": 0.28676171079429735, + "grad_norm": 0.740988659745703, + "learning_rate": 8.369115385985897e-06, + "loss": 0.3762, + "step": 6336 + }, + { + "epoch": 0.28680696990269294, + "grad_norm": 0.7073140874733087, + "learning_rate": 8.368573799782533e-06, + "loss": 0.4027, + "step": 6337 + }, + { + "epoch": 0.2868522290110885, + "grad_norm": 0.6412217710564962, + "learning_rate": 8.368032141198864e-06, + "loss": 0.3663, + "step": 6338 + }, + { + "epoch": 0.28689748811948407, + "grad_norm": 0.6852482683651501, + "learning_rate": 8.367490410246525e-06, + "loss": 0.3905, + "step": 6339 + }, + { + "epoch": 0.2869427472278796, + "grad_norm": 0.6830362986429956, + "learning_rate": 8.366948606937161e-06, + "loss": 0.3932, + "step": 6340 + }, + { + "epoch": 0.2869880063362752, + "grad_norm": 0.6531315592813901, + "learning_rate": 8.366406731282415e-06, + "loss": 0.3571, + "step": 6341 + }, + { + "epoch": 0.2870332654446707, + "grad_norm": 0.6840745659773954, + "learning_rate": 8.365864783293925e-06, + "loss": 0.3852, + "step": 6342 + }, + { + "epoch": 0.2870785245530663, + "grad_norm": 0.7243044316163573, + "learning_rate": 8.36532276298334e-06, + "loss": 0.3673, + "step": 6343 + }, + { + "epoch": 0.28712378366146185, + "grad_norm": 0.3596143601853733, + "learning_rate": 8.364780670362302e-06, + "loss": 0.4866, + "step": 6344 + }, + { + "epoch": 0.28716904276985744, + "grad_norm": 0.3369326536107606, + "learning_rate": 8.364238505442462e-06, + "loss": 0.4754, + "step": 6345 + }, + { + "epoch": 0.287214301878253, + "grad_norm": 0.7430363453770691, + "learning_rate": 8.36369626823547e-06, + "loss": 0.4005, + "step": 6346 + }, + { + "epoch": 0.28725956098664857, + "grad_norm": 0.6433857629537567, + "learning_rate": 8.363153958752976e-06, + "loss": 0.382, + "step": 6347 + }, + { + "epoch": 0.2873048200950441, + "grad_norm": 0.6350133465496026, + "learning_rate": 8.362611577006632e-06, + "loss": 0.3565, + "step": 6348 + }, + { + "epoch": 0.2873500792034397, + "grad_norm": 0.6169981326762816, + "learning_rate": 8.362069123008092e-06, + "loss": 0.3779, + "step": 6349 + }, + { + "epoch": 0.2873953383118353, + "grad_norm": 0.45227784655374936, + "learning_rate": 8.361526596769013e-06, + "loss": 0.4995, + "step": 6350 + }, + { + "epoch": 0.2874405974202308, + "grad_norm": 0.9147592151032556, + "learning_rate": 8.360983998301053e-06, + "loss": 0.3873, + "step": 6351 + }, + { + "epoch": 0.2874858565286264, + "grad_norm": 0.6547204163805698, + "learning_rate": 8.360441327615868e-06, + "loss": 0.3806, + "step": 6352 + }, + { + "epoch": 0.28753111563702194, + "grad_norm": 0.7098358675605005, + "learning_rate": 8.35989858472512e-06, + "loss": 0.3651, + "step": 6353 + }, + { + "epoch": 0.28757637474541753, + "grad_norm": 0.7335097713307398, + "learning_rate": 8.359355769640472e-06, + "loss": 0.3561, + "step": 6354 + }, + { + "epoch": 0.28762163385381306, + "grad_norm": 0.7783977190478998, + "learning_rate": 8.358812882373584e-06, + "loss": 0.3531, + "step": 6355 + }, + { + "epoch": 0.28766689296220865, + "grad_norm": 0.6820434180148108, + "learning_rate": 8.358269922936121e-06, + "loss": 0.3537, + "step": 6356 + }, + { + "epoch": 0.2877121520706042, + "grad_norm": 0.6367159751994629, + "learning_rate": 8.357726891339756e-06, + "loss": 0.3764, + "step": 6357 + }, + { + "epoch": 0.2877574111789998, + "grad_norm": 0.7413979855286208, + "learning_rate": 8.357183787596151e-06, + "loss": 0.3354, + "step": 6358 + }, + { + "epoch": 0.2878026702873953, + "grad_norm": 0.7893411212943653, + "learning_rate": 8.356640611716976e-06, + "loss": 0.3576, + "step": 6359 + }, + { + "epoch": 0.2878479293957909, + "grad_norm": 0.6973211159466143, + "learning_rate": 8.356097363713904e-06, + "loss": 0.3546, + "step": 6360 + }, + { + "epoch": 0.2878931885041865, + "grad_norm": 0.6283123119027827, + "learning_rate": 8.355554043598608e-06, + "loss": 0.3581, + "step": 6361 + }, + { + "epoch": 0.287938447612582, + "grad_norm": 0.3994029621696445, + "learning_rate": 8.35501065138276e-06, + "loss": 0.4921, + "step": 6362 + }, + { + "epoch": 0.2879837067209776, + "grad_norm": 0.35484264290434336, + "learning_rate": 8.354467187078037e-06, + "loss": 0.5089, + "step": 6363 + }, + { + "epoch": 0.28802896582937315, + "grad_norm": 0.8314561846376063, + "learning_rate": 8.353923650696119e-06, + "loss": 0.3948, + "step": 6364 + }, + { + "epoch": 0.28807422493776874, + "grad_norm": 0.33738649343966226, + "learning_rate": 8.35338004224868e-06, + "loss": 0.5093, + "step": 6365 + }, + { + "epoch": 0.2881194840461643, + "grad_norm": 0.6726203777236593, + "learning_rate": 8.352836361747403e-06, + "loss": 0.4174, + "step": 6366 + }, + { + "epoch": 0.28816474315455987, + "grad_norm": 0.6518077292485198, + "learning_rate": 8.352292609203973e-06, + "loss": 0.3607, + "step": 6367 + }, + { + "epoch": 0.2882100022629554, + "grad_norm": 0.6762160616346438, + "learning_rate": 8.351748784630068e-06, + "loss": 0.3921, + "step": 6368 + }, + { + "epoch": 0.288255261371351, + "grad_norm": 0.6930960439817764, + "learning_rate": 8.351204888037377e-06, + "loss": 0.4079, + "step": 6369 + }, + { + "epoch": 0.2883005204797465, + "grad_norm": 0.6448827108519407, + "learning_rate": 8.350660919437585e-06, + "loss": 0.32, + "step": 6370 + }, + { + "epoch": 0.2883457795881421, + "grad_norm": 0.6114651259864918, + "learning_rate": 8.350116878842379e-06, + "loss": 0.3314, + "step": 6371 + }, + { + "epoch": 0.2883910386965377, + "grad_norm": 0.6361346844899176, + "learning_rate": 8.349572766263452e-06, + "loss": 0.3434, + "step": 6372 + }, + { + "epoch": 0.28843629780493324, + "grad_norm": 0.6258771935514704, + "learning_rate": 8.349028581712493e-06, + "loss": 0.3732, + "step": 6373 + }, + { + "epoch": 0.28848155691332883, + "grad_norm": 0.6344175862930747, + "learning_rate": 8.348484325201196e-06, + "loss": 0.3555, + "step": 6374 + }, + { + "epoch": 0.28852681602172436, + "grad_norm": 0.6512433095639879, + "learning_rate": 8.347939996741255e-06, + "loss": 0.3597, + "step": 6375 + }, + { + "epoch": 0.28857207513011995, + "grad_norm": 0.6347448345003477, + "learning_rate": 8.347395596344365e-06, + "loss": 0.3672, + "step": 6376 + }, + { + "epoch": 0.2886173342385155, + "grad_norm": 0.7352586053277483, + "learning_rate": 8.346851124022226e-06, + "loss": 0.3458, + "step": 6377 + }, + { + "epoch": 0.2886625933469111, + "grad_norm": 0.6173551112383586, + "learning_rate": 8.346306579786536e-06, + "loss": 0.3825, + "step": 6378 + }, + { + "epoch": 0.2887078524553066, + "grad_norm": 0.7178480662247867, + "learning_rate": 8.345761963648993e-06, + "loss": 0.3867, + "step": 6379 + }, + { + "epoch": 0.2887531115637022, + "grad_norm": 0.6351496353698858, + "learning_rate": 8.345217275621303e-06, + "loss": 0.3606, + "step": 6380 + }, + { + "epoch": 0.28879837067209774, + "grad_norm": 0.6624551672993362, + "learning_rate": 8.344672515715165e-06, + "loss": 0.3461, + "step": 6381 + }, + { + "epoch": 0.2888436297804933, + "grad_norm": 0.650563289492609, + "learning_rate": 8.344127683942289e-06, + "loss": 0.4002, + "step": 6382 + }, + { + "epoch": 0.28888888888888886, + "grad_norm": 0.6078861028555593, + "learning_rate": 8.34358278031438e-06, + "loss": 0.3572, + "step": 6383 + }, + { + "epoch": 0.28893414799728445, + "grad_norm": 0.6120498255653158, + "learning_rate": 8.343037804843143e-06, + "loss": 0.3589, + "step": 6384 + }, + { + "epoch": 0.28897940710568004, + "grad_norm": 0.6481854290974546, + "learning_rate": 8.342492757540294e-06, + "loss": 0.3579, + "step": 6385 + }, + { + "epoch": 0.2890246662140756, + "grad_norm": 0.5468373204491022, + "learning_rate": 8.34194763841754e-06, + "loss": 0.4952, + "step": 6386 + }, + { + "epoch": 0.28906992532247117, + "grad_norm": 0.7190267617607462, + "learning_rate": 8.341402447486598e-06, + "loss": 0.4117, + "step": 6387 + }, + { + "epoch": 0.2891151844308667, + "grad_norm": 0.6336260112629661, + "learning_rate": 8.340857184759178e-06, + "loss": 0.3373, + "step": 6388 + }, + { + "epoch": 0.2891604435392623, + "grad_norm": 0.6630374397436601, + "learning_rate": 8.340311850246996e-06, + "loss": 0.3619, + "step": 6389 + }, + { + "epoch": 0.2892057026476578, + "grad_norm": 0.7371034281394628, + "learning_rate": 8.339766443961772e-06, + "loss": 0.3494, + "step": 6390 + }, + { + "epoch": 0.2892509617560534, + "grad_norm": 0.6538041235259925, + "learning_rate": 8.339220965915227e-06, + "loss": 0.3795, + "step": 6391 + }, + { + "epoch": 0.28929622086444895, + "grad_norm": 0.38119731417128494, + "learning_rate": 8.338675416119076e-06, + "loss": 0.4796, + "step": 6392 + }, + { + "epoch": 0.28934147997284454, + "grad_norm": 0.726785165671723, + "learning_rate": 8.338129794585047e-06, + "loss": 0.3564, + "step": 6393 + }, + { + "epoch": 0.2893867390812401, + "grad_norm": 0.6533301961414332, + "learning_rate": 8.337584101324859e-06, + "loss": 0.3779, + "step": 6394 + }, + { + "epoch": 0.28943199818963566, + "grad_norm": 0.6282052343937943, + "learning_rate": 8.337038336350238e-06, + "loss": 0.3447, + "step": 6395 + }, + { + "epoch": 0.28947725729803125, + "grad_norm": 0.6293806472535389, + "learning_rate": 8.336492499672915e-06, + "loss": 0.3495, + "step": 6396 + }, + { + "epoch": 0.2895225164064268, + "grad_norm": 0.5981108898546699, + "learning_rate": 8.335946591304614e-06, + "loss": 0.3557, + "step": 6397 + }, + { + "epoch": 0.2895677755148224, + "grad_norm": 0.6777571348756583, + "learning_rate": 8.335400611257067e-06, + "loss": 0.4098, + "step": 6398 + }, + { + "epoch": 0.2896130346232179, + "grad_norm": 0.3942290080698544, + "learning_rate": 8.334854559542004e-06, + "loss": 0.5415, + "step": 6399 + }, + { + "epoch": 0.2896582937316135, + "grad_norm": 0.6961096511585597, + "learning_rate": 8.334308436171159e-06, + "loss": 0.4219, + "step": 6400 + }, + { + "epoch": 0.28970355284000904, + "grad_norm": 0.6754681406841953, + "learning_rate": 8.333762241156268e-06, + "loss": 0.3602, + "step": 6401 + }, + { + "epoch": 0.2897488119484046, + "grad_norm": 0.7790640942022196, + "learning_rate": 8.33321597450906e-06, + "loss": 0.3734, + "step": 6402 + }, + { + "epoch": 0.28979407105680016, + "grad_norm": 0.31179722449465863, + "learning_rate": 8.332669636241284e-06, + "loss": 0.4947, + "step": 6403 + }, + { + "epoch": 0.28983933016519575, + "grad_norm": 0.32547128381188095, + "learning_rate": 8.33212322636467e-06, + "loss": 0.5166, + "step": 6404 + }, + { + "epoch": 0.2898845892735913, + "grad_norm": 0.6859148528664963, + "learning_rate": 8.331576744890963e-06, + "loss": 0.4074, + "step": 6405 + }, + { + "epoch": 0.2899298483819869, + "grad_norm": 0.7084680498510506, + "learning_rate": 8.331030191831904e-06, + "loss": 0.3552, + "step": 6406 + }, + { + "epoch": 0.28997510749038247, + "grad_norm": 0.29201909746466553, + "learning_rate": 8.330483567199234e-06, + "loss": 0.4949, + "step": 6407 + }, + { + "epoch": 0.290020366598778, + "grad_norm": 0.30213810850562034, + "learning_rate": 8.329936871004703e-06, + "loss": 0.4922, + "step": 6408 + }, + { + "epoch": 0.2900656257071736, + "grad_norm": 0.6708127394690835, + "learning_rate": 8.329390103260057e-06, + "loss": 0.3748, + "step": 6409 + }, + { + "epoch": 0.2901108848155691, + "grad_norm": 0.7416775574171741, + "learning_rate": 8.32884326397704e-06, + "loss": 0.3585, + "step": 6410 + }, + { + "epoch": 0.2901561439239647, + "grad_norm": 0.5993879488501103, + "learning_rate": 8.328296353167408e-06, + "loss": 0.3609, + "step": 6411 + }, + { + "epoch": 0.29020140303236025, + "grad_norm": 0.6371880564391219, + "learning_rate": 8.327749370842909e-06, + "loss": 0.3863, + "step": 6412 + }, + { + "epoch": 0.29024666214075584, + "grad_norm": 0.618451111883719, + "learning_rate": 8.327202317015295e-06, + "loss": 0.353, + "step": 6413 + }, + { + "epoch": 0.2902919212491514, + "grad_norm": 0.6249599044660648, + "learning_rate": 8.326655191696322e-06, + "loss": 0.4184, + "step": 6414 + }, + { + "epoch": 0.29033718035754696, + "grad_norm": 0.34375589339921275, + "learning_rate": 8.326107994897748e-06, + "loss": 0.4931, + "step": 6415 + }, + { + "epoch": 0.2903824394659425, + "grad_norm": 0.6602548060548115, + "learning_rate": 8.325560726631325e-06, + "loss": 0.3652, + "step": 6416 + }, + { + "epoch": 0.2904276985743381, + "grad_norm": 0.33527913002483645, + "learning_rate": 8.325013386908817e-06, + "loss": 0.4795, + "step": 6417 + }, + { + "epoch": 0.2904729576827336, + "grad_norm": 0.6547073505960216, + "learning_rate": 8.324465975741986e-06, + "loss": 0.3442, + "step": 6418 + }, + { + "epoch": 0.2905182167911292, + "grad_norm": 0.6652996596938084, + "learning_rate": 8.323918493142588e-06, + "loss": 0.3898, + "step": 6419 + }, + { + "epoch": 0.2905634758995248, + "grad_norm": 0.64028964625868, + "learning_rate": 8.323370939122393e-06, + "loss": 0.3706, + "step": 6420 + }, + { + "epoch": 0.29060873500792034, + "grad_norm": 0.680854532340238, + "learning_rate": 8.322823313693162e-06, + "loss": 0.3617, + "step": 6421 + }, + { + "epoch": 0.29065399411631593, + "grad_norm": 0.6684202593248905, + "learning_rate": 8.322275616866663e-06, + "loss": 0.4334, + "step": 6422 + }, + { + "epoch": 0.29069925322471146, + "grad_norm": 0.5962055859753047, + "learning_rate": 8.321727848654666e-06, + "loss": 0.3495, + "step": 6423 + }, + { + "epoch": 0.29074451233310705, + "grad_norm": 0.6619552340244278, + "learning_rate": 8.321180009068937e-06, + "loss": 0.4093, + "step": 6424 + }, + { + "epoch": 0.2907897714415026, + "grad_norm": 0.7488039229952417, + "learning_rate": 8.320632098121253e-06, + "loss": 0.3796, + "step": 6425 + }, + { + "epoch": 0.2908350305498982, + "grad_norm": 0.6722660190015518, + "learning_rate": 8.320084115823382e-06, + "loss": 0.3798, + "step": 6426 + }, + { + "epoch": 0.2908802896582937, + "grad_norm": 0.6643333815944632, + "learning_rate": 8.3195360621871e-06, + "loss": 0.3934, + "step": 6427 + }, + { + "epoch": 0.2909255487666893, + "grad_norm": 0.5847662005979445, + "learning_rate": 8.318987937224183e-06, + "loss": 0.3645, + "step": 6428 + }, + { + "epoch": 0.29097080787508484, + "grad_norm": 0.7669998563625238, + "learning_rate": 8.318439740946409e-06, + "loss": 0.4105, + "step": 6429 + }, + { + "epoch": 0.2910160669834804, + "grad_norm": 0.5310405439597425, + "learning_rate": 8.317891473365558e-06, + "loss": 0.4751, + "step": 6430 + }, + { + "epoch": 0.291061326091876, + "grad_norm": 0.6261449752730862, + "learning_rate": 8.317343134493408e-06, + "loss": 0.332, + "step": 6431 + }, + { + "epoch": 0.29110658520027155, + "grad_norm": 0.6987428389371922, + "learning_rate": 8.316794724341743e-06, + "loss": 0.3862, + "step": 6432 + }, + { + "epoch": 0.29115184430866714, + "grad_norm": 0.6655812586635537, + "learning_rate": 8.316246242922345e-06, + "loss": 0.3647, + "step": 6433 + }, + { + "epoch": 0.2911971034170627, + "grad_norm": 0.6048388635137149, + "learning_rate": 8.315697690247002e-06, + "loss": 0.3789, + "step": 6434 + }, + { + "epoch": 0.29124236252545826, + "grad_norm": 0.41803567170471573, + "learning_rate": 8.315149066327498e-06, + "loss": 0.5103, + "step": 6435 + }, + { + "epoch": 0.2912876216338538, + "grad_norm": 0.40434775094018255, + "learning_rate": 8.314600371175623e-06, + "loss": 0.5231, + "step": 6436 + }, + { + "epoch": 0.2913328807422494, + "grad_norm": 0.6869531963848231, + "learning_rate": 8.314051604803164e-06, + "loss": 0.3729, + "step": 6437 + }, + { + "epoch": 0.2913781398506449, + "grad_norm": 0.29570610897397354, + "learning_rate": 8.313502767221916e-06, + "loss": 0.4862, + "step": 6438 + }, + { + "epoch": 0.2914233989590405, + "grad_norm": 0.31598816349430026, + "learning_rate": 8.312953858443672e-06, + "loss": 0.4805, + "step": 6439 + }, + { + "epoch": 0.29146865806743605, + "grad_norm": 0.80407284756795, + "learning_rate": 8.312404878480222e-06, + "loss": 0.3466, + "step": 6440 + }, + { + "epoch": 0.29151391717583164, + "grad_norm": 0.6493079758709581, + "learning_rate": 8.311855827343364e-06, + "loss": 0.4, + "step": 6441 + }, + { + "epoch": 0.29155917628422723, + "grad_norm": 0.6575077859486658, + "learning_rate": 8.311306705044898e-06, + "loss": 0.4026, + "step": 6442 + }, + { + "epoch": 0.29160443539262276, + "grad_norm": 0.6641962626839024, + "learning_rate": 8.31075751159662e-06, + "loss": 0.3636, + "step": 6443 + }, + { + "epoch": 0.29164969450101835, + "grad_norm": 0.66221788099013, + "learning_rate": 8.310208247010331e-06, + "loss": 0.3959, + "step": 6444 + }, + { + "epoch": 0.2916949536094139, + "grad_norm": 0.6939365180392555, + "learning_rate": 8.309658911297833e-06, + "loss": 0.3597, + "step": 6445 + }, + { + "epoch": 0.2917402127178095, + "grad_norm": 0.4297312805988011, + "learning_rate": 8.309109504470932e-06, + "loss": 0.4985, + "step": 6446 + }, + { + "epoch": 0.291785471826205, + "grad_norm": 0.6415743882962217, + "learning_rate": 8.308560026541428e-06, + "loss": 0.3852, + "step": 6447 + }, + { + "epoch": 0.2918307309346006, + "grad_norm": 0.6544798763209472, + "learning_rate": 8.30801047752113e-06, + "loss": 0.3674, + "step": 6448 + }, + { + "epoch": 0.29187599004299614, + "grad_norm": 0.6687801130362593, + "learning_rate": 8.307460857421849e-06, + "loss": 0.3697, + "step": 6449 + }, + { + "epoch": 0.2919212491513917, + "grad_norm": 0.6429403857604996, + "learning_rate": 8.306911166255392e-06, + "loss": 0.3401, + "step": 6450 + }, + { + "epoch": 0.29196650825978726, + "grad_norm": 0.5829187355129206, + "learning_rate": 8.306361404033571e-06, + "loss": 0.3139, + "step": 6451 + }, + { + "epoch": 0.29201176736818285, + "grad_norm": 0.6256295801055626, + "learning_rate": 8.305811570768196e-06, + "loss": 0.3806, + "step": 6452 + }, + { + "epoch": 0.2920570264765784, + "grad_norm": 0.656436203896976, + "learning_rate": 8.305261666471085e-06, + "loss": 0.3926, + "step": 6453 + }, + { + "epoch": 0.292102285584974, + "grad_norm": 0.6134172488872648, + "learning_rate": 8.304711691154052e-06, + "loss": 0.3953, + "step": 6454 + }, + { + "epoch": 0.29214754469336957, + "grad_norm": 0.38117255138407263, + "learning_rate": 8.304161644828913e-06, + "loss": 0.5043, + "step": 6455 + }, + { + "epoch": 0.2921928038017651, + "grad_norm": 0.6713297462223095, + "learning_rate": 8.30361152750749e-06, + "loss": 0.4098, + "step": 6456 + }, + { + "epoch": 0.2922380629101607, + "grad_norm": 0.6994613343471084, + "learning_rate": 8.303061339201601e-06, + "loss": 0.3802, + "step": 6457 + }, + { + "epoch": 0.2922833220185562, + "grad_norm": 0.6373226817333996, + "learning_rate": 8.302511079923068e-06, + "loss": 0.3764, + "step": 6458 + }, + { + "epoch": 0.2923285811269518, + "grad_norm": 0.31101672311753653, + "learning_rate": 8.301960749683715e-06, + "loss": 0.5137, + "step": 6459 + }, + { + "epoch": 0.29237384023534735, + "grad_norm": 0.6661226652935572, + "learning_rate": 8.301410348495366e-06, + "loss": 0.3826, + "step": 6460 + }, + { + "epoch": 0.29241909934374294, + "grad_norm": 0.681465527925491, + "learning_rate": 8.300859876369849e-06, + "loss": 0.3283, + "step": 6461 + }, + { + "epoch": 0.2924643584521385, + "grad_norm": 0.6503583509986406, + "learning_rate": 8.300309333318992e-06, + "loss": 0.3807, + "step": 6462 + }, + { + "epoch": 0.29250961756053406, + "grad_norm": 0.35637099222950214, + "learning_rate": 8.299758719354621e-06, + "loss": 0.5103, + "step": 6463 + }, + { + "epoch": 0.2925548766689296, + "grad_norm": 0.9007426007639495, + "learning_rate": 8.299208034488571e-06, + "loss": 0.4173, + "step": 6464 + }, + { + "epoch": 0.2926001357773252, + "grad_norm": 0.8978949257967975, + "learning_rate": 8.298657278732673e-06, + "loss": 0.4247, + "step": 6465 + }, + { + "epoch": 0.2926453948857208, + "grad_norm": 0.7437470268845902, + "learning_rate": 8.298106452098761e-06, + "loss": 0.3561, + "step": 6466 + }, + { + "epoch": 0.2926906539941163, + "grad_norm": 0.674750975286095, + "learning_rate": 8.297555554598671e-06, + "loss": 0.4265, + "step": 6467 + }, + { + "epoch": 0.2927359131025119, + "grad_norm": 0.6389898615422173, + "learning_rate": 8.29700458624424e-06, + "loss": 0.3432, + "step": 6468 + }, + { + "epoch": 0.29278117221090744, + "grad_norm": 0.6119289376751083, + "learning_rate": 8.296453547047305e-06, + "loss": 0.3565, + "step": 6469 + }, + { + "epoch": 0.292826431319303, + "grad_norm": 0.6467003422732659, + "learning_rate": 8.295902437019709e-06, + "loss": 0.3678, + "step": 6470 + }, + { + "epoch": 0.29287169042769856, + "grad_norm": 0.6275114622388187, + "learning_rate": 8.295351256173292e-06, + "loss": 0.3772, + "step": 6471 + }, + { + "epoch": 0.29291694953609415, + "grad_norm": 0.6070339394501282, + "learning_rate": 8.294800004519895e-06, + "loss": 0.3339, + "step": 6472 + }, + { + "epoch": 0.2929622086444897, + "grad_norm": 0.6790702159687559, + "learning_rate": 8.294248682071369e-06, + "loss": 0.3849, + "step": 6473 + }, + { + "epoch": 0.2930074677528853, + "grad_norm": 0.6806848558424293, + "learning_rate": 8.293697288839555e-06, + "loss": 0.3665, + "step": 6474 + }, + { + "epoch": 0.2930527268612808, + "grad_norm": 0.6489635136154929, + "learning_rate": 8.293145824836302e-06, + "loss": 0.392, + "step": 6475 + }, + { + "epoch": 0.2930979859696764, + "grad_norm": 0.37044885389524723, + "learning_rate": 8.29259429007346e-06, + "loss": 0.5182, + "step": 6476 + }, + { + "epoch": 0.29314324507807193, + "grad_norm": 0.655501620363715, + "learning_rate": 8.292042684562878e-06, + "loss": 0.357, + "step": 6477 + }, + { + "epoch": 0.2931885041864675, + "grad_norm": 0.9507463591648303, + "learning_rate": 8.291491008316409e-06, + "loss": 0.3688, + "step": 6478 + }, + { + "epoch": 0.2932337632948631, + "grad_norm": 0.32278269268452087, + "learning_rate": 8.290939261345908e-06, + "loss": 0.4761, + "step": 6479 + }, + { + "epoch": 0.29327902240325865, + "grad_norm": 0.6977751345184396, + "learning_rate": 8.29038744366323e-06, + "loss": 0.3683, + "step": 6480 + }, + { + "epoch": 0.29332428151165424, + "grad_norm": 0.30990544000558706, + "learning_rate": 8.28983555528023e-06, + "loss": 0.4853, + "step": 6481 + }, + { + "epoch": 0.2933695406200498, + "grad_norm": 0.704930034295891, + "learning_rate": 8.289283596208769e-06, + "loss": 0.3804, + "step": 6482 + }, + { + "epoch": 0.29341479972844536, + "grad_norm": 0.3095176020113561, + "learning_rate": 8.288731566460706e-06, + "loss": 0.4689, + "step": 6483 + }, + { + "epoch": 0.2934600588368409, + "grad_norm": 0.6767804618120248, + "learning_rate": 8.288179466047903e-06, + "loss": 0.3578, + "step": 6484 + }, + { + "epoch": 0.2935053179452365, + "grad_norm": 0.6605536627738009, + "learning_rate": 8.28762729498222e-06, + "loss": 0.385, + "step": 6485 + }, + { + "epoch": 0.293550577053632, + "grad_norm": 0.6499909633330283, + "learning_rate": 8.287075053275527e-06, + "loss": 0.3542, + "step": 6486 + }, + { + "epoch": 0.2935958361620276, + "grad_norm": 0.6419155300510001, + "learning_rate": 8.286522740939682e-06, + "loss": 0.3944, + "step": 6487 + }, + { + "epoch": 0.29364109527042315, + "grad_norm": 0.41197650198755686, + "learning_rate": 8.285970357986559e-06, + "loss": 0.4885, + "step": 6488 + }, + { + "epoch": 0.29368635437881874, + "grad_norm": 0.6783231634374803, + "learning_rate": 8.285417904428025e-06, + "loss": 0.3763, + "step": 6489 + }, + { + "epoch": 0.2937316134872143, + "grad_norm": 0.3380209730570563, + "learning_rate": 8.284865380275953e-06, + "loss": 0.5039, + "step": 6490 + }, + { + "epoch": 0.29377687259560986, + "grad_norm": 0.6434836225109783, + "learning_rate": 8.28431278554221e-06, + "loss": 0.3606, + "step": 6491 + }, + { + "epoch": 0.29382213170400545, + "grad_norm": 0.6712871013442894, + "learning_rate": 8.283760120238672e-06, + "loss": 0.3692, + "step": 6492 + }, + { + "epoch": 0.293867390812401, + "grad_norm": 0.7502628512683776, + "learning_rate": 8.283207384377217e-06, + "loss": 0.3614, + "step": 6493 + }, + { + "epoch": 0.2939126499207966, + "grad_norm": 0.6574921802281626, + "learning_rate": 8.282654577969715e-06, + "loss": 0.3889, + "step": 6494 + }, + { + "epoch": 0.2939579090291921, + "grad_norm": 0.6436678017455617, + "learning_rate": 8.282101701028051e-06, + "loss": 0.3759, + "step": 6495 + }, + { + "epoch": 0.2940031681375877, + "grad_norm": 0.6331727763394492, + "learning_rate": 8.281548753564101e-06, + "loss": 0.3794, + "step": 6496 + }, + { + "epoch": 0.29404842724598323, + "grad_norm": 3.064791132428098, + "learning_rate": 8.280995735589748e-06, + "loss": 0.3326, + "step": 6497 + }, + { + "epoch": 0.2940936863543788, + "grad_norm": 0.7158885307535165, + "learning_rate": 8.28044264711687e-06, + "loss": 0.3618, + "step": 6498 + }, + { + "epoch": 0.29413894546277436, + "grad_norm": 0.633887155405091, + "learning_rate": 8.279889488157358e-06, + "loss": 0.3625, + "step": 6499 + }, + { + "epoch": 0.29418420457116995, + "grad_norm": 0.44873262101556294, + "learning_rate": 8.279336258723092e-06, + "loss": 0.4881, + "step": 6500 + }, + { + "epoch": 0.29422946367956554, + "grad_norm": 0.42868033624798035, + "learning_rate": 8.278782958825963e-06, + "loss": 0.4763, + "step": 6501 + }, + { + "epoch": 0.2942747227879611, + "grad_norm": 0.6926362327266504, + "learning_rate": 8.278229588477857e-06, + "loss": 0.3633, + "step": 6502 + }, + { + "epoch": 0.29431998189635666, + "grad_norm": 0.7159833505501458, + "learning_rate": 8.277676147690667e-06, + "loss": 0.3704, + "step": 6503 + }, + { + "epoch": 0.2943652410047522, + "grad_norm": 0.6048515450661027, + "learning_rate": 8.277122636476284e-06, + "loss": 0.361, + "step": 6504 + }, + { + "epoch": 0.2944105001131478, + "grad_norm": 0.6926580589479077, + "learning_rate": 8.276569054846598e-06, + "loss": 0.4029, + "step": 6505 + }, + { + "epoch": 0.2944557592215433, + "grad_norm": 0.7087743982571297, + "learning_rate": 8.276015402813507e-06, + "loss": 0.3709, + "step": 6506 + }, + { + "epoch": 0.2945010183299389, + "grad_norm": 0.6699221368445163, + "learning_rate": 8.275461680388907e-06, + "loss": 0.3808, + "step": 6507 + }, + { + "epoch": 0.29454627743833445, + "grad_norm": 0.6510226287700173, + "learning_rate": 8.274907887584695e-06, + "loss": 0.3394, + "step": 6508 + }, + { + "epoch": 0.29459153654673004, + "grad_norm": 0.7599440272738529, + "learning_rate": 8.274354024412771e-06, + "loss": 0.506, + "step": 6509 + }, + { + "epoch": 0.29463679565512557, + "grad_norm": 0.6533955878761483, + "learning_rate": 8.273800090885033e-06, + "loss": 0.3876, + "step": 6510 + }, + { + "epoch": 0.29468205476352116, + "grad_norm": 0.6330339667687448, + "learning_rate": 8.273246087013389e-06, + "loss": 0.3375, + "step": 6511 + }, + { + "epoch": 0.2947273138719167, + "grad_norm": 0.6825683608101373, + "learning_rate": 8.27269201280974e-06, + "loss": 0.3953, + "step": 6512 + }, + { + "epoch": 0.2947725729803123, + "grad_norm": 0.6742266479272396, + "learning_rate": 8.272137868285988e-06, + "loss": 0.3741, + "step": 6513 + }, + { + "epoch": 0.2948178320887079, + "grad_norm": 0.3509569724005049, + "learning_rate": 8.271583653454046e-06, + "loss": 0.4807, + "step": 6514 + }, + { + "epoch": 0.2948630911971034, + "grad_norm": 0.6964791974984916, + "learning_rate": 8.271029368325816e-06, + "loss": 0.4001, + "step": 6515 + }, + { + "epoch": 0.294908350305499, + "grad_norm": 0.7441171021360583, + "learning_rate": 8.270475012913212e-06, + "loss": 0.3423, + "step": 6516 + }, + { + "epoch": 0.29495360941389454, + "grad_norm": 0.6282383938135476, + "learning_rate": 8.269920587228145e-06, + "loss": 0.3606, + "step": 6517 + }, + { + "epoch": 0.2949988685222901, + "grad_norm": 0.6679807888966616, + "learning_rate": 8.269366091282526e-06, + "loss": 0.4197, + "step": 6518 + }, + { + "epoch": 0.29504412763068566, + "grad_norm": 0.6600489477472857, + "learning_rate": 8.268811525088273e-06, + "loss": 0.3808, + "step": 6519 + }, + { + "epoch": 0.29508938673908125, + "grad_norm": 0.5760869889668241, + "learning_rate": 8.2682568886573e-06, + "loss": 0.3092, + "step": 6520 + }, + { + "epoch": 0.2951346458474768, + "grad_norm": 0.660281851940936, + "learning_rate": 8.267702182001521e-06, + "loss": 0.3618, + "step": 6521 + }, + { + "epoch": 0.2951799049558724, + "grad_norm": 0.668073722385169, + "learning_rate": 8.26714740513286e-06, + "loss": 0.3764, + "step": 6522 + }, + { + "epoch": 0.2952251640642679, + "grad_norm": 0.6880108887912482, + "learning_rate": 8.266592558063235e-06, + "loss": 0.3695, + "step": 6523 + }, + { + "epoch": 0.2952704231726635, + "grad_norm": 0.3869646595533447, + "learning_rate": 8.26603764080457e-06, + "loss": 0.4912, + "step": 6524 + }, + { + "epoch": 0.2953156822810591, + "grad_norm": 0.6738971146084021, + "learning_rate": 8.265482653368786e-06, + "loss": 0.3755, + "step": 6525 + }, + { + "epoch": 0.2953609413894546, + "grad_norm": 0.9514622783496854, + "learning_rate": 8.264927595767808e-06, + "loss": 0.3406, + "step": 6526 + }, + { + "epoch": 0.2954062004978502, + "grad_norm": 0.6555285194634614, + "learning_rate": 8.264372468013566e-06, + "loss": 0.3592, + "step": 6527 + }, + { + "epoch": 0.29545145960624575, + "grad_norm": 0.6941521013634866, + "learning_rate": 8.263817270117984e-06, + "loss": 0.4001, + "step": 6528 + }, + { + "epoch": 0.29549671871464134, + "grad_norm": 0.6522125610450555, + "learning_rate": 8.263262002092992e-06, + "loss": 0.3549, + "step": 6529 + }, + { + "epoch": 0.29554197782303687, + "grad_norm": 0.7423122524992386, + "learning_rate": 8.262706663950522e-06, + "loss": 0.387, + "step": 6530 + }, + { + "epoch": 0.29558723693143246, + "grad_norm": 0.6941356725705105, + "learning_rate": 8.262151255702506e-06, + "loss": 0.3873, + "step": 6531 + }, + { + "epoch": 0.295632496039828, + "grad_norm": 0.40196026244367045, + "learning_rate": 8.261595777360881e-06, + "loss": 0.4844, + "step": 6532 + }, + { + "epoch": 0.2956777551482236, + "grad_norm": 0.6558786177137559, + "learning_rate": 8.261040228937578e-06, + "loss": 0.3722, + "step": 6533 + }, + { + "epoch": 0.2957230142566191, + "grad_norm": 0.6546132783831952, + "learning_rate": 8.260484610444537e-06, + "loss": 0.3947, + "step": 6534 + }, + { + "epoch": 0.2957682733650147, + "grad_norm": 0.638845136439574, + "learning_rate": 8.259928921893694e-06, + "loss": 0.3635, + "step": 6535 + }, + { + "epoch": 0.2958135324734103, + "grad_norm": 0.3215929274096865, + "learning_rate": 8.259373163296992e-06, + "loss": 0.4919, + "step": 6536 + }, + { + "epoch": 0.29585879158180584, + "grad_norm": 0.6560801022010452, + "learning_rate": 8.258817334666371e-06, + "loss": 0.3585, + "step": 6537 + }, + { + "epoch": 0.2959040506902014, + "grad_norm": 0.6445716599426061, + "learning_rate": 8.258261436013774e-06, + "loss": 0.3529, + "step": 6538 + }, + { + "epoch": 0.29594930979859696, + "grad_norm": 0.617007318890825, + "learning_rate": 8.257705467351144e-06, + "loss": 0.3152, + "step": 6539 + }, + { + "epoch": 0.29599456890699255, + "grad_norm": 0.6083874381432658, + "learning_rate": 8.257149428690432e-06, + "loss": 0.3922, + "step": 6540 + }, + { + "epoch": 0.2960398280153881, + "grad_norm": 0.3776880342089537, + "learning_rate": 8.256593320043582e-06, + "loss": 0.487, + "step": 6541 + }, + { + "epoch": 0.2960850871237837, + "grad_norm": 0.704918022131064, + "learning_rate": 8.25603714142254e-06, + "loss": 0.3876, + "step": 6542 + }, + { + "epoch": 0.2961303462321792, + "grad_norm": 0.8063942549240285, + "learning_rate": 8.255480892839262e-06, + "loss": 0.3627, + "step": 6543 + }, + { + "epoch": 0.2961756053405748, + "grad_norm": 0.5742802128032614, + "learning_rate": 8.254924574305698e-06, + "loss": 0.3473, + "step": 6544 + }, + { + "epoch": 0.29622086444897033, + "grad_norm": 0.6964485588806103, + "learning_rate": 8.254368185833803e-06, + "loss": 0.3822, + "step": 6545 + }, + { + "epoch": 0.2962661235573659, + "grad_norm": 0.6321601650487939, + "learning_rate": 8.25381172743553e-06, + "loss": 0.356, + "step": 6546 + }, + { + "epoch": 0.29631138266576146, + "grad_norm": 1.0443208662838888, + "learning_rate": 8.253255199122834e-06, + "loss": 0.4008, + "step": 6547 + }, + { + "epoch": 0.29635664177415705, + "grad_norm": 0.6582380851157981, + "learning_rate": 8.252698600907678e-06, + "loss": 0.3482, + "step": 6548 + }, + { + "epoch": 0.29640190088255264, + "grad_norm": 0.6970926427572819, + "learning_rate": 8.252141932802018e-06, + "loss": 0.3846, + "step": 6549 + }, + { + "epoch": 0.2964471599909482, + "grad_norm": 0.4938941833933624, + "learning_rate": 8.251585194817816e-06, + "loss": 0.5017, + "step": 6550 + }, + { + "epoch": 0.29649241909934376, + "grad_norm": 0.6856811251346372, + "learning_rate": 8.251028386967035e-06, + "loss": 0.3657, + "step": 6551 + }, + { + "epoch": 0.2965376782077393, + "grad_norm": 0.31996299943306283, + "learning_rate": 8.25047150926164e-06, + "loss": 0.4859, + "step": 6552 + }, + { + "epoch": 0.2965829373161349, + "grad_norm": 0.6998424919733743, + "learning_rate": 8.249914561713592e-06, + "loss": 0.3839, + "step": 6553 + }, + { + "epoch": 0.2966281964245304, + "grad_norm": 0.773242550385443, + "learning_rate": 8.249357544334865e-06, + "loss": 0.3736, + "step": 6554 + }, + { + "epoch": 0.296673455532926, + "grad_norm": 0.6725820236276115, + "learning_rate": 8.248800457137422e-06, + "loss": 0.388, + "step": 6555 + }, + { + "epoch": 0.29671871464132155, + "grad_norm": 0.6893640866870768, + "learning_rate": 8.248243300133236e-06, + "loss": 0.4145, + "step": 6556 + }, + { + "epoch": 0.29676397374971714, + "grad_norm": 0.7584177846682806, + "learning_rate": 8.247686073334277e-06, + "loss": 0.3928, + "step": 6557 + }, + { + "epoch": 0.29680923285811267, + "grad_norm": 0.6678868748025847, + "learning_rate": 8.247128776752517e-06, + "loss": 0.4747, + "step": 6558 + }, + { + "epoch": 0.29685449196650826, + "grad_norm": 0.6610574223754907, + "learning_rate": 8.246571410399935e-06, + "loss": 0.3582, + "step": 6559 + }, + { + "epoch": 0.29689975107490385, + "grad_norm": 0.6115168177738242, + "learning_rate": 8.246013974288505e-06, + "loss": 0.359, + "step": 6560 + }, + { + "epoch": 0.2969450101832994, + "grad_norm": 0.33510638899390455, + "learning_rate": 8.245456468430201e-06, + "loss": 0.4911, + "step": 6561 + }, + { + "epoch": 0.296990269291695, + "grad_norm": 0.6297514724178221, + "learning_rate": 8.244898892837009e-06, + "loss": 0.3633, + "step": 6562 + }, + { + "epoch": 0.2970355284000905, + "grad_norm": 0.6217919692620721, + "learning_rate": 8.244341247520903e-06, + "loss": 0.3601, + "step": 6563 + }, + { + "epoch": 0.2970807875084861, + "grad_norm": 0.4667666793389384, + "learning_rate": 8.243783532493868e-06, + "loss": 0.4649, + "step": 6564 + }, + { + "epoch": 0.29712604661688163, + "grad_norm": 0.6434234825245986, + "learning_rate": 8.243225747767888e-06, + "loss": 0.3581, + "step": 6565 + }, + { + "epoch": 0.2971713057252772, + "grad_norm": 0.6626520810564629, + "learning_rate": 8.242667893354948e-06, + "loss": 0.3694, + "step": 6566 + }, + { + "epoch": 0.29721656483367276, + "grad_norm": 0.6603571749083783, + "learning_rate": 8.242109969267033e-06, + "loss": 0.3276, + "step": 6567 + }, + { + "epoch": 0.29726182394206835, + "grad_norm": 0.6109779360936114, + "learning_rate": 8.241551975516133e-06, + "loss": 0.396, + "step": 6568 + }, + { + "epoch": 0.2973070830504639, + "grad_norm": 0.651519606462089, + "learning_rate": 8.240993912114236e-06, + "loss": 0.3578, + "step": 6569 + }, + { + "epoch": 0.2973523421588595, + "grad_norm": 0.6371769305932496, + "learning_rate": 8.240435779073336e-06, + "loss": 0.36, + "step": 6570 + }, + { + "epoch": 0.297397601267255, + "grad_norm": 0.42362648372322165, + "learning_rate": 8.23987757640542e-06, + "loss": 0.4808, + "step": 6571 + }, + { + "epoch": 0.2974428603756506, + "grad_norm": 0.6340758942769098, + "learning_rate": 8.239319304122488e-06, + "loss": 0.3618, + "step": 6572 + }, + { + "epoch": 0.2974881194840462, + "grad_norm": 0.7264695251353803, + "learning_rate": 8.238760962236532e-06, + "loss": 0.3885, + "step": 6573 + }, + { + "epoch": 0.2975333785924417, + "grad_norm": 0.6927123830509735, + "learning_rate": 8.23820255075955e-06, + "loss": 0.4151, + "step": 6574 + }, + { + "epoch": 0.2975786377008373, + "grad_norm": 0.7849148012762388, + "learning_rate": 8.23764406970354e-06, + "loss": 0.3743, + "step": 6575 + }, + { + "epoch": 0.29762389680923285, + "grad_norm": 0.654854279717437, + "learning_rate": 8.237085519080503e-06, + "loss": 0.375, + "step": 6576 + }, + { + "epoch": 0.29766915591762844, + "grad_norm": 0.7159203992116845, + "learning_rate": 8.236526898902439e-06, + "loss": 0.388, + "step": 6577 + }, + { + "epoch": 0.29771441502602397, + "grad_norm": 0.656283121300721, + "learning_rate": 8.235968209181355e-06, + "loss": 0.3411, + "step": 6578 + }, + { + "epoch": 0.29775967413441956, + "grad_norm": 0.4616172562386764, + "learning_rate": 8.23540944992925e-06, + "loss": 0.4888, + "step": 6579 + }, + { + "epoch": 0.2978049332428151, + "grad_norm": 0.664265125141684, + "learning_rate": 8.234850621158135e-06, + "loss": 0.3647, + "step": 6580 + }, + { + "epoch": 0.2978501923512107, + "grad_norm": 0.6515710215820063, + "learning_rate": 8.234291722880015e-06, + "loss": 0.3735, + "step": 6581 + }, + { + "epoch": 0.2978954514596062, + "grad_norm": 0.670688081563422, + "learning_rate": 8.233732755106897e-06, + "loss": 0.3846, + "step": 6582 + }, + { + "epoch": 0.2979407105680018, + "grad_norm": 0.3096613537636291, + "learning_rate": 8.233173717850796e-06, + "loss": 0.4626, + "step": 6583 + }, + { + "epoch": 0.2979859696763974, + "grad_norm": 0.670924612725373, + "learning_rate": 8.232614611123719e-06, + "loss": 0.3603, + "step": 6584 + }, + { + "epoch": 0.29803122878479293, + "grad_norm": 0.7092134926955636, + "learning_rate": 8.232055434937685e-06, + "loss": 0.3543, + "step": 6585 + }, + { + "epoch": 0.2980764878931885, + "grad_norm": 0.32778289159729157, + "learning_rate": 8.231496189304704e-06, + "loss": 0.4722, + "step": 6586 + }, + { + "epoch": 0.29812174700158406, + "grad_norm": 0.701945960013496, + "learning_rate": 8.230936874236797e-06, + "loss": 0.4089, + "step": 6587 + }, + { + "epoch": 0.29816700610997965, + "grad_norm": 0.6969133591371268, + "learning_rate": 8.230377489745979e-06, + "loss": 0.3806, + "step": 6588 + }, + { + "epoch": 0.2982122652183752, + "grad_norm": 0.6535133332299422, + "learning_rate": 8.229818035844269e-06, + "loss": 0.4209, + "step": 6589 + }, + { + "epoch": 0.2982575243267708, + "grad_norm": 0.6780849897711396, + "learning_rate": 8.22925851254369e-06, + "loss": 0.3675, + "step": 6590 + }, + { + "epoch": 0.2983027834351663, + "grad_norm": 0.7267884367024268, + "learning_rate": 8.228698919856264e-06, + "loss": 0.3453, + "step": 6591 + }, + { + "epoch": 0.2983480425435619, + "grad_norm": 0.39652816926380524, + "learning_rate": 8.228139257794012e-06, + "loss": 0.4841, + "step": 6592 + }, + { + "epoch": 0.29839330165195743, + "grad_norm": 0.30304362311260974, + "learning_rate": 8.227579526368965e-06, + "loss": 0.4818, + "step": 6593 + }, + { + "epoch": 0.298438560760353, + "grad_norm": 0.6851067942591184, + "learning_rate": 8.227019725593144e-06, + "loss": 0.3391, + "step": 6594 + }, + { + "epoch": 0.2984838198687486, + "grad_norm": 0.34227352067508154, + "learning_rate": 8.226459855478582e-06, + "loss": 0.4676, + "step": 6595 + }, + { + "epoch": 0.29852907897714415, + "grad_norm": 0.6581569083787847, + "learning_rate": 8.225899916037305e-06, + "loss": 0.403, + "step": 6596 + }, + { + "epoch": 0.29857433808553974, + "grad_norm": 0.6284583434239028, + "learning_rate": 8.22533990728135e-06, + "loss": 0.401, + "step": 6597 + }, + { + "epoch": 0.29861959719393527, + "grad_norm": 0.7456078983710108, + "learning_rate": 8.224779829222742e-06, + "loss": 0.3938, + "step": 6598 + }, + { + "epoch": 0.29866485630233086, + "grad_norm": 0.6615636059791379, + "learning_rate": 8.224219681873522e-06, + "loss": 0.3549, + "step": 6599 + }, + { + "epoch": 0.2987101154107264, + "grad_norm": 0.6578472104200105, + "learning_rate": 8.223659465245723e-06, + "loss": 0.3297, + "step": 6600 + }, + { + "epoch": 0.298755374519122, + "grad_norm": 0.327107354885252, + "learning_rate": 8.223099179351383e-06, + "loss": 0.4747, + "step": 6601 + }, + { + "epoch": 0.2988006336275175, + "grad_norm": 0.31622473719570926, + "learning_rate": 8.22253882420254e-06, + "loss": 0.4718, + "step": 6602 + }, + { + "epoch": 0.2988458927359131, + "grad_norm": 0.6994810720919846, + "learning_rate": 8.221978399811237e-06, + "loss": 0.3992, + "step": 6603 + }, + { + "epoch": 0.29889115184430864, + "grad_norm": 0.6706589677639165, + "learning_rate": 8.22141790618951e-06, + "loss": 0.3884, + "step": 6604 + }, + { + "epoch": 0.29893641095270423, + "grad_norm": 0.7094897895214411, + "learning_rate": 8.220857343349408e-06, + "loss": 0.3376, + "step": 6605 + }, + { + "epoch": 0.29898167006109977, + "grad_norm": 0.32556023080515356, + "learning_rate": 8.220296711302976e-06, + "loss": 0.5088, + "step": 6606 + }, + { + "epoch": 0.29902692916949536, + "grad_norm": 0.7184867337191, + "learning_rate": 8.219736010062255e-06, + "loss": 0.3605, + "step": 6607 + }, + { + "epoch": 0.29907218827789095, + "grad_norm": 0.3098575866940877, + "learning_rate": 8.219175239639296e-06, + "loss": 0.4804, + "step": 6608 + }, + { + "epoch": 0.2991174473862865, + "grad_norm": 0.6400206664582679, + "learning_rate": 8.21861440004615e-06, + "loss": 0.3574, + "step": 6609 + }, + { + "epoch": 0.2991627064946821, + "grad_norm": 0.3010626374463441, + "learning_rate": 8.218053491294864e-06, + "loss": 0.5005, + "step": 6610 + }, + { + "epoch": 0.2992079656030776, + "grad_norm": 0.6855028942826946, + "learning_rate": 8.217492513397493e-06, + "loss": 0.3662, + "step": 6611 + }, + { + "epoch": 0.2992532247114732, + "grad_norm": 0.7101719022351363, + "learning_rate": 8.216931466366089e-06, + "loss": 0.3976, + "step": 6612 + }, + { + "epoch": 0.29929848381986873, + "grad_norm": 0.5527238979079008, + "learning_rate": 8.216370350212709e-06, + "loss": 0.3337, + "step": 6613 + }, + { + "epoch": 0.2993437429282643, + "grad_norm": 0.6503365969415894, + "learning_rate": 8.215809164949407e-06, + "loss": 0.352, + "step": 6614 + }, + { + "epoch": 0.29938900203665986, + "grad_norm": 0.6456987454478041, + "learning_rate": 8.215247910588242e-06, + "loss": 0.4055, + "step": 6615 + }, + { + "epoch": 0.29943426114505545, + "grad_norm": 0.765375799768525, + "learning_rate": 8.214686587141277e-06, + "loss": 0.4042, + "step": 6616 + }, + { + "epoch": 0.299479520253451, + "grad_norm": 0.3333814206779106, + "learning_rate": 8.21412519462057e-06, + "loss": 0.5028, + "step": 6617 + }, + { + "epoch": 0.29952477936184657, + "grad_norm": 0.33084472661619535, + "learning_rate": 8.213563733038182e-06, + "loss": 0.4787, + "step": 6618 + }, + { + "epoch": 0.29957003847024216, + "grad_norm": 0.6637174994749759, + "learning_rate": 8.21300220240618e-06, + "loss": 0.3875, + "step": 6619 + }, + { + "epoch": 0.2996152975786377, + "grad_norm": 0.6912141263850212, + "learning_rate": 8.212440602736628e-06, + "loss": 0.3809, + "step": 6620 + }, + { + "epoch": 0.2996605566870333, + "grad_norm": 0.6936592964069401, + "learning_rate": 8.211878934041595e-06, + "loss": 0.3658, + "step": 6621 + }, + { + "epoch": 0.2997058157954288, + "grad_norm": 0.678807367422059, + "learning_rate": 8.211317196333149e-06, + "loss": 0.3622, + "step": 6622 + }, + { + "epoch": 0.2997510749038244, + "grad_norm": 0.38560937624830255, + "learning_rate": 8.210755389623356e-06, + "loss": 0.46, + "step": 6623 + }, + { + "epoch": 0.29979633401221994, + "grad_norm": 0.779311131617998, + "learning_rate": 8.210193513924294e-06, + "loss": 0.3718, + "step": 6624 + }, + { + "epoch": 0.29984159312061553, + "grad_norm": 0.6420801291643528, + "learning_rate": 8.209631569248031e-06, + "loss": 0.3685, + "step": 6625 + }, + { + "epoch": 0.29988685222901107, + "grad_norm": 0.6773260279806149, + "learning_rate": 8.209069555606643e-06, + "loss": 0.3472, + "step": 6626 + }, + { + "epoch": 0.29993211133740666, + "grad_norm": 0.7706893310245987, + "learning_rate": 8.208507473012207e-06, + "loss": 0.4212, + "step": 6627 + }, + { + "epoch": 0.2999773704458022, + "grad_norm": 0.638140601624294, + "learning_rate": 8.2079453214768e-06, + "loss": 0.3402, + "step": 6628 + }, + { + "epoch": 0.3000226295541978, + "grad_norm": 0.5940795490485952, + "learning_rate": 8.2073831010125e-06, + "loss": 0.3185, + "step": 6629 + }, + { + "epoch": 0.3000678886625934, + "grad_norm": 0.6635770683032601, + "learning_rate": 8.206820811631387e-06, + "loss": 0.3228, + "step": 6630 + }, + { + "epoch": 0.3001131477709889, + "grad_norm": 0.6899477337576465, + "learning_rate": 8.206258453345543e-06, + "loss": 0.384, + "step": 6631 + }, + { + "epoch": 0.3001584068793845, + "grad_norm": 0.6668924568154347, + "learning_rate": 8.205696026167054e-06, + "loss": 0.3475, + "step": 6632 + }, + { + "epoch": 0.30020366598778003, + "grad_norm": 0.6413029895755245, + "learning_rate": 8.205133530108003e-06, + "loss": 0.393, + "step": 6633 + }, + { + "epoch": 0.3002489250961756, + "grad_norm": 0.6730984902221498, + "learning_rate": 8.204570965180476e-06, + "loss": 0.3834, + "step": 6634 + }, + { + "epoch": 0.30029418420457116, + "grad_norm": 0.371876236667934, + "learning_rate": 8.204008331396562e-06, + "loss": 0.5068, + "step": 6635 + }, + { + "epoch": 0.30033944331296675, + "grad_norm": 0.6556362419726928, + "learning_rate": 8.203445628768347e-06, + "loss": 0.3631, + "step": 6636 + }, + { + "epoch": 0.3003847024213623, + "grad_norm": 0.6359940940564239, + "learning_rate": 8.202882857307926e-06, + "loss": 0.3779, + "step": 6637 + }, + { + "epoch": 0.30042996152975787, + "grad_norm": 0.6386188623759247, + "learning_rate": 8.202320017027387e-06, + "loss": 0.3984, + "step": 6638 + }, + { + "epoch": 0.3004752206381534, + "grad_norm": 0.32621834677771844, + "learning_rate": 8.201757107938829e-06, + "loss": 0.5145, + "step": 6639 + }, + { + "epoch": 0.300520479746549, + "grad_norm": 0.5928303322514519, + "learning_rate": 8.201194130054342e-06, + "loss": 0.3325, + "step": 6640 + }, + { + "epoch": 0.30056573885494453, + "grad_norm": 0.3094692390285682, + "learning_rate": 8.200631083386025e-06, + "loss": 0.4907, + "step": 6641 + }, + { + "epoch": 0.3006109979633401, + "grad_norm": 0.6144956649085465, + "learning_rate": 8.200067967945977e-06, + "loss": 0.3741, + "step": 6642 + }, + { + "epoch": 0.3006562570717357, + "grad_norm": 0.695521701070394, + "learning_rate": 8.199504783746297e-06, + "loss": 0.3798, + "step": 6643 + }, + { + "epoch": 0.30070151618013125, + "grad_norm": 0.7236023029614718, + "learning_rate": 8.198941530799084e-06, + "loss": 0.3339, + "step": 6644 + }, + { + "epoch": 0.30074677528852684, + "grad_norm": 0.2931514319730529, + "learning_rate": 8.198378209116444e-06, + "loss": 0.4678, + "step": 6645 + }, + { + "epoch": 0.30079203439692237, + "grad_norm": 0.3172098987084482, + "learning_rate": 8.19781481871048e-06, + "loss": 0.4783, + "step": 6646 + }, + { + "epoch": 0.30083729350531796, + "grad_norm": 0.6300258938616354, + "learning_rate": 8.197251359593294e-06, + "loss": 0.3924, + "step": 6647 + }, + { + "epoch": 0.3008825526137135, + "grad_norm": 0.6117147132966283, + "learning_rate": 8.196687831776998e-06, + "loss": 0.3308, + "step": 6648 + }, + { + "epoch": 0.3009278117221091, + "grad_norm": 0.6846165713761543, + "learning_rate": 8.196124235273698e-06, + "loss": 0.4117, + "step": 6649 + }, + { + "epoch": 0.3009730708305046, + "grad_norm": 0.6475285881169686, + "learning_rate": 8.195560570095504e-06, + "loss": 0.3732, + "step": 6650 + }, + { + "epoch": 0.3010183299389002, + "grad_norm": 0.6562117371282999, + "learning_rate": 8.194996836254527e-06, + "loss": 0.3692, + "step": 6651 + }, + { + "epoch": 0.30106358904729574, + "grad_norm": 0.3879940700042671, + "learning_rate": 8.194433033762882e-06, + "loss": 0.4973, + "step": 6652 + }, + { + "epoch": 0.30110884815569133, + "grad_norm": 0.3532760568138731, + "learning_rate": 8.193869162632682e-06, + "loss": 0.4715, + "step": 6653 + }, + { + "epoch": 0.3011541072640869, + "grad_norm": 0.33387439482702236, + "learning_rate": 8.193305222876043e-06, + "loss": 0.5229, + "step": 6654 + }, + { + "epoch": 0.30119936637248246, + "grad_norm": 0.6660220599660495, + "learning_rate": 8.19274121450508e-06, + "loss": 0.3942, + "step": 6655 + }, + { + "epoch": 0.30124462548087805, + "grad_norm": 0.8598578165148109, + "learning_rate": 8.192177137531916e-06, + "loss": 0.3359, + "step": 6656 + }, + { + "epoch": 0.3012898845892736, + "grad_norm": 0.4416717118687196, + "learning_rate": 8.19161299196867e-06, + "loss": 0.4614, + "step": 6657 + }, + { + "epoch": 0.30133514369766917, + "grad_norm": 0.8191984846210192, + "learning_rate": 8.191048777827462e-06, + "loss": 0.3596, + "step": 6658 + }, + { + "epoch": 0.3013804028060647, + "grad_norm": 0.7090058012522736, + "learning_rate": 8.190484495120416e-06, + "loss": 0.3474, + "step": 6659 + }, + { + "epoch": 0.3014256619144603, + "grad_norm": 0.6600972546014798, + "learning_rate": 8.189920143859658e-06, + "loss": 0.356, + "step": 6660 + }, + { + "epoch": 0.30147092102285583, + "grad_norm": 0.758035548634254, + "learning_rate": 8.189355724057313e-06, + "loss": 0.3822, + "step": 6661 + }, + { + "epoch": 0.3015161801312514, + "grad_norm": 0.6585617124518213, + "learning_rate": 8.188791235725509e-06, + "loss": 0.39, + "step": 6662 + }, + { + "epoch": 0.30156143923964696, + "grad_norm": 0.6584263455818427, + "learning_rate": 8.188226678876374e-06, + "loss": 0.3525, + "step": 6663 + }, + { + "epoch": 0.30160669834804255, + "grad_norm": 0.6284333579476523, + "learning_rate": 8.187662053522039e-06, + "loss": 0.364, + "step": 6664 + }, + { + "epoch": 0.30165195745643814, + "grad_norm": 0.6941954463209911, + "learning_rate": 8.187097359674638e-06, + "loss": 0.3824, + "step": 6665 + }, + { + "epoch": 0.30169721656483367, + "grad_norm": 0.6193892417113361, + "learning_rate": 8.186532597346304e-06, + "loss": 0.3797, + "step": 6666 + }, + { + "epoch": 0.30174247567322926, + "grad_norm": 0.661626608730419, + "learning_rate": 8.18596776654917e-06, + "loss": 0.3267, + "step": 6667 + }, + { + "epoch": 0.3017877347816248, + "grad_norm": 0.6513566152865766, + "learning_rate": 8.185402867295373e-06, + "loss": 0.3986, + "step": 6668 + }, + { + "epoch": 0.3018329938900204, + "grad_norm": 0.46167306904485933, + "learning_rate": 8.184837899597054e-06, + "loss": 0.4989, + "step": 6669 + }, + { + "epoch": 0.3018782529984159, + "grad_norm": 0.6812849953088046, + "learning_rate": 8.184272863466348e-06, + "loss": 0.3587, + "step": 6670 + }, + { + "epoch": 0.3019235121068115, + "grad_norm": 0.6372181036074359, + "learning_rate": 8.183707758915398e-06, + "loss": 0.3866, + "step": 6671 + }, + { + "epoch": 0.30196877121520704, + "grad_norm": 0.6663877518321638, + "learning_rate": 8.183142585956347e-06, + "loss": 0.382, + "step": 6672 + }, + { + "epoch": 0.30201403032360263, + "grad_norm": 0.6383630283033406, + "learning_rate": 8.182577344601337e-06, + "loss": 0.3833, + "step": 6673 + }, + { + "epoch": 0.30205928943199817, + "grad_norm": 0.36237644474402525, + "learning_rate": 8.182012034862514e-06, + "loss": 0.4855, + "step": 6674 + }, + { + "epoch": 0.30210454854039376, + "grad_norm": 0.7009030759694248, + "learning_rate": 8.181446656752027e-06, + "loss": 0.4167, + "step": 6675 + }, + { + "epoch": 0.3021498076487893, + "grad_norm": 0.3188822744200764, + "learning_rate": 8.18088121028202e-06, + "loss": 0.4902, + "step": 6676 + }, + { + "epoch": 0.3021950667571849, + "grad_norm": 0.7033049015038353, + "learning_rate": 8.18031569546465e-06, + "loss": 0.395, + "step": 6677 + }, + { + "epoch": 0.3022403258655805, + "grad_norm": 0.2996947562744208, + "learning_rate": 8.179750112312058e-06, + "loss": 0.4924, + "step": 6678 + }, + { + "epoch": 0.302285584973976, + "grad_norm": 0.7107215020779526, + "learning_rate": 8.179184460836404e-06, + "loss": 0.4296, + "step": 6679 + }, + { + "epoch": 0.3023308440823716, + "grad_norm": 0.36882862048673637, + "learning_rate": 8.178618741049841e-06, + "loss": 0.4754, + "step": 6680 + }, + { + "epoch": 0.30237610319076713, + "grad_norm": 0.35130336150032226, + "learning_rate": 8.178052952964523e-06, + "loss": 0.4703, + "step": 6681 + }, + { + "epoch": 0.3024213622991627, + "grad_norm": 0.7141293551304086, + "learning_rate": 8.177487096592607e-06, + "loss": 0.3988, + "step": 6682 + }, + { + "epoch": 0.30246662140755826, + "grad_norm": 0.7030862967298622, + "learning_rate": 8.176921171946252e-06, + "loss": 0.3606, + "step": 6683 + }, + { + "epoch": 0.30251188051595385, + "grad_norm": 0.6769924190293563, + "learning_rate": 8.176355179037619e-06, + "loss": 0.4181, + "step": 6684 + }, + { + "epoch": 0.3025571396243494, + "grad_norm": 0.6831775720861758, + "learning_rate": 8.17578911787887e-06, + "loss": 0.4089, + "step": 6685 + }, + { + "epoch": 0.30260239873274497, + "grad_norm": 0.6606531283871524, + "learning_rate": 8.175222988482163e-06, + "loss": 0.3295, + "step": 6686 + }, + { + "epoch": 0.3026476578411405, + "grad_norm": 0.6762522492476349, + "learning_rate": 8.174656790859668e-06, + "loss": 0.3567, + "step": 6687 + }, + { + "epoch": 0.3026929169495361, + "grad_norm": 0.6795353803369522, + "learning_rate": 8.17409052502355e-06, + "loss": 0.4107, + "step": 6688 + }, + { + "epoch": 0.3027381760579317, + "grad_norm": 0.538399507273518, + "learning_rate": 8.173524190985973e-06, + "loss": 0.4827, + "step": 6689 + }, + { + "epoch": 0.3027834351663272, + "grad_norm": 0.40540498512979073, + "learning_rate": 8.172957788759109e-06, + "loss": 0.4884, + "step": 6690 + }, + { + "epoch": 0.3028286942747228, + "grad_norm": 0.7645143001189432, + "learning_rate": 8.172391318355126e-06, + "loss": 0.3521, + "step": 6691 + }, + { + "epoch": 0.30287395338311834, + "grad_norm": 0.7116150900076627, + "learning_rate": 8.171824779786198e-06, + "loss": 0.383, + "step": 6692 + }, + { + "epoch": 0.30291921249151393, + "grad_norm": 0.6885266895160488, + "learning_rate": 8.171258173064497e-06, + "loss": 0.3792, + "step": 6693 + }, + { + "epoch": 0.30296447159990947, + "grad_norm": 0.6154377255141567, + "learning_rate": 8.170691498202196e-06, + "loss": 0.3836, + "step": 6694 + }, + { + "epoch": 0.30300973070830506, + "grad_norm": 0.6892015186835359, + "learning_rate": 8.170124755211475e-06, + "loss": 0.4875, + "step": 6695 + }, + { + "epoch": 0.3030549898167006, + "grad_norm": 0.5078678332810831, + "learning_rate": 8.16955794410451e-06, + "loss": 0.518, + "step": 6696 + }, + { + "epoch": 0.3031002489250962, + "grad_norm": 0.6996318079313077, + "learning_rate": 8.168991064893476e-06, + "loss": 0.4067, + "step": 6697 + }, + { + "epoch": 0.3031455080334917, + "grad_norm": 0.6701047881657107, + "learning_rate": 8.168424117590559e-06, + "loss": 0.364, + "step": 6698 + }, + { + "epoch": 0.3031907671418873, + "grad_norm": 0.6267231223583077, + "learning_rate": 8.167857102207936e-06, + "loss": 0.3914, + "step": 6699 + }, + { + "epoch": 0.30323602625028284, + "grad_norm": 0.6488152116391062, + "learning_rate": 8.167290018757797e-06, + "loss": 0.3677, + "step": 6700 + }, + { + "epoch": 0.30328128535867843, + "grad_norm": 0.657644421376092, + "learning_rate": 8.166722867252321e-06, + "loss": 0.3588, + "step": 6701 + }, + { + "epoch": 0.303326544467074, + "grad_norm": 0.6977592438122392, + "learning_rate": 8.166155647703698e-06, + "loss": 0.3919, + "step": 6702 + }, + { + "epoch": 0.30337180357546956, + "grad_norm": 1.0770726070383594, + "learning_rate": 8.165588360124112e-06, + "loss": 0.5246, + "step": 6703 + }, + { + "epoch": 0.30341706268386515, + "grad_norm": 0.6771285868253755, + "learning_rate": 8.165021004525758e-06, + "loss": 0.3728, + "step": 6704 + }, + { + "epoch": 0.3034623217922607, + "grad_norm": 0.6544659774166018, + "learning_rate": 8.164453580920819e-06, + "loss": 0.3463, + "step": 6705 + }, + { + "epoch": 0.30350758090065627, + "grad_norm": 0.6545512816480353, + "learning_rate": 8.163886089321493e-06, + "loss": 0.4144, + "step": 6706 + }, + { + "epoch": 0.3035528400090518, + "grad_norm": 0.7171692882504428, + "learning_rate": 8.163318529739971e-06, + "loss": 0.349, + "step": 6707 + }, + { + "epoch": 0.3035980991174474, + "grad_norm": 0.5006054067208444, + "learning_rate": 8.162750902188452e-06, + "loss": 0.4737, + "step": 6708 + }, + { + "epoch": 0.30364335822584293, + "grad_norm": 0.6516365427753947, + "learning_rate": 8.162183206679129e-06, + "loss": 0.3905, + "step": 6709 + }, + { + "epoch": 0.3036886173342385, + "grad_norm": 0.6767068672930011, + "learning_rate": 8.1616154432242e-06, + "loss": 0.346, + "step": 6710 + }, + { + "epoch": 0.30373387644263405, + "grad_norm": 0.6397724806662128, + "learning_rate": 8.161047611835866e-06, + "loss": 0.3751, + "step": 6711 + }, + { + "epoch": 0.30377913555102964, + "grad_norm": 0.8168005161205404, + "learning_rate": 8.160479712526326e-06, + "loss": 0.3977, + "step": 6712 + }, + { + "epoch": 0.30382439465942523, + "grad_norm": 0.7369886205778866, + "learning_rate": 8.159911745307785e-06, + "loss": 0.4237, + "step": 6713 + }, + { + "epoch": 0.30386965376782077, + "grad_norm": 0.6740165118906998, + "learning_rate": 8.159343710192445e-06, + "loss": 0.389, + "step": 6714 + }, + { + "epoch": 0.30391491287621636, + "grad_norm": 0.5087143101780832, + "learning_rate": 8.158775607192511e-06, + "loss": 0.4844, + "step": 6715 + }, + { + "epoch": 0.3039601719846119, + "grad_norm": 0.6533814867752841, + "learning_rate": 8.158207436320192e-06, + "loss": 0.3281, + "step": 6716 + }, + { + "epoch": 0.3040054310930075, + "grad_norm": 0.6203679406879485, + "learning_rate": 8.157639197587694e-06, + "loss": 0.3611, + "step": 6717 + }, + { + "epoch": 0.304050690201403, + "grad_norm": 0.6193842426608959, + "learning_rate": 8.157070891007227e-06, + "loss": 0.357, + "step": 6718 + }, + { + "epoch": 0.3040959493097986, + "grad_norm": 0.6689713449805593, + "learning_rate": 8.156502516591005e-06, + "loss": 0.4038, + "step": 6719 + }, + { + "epoch": 0.30414120841819414, + "grad_norm": 0.6649185774901039, + "learning_rate": 8.155934074351236e-06, + "loss": 0.3695, + "step": 6720 + }, + { + "epoch": 0.30418646752658973, + "grad_norm": 0.7085354849748461, + "learning_rate": 8.155365564300137e-06, + "loss": 0.3875, + "step": 6721 + }, + { + "epoch": 0.30423172663498527, + "grad_norm": 0.7011208137126376, + "learning_rate": 8.154796986449925e-06, + "loss": 0.3678, + "step": 6722 + }, + { + "epoch": 0.30427698574338086, + "grad_norm": 0.6183098806515898, + "learning_rate": 8.154228340812812e-06, + "loss": 0.3564, + "step": 6723 + }, + { + "epoch": 0.30432224485177645, + "grad_norm": 0.6849206059636519, + "learning_rate": 8.15365962740102e-06, + "loss": 0.4572, + "step": 6724 + }, + { + "epoch": 0.304367503960172, + "grad_norm": 0.667232617976701, + "learning_rate": 8.15309084622677e-06, + "loss": 0.3435, + "step": 6725 + }, + { + "epoch": 0.30441276306856757, + "grad_norm": 0.5710042272132126, + "learning_rate": 8.15252199730228e-06, + "loss": 0.486, + "step": 6726 + }, + { + "epoch": 0.3044580221769631, + "grad_norm": 0.6301191405750226, + "learning_rate": 8.151953080639777e-06, + "loss": 0.4118, + "step": 6727 + }, + { + "epoch": 0.3045032812853587, + "grad_norm": 0.6708578885522815, + "learning_rate": 8.15138409625148e-06, + "loss": 0.3569, + "step": 6728 + }, + { + "epoch": 0.30454854039375423, + "grad_norm": 0.6846127030532159, + "learning_rate": 8.15081504414962e-06, + "loss": 0.3496, + "step": 6729 + }, + { + "epoch": 0.3045937995021498, + "grad_norm": 0.6200505350083688, + "learning_rate": 8.15024592434642e-06, + "loss": 0.3675, + "step": 6730 + }, + { + "epoch": 0.30463905861054535, + "grad_norm": 0.5283697356772571, + "learning_rate": 8.14967673685411e-06, + "loss": 0.5014, + "step": 6731 + }, + { + "epoch": 0.30468431771894094, + "grad_norm": 0.9863932733707426, + "learning_rate": 8.149107481684922e-06, + "loss": 0.3767, + "step": 6732 + }, + { + "epoch": 0.3047295768273365, + "grad_norm": 0.6713443061113822, + "learning_rate": 8.148538158851084e-06, + "loss": 0.3789, + "step": 6733 + }, + { + "epoch": 0.30477483593573207, + "grad_norm": 0.5988285870373714, + "learning_rate": 8.147968768364833e-06, + "loss": 0.3227, + "step": 6734 + }, + { + "epoch": 0.3048200950441276, + "grad_norm": 0.6679839451442557, + "learning_rate": 8.1473993102384e-06, + "loss": 0.3709, + "step": 6735 + }, + { + "epoch": 0.3048653541525232, + "grad_norm": 0.719002401863315, + "learning_rate": 8.146829784484024e-06, + "loss": 0.3692, + "step": 6736 + }, + { + "epoch": 0.3049106132609188, + "grad_norm": 0.7384804649111679, + "learning_rate": 8.146260191113937e-06, + "loss": 0.408, + "step": 6737 + }, + { + "epoch": 0.3049558723693143, + "grad_norm": 0.5806880149958714, + "learning_rate": 8.145690530140385e-06, + "loss": 0.3467, + "step": 6738 + }, + { + "epoch": 0.3050011314777099, + "grad_norm": 0.7988457184655746, + "learning_rate": 8.145120801575603e-06, + "loss": 0.3329, + "step": 6739 + }, + { + "epoch": 0.30504639058610544, + "grad_norm": 0.45441104152888795, + "learning_rate": 8.144551005431835e-06, + "loss": 0.5055, + "step": 6740 + }, + { + "epoch": 0.30509164969450103, + "grad_norm": 0.3480895237820813, + "learning_rate": 8.143981141721324e-06, + "loss": 0.5051, + "step": 6741 + }, + { + "epoch": 0.30513690880289657, + "grad_norm": 0.6786100980441568, + "learning_rate": 8.143411210456314e-06, + "loss": 0.3572, + "step": 6742 + }, + { + "epoch": 0.30518216791129216, + "grad_norm": 0.3247065808236292, + "learning_rate": 8.142841211649052e-06, + "loss": 0.4946, + "step": 6743 + }, + { + "epoch": 0.3052274270196877, + "grad_norm": 0.7602586822007469, + "learning_rate": 8.142271145311784e-06, + "loss": 0.3885, + "step": 6744 + }, + { + "epoch": 0.3052726861280833, + "grad_norm": 0.6827903832763981, + "learning_rate": 8.141701011456759e-06, + "loss": 0.4101, + "step": 6745 + }, + { + "epoch": 0.3053179452364788, + "grad_norm": 0.6811187790476072, + "learning_rate": 8.14113081009623e-06, + "loss": 0.3494, + "step": 6746 + }, + { + "epoch": 0.3053632043448744, + "grad_norm": 0.7048085893836293, + "learning_rate": 8.140560541242446e-06, + "loss": 0.3774, + "step": 6747 + }, + { + "epoch": 0.30540846345327, + "grad_norm": 0.6491314330558875, + "learning_rate": 8.139990204907662e-06, + "loss": 0.3666, + "step": 6748 + }, + { + "epoch": 0.30545372256166553, + "grad_norm": 0.4712696305154235, + "learning_rate": 8.139419801104133e-06, + "loss": 0.5054, + "step": 6749 + }, + { + "epoch": 0.3054989816700611, + "grad_norm": 0.6124385047064537, + "learning_rate": 8.138849329844115e-06, + "loss": 0.3466, + "step": 6750 + }, + { + "epoch": 0.30554424077845665, + "grad_norm": 0.6026356203624581, + "learning_rate": 8.138278791139863e-06, + "loss": 0.3485, + "step": 6751 + }, + { + "epoch": 0.30558949988685224, + "grad_norm": 0.6271212950544153, + "learning_rate": 8.13770818500364e-06, + "loss": 0.3676, + "step": 6752 + }, + { + "epoch": 0.3056347589952478, + "grad_norm": 0.6357535646778157, + "learning_rate": 8.137137511447702e-06, + "loss": 0.3909, + "step": 6753 + }, + { + "epoch": 0.30568001810364337, + "grad_norm": 0.37425033364294313, + "learning_rate": 8.136566770484316e-06, + "loss": 0.489, + "step": 6754 + }, + { + "epoch": 0.3057252772120389, + "grad_norm": 0.3013803252222591, + "learning_rate": 8.135995962125744e-06, + "loss": 0.4781, + "step": 6755 + }, + { + "epoch": 0.3057705363204345, + "grad_norm": 0.6793897256341992, + "learning_rate": 8.135425086384249e-06, + "loss": 0.3599, + "step": 6756 + }, + { + "epoch": 0.30581579542883003, + "grad_norm": 0.5991893062045853, + "learning_rate": 8.1348541432721e-06, + "loss": 0.3353, + "step": 6757 + }, + { + "epoch": 0.3058610545372256, + "grad_norm": 0.6641871745828792, + "learning_rate": 8.134283132801562e-06, + "loss": 0.3843, + "step": 6758 + }, + { + "epoch": 0.3059063136456212, + "grad_norm": 0.615570207173398, + "learning_rate": 8.133712054984906e-06, + "loss": 0.3622, + "step": 6759 + }, + { + "epoch": 0.30595157275401674, + "grad_norm": 0.44958620298115815, + "learning_rate": 8.133140909834402e-06, + "loss": 0.5002, + "step": 6760 + }, + { + "epoch": 0.30599683186241233, + "grad_norm": 0.738740954931246, + "learning_rate": 8.132569697362323e-06, + "loss": 0.3919, + "step": 6761 + }, + { + "epoch": 0.30604209097080787, + "grad_norm": 0.3629080785737851, + "learning_rate": 8.131998417580942e-06, + "loss": 0.5022, + "step": 6762 + }, + { + "epoch": 0.30608735007920346, + "grad_norm": 0.6430334797680526, + "learning_rate": 8.131427070502535e-06, + "loss": 0.3578, + "step": 6763 + }, + { + "epoch": 0.306132609187599, + "grad_norm": 0.6998391775286003, + "learning_rate": 8.130855656139375e-06, + "loss": 0.3389, + "step": 6764 + }, + { + "epoch": 0.3061778682959946, + "grad_norm": 0.7018326564652131, + "learning_rate": 8.130284174503746e-06, + "loss": 0.3888, + "step": 6765 + }, + { + "epoch": 0.3062231274043901, + "grad_norm": 0.6413065879688464, + "learning_rate": 8.129712625607924e-06, + "loss": 0.351, + "step": 6766 + }, + { + "epoch": 0.3062683865127857, + "grad_norm": 0.6484535268441277, + "learning_rate": 8.129141009464187e-06, + "loss": 0.3305, + "step": 6767 + }, + { + "epoch": 0.30631364562118124, + "grad_norm": 0.6539147847237137, + "learning_rate": 8.128569326084824e-06, + "loss": 0.3459, + "step": 6768 + }, + { + "epoch": 0.30635890472957683, + "grad_norm": 0.6790155149435549, + "learning_rate": 8.127997575482112e-06, + "loss": 0.4109, + "step": 6769 + }, + { + "epoch": 0.30640416383797237, + "grad_norm": 0.6114908532644882, + "learning_rate": 8.127425757668338e-06, + "loss": 0.3752, + "step": 6770 + }, + { + "epoch": 0.30644942294636796, + "grad_norm": 0.6098158272610537, + "learning_rate": 8.12685387265579e-06, + "loss": 0.4123, + "step": 6771 + }, + { + "epoch": 0.30649468205476355, + "grad_norm": 0.6409207978303622, + "learning_rate": 8.126281920456758e-06, + "loss": 0.4011, + "step": 6772 + }, + { + "epoch": 0.3065399411631591, + "grad_norm": 0.7401398533726881, + "learning_rate": 8.12570990108353e-06, + "loss": 0.4185, + "step": 6773 + }, + { + "epoch": 0.30658520027155467, + "grad_norm": 0.6356749159664686, + "learning_rate": 8.125137814548394e-06, + "loss": 0.356, + "step": 6774 + }, + { + "epoch": 0.3066304593799502, + "grad_norm": 0.6529145959466799, + "learning_rate": 8.124565660863643e-06, + "loss": 0.3638, + "step": 6775 + }, + { + "epoch": 0.3066757184883458, + "grad_norm": 0.6608892458641886, + "learning_rate": 8.123993440041576e-06, + "loss": 0.3563, + "step": 6776 + }, + { + "epoch": 0.30672097759674133, + "grad_norm": 0.5521789874530064, + "learning_rate": 8.123421152094481e-06, + "loss": 0.5073, + "step": 6777 + }, + { + "epoch": 0.3067662367051369, + "grad_norm": 0.6905730622680517, + "learning_rate": 8.12284879703466e-06, + "loss": 0.3584, + "step": 6778 + }, + { + "epoch": 0.30681149581353245, + "grad_norm": 0.6080164401091869, + "learning_rate": 8.12227637487441e-06, + "loss": 0.3673, + "step": 6779 + }, + { + "epoch": 0.30685675492192804, + "grad_norm": 0.6349719454277032, + "learning_rate": 8.121703885626029e-06, + "loss": 0.3932, + "step": 6780 + }, + { + "epoch": 0.3069020140303236, + "grad_norm": 0.6416433540100211, + "learning_rate": 8.12113132930182e-06, + "loss": 0.3911, + "step": 6781 + }, + { + "epoch": 0.30694727313871917, + "grad_norm": 0.3495737986624057, + "learning_rate": 8.120558705914083e-06, + "loss": 0.4932, + "step": 6782 + }, + { + "epoch": 0.30699253224711476, + "grad_norm": 0.5923057827572341, + "learning_rate": 8.119986015475126e-06, + "loss": 0.3361, + "step": 6783 + }, + { + "epoch": 0.3070377913555103, + "grad_norm": 0.707659390930438, + "learning_rate": 8.11941325799725e-06, + "loss": 0.3557, + "step": 6784 + }, + { + "epoch": 0.3070830504639059, + "grad_norm": 0.7240388985619817, + "learning_rate": 8.118840433492764e-06, + "loss": 0.3898, + "step": 6785 + }, + { + "epoch": 0.3071283095723014, + "grad_norm": 0.6417285146209729, + "learning_rate": 8.118267541973975e-06, + "loss": 0.3738, + "step": 6786 + }, + { + "epoch": 0.307173568680697, + "grad_norm": 0.3553061010073323, + "learning_rate": 8.117694583453195e-06, + "loss": 0.4988, + "step": 6787 + }, + { + "epoch": 0.30721882778909254, + "grad_norm": 0.6635298379528045, + "learning_rate": 8.117121557942733e-06, + "loss": 0.3736, + "step": 6788 + }, + { + "epoch": 0.30726408689748813, + "grad_norm": 0.312431761561904, + "learning_rate": 8.116548465454902e-06, + "loss": 0.4785, + "step": 6789 + }, + { + "epoch": 0.30730934600588367, + "grad_norm": 0.741654489937296, + "learning_rate": 8.115975306002018e-06, + "loss": 0.3714, + "step": 6790 + }, + { + "epoch": 0.30735460511427926, + "grad_norm": 0.6059806753467073, + "learning_rate": 8.115402079596392e-06, + "loss": 0.3504, + "step": 6791 + }, + { + "epoch": 0.3073998642226748, + "grad_norm": 0.6754900935493422, + "learning_rate": 8.114828786250345e-06, + "loss": 0.3692, + "step": 6792 + }, + { + "epoch": 0.3074451233310704, + "grad_norm": 0.379407826248663, + "learning_rate": 8.114255425976193e-06, + "loss": 0.4703, + "step": 6793 + }, + { + "epoch": 0.30749038243946597, + "grad_norm": 0.6066107353561802, + "learning_rate": 8.113681998786257e-06, + "loss": 0.3734, + "step": 6794 + }, + { + "epoch": 0.3075356415478615, + "grad_norm": 0.6140946118613567, + "learning_rate": 8.113108504692858e-06, + "loss": 0.3494, + "step": 6795 + }, + { + "epoch": 0.3075809006562571, + "grad_norm": 0.6478485310329225, + "learning_rate": 8.11253494370832e-06, + "loss": 0.4195, + "step": 6796 + }, + { + "epoch": 0.30762615976465263, + "grad_norm": 0.6222099995071896, + "learning_rate": 8.111961315844964e-06, + "loss": 0.3788, + "step": 6797 + }, + { + "epoch": 0.3076714188730482, + "grad_norm": 0.32387049358651526, + "learning_rate": 8.111387621115116e-06, + "loss": 0.4689, + "step": 6798 + }, + { + "epoch": 0.30771667798144375, + "grad_norm": 0.7076263821704921, + "learning_rate": 8.110813859531104e-06, + "loss": 0.3445, + "step": 6799 + }, + { + "epoch": 0.30776193708983934, + "grad_norm": 0.6373148724099768, + "learning_rate": 8.110240031105257e-06, + "loss": 0.3686, + "step": 6800 + }, + { + "epoch": 0.3078071961982349, + "grad_norm": 0.7596219343419965, + "learning_rate": 8.109666135849905e-06, + "loss": 0.3462, + "step": 6801 + }, + { + "epoch": 0.30785245530663047, + "grad_norm": 0.6406151701818559, + "learning_rate": 8.109092173777376e-06, + "loss": 0.3602, + "step": 6802 + }, + { + "epoch": 0.307897714415026, + "grad_norm": 0.6168811765740875, + "learning_rate": 8.108518144900007e-06, + "loss": 0.3321, + "step": 6803 + }, + { + "epoch": 0.3079429735234216, + "grad_norm": 0.6369882372627966, + "learning_rate": 8.10794404923013e-06, + "loss": 0.3178, + "step": 6804 + }, + { + "epoch": 0.3079882326318171, + "grad_norm": 0.3820706526356615, + "learning_rate": 8.107369886780082e-06, + "loss": 0.4625, + "step": 6805 + }, + { + "epoch": 0.3080334917402127, + "grad_norm": 0.6049111052281383, + "learning_rate": 8.106795657562197e-06, + "loss": 0.331, + "step": 6806 + }, + { + "epoch": 0.3080787508486083, + "grad_norm": 0.3170775102658955, + "learning_rate": 8.106221361588814e-06, + "loss": 0.5085, + "step": 6807 + }, + { + "epoch": 0.30812400995700384, + "grad_norm": 0.6396101195957737, + "learning_rate": 8.105646998872275e-06, + "loss": 0.3939, + "step": 6808 + }, + { + "epoch": 0.30816926906539943, + "grad_norm": 0.6137977068626062, + "learning_rate": 8.10507256942492e-06, + "loss": 0.3392, + "step": 6809 + }, + { + "epoch": 0.30821452817379497, + "grad_norm": 0.6733315999273725, + "learning_rate": 8.104498073259093e-06, + "loss": 0.338, + "step": 6810 + }, + { + "epoch": 0.30825978728219056, + "grad_norm": 0.6554373555685616, + "learning_rate": 8.103923510387138e-06, + "loss": 0.3999, + "step": 6811 + }, + { + "epoch": 0.3083050463905861, + "grad_norm": 0.6453238347422188, + "learning_rate": 8.103348880821399e-06, + "loss": 0.3468, + "step": 6812 + }, + { + "epoch": 0.3083503054989817, + "grad_norm": 0.4037572749428178, + "learning_rate": 8.10277418457422e-06, + "loss": 0.5147, + "step": 6813 + }, + { + "epoch": 0.3083955646073772, + "grad_norm": 0.3979089005469083, + "learning_rate": 8.102199421657957e-06, + "loss": 0.509, + "step": 6814 + }, + { + "epoch": 0.3084408237157728, + "grad_norm": 0.6515789874392688, + "learning_rate": 8.101624592084956e-06, + "loss": 0.3963, + "step": 6815 + }, + { + "epoch": 0.30848608282416834, + "grad_norm": 0.7141706623863724, + "learning_rate": 8.101049695867566e-06, + "loss": 0.3619, + "step": 6816 + }, + { + "epoch": 0.30853134193256393, + "grad_norm": 0.6568144635043696, + "learning_rate": 8.100474733018145e-06, + "loss": 0.3569, + "step": 6817 + }, + { + "epoch": 0.3085766010409595, + "grad_norm": 0.687926850848241, + "learning_rate": 8.099899703549043e-06, + "loss": 0.3557, + "step": 6818 + }, + { + "epoch": 0.30862186014935505, + "grad_norm": 0.7585055645699293, + "learning_rate": 8.099324607472619e-06, + "loss": 0.3915, + "step": 6819 + }, + { + "epoch": 0.30866711925775064, + "grad_norm": 0.6493471634676304, + "learning_rate": 8.098749444801226e-06, + "loss": 0.3558, + "step": 6820 + }, + { + "epoch": 0.3087123783661462, + "grad_norm": 0.5066541089205197, + "learning_rate": 8.098174215547224e-06, + "loss": 0.4819, + "step": 6821 + }, + { + "epoch": 0.30875763747454177, + "grad_norm": 0.6462727041437436, + "learning_rate": 8.097598919722975e-06, + "loss": 0.3227, + "step": 6822 + }, + { + "epoch": 0.3088028965829373, + "grad_norm": 0.636904943838202, + "learning_rate": 8.097023557340837e-06, + "loss": 0.3285, + "step": 6823 + }, + { + "epoch": 0.3088481556913329, + "grad_norm": 0.3604508863174562, + "learning_rate": 8.096448128413177e-06, + "loss": 0.494, + "step": 6824 + }, + { + "epoch": 0.3088934147997284, + "grad_norm": 0.3033044941525956, + "learning_rate": 8.095872632952354e-06, + "loss": 0.5074, + "step": 6825 + }, + { + "epoch": 0.308938673908124, + "grad_norm": 0.6798141188672809, + "learning_rate": 8.095297070970738e-06, + "loss": 0.4, + "step": 6826 + }, + { + "epoch": 0.30898393301651955, + "grad_norm": 0.6561679938459225, + "learning_rate": 8.094721442480696e-06, + "loss": 0.3725, + "step": 6827 + }, + { + "epoch": 0.30902919212491514, + "grad_norm": 0.4073413244614808, + "learning_rate": 8.094145747494591e-06, + "loss": 0.4766, + "step": 6828 + }, + { + "epoch": 0.3090744512333107, + "grad_norm": 0.7145571037258346, + "learning_rate": 8.093569986024798e-06, + "loss": 0.3615, + "step": 6829 + }, + { + "epoch": 0.30911971034170627, + "grad_norm": 0.42268248652396634, + "learning_rate": 8.092994158083689e-06, + "loss": 0.5162, + "step": 6830 + }, + { + "epoch": 0.30916496945010186, + "grad_norm": 0.6628200350098077, + "learning_rate": 8.092418263683635e-06, + "loss": 0.4114, + "step": 6831 + }, + { + "epoch": 0.3092102285584974, + "grad_norm": 0.6047039574344523, + "learning_rate": 8.091842302837009e-06, + "loss": 0.3592, + "step": 6832 + }, + { + "epoch": 0.309255487666893, + "grad_norm": 0.6438819114591449, + "learning_rate": 8.091266275556188e-06, + "loss": 0.4155, + "step": 6833 + }, + { + "epoch": 0.3093007467752885, + "grad_norm": 0.6437391796856079, + "learning_rate": 8.090690181853548e-06, + "loss": 0.3785, + "step": 6834 + }, + { + "epoch": 0.3093460058836841, + "grad_norm": 0.6030900916072861, + "learning_rate": 8.09011402174147e-06, + "loss": 0.3633, + "step": 6835 + }, + { + "epoch": 0.30939126499207964, + "grad_norm": 0.7355170778499127, + "learning_rate": 8.089537795232331e-06, + "loss": 0.3587, + "step": 6836 + }, + { + "epoch": 0.30943652410047523, + "grad_norm": 0.6698188931352116, + "learning_rate": 8.088961502338514e-06, + "loss": 0.3725, + "step": 6837 + }, + { + "epoch": 0.30948178320887076, + "grad_norm": 0.7400131968976658, + "learning_rate": 8.088385143072402e-06, + "loss": 0.3586, + "step": 6838 + }, + { + "epoch": 0.30952704231726635, + "grad_norm": 0.6718189379267882, + "learning_rate": 8.087808717446377e-06, + "loss": 0.3694, + "step": 6839 + }, + { + "epoch": 0.3095723014256619, + "grad_norm": 0.6473017686964793, + "learning_rate": 8.087232225472827e-06, + "loss": 0.3561, + "step": 6840 + }, + { + "epoch": 0.3096175605340575, + "grad_norm": 1.0674858541583925, + "learning_rate": 8.086655667164137e-06, + "loss": 0.3661, + "step": 6841 + }, + { + "epoch": 0.30966281964245307, + "grad_norm": 0.5969628411282236, + "learning_rate": 8.086079042532699e-06, + "loss": 0.3864, + "step": 6842 + }, + { + "epoch": 0.3097080787508486, + "grad_norm": 0.5847828086110796, + "learning_rate": 8.0855023515909e-06, + "loss": 0.3402, + "step": 6843 + }, + { + "epoch": 0.3097533378592442, + "grad_norm": 0.6408510947564642, + "learning_rate": 8.08492559435113e-06, + "loss": 0.3885, + "step": 6844 + }, + { + "epoch": 0.30979859696763973, + "grad_norm": 0.6446133618228684, + "learning_rate": 8.084348770825785e-06, + "loss": 0.4956, + "step": 6845 + }, + { + "epoch": 0.3098438560760353, + "grad_norm": 0.6549341012350445, + "learning_rate": 8.083771881027259e-06, + "loss": 0.4082, + "step": 6846 + }, + { + "epoch": 0.30988911518443085, + "grad_norm": 0.7068487841425369, + "learning_rate": 8.083194924967943e-06, + "loss": 0.4099, + "step": 6847 + }, + { + "epoch": 0.30993437429282644, + "grad_norm": 0.7007918864113356, + "learning_rate": 8.08261790266024e-06, + "loss": 0.3243, + "step": 6848 + }, + { + "epoch": 0.309979633401222, + "grad_norm": 0.629604268051431, + "learning_rate": 8.082040814116545e-06, + "loss": 0.3626, + "step": 6849 + }, + { + "epoch": 0.31002489250961757, + "grad_norm": 0.6278545282083574, + "learning_rate": 8.081463659349258e-06, + "loss": 0.3375, + "step": 6850 + }, + { + "epoch": 0.3100701516180131, + "grad_norm": 0.4734338413743656, + "learning_rate": 8.080886438370781e-06, + "loss": 0.5128, + "step": 6851 + }, + { + "epoch": 0.3101154107264087, + "grad_norm": 0.4099808770615001, + "learning_rate": 8.080309151193517e-06, + "loss": 0.4826, + "step": 6852 + }, + { + "epoch": 0.3101606698348043, + "grad_norm": 0.6989913052154422, + "learning_rate": 8.07973179782987e-06, + "loss": 0.3824, + "step": 6853 + }, + { + "epoch": 0.3102059289431998, + "grad_norm": 0.6248720257681796, + "learning_rate": 8.079154378292246e-06, + "loss": 0.3345, + "step": 6854 + }, + { + "epoch": 0.3102511880515954, + "grad_norm": 0.6139729843377981, + "learning_rate": 8.07857689259305e-06, + "loss": 0.363, + "step": 6855 + }, + { + "epoch": 0.31029644715999094, + "grad_norm": 0.6465785975930611, + "learning_rate": 8.077999340744694e-06, + "loss": 0.3383, + "step": 6856 + }, + { + "epoch": 0.31034170626838653, + "grad_norm": 0.654275213477745, + "learning_rate": 8.077421722759584e-06, + "loss": 0.3197, + "step": 6857 + }, + { + "epoch": 0.31038696537678206, + "grad_norm": 0.7020318044378301, + "learning_rate": 8.076844038650133e-06, + "loss": 0.3761, + "step": 6858 + }, + { + "epoch": 0.31043222448517765, + "grad_norm": 0.6495770931717897, + "learning_rate": 8.076266288428753e-06, + "loss": 0.3691, + "step": 6859 + }, + { + "epoch": 0.3104774835935732, + "grad_norm": 0.8143884827910683, + "learning_rate": 8.075688472107859e-06, + "loss": 0.3725, + "step": 6860 + }, + { + "epoch": 0.3105227427019688, + "grad_norm": 0.8094018848341005, + "learning_rate": 8.075110589699866e-06, + "loss": 0.5127, + "step": 6861 + }, + { + "epoch": 0.3105680018103643, + "grad_norm": 0.6774066803171471, + "learning_rate": 8.07453264121719e-06, + "loss": 0.4156, + "step": 6862 + }, + { + "epoch": 0.3106132609187599, + "grad_norm": 0.6939323935945736, + "learning_rate": 8.07395462667225e-06, + "loss": 0.3314, + "step": 6863 + }, + { + "epoch": 0.31065852002715544, + "grad_norm": 0.6902658938875413, + "learning_rate": 8.073376546077468e-06, + "loss": 0.3835, + "step": 6864 + }, + { + "epoch": 0.31070377913555103, + "grad_norm": 0.34823630616883156, + "learning_rate": 8.07279839944526e-06, + "loss": 0.4922, + "step": 6865 + }, + { + "epoch": 0.3107490382439466, + "grad_norm": 0.7711677796759714, + "learning_rate": 8.072220186788056e-06, + "loss": 0.3656, + "step": 6866 + }, + { + "epoch": 0.31079429735234215, + "grad_norm": 0.6111540013077865, + "learning_rate": 8.071641908118273e-06, + "loss": 0.3609, + "step": 6867 + }, + { + "epoch": 0.31083955646073774, + "grad_norm": 0.4677566680030358, + "learning_rate": 8.071063563448341e-06, + "loss": 0.512, + "step": 6868 + }, + { + "epoch": 0.3108848155691333, + "grad_norm": 0.6404382698128002, + "learning_rate": 8.070485152790684e-06, + "loss": 0.3427, + "step": 6869 + }, + { + "epoch": 0.31093007467752887, + "grad_norm": 0.3686604494675354, + "learning_rate": 8.06990667615773e-06, + "loss": 0.4937, + "step": 6870 + }, + { + "epoch": 0.3109753337859244, + "grad_norm": 0.6986737941715713, + "learning_rate": 8.069328133561911e-06, + "loss": 0.3958, + "step": 6871 + }, + { + "epoch": 0.31102059289432, + "grad_norm": 0.3162165790029801, + "learning_rate": 8.068749525015658e-06, + "loss": 0.4915, + "step": 6872 + }, + { + "epoch": 0.3110658520027155, + "grad_norm": 0.8038232490379009, + "learning_rate": 8.068170850531401e-06, + "loss": 0.3809, + "step": 6873 + }, + { + "epoch": 0.3111111111111111, + "grad_norm": 0.6650645932764687, + "learning_rate": 8.067592110121576e-06, + "loss": 0.3954, + "step": 6874 + }, + { + "epoch": 0.31115637021950665, + "grad_norm": 0.6527969870447125, + "learning_rate": 8.06701330379862e-06, + "loss": 0.3501, + "step": 6875 + }, + { + "epoch": 0.31120162932790224, + "grad_norm": 0.5320663380971439, + "learning_rate": 8.066434431574965e-06, + "loss": 0.4975, + "step": 6876 + }, + { + "epoch": 0.31124688843629783, + "grad_norm": 0.7166134358312011, + "learning_rate": 8.065855493463055e-06, + "loss": 0.3703, + "step": 6877 + }, + { + "epoch": 0.31129214754469337, + "grad_norm": 0.6843795237430064, + "learning_rate": 8.065276489475324e-06, + "loss": 0.3682, + "step": 6878 + }, + { + "epoch": 0.31133740665308896, + "grad_norm": 0.628539631506995, + "learning_rate": 8.064697419624216e-06, + "loss": 0.3842, + "step": 6879 + }, + { + "epoch": 0.3113826657614845, + "grad_norm": 0.63461679915827, + "learning_rate": 8.064118283922173e-06, + "loss": 0.3472, + "step": 6880 + }, + { + "epoch": 0.3114279248698801, + "grad_norm": 0.5858346718837931, + "learning_rate": 8.06353908238164e-06, + "loss": 0.3982, + "step": 6881 + }, + { + "epoch": 0.3114731839782756, + "grad_norm": 0.3290559586116341, + "learning_rate": 8.06295981501506e-06, + "loss": 0.4678, + "step": 6882 + }, + { + "epoch": 0.3115184430866712, + "grad_norm": 0.6397350366827538, + "learning_rate": 8.062380481834881e-06, + "loss": 0.3743, + "step": 6883 + }, + { + "epoch": 0.31156370219506674, + "grad_norm": 0.6291797265410776, + "learning_rate": 8.061801082853548e-06, + "loss": 0.3446, + "step": 6884 + }, + { + "epoch": 0.31160896130346233, + "grad_norm": 0.6852533920605484, + "learning_rate": 8.061221618083519e-06, + "loss": 0.3873, + "step": 6885 + }, + { + "epoch": 0.31165422041185786, + "grad_norm": 0.7471568005288752, + "learning_rate": 8.060642087537233e-06, + "loss": 0.3438, + "step": 6886 + }, + { + "epoch": 0.31169947952025345, + "grad_norm": 0.5769928531316513, + "learning_rate": 8.060062491227154e-06, + "loss": 0.3268, + "step": 6887 + }, + { + "epoch": 0.31174473862864904, + "grad_norm": 0.39457544398209493, + "learning_rate": 8.059482829165728e-06, + "loss": 0.4775, + "step": 6888 + }, + { + "epoch": 0.3117899977370446, + "grad_norm": 0.6477673888723783, + "learning_rate": 8.058903101365412e-06, + "loss": 0.3274, + "step": 6889 + }, + { + "epoch": 0.31183525684544017, + "grad_norm": 0.6592795719191157, + "learning_rate": 8.058323307838665e-06, + "loss": 0.3386, + "step": 6890 + }, + { + "epoch": 0.3118805159538357, + "grad_norm": 0.7579655825665975, + "learning_rate": 8.05774344859794e-06, + "loss": 0.3585, + "step": 6891 + }, + { + "epoch": 0.3119257750622313, + "grad_norm": 0.6553535146501523, + "learning_rate": 8.057163523655702e-06, + "loss": 0.3319, + "step": 6892 + }, + { + "epoch": 0.3119710341706268, + "grad_norm": 0.722228702460221, + "learning_rate": 8.056583533024408e-06, + "loss": 0.4117, + "step": 6893 + }, + { + "epoch": 0.3120162932790224, + "grad_norm": 0.62822723167227, + "learning_rate": 8.056003476716521e-06, + "loss": 0.3519, + "step": 6894 + }, + { + "epoch": 0.31206155238741795, + "grad_norm": 0.34302529356673794, + "learning_rate": 8.055423354744507e-06, + "loss": 0.4896, + "step": 6895 + }, + { + "epoch": 0.31210681149581354, + "grad_norm": 0.8038315586479057, + "learning_rate": 8.054843167120827e-06, + "loss": 0.3516, + "step": 6896 + }, + { + "epoch": 0.3121520706042091, + "grad_norm": 0.7075627538296707, + "learning_rate": 8.054262913857951e-06, + "loss": 0.3913, + "step": 6897 + }, + { + "epoch": 0.31219732971260467, + "grad_norm": 0.6365784262746258, + "learning_rate": 8.053682594968346e-06, + "loss": 0.3721, + "step": 6898 + }, + { + "epoch": 0.3122425888210002, + "grad_norm": 0.6020970936461443, + "learning_rate": 8.053102210464478e-06, + "loss": 0.3466, + "step": 6899 + }, + { + "epoch": 0.3122878479293958, + "grad_norm": 0.6240839936563485, + "learning_rate": 8.052521760358822e-06, + "loss": 0.3847, + "step": 6900 + }, + { + "epoch": 0.3123331070377914, + "grad_norm": 0.30425922617942247, + "learning_rate": 8.05194124466385e-06, + "loss": 0.4701, + "step": 6901 + }, + { + "epoch": 0.3123783661461869, + "grad_norm": 0.6497985170793312, + "learning_rate": 8.051360663392031e-06, + "loss": 0.3752, + "step": 6902 + }, + { + "epoch": 0.3124236252545825, + "grad_norm": 0.6580678715176217, + "learning_rate": 8.050780016555846e-06, + "loss": 0.4101, + "step": 6903 + }, + { + "epoch": 0.31246888436297804, + "grad_norm": 0.2985922174224097, + "learning_rate": 8.050199304167766e-06, + "loss": 0.4843, + "step": 6904 + }, + { + "epoch": 0.31251414347137363, + "grad_norm": 0.6295805144586434, + "learning_rate": 8.04961852624027e-06, + "loss": 0.3404, + "step": 6905 + }, + { + "epoch": 0.31255940257976916, + "grad_norm": 0.6372763963072887, + "learning_rate": 8.04903768278584e-06, + "loss": 0.352, + "step": 6906 + }, + { + "epoch": 0.31260466168816475, + "grad_norm": 0.6481452415926576, + "learning_rate": 8.048456773816955e-06, + "loss": 0.3757, + "step": 6907 + }, + { + "epoch": 0.3126499207965603, + "grad_norm": 0.7300271667682606, + "learning_rate": 8.047875799346096e-06, + "loss": 0.3824, + "step": 6908 + }, + { + "epoch": 0.3126951799049559, + "grad_norm": 0.6156677324812093, + "learning_rate": 8.047294759385746e-06, + "loss": 0.3255, + "step": 6909 + }, + { + "epoch": 0.3127404390133514, + "grad_norm": 0.67457907378465, + "learning_rate": 8.046713653948393e-06, + "loss": 0.3683, + "step": 6910 + }, + { + "epoch": 0.312785698121747, + "grad_norm": 0.6480105249444751, + "learning_rate": 8.046132483046518e-06, + "loss": 0.3724, + "step": 6911 + }, + { + "epoch": 0.3128309572301426, + "grad_norm": 0.6655706607509766, + "learning_rate": 8.045551246692612e-06, + "loss": 0.3749, + "step": 6912 + }, + { + "epoch": 0.3128762163385381, + "grad_norm": 0.696785351305484, + "learning_rate": 8.044969944899165e-06, + "loss": 0.4009, + "step": 6913 + }, + { + "epoch": 0.3129214754469337, + "grad_norm": 0.7094320509766985, + "learning_rate": 8.044388577678666e-06, + "loss": 0.4151, + "step": 6914 + }, + { + "epoch": 0.31296673455532925, + "grad_norm": 0.8859580873747761, + "learning_rate": 8.043807145043604e-06, + "loss": 0.332, + "step": 6915 + }, + { + "epoch": 0.31301199366372484, + "grad_norm": 0.7092949835152653, + "learning_rate": 8.043225647006475e-06, + "loss": 0.3363, + "step": 6916 + }, + { + "epoch": 0.3130572527721204, + "grad_norm": 0.4288553491654917, + "learning_rate": 8.042644083579775e-06, + "loss": 0.5173, + "step": 6917 + }, + { + "epoch": 0.31310251188051597, + "grad_norm": 0.6636966437293738, + "learning_rate": 8.042062454775999e-06, + "loss": 0.3481, + "step": 6918 + }, + { + "epoch": 0.3131477709889115, + "grad_norm": 0.700343789222212, + "learning_rate": 8.041480760607642e-06, + "loss": 0.3481, + "step": 6919 + }, + { + "epoch": 0.3131930300973071, + "grad_norm": 0.6413586661482176, + "learning_rate": 8.040899001087206e-06, + "loss": 0.4055, + "step": 6920 + }, + { + "epoch": 0.3132382892057026, + "grad_norm": 0.7054811245614201, + "learning_rate": 8.04031717622719e-06, + "loss": 0.3947, + "step": 6921 + }, + { + "epoch": 0.3132835483140982, + "grad_norm": 0.6748577078036545, + "learning_rate": 8.039735286040095e-06, + "loss": 0.3392, + "step": 6922 + }, + { + "epoch": 0.31332880742249375, + "grad_norm": 0.6208383147855693, + "learning_rate": 8.039153330538423e-06, + "loss": 0.3515, + "step": 6923 + }, + { + "epoch": 0.31337406653088934, + "grad_norm": 0.6376714796607698, + "learning_rate": 8.038571309734682e-06, + "loss": 0.3985, + "step": 6924 + }, + { + "epoch": 0.31341932563928493, + "grad_norm": 0.6995289970919804, + "learning_rate": 8.037989223641375e-06, + "loss": 0.3348, + "step": 6925 + }, + { + "epoch": 0.31346458474768046, + "grad_norm": 0.6844964516030871, + "learning_rate": 8.03740707227101e-06, + "loss": 0.3474, + "step": 6926 + }, + { + "epoch": 0.31350984385607605, + "grad_norm": 0.6413951552444123, + "learning_rate": 8.036824855636096e-06, + "loss": 0.3519, + "step": 6927 + }, + { + "epoch": 0.3135551029644716, + "grad_norm": 0.4902546346685851, + "learning_rate": 8.036242573749142e-06, + "loss": 0.4797, + "step": 6928 + }, + { + "epoch": 0.3136003620728672, + "grad_norm": 0.6548546213508429, + "learning_rate": 8.035660226622661e-06, + "loss": 0.3783, + "step": 6929 + }, + { + "epoch": 0.3136456211812627, + "grad_norm": 0.6675440569763577, + "learning_rate": 8.035077814269165e-06, + "loss": 0.3656, + "step": 6930 + }, + { + "epoch": 0.3136908802896583, + "grad_norm": 0.6934360166198267, + "learning_rate": 8.034495336701169e-06, + "loss": 0.3737, + "step": 6931 + }, + { + "epoch": 0.31373613939805384, + "grad_norm": 0.6674105315224877, + "learning_rate": 8.033912793931187e-06, + "loss": 0.3313, + "step": 6932 + }, + { + "epoch": 0.3137813985064494, + "grad_norm": 0.6779911169606194, + "learning_rate": 8.033330185971737e-06, + "loss": 0.3777, + "step": 6933 + }, + { + "epoch": 0.31382665761484496, + "grad_norm": 0.3547796842907606, + "learning_rate": 8.032747512835338e-06, + "loss": 0.5, + "step": 6934 + }, + { + "epoch": 0.31387191672324055, + "grad_norm": 0.616740468431483, + "learning_rate": 8.03216477453451e-06, + "loss": 0.3504, + "step": 6935 + }, + { + "epoch": 0.31391717583163614, + "grad_norm": 0.636731133727304, + "learning_rate": 8.03158197108177e-06, + "loss": 0.4083, + "step": 6936 + }, + { + "epoch": 0.3139624349400317, + "grad_norm": 0.6420405558207202, + "learning_rate": 8.030999102489649e-06, + "loss": 0.376, + "step": 6937 + }, + { + "epoch": 0.31400769404842727, + "grad_norm": 0.6260731233892689, + "learning_rate": 8.030416168770663e-06, + "loss": 0.3452, + "step": 6938 + }, + { + "epoch": 0.3140529531568228, + "grad_norm": 0.6135576943507863, + "learning_rate": 8.029833169937343e-06, + "loss": 0.359, + "step": 6939 + }, + { + "epoch": 0.3140982122652184, + "grad_norm": 0.7184287456717344, + "learning_rate": 8.029250106002212e-06, + "loss": 0.3575, + "step": 6940 + }, + { + "epoch": 0.3141434713736139, + "grad_norm": 0.6092310549635934, + "learning_rate": 8.0286669769778e-06, + "loss": 0.3642, + "step": 6941 + }, + { + "epoch": 0.3141887304820095, + "grad_norm": 0.6550276525946266, + "learning_rate": 8.028083782876636e-06, + "loss": 0.3899, + "step": 6942 + }, + { + "epoch": 0.31423398959040505, + "grad_norm": 0.6539315006997505, + "learning_rate": 8.027500523711253e-06, + "loss": 0.3467, + "step": 6943 + }, + { + "epoch": 0.31427924869880064, + "grad_norm": 0.6699656463779345, + "learning_rate": 8.026917199494181e-06, + "loss": 0.3618, + "step": 6944 + }, + { + "epoch": 0.3143245078071962, + "grad_norm": 0.36221957851989256, + "learning_rate": 8.026333810237956e-06, + "loss": 0.4938, + "step": 6945 + }, + { + "epoch": 0.31436976691559176, + "grad_norm": 0.7061374105862012, + "learning_rate": 8.025750355955112e-06, + "loss": 0.3669, + "step": 6946 + }, + { + "epoch": 0.31441502602398735, + "grad_norm": 0.6008363417200151, + "learning_rate": 8.025166836658185e-06, + "loss": 0.3171, + "step": 6947 + }, + { + "epoch": 0.3144602851323829, + "grad_norm": 0.6153450850043569, + "learning_rate": 8.024583252359714e-06, + "loss": 0.3622, + "step": 6948 + }, + { + "epoch": 0.3145055442407785, + "grad_norm": 0.6484699123239448, + "learning_rate": 8.023999603072236e-06, + "loss": 0.3492, + "step": 6949 + }, + { + "epoch": 0.314550803349174, + "grad_norm": 0.6947514250003248, + "learning_rate": 8.023415888808297e-06, + "loss": 0.3531, + "step": 6950 + }, + { + "epoch": 0.3145960624575696, + "grad_norm": 0.38000138347421075, + "learning_rate": 8.022832109580437e-06, + "loss": 0.4838, + "step": 6951 + }, + { + "epoch": 0.31464132156596514, + "grad_norm": 0.6763144529594775, + "learning_rate": 8.022248265401196e-06, + "loss": 0.3564, + "step": 6952 + }, + { + "epoch": 0.3146865806743607, + "grad_norm": 0.6601351368993845, + "learning_rate": 8.021664356283123e-06, + "loss": 0.3684, + "step": 6953 + }, + { + "epoch": 0.31473183978275626, + "grad_norm": 0.7040570624830684, + "learning_rate": 8.021080382238763e-06, + "loss": 0.3753, + "step": 6954 + }, + { + "epoch": 0.31477709889115185, + "grad_norm": 0.2880395263967866, + "learning_rate": 8.020496343280664e-06, + "loss": 0.4805, + "step": 6955 + }, + { + "epoch": 0.3148223579995474, + "grad_norm": 0.3021069983303145, + "learning_rate": 8.019912239421376e-06, + "loss": 0.4913, + "step": 6956 + }, + { + "epoch": 0.314867617107943, + "grad_norm": 0.8756622334383337, + "learning_rate": 8.019328070673449e-06, + "loss": 0.3898, + "step": 6957 + }, + { + "epoch": 0.3149128762163385, + "grad_norm": 0.6654109828266398, + "learning_rate": 8.018743837049433e-06, + "loss": 0.3788, + "step": 6958 + }, + { + "epoch": 0.3149581353247341, + "grad_norm": 0.7690824808772482, + "learning_rate": 8.018159538561888e-06, + "loss": 0.3458, + "step": 6959 + }, + { + "epoch": 0.3150033944331297, + "grad_norm": 0.790969564695417, + "learning_rate": 8.01757517522336e-06, + "loss": 0.3943, + "step": 6960 + }, + { + "epoch": 0.3150486535415252, + "grad_norm": 0.6888825820126708, + "learning_rate": 8.01699074704641e-06, + "loss": 0.4222, + "step": 6961 + }, + { + "epoch": 0.3150939126499208, + "grad_norm": 0.9953360923545216, + "learning_rate": 8.016406254043595e-06, + "loss": 0.3557, + "step": 6962 + }, + { + "epoch": 0.31513917175831635, + "grad_norm": 0.7163182122643703, + "learning_rate": 8.015821696227475e-06, + "loss": 0.3606, + "step": 6963 + }, + { + "epoch": 0.31518443086671194, + "grad_norm": 0.6413564293083378, + "learning_rate": 8.015237073610607e-06, + "loss": 0.3787, + "step": 6964 + }, + { + "epoch": 0.3152296899751075, + "grad_norm": 0.6576486928547289, + "learning_rate": 8.014652386205557e-06, + "loss": 0.4123, + "step": 6965 + }, + { + "epoch": 0.31527494908350306, + "grad_norm": 0.6047092559852032, + "learning_rate": 8.014067634024884e-06, + "loss": 0.3701, + "step": 6966 + }, + { + "epoch": 0.3153202081918986, + "grad_norm": 0.6571742648341652, + "learning_rate": 8.013482817081157e-06, + "loss": 0.3856, + "step": 6967 + }, + { + "epoch": 0.3153654673002942, + "grad_norm": 0.6707972533911652, + "learning_rate": 8.012897935386938e-06, + "loss": 0.3592, + "step": 6968 + }, + { + "epoch": 0.3154107264086897, + "grad_norm": 0.6202903980423257, + "learning_rate": 8.012312988954795e-06, + "loss": 0.3869, + "step": 6969 + }, + { + "epoch": 0.3154559855170853, + "grad_norm": 0.4156628400616371, + "learning_rate": 8.0117279777973e-06, + "loss": 0.4932, + "step": 6970 + }, + { + "epoch": 0.3155012446254809, + "grad_norm": 0.660814934434108, + "learning_rate": 8.011142901927018e-06, + "loss": 0.3479, + "step": 6971 + }, + { + "epoch": 0.31554650373387644, + "grad_norm": 0.6830413012597334, + "learning_rate": 8.010557761356523e-06, + "loss": 0.4147, + "step": 6972 + }, + { + "epoch": 0.31559176284227203, + "grad_norm": 0.30869750524274253, + "learning_rate": 8.009972556098388e-06, + "loss": 0.4674, + "step": 6973 + }, + { + "epoch": 0.31563702195066756, + "grad_norm": 0.3013701412214667, + "learning_rate": 8.009387286165188e-06, + "loss": 0.501, + "step": 6974 + }, + { + "epoch": 0.31568228105906315, + "grad_norm": 0.7170094699174588, + "learning_rate": 8.008801951569501e-06, + "loss": 0.3552, + "step": 6975 + }, + { + "epoch": 0.3157275401674587, + "grad_norm": 0.7032893337212256, + "learning_rate": 8.008216552323896e-06, + "loss": 0.3914, + "step": 6976 + }, + { + "epoch": 0.3157727992758543, + "grad_norm": 0.6187411787630647, + "learning_rate": 8.007631088440959e-06, + "loss": 0.3883, + "step": 6977 + }, + { + "epoch": 0.3158180583842498, + "grad_norm": 0.6929869219292941, + "learning_rate": 8.007045559933265e-06, + "loss": 0.3521, + "step": 6978 + }, + { + "epoch": 0.3158633174926454, + "grad_norm": 0.41659022905236215, + "learning_rate": 8.006459966813399e-06, + "loss": 0.518, + "step": 6979 + }, + { + "epoch": 0.31590857660104094, + "grad_norm": 0.6864769287896622, + "learning_rate": 8.005874309093942e-06, + "loss": 0.3382, + "step": 6980 + }, + { + "epoch": 0.3159538357094365, + "grad_norm": 0.8409165007357672, + "learning_rate": 8.005288586787477e-06, + "loss": 0.3954, + "step": 6981 + }, + { + "epoch": 0.3159990948178321, + "grad_norm": 0.6951795654243249, + "learning_rate": 8.00470279990659e-06, + "loss": 0.4228, + "step": 6982 + }, + { + "epoch": 0.31604435392622765, + "grad_norm": 0.3213139159543221, + "learning_rate": 8.00411694846387e-06, + "loss": 0.4808, + "step": 6983 + }, + { + "epoch": 0.31608961303462324, + "grad_norm": 0.7266194383038589, + "learning_rate": 8.003531032471901e-06, + "loss": 0.3913, + "step": 6984 + }, + { + "epoch": 0.3161348721430188, + "grad_norm": 0.9611211426506843, + "learning_rate": 8.002945051943276e-06, + "loss": 0.3398, + "step": 6985 + }, + { + "epoch": 0.31618013125141436, + "grad_norm": 0.6052295188548436, + "learning_rate": 8.002359006890585e-06, + "loss": 0.3788, + "step": 6986 + }, + { + "epoch": 0.3162253903598099, + "grad_norm": 0.7664924172248415, + "learning_rate": 8.001772897326418e-06, + "loss": 0.3707, + "step": 6987 + }, + { + "epoch": 0.3162706494682055, + "grad_norm": 0.8040540740269324, + "learning_rate": 8.001186723263374e-06, + "loss": 0.3593, + "step": 6988 + }, + { + "epoch": 0.316315908576601, + "grad_norm": 0.6847053006400641, + "learning_rate": 8.000600484714043e-06, + "loss": 0.4082, + "step": 6989 + }, + { + "epoch": 0.3163611676849966, + "grad_norm": 0.692378851693665, + "learning_rate": 8.000014181691023e-06, + "loss": 0.3716, + "step": 6990 + }, + { + "epoch": 0.31640642679339215, + "grad_norm": 0.6925287113795098, + "learning_rate": 7.999427814206911e-06, + "loss": 0.3648, + "step": 6991 + }, + { + "epoch": 0.31645168590178774, + "grad_norm": 0.6704389383276854, + "learning_rate": 7.99884138227431e-06, + "loss": 0.3365, + "step": 6992 + }, + { + "epoch": 0.3164969450101833, + "grad_norm": 0.6874645585931815, + "learning_rate": 7.998254885905817e-06, + "loss": 0.3452, + "step": 6993 + }, + { + "epoch": 0.31654220411857886, + "grad_norm": 0.6286062996410474, + "learning_rate": 7.997668325114033e-06, + "loss": 0.3476, + "step": 6994 + }, + { + "epoch": 0.31658746322697445, + "grad_norm": 0.6251766923475587, + "learning_rate": 7.997081699911566e-06, + "loss": 0.36, + "step": 6995 + }, + { + "epoch": 0.31663272233537, + "grad_norm": 0.4019324945117982, + "learning_rate": 7.996495010311017e-06, + "loss": 0.4625, + "step": 6996 + }, + { + "epoch": 0.3166779814437656, + "grad_norm": 0.7655651498493894, + "learning_rate": 7.995908256324992e-06, + "loss": 0.3416, + "step": 6997 + }, + { + "epoch": 0.3167232405521611, + "grad_norm": 0.6785538399318121, + "learning_rate": 7.995321437966102e-06, + "loss": 0.4238, + "step": 6998 + }, + { + "epoch": 0.3167684996605567, + "grad_norm": 0.6357356159438607, + "learning_rate": 7.99473455524695e-06, + "loss": 0.39, + "step": 6999 + }, + { + "epoch": 0.31681375876895224, + "grad_norm": 0.33904987132475095, + "learning_rate": 7.994147608180153e-06, + "loss": 0.4999, + "step": 7000 + }, + { + "epoch": 0.3168590178773478, + "grad_norm": 0.7621136973532631, + "learning_rate": 7.993560596778321e-06, + "loss": 0.3879, + "step": 7001 + }, + { + "epoch": 0.31690427698574336, + "grad_norm": 0.7132686937912542, + "learning_rate": 7.992973521054063e-06, + "loss": 0.3871, + "step": 7002 + }, + { + "epoch": 0.31694953609413895, + "grad_norm": 0.6690670498675246, + "learning_rate": 7.992386381019999e-06, + "loss": 0.3945, + "step": 7003 + }, + { + "epoch": 0.3169947952025345, + "grad_norm": 0.7285347897571842, + "learning_rate": 7.99179917668874e-06, + "loss": 0.3553, + "step": 7004 + }, + { + "epoch": 0.3170400543109301, + "grad_norm": 0.6793097106943258, + "learning_rate": 7.991211908072905e-06, + "loss": 0.3883, + "step": 7005 + }, + { + "epoch": 0.31708531341932567, + "grad_norm": 0.6018551606250381, + "learning_rate": 7.990624575185116e-06, + "loss": 0.3684, + "step": 7006 + }, + { + "epoch": 0.3171305725277212, + "grad_norm": 0.6756261068762829, + "learning_rate": 7.990037178037987e-06, + "loss": 0.4062, + "step": 7007 + }, + { + "epoch": 0.3171758316361168, + "grad_norm": 0.6133815435263604, + "learning_rate": 7.989449716644142e-06, + "loss": 0.3658, + "step": 7008 + }, + { + "epoch": 0.3172210907445123, + "grad_norm": 0.619225307675842, + "learning_rate": 7.988862191016204e-06, + "loss": 0.362, + "step": 7009 + }, + { + "epoch": 0.3172663498529079, + "grad_norm": 0.6478285255297425, + "learning_rate": 7.9882746011668e-06, + "loss": 0.3724, + "step": 7010 + }, + { + "epoch": 0.31731160896130345, + "grad_norm": 0.6368209266259487, + "learning_rate": 7.98768694710855e-06, + "loss": 0.3836, + "step": 7011 + }, + { + "epoch": 0.31735686806969904, + "grad_norm": 0.4413932798745625, + "learning_rate": 7.987099228854083e-06, + "loss": 0.4922, + "step": 7012 + }, + { + "epoch": 0.3174021271780946, + "grad_norm": 0.6229862211243823, + "learning_rate": 7.986511446416029e-06, + "loss": 0.3324, + "step": 7013 + }, + { + "epoch": 0.31744738628649016, + "grad_norm": 0.6977332581462345, + "learning_rate": 7.985923599807017e-06, + "loss": 0.4385, + "step": 7014 + }, + { + "epoch": 0.3174926453948857, + "grad_norm": 0.7046571732350977, + "learning_rate": 7.985335689039675e-06, + "loss": 0.3629, + "step": 7015 + }, + { + "epoch": 0.3175379045032813, + "grad_norm": 0.6477763537029326, + "learning_rate": 7.984747714126639e-06, + "loss": 0.3785, + "step": 7016 + }, + { + "epoch": 0.3175831636116769, + "grad_norm": 0.7144266335689072, + "learning_rate": 7.984159675080543e-06, + "loss": 0.3906, + "step": 7017 + }, + { + "epoch": 0.3176284227200724, + "grad_norm": 0.6258472946470179, + "learning_rate": 7.98357157191402e-06, + "loss": 0.3575, + "step": 7018 + }, + { + "epoch": 0.317673681828468, + "grad_norm": 0.6200884932537759, + "learning_rate": 7.982983404639707e-06, + "loss": 0.3471, + "step": 7019 + }, + { + "epoch": 0.31771894093686354, + "grad_norm": 0.6650517614200443, + "learning_rate": 7.98239517327024e-06, + "loss": 0.3554, + "step": 7020 + }, + { + "epoch": 0.3177642000452591, + "grad_norm": 0.6530860017670242, + "learning_rate": 7.981806877818265e-06, + "loss": 0.3711, + "step": 7021 + }, + { + "epoch": 0.31780945915365466, + "grad_norm": 0.8740355444512873, + "learning_rate": 7.981218518296414e-06, + "loss": 0.3659, + "step": 7022 + }, + { + "epoch": 0.31785471826205025, + "grad_norm": 0.6671952504181827, + "learning_rate": 7.980630094717337e-06, + "loss": 0.3792, + "step": 7023 + }, + { + "epoch": 0.3178999773704458, + "grad_norm": 0.6477284106255007, + "learning_rate": 7.98004160709367e-06, + "loss": 0.3446, + "step": 7024 + }, + { + "epoch": 0.3179452364788414, + "grad_norm": 0.6192963333013082, + "learning_rate": 7.979453055438063e-06, + "loss": 0.3551, + "step": 7025 + }, + { + "epoch": 0.3179904955872369, + "grad_norm": 0.6196762931669262, + "learning_rate": 7.97886443976316e-06, + "loss": 0.328, + "step": 7026 + }, + { + "epoch": 0.3180357546956325, + "grad_norm": 0.48428443466322474, + "learning_rate": 7.978275760081611e-06, + "loss": 0.5026, + "step": 7027 + }, + { + "epoch": 0.31808101380402803, + "grad_norm": 0.661978297863087, + "learning_rate": 7.97768701640606e-06, + "loss": 0.4117, + "step": 7028 + }, + { + "epoch": 0.3181262729124236, + "grad_norm": 1.7101319009447666, + "learning_rate": 7.977098208749162e-06, + "loss": 0.3473, + "step": 7029 + }, + { + "epoch": 0.3181715320208192, + "grad_norm": 0.6697938911628487, + "learning_rate": 7.976509337123567e-06, + "loss": 0.4033, + "step": 7030 + }, + { + "epoch": 0.31821679112921475, + "grad_norm": 0.6372431024914199, + "learning_rate": 7.975920401541927e-06, + "loss": 0.3733, + "step": 7031 + }, + { + "epoch": 0.31826205023761034, + "grad_norm": 0.6087178277991327, + "learning_rate": 7.975331402016898e-06, + "loss": 0.363, + "step": 7032 + }, + { + "epoch": 0.3183073093460059, + "grad_norm": 0.6942216985962842, + "learning_rate": 7.974742338561134e-06, + "loss": 0.3627, + "step": 7033 + }, + { + "epoch": 0.31835256845440146, + "grad_norm": 0.391526321631339, + "learning_rate": 7.974153211187296e-06, + "loss": 0.5146, + "step": 7034 + }, + { + "epoch": 0.318397827562797, + "grad_norm": 0.3479777571847785, + "learning_rate": 7.973564019908038e-06, + "loss": 0.4979, + "step": 7035 + }, + { + "epoch": 0.3184430866711926, + "grad_norm": 1.4253779934524895, + "learning_rate": 7.972974764736023e-06, + "loss": 0.3807, + "step": 7036 + }, + { + "epoch": 0.3184883457795881, + "grad_norm": 0.6227052935548067, + "learning_rate": 7.97238544568391e-06, + "loss": 0.3789, + "step": 7037 + }, + { + "epoch": 0.3185336048879837, + "grad_norm": 0.6147570914994451, + "learning_rate": 7.971796062764363e-06, + "loss": 0.3501, + "step": 7038 + }, + { + "epoch": 0.31857886399637925, + "grad_norm": 0.6806938315694051, + "learning_rate": 7.971206615990046e-06, + "loss": 0.3667, + "step": 7039 + }, + { + "epoch": 0.31862412310477484, + "grad_norm": 0.6507251787693218, + "learning_rate": 7.970617105373624e-06, + "loss": 0.441, + "step": 7040 + }, + { + "epoch": 0.3186693822131704, + "grad_norm": 0.6400026468720813, + "learning_rate": 7.970027530927765e-06, + "loss": 0.3509, + "step": 7041 + }, + { + "epoch": 0.31871464132156596, + "grad_norm": 0.9682135791562172, + "learning_rate": 7.969437892665134e-06, + "loss": 0.335, + "step": 7042 + }, + { + "epoch": 0.31875990042996155, + "grad_norm": 0.6946780013933663, + "learning_rate": 7.968848190598404e-06, + "loss": 0.4637, + "step": 7043 + }, + { + "epoch": 0.3188051595383571, + "grad_norm": 0.6146416446661337, + "learning_rate": 7.968258424740245e-06, + "loss": 0.3855, + "step": 7044 + }, + { + "epoch": 0.3188504186467527, + "grad_norm": 0.6630762453867052, + "learning_rate": 7.967668595103328e-06, + "loss": 0.3648, + "step": 7045 + }, + { + "epoch": 0.3188956777551482, + "grad_norm": 0.6409902766059519, + "learning_rate": 7.967078701700329e-06, + "loss": 0.3675, + "step": 7046 + }, + { + "epoch": 0.3189409368635438, + "grad_norm": 0.3593074982929306, + "learning_rate": 7.966488744543919e-06, + "loss": 0.4827, + "step": 7047 + }, + { + "epoch": 0.31898619597193933, + "grad_norm": 0.40176551814356487, + "learning_rate": 7.965898723646777e-06, + "loss": 0.5214, + "step": 7048 + }, + { + "epoch": 0.3190314550803349, + "grad_norm": 0.45032727794835453, + "learning_rate": 7.965308639021581e-06, + "loss": 0.4759, + "step": 7049 + }, + { + "epoch": 0.31907671418873046, + "grad_norm": 0.7198669844930446, + "learning_rate": 7.964718490681009e-06, + "loss": 0.3569, + "step": 7050 + }, + { + "epoch": 0.31912197329712605, + "grad_norm": 0.6546251899527719, + "learning_rate": 7.964128278637745e-06, + "loss": 0.3607, + "step": 7051 + }, + { + "epoch": 0.3191672324055216, + "grad_norm": 0.8092815635678249, + "learning_rate": 7.963538002904464e-06, + "loss": 0.3728, + "step": 7052 + }, + { + "epoch": 0.3192124915139172, + "grad_norm": 1.5350541693416326, + "learning_rate": 7.962947663493855e-06, + "loss": 0.3441, + "step": 7053 + }, + { + "epoch": 0.31925775062231276, + "grad_norm": 0.6106851124656958, + "learning_rate": 7.9623572604186e-06, + "loss": 0.3581, + "step": 7054 + }, + { + "epoch": 0.3193030097307083, + "grad_norm": 0.6489555394182779, + "learning_rate": 7.961766793691387e-06, + "loss": 0.3446, + "step": 7055 + }, + { + "epoch": 0.3193482688391039, + "grad_norm": 0.6293509170922542, + "learning_rate": 7.961176263324902e-06, + "loss": 0.3544, + "step": 7056 + }, + { + "epoch": 0.3193935279474994, + "grad_norm": 0.6794823179154822, + "learning_rate": 7.960585669331832e-06, + "loss": 0.376, + "step": 7057 + }, + { + "epoch": 0.319438787055895, + "grad_norm": 0.628733216417228, + "learning_rate": 7.959995011724869e-06, + "loss": 0.3947, + "step": 7058 + }, + { + "epoch": 0.31948404616429055, + "grad_norm": 0.7145812322342616, + "learning_rate": 7.959404290516705e-06, + "loss": 0.4822, + "step": 7059 + }, + { + "epoch": 0.31952930527268614, + "grad_norm": 0.6517928178132028, + "learning_rate": 7.958813505720031e-06, + "loss": 0.408, + "step": 7060 + }, + { + "epoch": 0.31957456438108167, + "grad_norm": 0.643493560485578, + "learning_rate": 7.958222657347543e-06, + "loss": 0.3909, + "step": 7061 + }, + { + "epoch": 0.31961982348947726, + "grad_norm": 0.6968346039393635, + "learning_rate": 7.957631745411936e-06, + "loss": 0.3946, + "step": 7062 + }, + { + "epoch": 0.3196650825978728, + "grad_norm": 0.6186660710362943, + "learning_rate": 7.957040769925906e-06, + "loss": 0.3484, + "step": 7063 + }, + { + "epoch": 0.3197103417062684, + "grad_norm": 0.6076213582496474, + "learning_rate": 7.95644973090215e-06, + "loss": 0.3651, + "step": 7064 + }, + { + "epoch": 0.319755600814664, + "grad_norm": 0.5507873432743083, + "learning_rate": 7.955858628353372e-06, + "loss": 0.4934, + "step": 7065 + }, + { + "epoch": 0.3198008599230595, + "grad_norm": 0.6447783016497618, + "learning_rate": 7.95526746229227e-06, + "loss": 0.3774, + "step": 7066 + }, + { + "epoch": 0.3198461190314551, + "grad_norm": 0.6033695502218002, + "learning_rate": 7.954676232731545e-06, + "loss": 0.3423, + "step": 7067 + }, + { + "epoch": 0.31989137813985064, + "grad_norm": 0.7129234065266957, + "learning_rate": 7.954084939683901e-06, + "loss": 0.3626, + "step": 7068 + }, + { + "epoch": 0.3199366372482462, + "grad_norm": 0.6305288052748258, + "learning_rate": 7.953493583162047e-06, + "loss": 0.346, + "step": 7069 + }, + { + "epoch": 0.31998189635664176, + "grad_norm": 0.3498155729821522, + "learning_rate": 7.952902163178687e-06, + "loss": 0.4731, + "step": 7070 + }, + { + "epoch": 0.32002715546503735, + "grad_norm": 0.6789249555155012, + "learning_rate": 7.952310679746528e-06, + "loss": 0.3761, + "step": 7071 + }, + { + "epoch": 0.3200724145734329, + "grad_norm": 0.6980391317468145, + "learning_rate": 7.951719132878279e-06, + "loss": 0.3881, + "step": 7072 + }, + { + "epoch": 0.3201176736818285, + "grad_norm": 0.9088700610711827, + "learning_rate": 7.951127522586653e-06, + "loss": 0.3882, + "step": 7073 + }, + { + "epoch": 0.320162932790224, + "grad_norm": 0.6659807627108673, + "learning_rate": 7.95053584888436e-06, + "loss": 0.3502, + "step": 7074 + }, + { + "epoch": 0.3202081918986196, + "grad_norm": 0.755708696082257, + "learning_rate": 7.94994411178411e-06, + "loss": 0.4077, + "step": 7075 + }, + { + "epoch": 0.3202534510070152, + "grad_norm": 0.6560835029799718, + "learning_rate": 7.949352311298626e-06, + "loss": 0.3779, + "step": 7076 + }, + { + "epoch": 0.3202987101154107, + "grad_norm": 0.6869943892018556, + "learning_rate": 7.948760447440617e-06, + "loss": 0.362, + "step": 7077 + }, + { + "epoch": 0.3203439692238063, + "grad_norm": 0.3807530113056217, + "learning_rate": 7.948168520222802e-06, + "loss": 0.4803, + "step": 7078 + }, + { + "epoch": 0.32038922833220185, + "grad_norm": 0.6263532501824917, + "learning_rate": 7.9475765296579e-06, + "loss": 0.3371, + "step": 7079 + }, + { + "epoch": 0.32043448744059744, + "grad_norm": 0.6798061248166258, + "learning_rate": 7.946984475758633e-06, + "loss": 0.3876, + "step": 7080 + }, + { + "epoch": 0.32047974654899297, + "grad_norm": 0.6812449344143824, + "learning_rate": 7.946392358537719e-06, + "loss": 0.372, + "step": 7081 + }, + { + "epoch": 0.32052500565738856, + "grad_norm": 0.634619025478144, + "learning_rate": 7.945800178007883e-06, + "loss": 0.3567, + "step": 7082 + }, + { + "epoch": 0.3205702647657841, + "grad_norm": 0.6872688162762708, + "learning_rate": 7.945207934181849e-06, + "loss": 0.3509, + "step": 7083 + }, + { + "epoch": 0.3206155238741797, + "grad_norm": 0.6278473270352956, + "learning_rate": 7.944615627072341e-06, + "loss": 0.3606, + "step": 7084 + }, + { + "epoch": 0.3206607829825752, + "grad_norm": 0.6898920436870292, + "learning_rate": 7.944023256692086e-06, + "loss": 0.3734, + "step": 7085 + }, + { + "epoch": 0.3207060420909708, + "grad_norm": 0.6505918738584352, + "learning_rate": 7.943430823053815e-06, + "loss": 0.3845, + "step": 7086 + }, + { + "epoch": 0.32075130119936635, + "grad_norm": 0.7038738512820012, + "learning_rate": 7.942838326170255e-06, + "loss": 0.3345, + "step": 7087 + }, + { + "epoch": 0.32079656030776194, + "grad_norm": 0.6051699178090763, + "learning_rate": 7.942245766054137e-06, + "loss": 0.3571, + "step": 7088 + }, + { + "epoch": 0.3208418194161575, + "grad_norm": 0.4150689109819772, + "learning_rate": 7.941653142718194e-06, + "loss": 0.4855, + "step": 7089 + }, + { + "epoch": 0.32088707852455306, + "grad_norm": 0.6268038080046793, + "learning_rate": 7.94106045617516e-06, + "loss": 0.3285, + "step": 7090 + }, + { + "epoch": 0.32093233763294865, + "grad_norm": 0.6397684099671085, + "learning_rate": 7.94046770643777e-06, + "loss": 0.3662, + "step": 7091 + }, + { + "epoch": 0.3209775967413442, + "grad_norm": 0.6908477440660391, + "learning_rate": 7.93987489351876e-06, + "loss": 0.3667, + "step": 7092 + }, + { + "epoch": 0.3210228558497398, + "grad_norm": 0.6745483725451942, + "learning_rate": 7.939282017430867e-06, + "loss": 0.3978, + "step": 7093 + }, + { + "epoch": 0.3210681149581353, + "grad_norm": 0.6529001360028059, + "learning_rate": 7.93868907818683e-06, + "loss": 0.3489, + "step": 7094 + }, + { + "epoch": 0.3211133740665309, + "grad_norm": 0.6145650846844853, + "learning_rate": 7.938096075799391e-06, + "loss": 0.4018, + "step": 7095 + }, + { + "epoch": 0.32115863317492643, + "grad_norm": 0.4122410958350045, + "learning_rate": 7.93750301028129e-06, + "loss": 0.487, + "step": 7096 + }, + { + "epoch": 0.321203892283322, + "grad_norm": 0.6595309437944411, + "learning_rate": 7.936909881645275e-06, + "loss": 0.3653, + "step": 7097 + }, + { + "epoch": 0.32124915139171756, + "grad_norm": 0.6300548129151529, + "learning_rate": 7.936316689904083e-06, + "loss": 0.3788, + "step": 7098 + }, + { + "epoch": 0.32129441050011315, + "grad_norm": 0.6692355502644558, + "learning_rate": 7.935723435070464e-06, + "loss": 0.3666, + "step": 7099 + }, + { + "epoch": 0.32133966960850874, + "grad_norm": 0.6494119925557855, + "learning_rate": 7.935130117157166e-06, + "loss": 0.3704, + "step": 7100 + }, + { + "epoch": 0.3213849287169043, + "grad_norm": 0.34216565367593454, + "learning_rate": 7.934536736176934e-06, + "loss": 0.5311, + "step": 7101 + }, + { + "epoch": 0.32143018782529986, + "grad_norm": 1.0052287214302138, + "learning_rate": 7.933943292142524e-06, + "loss": 0.3802, + "step": 7102 + }, + { + "epoch": 0.3214754469336954, + "grad_norm": 0.6499179876977065, + "learning_rate": 7.93334978506668e-06, + "loss": 0.344, + "step": 7103 + }, + { + "epoch": 0.321520706042091, + "grad_norm": 0.6181493818110998, + "learning_rate": 7.93275621496216e-06, + "loss": 0.3724, + "step": 7104 + }, + { + "epoch": 0.3215659651504865, + "grad_norm": 0.6791885332119639, + "learning_rate": 7.932162581841715e-06, + "loss": 0.348, + "step": 7105 + }, + { + "epoch": 0.3216112242588821, + "grad_norm": 0.4324667087116202, + "learning_rate": 7.931568885718104e-06, + "loss": 0.5062, + "step": 7106 + }, + { + "epoch": 0.32165648336727765, + "grad_norm": 0.5369739435044163, + "learning_rate": 7.930975126604079e-06, + "loss": 0.512, + "step": 7107 + }, + { + "epoch": 0.32170174247567324, + "grad_norm": 0.30855384882874476, + "learning_rate": 7.930381304512401e-06, + "loss": 0.5037, + "step": 7108 + }, + { + "epoch": 0.32174700158406877, + "grad_norm": 0.6766398168097438, + "learning_rate": 7.92978741945583e-06, + "loss": 0.442, + "step": 7109 + }, + { + "epoch": 0.32179226069246436, + "grad_norm": 0.6718820626808718, + "learning_rate": 7.929193471447123e-06, + "loss": 0.3785, + "step": 7110 + }, + { + "epoch": 0.32183751980085995, + "grad_norm": 0.62498935257029, + "learning_rate": 7.928599460499046e-06, + "loss": 0.3497, + "step": 7111 + }, + { + "epoch": 0.3218827789092555, + "grad_norm": 0.7246586779569112, + "learning_rate": 7.92800538662436e-06, + "loss": 0.3715, + "step": 7112 + }, + { + "epoch": 0.3219280380176511, + "grad_norm": 0.6591067393074789, + "learning_rate": 7.927411249835832e-06, + "loss": 0.3734, + "step": 7113 + }, + { + "epoch": 0.3219732971260466, + "grad_norm": 0.7603779807342957, + "learning_rate": 7.926817050146227e-06, + "loss": 0.3286, + "step": 7114 + }, + { + "epoch": 0.3220185562344422, + "grad_norm": 0.657032741453226, + "learning_rate": 7.926222787568314e-06, + "loss": 0.3599, + "step": 7115 + }, + { + "epoch": 0.32206381534283773, + "grad_norm": 0.8247053846685484, + "learning_rate": 7.925628462114858e-06, + "loss": 0.5117, + "step": 7116 + }, + { + "epoch": 0.3221090744512333, + "grad_norm": 0.8964379263543338, + "learning_rate": 7.925034073798632e-06, + "loss": 0.3377, + "step": 7117 + }, + { + "epoch": 0.32215433355962886, + "grad_norm": 0.6875665098075191, + "learning_rate": 7.92443962263241e-06, + "loss": 0.3311, + "step": 7118 + }, + { + "epoch": 0.32219959266802445, + "grad_norm": 0.9490803870507291, + "learning_rate": 7.92384510862896e-06, + "loss": 0.3867, + "step": 7119 + }, + { + "epoch": 0.32224485177642, + "grad_norm": 0.7349779813006002, + "learning_rate": 7.92325053180106e-06, + "loss": 0.3784, + "step": 7120 + }, + { + "epoch": 0.3222901108848156, + "grad_norm": 0.7006969086062932, + "learning_rate": 7.922655892161482e-06, + "loss": 0.3666, + "step": 7121 + }, + { + "epoch": 0.3223353699932111, + "grad_norm": 0.6555049359630561, + "learning_rate": 7.922061189723007e-06, + "loss": 0.3581, + "step": 7122 + }, + { + "epoch": 0.3223806291016067, + "grad_norm": 0.6740198833872045, + "learning_rate": 7.921466424498409e-06, + "loss": 0.3809, + "step": 7123 + }, + { + "epoch": 0.3224258882100023, + "grad_norm": 0.6650014243978319, + "learning_rate": 7.920871596500473e-06, + "loss": 0.3679, + "step": 7124 + }, + { + "epoch": 0.3224711473183978, + "grad_norm": 0.6504397000732004, + "learning_rate": 7.920276705741975e-06, + "loss": 0.3366, + "step": 7125 + }, + { + "epoch": 0.3225164064267934, + "grad_norm": 0.49837067139854274, + "learning_rate": 7.919681752235701e-06, + "loss": 0.4496, + "step": 7126 + }, + { + "epoch": 0.32256166553518895, + "grad_norm": 0.6165258872006424, + "learning_rate": 7.919086735994433e-06, + "loss": 0.3166, + "step": 7127 + }, + { + "epoch": 0.32260692464358454, + "grad_norm": 0.6527582636116545, + "learning_rate": 7.918491657030956e-06, + "loss": 0.3648, + "step": 7128 + }, + { + "epoch": 0.32265218375198007, + "grad_norm": 0.6597269628107116, + "learning_rate": 7.917896515358057e-06, + "loss": 0.3332, + "step": 7129 + }, + { + "epoch": 0.32269744286037566, + "grad_norm": 0.6542637026119181, + "learning_rate": 7.917301310988525e-06, + "loss": 0.3536, + "step": 7130 + }, + { + "epoch": 0.3227427019687712, + "grad_norm": 0.6347125789718905, + "learning_rate": 7.916706043935145e-06, + "loss": 0.365, + "step": 7131 + }, + { + "epoch": 0.3227879610771668, + "grad_norm": 0.7115640181010907, + "learning_rate": 7.916110714210711e-06, + "loss": 0.3735, + "step": 7132 + }, + { + "epoch": 0.3228332201855623, + "grad_norm": 0.3234316538967396, + "learning_rate": 7.915515321828014e-06, + "loss": 0.4915, + "step": 7133 + }, + { + "epoch": 0.3228784792939579, + "grad_norm": 0.687035292169822, + "learning_rate": 7.914919866799847e-06, + "loss": 0.3634, + "step": 7134 + }, + { + "epoch": 0.3229237384023535, + "grad_norm": 0.3137342255904164, + "learning_rate": 7.914324349139006e-06, + "loss": 0.4805, + "step": 7135 + }, + { + "epoch": 0.32296899751074903, + "grad_norm": 0.27487283487033065, + "learning_rate": 7.913728768858283e-06, + "loss": 0.4696, + "step": 7136 + }, + { + "epoch": 0.3230142566191446, + "grad_norm": 0.6152486208784101, + "learning_rate": 7.91313312597048e-06, + "loss": 0.3231, + "step": 7137 + }, + { + "epoch": 0.32305951572754016, + "grad_norm": 0.6482665421392981, + "learning_rate": 7.91253742048839e-06, + "loss": 0.3707, + "step": 7138 + }, + { + "epoch": 0.32310477483593575, + "grad_norm": 0.6755287950541693, + "learning_rate": 7.911941652424819e-06, + "loss": 0.3887, + "step": 7139 + }, + { + "epoch": 0.3231500339443313, + "grad_norm": 0.7476984596163645, + "learning_rate": 7.911345821792565e-06, + "loss": 0.3947, + "step": 7140 + }, + { + "epoch": 0.3231952930527269, + "grad_norm": 0.7050035119659278, + "learning_rate": 7.910749928604429e-06, + "loss": 0.398, + "step": 7141 + }, + { + "epoch": 0.3232405521611224, + "grad_norm": 0.6985845003611664, + "learning_rate": 7.910153972873218e-06, + "loss": 0.3849, + "step": 7142 + }, + { + "epoch": 0.323285811269518, + "grad_norm": 0.7188017184074661, + "learning_rate": 7.909557954611736e-06, + "loss": 0.3831, + "step": 7143 + }, + { + "epoch": 0.32333107037791353, + "grad_norm": 0.6187212364272202, + "learning_rate": 7.908961873832788e-06, + "loss": 0.3489, + "step": 7144 + }, + { + "epoch": 0.3233763294863091, + "grad_norm": 0.5689497713714161, + "learning_rate": 7.908365730549183e-06, + "loss": 0.4918, + "step": 7145 + }, + { + "epoch": 0.3234215885947047, + "grad_norm": 0.6522720842697802, + "learning_rate": 7.907769524773734e-06, + "loss": 0.352, + "step": 7146 + }, + { + "epoch": 0.32346684770310025, + "grad_norm": 0.68249189867944, + "learning_rate": 7.907173256519246e-06, + "loss": 0.3962, + "step": 7147 + }, + { + "epoch": 0.32351210681149584, + "grad_norm": 0.6918661181189343, + "learning_rate": 7.906576925798535e-06, + "loss": 0.3562, + "step": 7148 + }, + { + "epoch": 0.32355736591989137, + "grad_norm": 0.6470440870797444, + "learning_rate": 7.905980532624411e-06, + "loss": 0.3726, + "step": 7149 + }, + { + "epoch": 0.32360262502828696, + "grad_norm": 0.6204544691347952, + "learning_rate": 7.905384077009693e-06, + "loss": 0.3307, + "step": 7150 + }, + { + "epoch": 0.3236478841366825, + "grad_norm": 0.654141371005639, + "learning_rate": 7.904787558967193e-06, + "loss": 0.3837, + "step": 7151 + }, + { + "epoch": 0.3236931432450781, + "grad_norm": 0.6214250147231662, + "learning_rate": 7.904190978509729e-06, + "loss": 0.364, + "step": 7152 + }, + { + "epoch": 0.3237384023534736, + "grad_norm": 0.6605746453137314, + "learning_rate": 7.90359433565012e-06, + "loss": 0.3793, + "step": 7153 + }, + { + "epoch": 0.3237836614618692, + "grad_norm": 0.6374246098103137, + "learning_rate": 7.902997630401188e-06, + "loss": 0.3635, + "step": 7154 + }, + { + "epoch": 0.32382892057026474, + "grad_norm": 0.7547998705704276, + "learning_rate": 7.902400862775752e-06, + "loss": 0.384, + "step": 7155 + }, + { + "epoch": 0.32387417967866033, + "grad_norm": 0.6713274567187839, + "learning_rate": 7.901804032786637e-06, + "loss": 0.3638, + "step": 7156 + }, + { + "epoch": 0.32391943878705587, + "grad_norm": 0.6410889902728407, + "learning_rate": 7.901207140446662e-06, + "loss": 0.3773, + "step": 7157 + }, + { + "epoch": 0.32396469789545146, + "grad_norm": 0.6597367749925079, + "learning_rate": 7.90061018576866e-06, + "loss": 0.3783, + "step": 7158 + }, + { + "epoch": 0.32400995700384705, + "grad_norm": 0.7166227954783297, + "learning_rate": 7.900013168765453e-06, + "loss": 0.3571, + "step": 7159 + }, + { + "epoch": 0.3240552161122426, + "grad_norm": 0.8334546986425982, + "learning_rate": 7.899416089449867e-06, + "loss": 0.3568, + "step": 7160 + }, + { + "epoch": 0.3241004752206382, + "grad_norm": 0.6549313556490581, + "learning_rate": 7.898818947834737e-06, + "loss": 0.3913, + "step": 7161 + }, + { + "epoch": 0.3241457343290337, + "grad_norm": 0.6616731785929209, + "learning_rate": 7.898221743932887e-06, + "loss": 0.3639, + "step": 7162 + }, + { + "epoch": 0.3241909934374293, + "grad_norm": 0.6307360694773526, + "learning_rate": 7.897624477757156e-06, + "loss": 0.3985, + "step": 7163 + }, + { + "epoch": 0.32423625254582483, + "grad_norm": 0.6211038754157479, + "learning_rate": 7.897027149320375e-06, + "loss": 0.3666, + "step": 7164 + }, + { + "epoch": 0.3242815116542204, + "grad_norm": 0.6638297280567542, + "learning_rate": 7.896429758635375e-06, + "loss": 0.365, + "step": 7165 + }, + { + "epoch": 0.32432677076261596, + "grad_norm": 0.6532817268016007, + "learning_rate": 7.895832305715e-06, + "loss": 0.3813, + "step": 7166 + }, + { + "epoch": 0.32437202987101155, + "grad_norm": 0.6701885349272433, + "learning_rate": 7.895234790572077e-06, + "loss": 0.3704, + "step": 7167 + }, + { + "epoch": 0.3244172889794071, + "grad_norm": 0.6442400135179758, + "learning_rate": 7.894637213219454e-06, + "loss": 0.378, + "step": 7168 + }, + { + "epoch": 0.32446254808780267, + "grad_norm": 0.667231431672473, + "learning_rate": 7.894039573669968e-06, + "loss": 0.3926, + "step": 7169 + }, + { + "epoch": 0.32450780719619826, + "grad_norm": 0.6344096908061077, + "learning_rate": 7.893441871936456e-06, + "loss": 0.3954, + "step": 7170 + }, + { + "epoch": 0.3245530663045938, + "grad_norm": 0.6725657035402189, + "learning_rate": 7.892844108031768e-06, + "loss": 0.3807, + "step": 7171 + }, + { + "epoch": 0.3245983254129894, + "grad_norm": 0.7062785122126237, + "learning_rate": 7.892246281968745e-06, + "loss": 0.4932, + "step": 7172 + }, + { + "epoch": 0.3246435845213849, + "grad_norm": 0.6774231556142725, + "learning_rate": 7.891648393760232e-06, + "loss": 0.3447, + "step": 7173 + }, + { + "epoch": 0.3246888436297805, + "grad_norm": 0.8538387827323394, + "learning_rate": 7.891050443419074e-06, + "loss": 0.3446, + "step": 7174 + }, + { + "epoch": 0.32473410273817604, + "grad_norm": 0.6583879453482827, + "learning_rate": 7.890452430958123e-06, + "loss": 0.4065, + "step": 7175 + }, + { + "epoch": 0.32477936184657163, + "grad_norm": 0.681043978728226, + "learning_rate": 7.889854356390225e-06, + "loss": 0.4438, + "step": 7176 + }, + { + "epoch": 0.32482462095496717, + "grad_norm": 0.8220002508654024, + "learning_rate": 7.889256219728235e-06, + "loss": 0.3427, + "step": 7177 + }, + { + "epoch": 0.32486988006336276, + "grad_norm": 0.6731314383680075, + "learning_rate": 7.888658020985e-06, + "loss": 0.3257, + "step": 7178 + }, + { + "epoch": 0.3249151391717583, + "grad_norm": 0.6885006191925244, + "learning_rate": 7.888059760173377e-06, + "loss": 0.3643, + "step": 7179 + }, + { + "epoch": 0.3249603982801539, + "grad_norm": 0.6801934899357927, + "learning_rate": 7.887461437306221e-06, + "loss": 0.3772, + "step": 7180 + }, + { + "epoch": 0.3250056573885494, + "grad_norm": 0.7150719356238756, + "learning_rate": 7.886863052396384e-06, + "loss": 0.4863, + "step": 7181 + }, + { + "epoch": 0.325050916496945, + "grad_norm": 0.626612080010428, + "learning_rate": 7.886264605456727e-06, + "loss": 0.3638, + "step": 7182 + }, + { + "epoch": 0.3250961756053406, + "grad_norm": 0.6263977091282102, + "learning_rate": 7.88566609650011e-06, + "loss": 0.3396, + "step": 7183 + }, + { + "epoch": 0.32514143471373613, + "grad_norm": 0.6505262702471224, + "learning_rate": 7.88506752553939e-06, + "loss": 0.3751, + "step": 7184 + }, + { + "epoch": 0.3251866938221317, + "grad_norm": 0.35956124288248265, + "learning_rate": 7.88446889258743e-06, + "loss": 0.4744, + "step": 7185 + }, + { + "epoch": 0.32523195293052726, + "grad_norm": 0.6126826293591199, + "learning_rate": 7.883870197657094e-06, + "loss": 0.3577, + "step": 7186 + }, + { + "epoch": 0.32527721203892285, + "grad_norm": 0.6808837015295135, + "learning_rate": 7.883271440761241e-06, + "loss": 0.3959, + "step": 7187 + }, + { + "epoch": 0.3253224711473184, + "grad_norm": 0.726997546831792, + "learning_rate": 7.882672621912742e-06, + "loss": 0.3182, + "step": 7188 + }, + { + "epoch": 0.32536773025571397, + "grad_norm": 0.5954746959339828, + "learning_rate": 7.882073741124464e-06, + "loss": 0.3575, + "step": 7189 + }, + { + "epoch": 0.3254129893641095, + "grad_norm": 0.6277763190542311, + "learning_rate": 7.88147479840927e-06, + "loss": 0.3882, + "step": 7190 + }, + { + "epoch": 0.3254582484725051, + "grad_norm": 0.6181567794663823, + "learning_rate": 7.880875793780031e-06, + "loss": 0.3792, + "step": 7191 + }, + { + "epoch": 0.32550350758090063, + "grad_norm": 0.6307714979444174, + "learning_rate": 7.880276727249623e-06, + "loss": 0.4817, + "step": 7192 + }, + { + "epoch": 0.3255487666892962, + "grad_norm": 0.747922164182171, + "learning_rate": 7.879677598830913e-06, + "loss": 0.3931, + "step": 7193 + }, + { + "epoch": 0.3255940257976918, + "grad_norm": 0.6431937919879824, + "learning_rate": 7.879078408536774e-06, + "loss": 0.3458, + "step": 7194 + }, + { + "epoch": 0.32563928490608735, + "grad_norm": 0.3977026758300841, + "learning_rate": 7.878479156380085e-06, + "loss": 0.4914, + "step": 7195 + }, + { + "epoch": 0.32568454401448294, + "grad_norm": 0.682964630211364, + "learning_rate": 7.877879842373718e-06, + "loss": 0.3708, + "step": 7196 + }, + { + "epoch": 0.32572980312287847, + "grad_norm": 0.6231524576095671, + "learning_rate": 7.877280466530552e-06, + "loss": 0.3418, + "step": 7197 + }, + { + "epoch": 0.32577506223127406, + "grad_norm": 0.6748437842992963, + "learning_rate": 7.876681028863464e-06, + "loss": 0.3746, + "step": 7198 + }, + { + "epoch": 0.3258203213396696, + "grad_norm": 0.6550830959993491, + "learning_rate": 7.876081529385338e-06, + "loss": 0.3588, + "step": 7199 + }, + { + "epoch": 0.3258655804480652, + "grad_norm": 0.6477789521912153, + "learning_rate": 7.875481968109052e-06, + "loss": 0.3705, + "step": 7200 + }, + { + "epoch": 0.3259108395564607, + "grad_norm": 0.6372087242778248, + "learning_rate": 7.874882345047491e-06, + "loss": 0.3785, + "step": 7201 + }, + { + "epoch": 0.3259560986648563, + "grad_norm": 0.655494204670402, + "learning_rate": 7.874282660213537e-06, + "loss": 0.3595, + "step": 7202 + }, + { + "epoch": 0.32600135777325184, + "grad_norm": 0.6214300206617079, + "learning_rate": 7.873682913620077e-06, + "loss": 0.3709, + "step": 7203 + }, + { + "epoch": 0.32604661688164743, + "grad_norm": 0.7395486618414999, + "learning_rate": 7.873083105279996e-06, + "loss": 0.4838, + "step": 7204 + }, + { + "epoch": 0.326091875990043, + "grad_norm": 0.6643510155786785, + "learning_rate": 7.872483235206184e-06, + "loss": 0.34, + "step": 7205 + }, + { + "epoch": 0.32613713509843856, + "grad_norm": 0.7851872919090189, + "learning_rate": 7.87188330341153e-06, + "loss": 0.3432, + "step": 7206 + }, + { + "epoch": 0.32618239420683415, + "grad_norm": 0.6122471370986199, + "learning_rate": 7.871283309908922e-06, + "loss": 0.3881, + "step": 7207 + }, + { + "epoch": 0.3262276533152297, + "grad_norm": 0.654839829130589, + "learning_rate": 7.870683254711255e-06, + "loss": 0.4115, + "step": 7208 + }, + { + "epoch": 0.32627291242362527, + "grad_norm": 0.632937065166646, + "learning_rate": 7.870083137831423e-06, + "loss": 0.3534, + "step": 7209 + }, + { + "epoch": 0.3263181715320208, + "grad_norm": 0.3925737232393038, + "learning_rate": 7.869482959282318e-06, + "loss": 0.4862, + "step": 7210 + }, + { + "epoch": 0.3263634306404164, + "grad_norm": 0.6745670011729434, + "learning_rate": 7.868882719076838e-06, + "loss": 0.3442, + "step": 7211 + }, + { + "epoch": 0.32640868974881193, + "grad_norm": 0.6684057044920896, + "learning_rate": 7.868282417227877e-06, + "loss": 0.3888, + "step": 7212 + }, + { + "epoch": 0.3264539488572075, + "grad_norm": 0.6623891104338104, + "learning_rate": 7.867682053748338e-06, + "loss": 0.4021, + "step": 7213 + }, + { + "epoch": 0.32649920796560306, + "grad_norm": 0.5684645700583332, + "learning_rate": 7.86708162865112e-06, + "loss": 0.3948, + "step": 7214 + }, + { + "epoch": 0.32654446707399865, + "grad_norm": 0.38853354414543634, + "learning_rate": 7.866481141949123e-06, + "loss": 0.4923, + "step": 7215 + }, + { + "epoch": 0.3265897261823942, + "grad_norm": 0.6899635765060905, + "learning_rate": 7.86588059365525e-06, + "loss": 0.4039, + "step": 7216 + }, + { + "epoch": 0.32663498529078977, + "grad_norm": 0.637953863348367, + "learning_rate": 7.865279983782402e-06, + "loss": 0.403, + "step": 7217 + }, + { + "epoch": 0.32668024439918536, + "grad_norm": 0.6447741093066474, + "learning_rate": 7.864679312343491e-06, + "loss": 0.419, + "step": 7218 + }, + { + "epoch": 0.3267255035075809, + "grad_norm": 0.3132402057909027, + "learning_rate": 7.864078579351418e-06, + "loss": 0.4769, + "step": 7219 + }, + { + "epoch": 0.3267707626159765, + "grad_norm": 0.6410307435099741, + "learning_rate": 7.863477784819091e-06, + "loss": 0.3385, + "step": 7220 + }, + { + "epoch": 0.326816021724372, + "grad_norm": 0.8084080593940757, + "learning_rate": 7.862876928759424e-06, + "loss": 0.3662, + "step": 7221 + }, + { + "epoch": 0.3268612808327676, + "grad_norm": 0.6242179182343427, + "learning_rate": 7.862276011185323e-06, + "loss": 0.3368, + "step": 7222 + }, + { + "epoch": 0.32690653994116314, + "grad_norm": 0.6506736147066804, + "learning_rate": 7.8616750321097e-06, + "loss": 0.3591, + "step": 7223 + }, + { + "epoch": 0.32695179904955873, + "grad_norm": 0.3743456101413249, + "learning_rate": 7.861073991545472e-06, + "loss": 0.5008, + "step": 7224 + }, + { + "epoch": 0.32699705815795427, + "grad_norm": 0.8322622971337075, + "learning_rate": 7.86047288950555e-06, + "loss": 0.3881, + "step": 7225 + }, + { + "epoch": 0.32704231726634986, + "grad_norm": 0.42578988571435983, + "learning_rate": 7.859871726002852e-06, + "loss": 0.493, + "step": 7226 + }, + { + "epoch": 0.3270875763747454, + "grad_norm": 0.6491635693186513, + "learning_rate": 7.859270501050292e-06, + "loss": 0.3765, + "step": 7227 + }, + { + "epoch": 0.327132835483141, + "grad_norm": 0.2764549467599382, + "learning_rate": 7.858669214660792e-06, + "loss": 0.4823, + "step": 7228 + }, + { + "epoch": 0.3271780945915366, + "grad_norm": 0.7046311928792779, + "learning_rate": 7.85806786684727e-06, + "loss": 0.351, + "step": 7229 + }, + { + "epoch": 0.3272233536999321, + "grad_norm": 0.3321521003907507, + "learning_rate": 7.857466457622647e-06, + "loss": 0.4904, + "step": 7230 + }, + { + "epoch": 0.3272686128083277, + "grad_norm": 0.6566847794699036, + "learning_rate": 7.856864986999845e-06, + "loss": 0.3917, + "step": 7231 + }, + { + "epoch": 0.32731387191672323, + "grad_norm": 0.629014744960846, + "learning_rate": 7.856263454991791e-06, + "loss": 0.3318, + "step": 7232 + }, + { + "epoch": 0.3273591310251188, + "grad_norm": 0.6085911810930542, + "learning_rate": 7.855661861611406e-06, + "loss": 0.3438, + "step": 7233 + }, + { + "epoch": 0.32740439013351436, + "grad_norm": 0.6578764127823246, + "learning_rate": 7.855060206871618e-06, + "loss": 0.419, + "step": 7234 + }, + { + "epoch": 0.32744964924190995, + "grad_norm": 0.6536238269200988, + "learning_rate": 7.854458490785354e-06, + "loss": 0.3811, + "step": 7235 + }, + { + "epoch": 0.3274949083503055, + "grad_norm": 0.381547503313166, + "learning_rate": 7.853856713365547e-06, + "loss": 0.4837, + "step": 7236 + }, + { + "epoch": 0.32754016745870107, + "grad_norm": 0.7208941435848779, + "learning_rate": 7.853254874625122e-06, + "loss": 0.3384, + "step": 7237 + }, + { + "epoch": 0.3275854265670966, + "grad_norm": 0.6808342916905882, + "learning_rate": 7.852652974577012e-06, + "loss": 0.3551, + "step": 7238 + }, + { + "epoch": 0.3276306856754922, + "grad_norm": 0.7001374804992242, + "learning_rate": 7.852051013234153e-06, + "loss": 0.375, + "step": 7239 + }, + { + "epoch": 0.3276759447838878, + "grad_norm": 0.650543073012321, + "learning_rate": 7.851448990609476e-06, + "loss": 0.3435, + "step": 7240 + }, + { + "epoch": 0.3277212038922833, + "grad_norm": 0.34818232252070125, + "learning_rate": 7.850846906715917e-06, + "loss": 0.4859, + "step": 7241 + }, + { + "epoch": 0.3277664630006789, + "grad_norm": 0.6569965955754113, + "learning_rate": 7.850244761566415e-06, + "loss": 0.35, + "step": 7242 + }, + { + "epoch": 0.32781172210907444, + "grad_norm": 0.6249546782859293, + "learning_rate": 7.849642555173907e-06, + "loss": 0.3492, + "step": 7243 + }, + { + "epoch": 0.32785698121747003, + "grad_norm": 0.7960260771660157, + "learning_rate": 7.849040287551331e-06, + "loss": 0.3906, + "step": 7244 + }, + { + "epoch": 0.32790224032586557, + "grad_norm": 0.6504881285244849, + "learning_rate": 7.848437958711631e-06, + "loss": 0.3752, + "step": 7245 + }, + { + "epoch": 0.32794749943426116, + "grad_norm": 0.6562907636624832, + "learning_rate": 7.847835568667746e-06, + "loss": 0.341, + "step": 7246 + }, + { + "epoch": 0.3279927585426567, + "grad_norm": 0.663970148382978, + "learning_rate": 7.847233117432623e-06, + "loss": 0.4125, + "step": 7247 + }, + { + "epoch": 0.3280380176510523, + "grad_norm": 0.611816963228363, + "learning_rate": 7.846630605019204e-06, + "loss": 0.3623, + "step": 7248 + }, + { + "epoch": 0.3280832767594478, + "grad_norm": 0.8418055709857473, + "learning_rate": 7.846028031440436e-06, + "loss": 0.3735, + "step": 7249 + }, + { + "epoch": 0.3281285358678434, + "grad_norm": 0.4046248697144449, + "learning_rate": 7.845425396709266e-06, + "loss": 0.4738, + "step": 7250 + }, + { + "epoch": 0.32817379497623894, + "grad_norm": 0.6705514103940412, + "learning_rate": 7.844822700838644e-06, + "loss": 0.3467, + "step": 7251 + }, + { + "epoch": 0.32821905408463453, + "grad_norm": 0.663416672149569, + "learning_rate": 7.84421994384152e-06, + "loss": 0.388, + "step": 7252 + }, + { + "epoch": 0.3282643131930301, + "grad_norm": 0.651365532004636, + "learning_rate": 7.843617125730842e-06, + "loss": 0.3884, + "step": 7253 + }, + { + "epoch": 0.32830957230142566, + "grad_norm": 0.6076682619566085, + "learning_rate": 7.843014246519569e-06, + "loss": 0.3548, + "step": 7254 + }, + { + "epoch": 0.32835483140982125, + "grad_norm": 0.6729372693949679, + "learning_rate": 7.84241130622065e-06, + "loss": 0.3634, + "step": 7255 + }, + { + "epoch": 0.3284000905182168, + "grad_norm": 0.6264003233718723, + "learning_rate": 7.841808304847041e-06, + "loss": 0.3481, + "step": 7256 + }, + { + "epoch": 0.32844534962661237, + "grad_norm": 0.8129037846541436, + "learning_rate": 7.841205242411701e-06, + "loss": 0.3632, + "step": 7257 + }, + { + "epoch": 0.3284906087350079, + "grad_norm": 0.37059103009216027, + "learning_rate": 7.840602118927584e-06, + "loss": 0.4537, + "step": 7258 + }, + { + "epoch": 0.3285358678434035, + "grad_norm": 0.6352263729186042, + "learning_rate": 7.839998934407652e-06, + "loss": 0.3527, + "step": 7259 + }, + { + "epoch": 0.32858112695179903, + "grad_norm": 0.6370304748624506, + "learning_rate": 7.839395688864868e-06, + "loss": 0.3522, + "step": 7260 + }, + { + "epoch": 0.3286263860601946, + "grad_norm": 0.33410480912428436, + "learning_rate": 7.83879238231219e-06, + "loss": 0.4857, + "step": 7261 + }, + { + "epoch": 0.32867164516859015, + "grad_norm": 0.39715243089542934, + "learning_rate": 7.838189014762582e-06, + "loss": 0.5234, + "step": 7262 + }, + { + "epoch": 0.32871690427698574, + "grad_norm": 0.6725589605603078, + "learning_rate": 7.83758558622901e-06, + "loss": 0.3386, + "step": 7263 + }, + { + "epoch": 0.32876216338538133, + "grad_norm": 0.7081106837258482, + "learning_rate": 7.836982096724438e-06, + "loss": 0.3806, + "step": 7264 + }, + { + "epoch": 0.32880742249377687, + "grad_norm": 0.6134248938641275, + "learning_rate": 7.836378546261834e-06, + "loss": 0.3394, + "step": 7265 + }, + { + "epoch": 0.32885268160217246, + "grad_norm": 0.6459265887108824, + "learning_rate": 7.835774934854166e-06, + "loss": 0.3418, + "step": 7266 + }, + { + "epoch": 0.328897940710568, + "grad_norm": 0.6242275569284673, + "learning_rate": 7.835171262514402e-06, + "loss": 0.3263, + "step": 7267 + }, + { + "epoch": 0.3289431998189636, + "grad_norm": 0.6192643139391333, + "learning_rate": 7.834567529255519e-06, + "loss": 0.3335, + "step": 7268 + }, + { + "epoch": 0.3289884589273591, + "grad_norm": 0.7266708113236864, + "learning_rate": 7.833963735090484e-06, + "loss": 0.3673, + "step": 7269 + }, + { + "epoch": 0.3290337180357547, + "grad_norm": 0.42644557495266, + "learning_rate": 7.833359880032272e-06, + "loss": 0.4821, + "step": 7270 + }, + { + "epoch": 0.32907897714415024, + "grad_norm": 0.37821061706041686, + "learning_rate": 7.832755964093859e-06, + "loss": 0.51, + "step": 7271 + }, + { + "epoch": 0.32912423625254583, + "grad_norm": 0.3157916385785731, + "learning_rate": 7.832151987288219e-06, + "loss": 0.5135, + "step": 7272 + }, + { + "epoch": 0.32916949536094137, + "grad_norm": 0.7308948326064201, + "learning_rate": 7.83154794962833e-06, + "loss": 0.3712, + "step": 7273 + }, + { + "epoch": 0.32921475446933696, + "grad_norm": 0.6851166754739852, + "learning_rate": 7.830943851127175e-06, + "loss": 0.3813, + "step": 7274 + }, + { + "epoch": 0.3292600135777325, + "grad_norm": 0.6385881894664626, + "learning_rate": 7.830339691797727e-06, + "loss": 0.3423, + "step": 7275 + }, + { + "epoch": 0.3293052726861281, + "grad_norm": 0.9217747573027575, + "learning_rate": 7.829735471652978e-06, + "loss": 0.3602, + "step": 7276 + }, + { + "epoch": 0.32935053179452367, + "grad_norm": 0.6708483049494212, + "learning_rate": 7.8291311907059e-06, + "loss": 0.371, + "step": 7277 + }, + { + "epoch": 0.3293957909029192, + "grad_norm": 0.7331380798086038, + "learning_rate": 7.828526848969482e-06, + "loss": 0.4031, + "step": 7278 + }, + { + "epoch": 0.3294410500113148, + "grad_norm": 0.7548536600065007, + "learning_rate": 7.827922446456711e-06, + "loss": 0.3531, + "step": 7279 + }, + { + "epoch": 0.32948630911971033, + "grad_norm": 0.6395017247716274, + "learning_rate": 7.827317983180571e-06, + "loss": 0.376, + "step": 7280 + }, + { + "epoch": 0.3295315682281059, + "grad_norm": 0.688170077140715, + "learning_rate": 7.826713459154051e-06, + "loss": 0.3621, + "step": 7281 + }, + { + "epoch": 0.32957682733650145, + "grad_norm": 0.6798231401394088, + "learning_rate": 7.826108874390141e-06, + "loss": 0.3872, + "step": 7282 + }, + { + "epoch": 0.32962208644489704, + "grad_norm": 0.7378493099029014, + "learning_rate": 7.82550422890183e-06, + "loss": 0.358, + "step": 7283 + }, + { + "epoch": 0.3296673455532926, + "grad_norm": 0.6859895137902425, + "learning_rate": 7.824899522702112e-06, + "loss": 0.3741, + "step": 7284 + }, + { + "epoch": 0.32971260466168817, + "grad_norm": 0.658983030067839, + "learning_rate": 7.824294755803978e-06, + "loss": 0.3697, + "step": 7285 + }, + { + "epoch": 0.3297578637700837, + "grad_norm": 0.8659072243183273, + "learning_rate": 7.823689928220424e-06, + "loss": 0.3796, + "step": 7286 + }, + { + "epoch": 0.3298031228784793, + "grad_norm": 0.8688001597626949, + "learning_rate": 7.823085039964446e-06, + "loss": 0.509, + "step": 7287 + }, + { + "epoch": 0.3298483819868749, + "grad_norm": 0.6597518755134283, + "learning_rate": 7.82248009104904e-06, + "loss": 0.4867, + "step": 7288 + }, + { + "epoch": 0.3298936410952704, + "grad_norm": 0.3427715840543005, + "learning_rate": 7.821875081487208e-06, + "loss": 0.496, + "step": 7289 + }, + { + "epoch": 0.329938900203666, + "grad_norm": 0.6649470951982919, + "learning_rate": 7.821270011291946e-06, + "loss": 0.3074, + "step": 7290 + }, + { + "epoch": 0.32998415931206154, + "grad_norm": 0.7713963361882481, + "learning_rate": 7.820664880476257e-06, + "loss": 0.3383, + "step": 7291 + }, + { + "epoch": 0.33002941842045713, + "grad_norm": 0.6840185544238141, + "learning_rate": 7.820059689053142e-06, + "loss": 0.3775, + "step": 7292 + }, + { + "epoch": 0.33007467752885267, + "grad_norm": 0.7067990947071694, + "learning_rate": 7.819454437035605e-06, + "loss": 0.3748, + "step": 7293 + }, + { + "epoch": 0.33011993663724826, + "grad_norm": 0.6848570477143894, + "learning_rate": 7.818849124436651e-06, + "loss": 0.3773, + "step": 7294 + }, + { + "epoch": 0.3301651957456438, + "grad_norm": 0.6522391481043741, + "learning_rate": 7.818243751269288e-06, + "loss": 0.4016, + "step": 7295 + }, + { + "epoch": 0.3302104548540394, + "grad_norm": 0.6509650533075849, + "learning_rate": 7.817638317546521e-06, + "loss": 0.3688, + "step": 7296 + }, + { + "epoch": 0.3302557139624349, + "grad_norm": 0.785981076357508, + "learning_rate": 7.817032823281362e-06, + "loss": 0.3385, + "step": 7297 + }, + { + "epoch": 0.3303009730708305, + "grad_norm": 0.7189844060702856, + "learning_rate": 7.816427268486819e-06, + "loss": 0.3952, + "step": 7298 + }, + { + "epoch": 0.3303462321792261, + "grad_norm": 0.6982142130746813, + "learning_rate": 7.815821653175903e-06, + "loss": 0.3857, + "step": 7299 + }, + { + "epoch": 0.33039149128762163, + "grad_norm": 0.6773609322645711, + "learning_rate": 7.815215977361628e-06, + "loss": 0.3254, + "step": 7300 + }, + { + "epoch": 0.3304367503960172, + "grad_norm": 0.7337843174717779, + "learning_rate": 7.814610241057009e-06, + "loss": 0.3923, + "step": 7301 + }, + { + "epoch": 0.33048200950441275, + "grad_norm": 0.617776076989984, + "learning_rate": 7.814004444275058e-06, + "loss": 0.3394, + "step": 7302 + }, + { + "epoch": 0.33052726861280834, + "grad_norm": 0.6361374966992775, + "learning_rate": 7.813398587028798e-06, + "loss": 0.3687, + "step": 7303 + }, + { + "epoch": 0.3305725277212039, + "grad_norm": 0.6348279118574923, + "learning_rate": 7.81279266933124e-06, + "loss": 0.3844, + "step": 7304 + }, + { + "epoch": 0.33061778682959947, + "grad_norm": 2.8535305256216423, + "learning_rate": 7.812186691195407e-06, + "loss": 0.5487, + "step": 7305 + }, + { + "epoch": 0.330663045937995, + "grad_norm": 0.7068173549230775, + "learning_rate": 7.811580652634319e-06, + "loss": 0.379, + "step": 7306 + }, + { + "epoch": 0.3307083050463906, + "grad_norm": 0.690644774893463, + "learning_rate": 7.810974553660998e-06, + "loss": 0.375, + "step": 7307 + }, + { + "epoch": 0.33075356415478613, + "grad_norm": 0.663678126303351, + "learning_rate": 7.810368394288468e-06, + "loss": 0.3685, + "step": 7308 + }, + { + "epoch": 0.3307988232631817, + "grad_norm": 0.740229603627771, + "learning_rate": 7.809762174529752e-06, + "loss": 0.3729, + "step": 7309 + }, + { + "epoch": 0.33084408237157725, + "grad_norm": 0.7410494585371324, + "learning_rate": 7.809155894397876e-06, + "loss": 0.3873, + "step": 7310 + }, + { + "epoch": 0.33088934147997284, + "grad_norm": 0.6234515829771916, + "learning_rate": 7.808549553905867e-06, + "loss": 0.348, + "step": 7311 + }, + { + "epoch": 0.33093460058836843, + "grad_norm": 0.6860869376405904, + "learning_rate": 7.807943153066754e-06, + "loss": 0.3357, + "step": 7312 + }, + { + "epoch": 0.33097985969676397, + "grad_norm": 1.2962262711218264, + "learning_rate": 7.807336691893568e-06, + "loss": 0.5247, + "step": 7313 + }, + { + "epoch": 0.33102511880515956, + "grad_norm": 0.6934296670610857, + "learning_rate": 7.806730170399337e-06, + "loss": 0.3651, + "step": 7314 + }, + { + "epoch": 0.3310703779135551, + "grad_norm": 0.7025851521873115, + "learning_rate": 7.806123588597094e-06, + "loss": 0.3592, + "step": 7315 + }, + { + "epoch": 0.3311156370219507, + "grad_norm": 0.768101167716597, + "learning_rate": 7.805516946499876e-06, + "loss": 0.3298, + "step": 7316 + }, + { + "epoch": 0.3311608961303462, + "grad_norm": 0.6351060711723828, + "learning_rate": 7.804910244120714e-06, + "loss": 0.3296, + "step": 7317 + }, + { + "epoch": 0.3312061552387418, + "grad_norm": 0.6978681798720107, + "learning_rate": 7.804303481472645e-06, + "loss": 0.3914, + "step": 7318 + }, + { + "epoch": 0.33125141434713734, + "grad_norm": 0.6061560182864592, + "learning_rate": 7.80369665856871e-06, + "loss": 0.3256, + "step": 7319 + }, + { + "epoch": 0.33129667345553293, + "grad_norm": 0.6335131386121605, + "learning_rate": 7.80308977542194e-06, + "loss": 0.3703, + "step": 7320 + }, + { + "epoch": 0.33134193256392847, + "grad_norm": 1.3965455827885276, + "learning_rate": 7.802482832045383e-06, + "loss": 0.3467, + "step": 7321 + }, + { + "epoch": 0.33138719167232406, + "grad_norm": 0.6540973659645941, + "learning_rate": 7.801875828452077e-06, + "loss": 0.4098, + "step": 7322 + }, + { + "epoch": 0.33143245078071965, + "grad_norm": 0.5751699292719178, + "learning_rate": 7.801268764655063e-06, + "loss": 0.3413, + "step": 7323 + }, + { + "epoch": 0.3314777098891152, + "grad_norm": 0.6726602987568291, + "learning_rate": 7.800661640667388e-06, + "loss": 0.3027, + "step": 7324 + }, + { + "epoch": 0.33152296899751077, + "grad_norm": 0.6510373564214473, + "learning_rate": 7.800054456502096e-06, + "loss": 0.3328, + "step": 7325 + }, + { + "epoch": 0.3315682281059063, + "grad_norm": 0.6380091514174852, + "learning_rate": 7.799447212172233e-06, + "loss": 0.3574, + "step": 7326 + }, + { + "epoch": 0.3316134872143019, + "grad_norm": 0.6531228281965532, + "learning_rate": 7.798839907690847e-06, + "loss": 0.3581, + "step": 7327 + }, + { + "epoch": 0.33165874632269743, + "grad_norm": 0.9721212229654579, + "learning_rate": 7.798232543070987e-06, + "loss": 0.4774, + "step": 7328 + }, + { + "epoch": 0.331704005431093, + "grad_norm": 0.6227626915657207, + "learning_rate": 7.797625118325705e-06, + "loss": 0.3739, + "step": 7329 + }, + { + "epoch": 0.33174926453948855, + "grad_norm": 0.7719234444001082, + "learning_rate": 7.797017633468052e-06, + "loss": 0.4823, + "step": 7330 + }, + { + "epoch": 0.33179452364788414, + "grad_norm": 0.627263679418447, + "learning_rate": 7.796410088511078e-06, + "loss": 0.3712, + "step": 7331 + }, + { + "epoch": 0.3318397827562797, + "grad_norm": 0.3508190839711614, + "learning_rate": 7.79580248346784e-06, + "loss": 0.4973, + "step": 7332 + }, + { + "epoch": 0.33188504186467527, + "grad_norm": 0.6231007923758607, + "learning_rate": 7.795194818351395e-06, + "loss": 0.363, + "step": 7333 + }, + { + "epoch": 0.33193030097307086, + "grad_norm": 0.6697970573735087, + "learning_rate": 7.794587093174797e-06, + "loss": 0.4091, + "step": 7334 + }, + { + "epoch": 0.3319755600814664, + "grad_norm": 0.6362010756444667, + "learning_rate": 7.793979307951108e-06, + "loss": 0.3471, + "step": 7335 + }, + { + "epoch": 0.332020819189862, + "grad_norm": 0.6596318056401064, + "learning_rate": 7.79337146269338e-06, + "loss": 0.3645, + "step": 7336 + }, + { + "epoch": 0.3320660782982575, + "grad_norm": 0.9210714304746968, + "learning_rate": 7.792763557414683e-06, + "loss": 0.3606, + "step": 7337 + }, + { + "epoch": 0.3321113374066531, + "grad_norm": 0.5987292893054342, + "learning_rate": 7.792155592128072e-06, + "loss": 0.3364, + "step": 7338 + }, + { + "epoch": 0.33215659651504864, + "grad_norm": 0.6215979190882727, + "learning_rate": 7.791547566846612e-06, + "loss": 0.3604, + "step": 7339 + }, + { + "epoch": 0.33220185562344423, + "grad_norm": 0.646836740101282, + "learning_rate": 7.79093948158337e-06, + "loss": 0.3662, + "step": 7340 + }, + { + "epoch": 0.33224711473183977, + "grad_norm": 0.6432445331083202, + "learning_rate": 7.790331336351408e-06, + "loss": 0.3607, + "step": 7341 + }, + { + "epoch": 0.33229237384023536, + "grad_norm": 0.8022329252854027, + "learning_rate": 7.7897231311638e-06, + "loss": 0.4262, + "step": 7342 + }, + { + "epoch": 0.3323376329486309, + "grad_norm": 0.6805560295429158, + "learning_rate": 7.789114866033607e-06, + "loss": 0.3422, + "step": 7343 + }, + { + "epoch": 0.3323828920570265, + "grad_norm": 0.7509513883759419, + "learning_rate": 7.788506540973902e-06, + "loss": 0.4154, + "step": 7344 + }, + { + "epoch": 0.332428151165422, + "grad_norm": 1.1071314317950283, + "learning_rate": 7.787898155997755e-06, + "loss": 0.5069, + "step": 7345 + }, + { + "epoch": 0.3324734102738176, + "grad_norm": 0.992944270441858, + "learning_rate": 7.787289711118238e-06, + "loss": 0.5124, + "step": 7346 + }, + { + "epoch": 0.3325186693822132, + "grad_norm": 0.7760811321913402, + "learning_rate": 7.786681206348428e-06, + "loss": 0.3206, + "step": 7347 + }, + { + "epoch": 0.33256392849060873, + "grad_norm": 0.5570280244602357, + "learning_rate": 7.786072641701397e-06, + "loss": 0.502, + "step": 7348 + }, + { + "epoch": 0.3326091875990043, + "grad_norm": 0.43038194913860595, + "learning_rate": 7.78546401719022e-06, + "loss": 0.5162, + "step": 7349 + }, + { + "epoch": 0.33265444670739985, + "grad_norm": 0.7139064690910768, + "learning_rate": 7.784855332827979e-06, + "loss": 0.3599, + "step": 7350 + }, + { + "epoch": 0.33269970581579544, + "grad_norm": 0.6840052243027095, + "learning_rate": 7.784246588627747e-06, + "loss": 0.346, + "step": 7351 + }, + { + "epoch": 0.332744964924191, + "grad_norm": 0.7906548268620531, + "learning_rate": 7.783637784602608e-06, + "loss": 0.3728, + "step": 7352 + }, + { + "epoch": 0.33279022403258657, + "grad_norm": 0.9588818268057699, + "learning_rate": 7.783028920765644e-06, + "loss": 0.5166, + "step": 7353 + }, + { + "epoch": 0.3328354831409821, + "grad_norm": 0.628264054881028, + "learning_rate": 7.782419997129934e-06, + "loss": 0.3538, + "step": 7354 + }, + { + "epoch": 0.3328807422493777, + "grad_norm": 0.7094367104820966, + "learning_rate": 7.781811013708565e-06, + "loss": 0.3691, + "step": 7355 + }, + { + "epoch": 0.3329260013577732, + "grad_norm": 0.9712871371707572, + "learning_rate": 7.78120197051462e-06, + "loss": 0.5178, + "step": 7356 + }, + { + "epoch": 0.3329712604661688, + "grad_norm": 0.7065811609441587, + "learning_rate": 7.780592867561187e-06, + "loss": 0.3436, + "step": 7357 + }, + { + "epoch": 0.3330165195745644, + "grad_norm": 0.8080745881239523, + "learning_rate": 7.779983704861354e-06, + "loss": 0.5165, + "step": 7358 + }, + { + "epoch": 0.33306177868295994, + "grad_norm": 0.5864163976829028, + "learning_rate": 7.779374482428206e-06, + "loss": 0.4997, + "step": 7359 + }, + { + "epoch": 0.33310703779135553, + "grad_norm": 0.6685332325376612, + "learning_rate": 7.77876520027484e-06, + "loss": 0.3599, + "step": 7360 + }, + { + "epoch": 0.33315229689975107, + "grad_norm": 0.7441894827625023, + "learning_rate": 7.778155858414342e-06, + "loss": 0.3582, + "step": 7361 + }, + { + "epoch": 0.33319755600814666, + "grad_norm": 0.733170379006083, + "learning_rate": 7.777546456859808e-06, + "loss": 0.3976, + "step": 7362 + }, + { + "epoch": 0.3332428151165422, + "grad_norm": 0.621295126537377, + "learning_rate": 7.77693699562433e-06, + "loss": 0.3647, + "step": 7363 + }, + { + "epoch": 0.3332880742249378, + "grad_norm": 0.6711556936745743, + "learning_rate": 7.776327474721009e-06, + "loss": 0.5287, + "step": 7364 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 0.722177606091501, + "learning_rate": 7.775717894162933e-06, + "loss": 0.3633, + "step": 7365 + }, + { + "epoch": 0.3333785924417289, + "grad_norm": 0.7038042521755242, + "learning_rate": 7.775108253963207e-06, + "loss": 0.365, + "step": 7366 + }, + { + "epoch": 0.33342385155012444, + "grad_norm": 0.7542576892215929, + "learning_rate": 7.774498554134925e-06, + "loss": 0.4828, + "step": 7367 + }, + { + "epoch": 0.33346911065852003, + "grad_norm": 0.622741190715478, + "learning_rate": 7.773888794691192e-06, + "loss": 0.4903, + "step": 7368 + }, + { + "epoch": 0.3335143697669156, + "grad_norm": 0.6829381274063849, + "learning_rate": 7.773278975645109e-06, + "loss": 0.3455, + "step": 7369 + }, + { + "epoch": 0.33355962887531115, + "grad_norm": 0.7782974536058487, + "learning_rate": 7.772669097009777e-06, + "loss": 0.36, + "step": 7370 + }, + { + "epoch": 0.33360488798370674, + "grad_norm": 0.5081855472190925, + "learning_rate": 7.772059158798302e-06, + "loss": 0.4576, + "step": 7371 + }, + { + "epoch": 0.3336501470921023, + "grad_norm": 0.6705304178050254, + "learning_rate": 7.77144916102379e-06, + "loss": 0.3765, + "step": 7372 + }, + { + "epoch": 0.33369540620049787, + "grad_norm": 0.38144921018083416, + "learning_rate": 7.770839103699345e-06, + "loss": 0.4927, + "step": 7373 + }, + { + "epoch": 0.3337406653088934, + "grad_norm": 0.7062153479980101, + "learning_rate": 7.77022898683808e-06, + "loss": 0.3313, + "step": 7374 + }, + { + "epoch": 0.333785924417289, + "grad_norm": 0.4372021191401388, + "learning_rate": 7.769618810453101e-06, + "loss": 0.4999, + "step": 7375 + }, + { + "epoch": 0.3338311835256845, + "grad_norm": 0.6507493359356833, + "learning_rate": 7.769008574557522e-06, + "loss": 0.3894, + "step": 7376 + }, + { + "epoch": 0.3338764426340801, + "grad_norm": 0.7817304510978234, + "learning_rate": 7.76839827916445e-06, + "loss": 0.3949, + "step": 7377 + }, + { + "epoch": 0.33392170174247565, + "grad_norm": 0.6754684107822001, + "learning_rate": 7.767787924287005e-06, + "loss": 0.3897, + "step": 7378 + }, + { + "epoch": 0.33396696085087124, + "grad_norm": 0.68264505263259, + "learning_rate": 7.767177509938294e-06, + "loss": 0.4152, + "step": 7379 + }, + { + "epoch": 0.3340122199592668, + "grad_norm": 0.607110426394516, + "learning_rate": 7.76656703613144e-06, + "loss": 0.3981, + "step": 7380 + }, + { + "epoch": 0.33405747906766237, + "grad_norm": 0.6315941771087339, + "learning_rate": 7.765956502879557e-06, + "loss": 0.351, + "step": 7381 + }, + { + "epoch": 0.33410273817605796, + "grad_norm": 0.622596703964894, + "learning_rate": 7.765345910195764e-06, + "loss": 0.3744, + "step": 7382 + }, + { + "epoch": 0.3341479972844535, + "grad_norm": 0.6443042347847058, + "learning_rate": 7.76473525809318e-06, + "loss": 0.3394, + "step": 7383 + }, + { + "epoch": 0.3341932563928491, + "grad_norm": 0.6177848452709123, + "learning_rate": 7.764124546584926e-06, + "loss": 0.3718, + "step": 7384 + }, + { + "epoch": 0.3342385155012446, + "grad_norm": 0.6317024650528028, + "learning_rate": 7.763513775684125e-06, + "loss": 0.3415, + "step": 7385 + }, + { + "epoch": 0.3342837746096402, + "grad_norm": 0.6268294522319873, + "learning_rate": 7.7629029454039e-06, + "loss": 0.3432, + "step": 7386 + }, + { + "epoch": 0.33432903371803574, + "grad_norm": 0.6961497243147552, + "learning_rate": 7.762292055757379e-06, + "loss": 0.4405, + "step": 7387 + }, + { + "epoch": 0.33437429282643133, + "grad_norm": 0.6416482361622061, + "learning_rate": 7.761681106757682e-06, + "loss": 0.3533, + "step": 7388 + }, + { + "epoch": 0.33441955193482686, + "grad_norm": 0.6484749481198567, + "learning_rate": 7.761070098417943e-06, + "loss": 0.3225, + "step": 7389 + }, + { + "epoch": 0.33446481104322245, + "grad_norm": 0.7573052274699215, + "learning_rate": 7.760459030751285e-06, + "loss": 0.4899, + "step": 7390 + }, + { + "epoch": 0.334510070151618, + "grad_norm": 0.6484070916262837, + "learning_rate": 7.759847903770841e-06, + "loss": 0.3776, + "step": 7391 + }, + { + "epoch": 0.3345553292600136, + "grad_norm": 0.6403106131889722, + "learning_rate": 7.759236717489743e-06, + "loss": 0.3534, + "step": 7392 + }, + { + "epoch": 0.33460058836840917, + "grad_norm": 0.7201737675332623, + "learning_rate": 7.75862547192112e-06, + "loss": 0.4111, + "step": 7393 + }, + { + "epoch": 0.3346458474768047, + "grad_norm": 0.3878614133578035, + "learning_rate": 7.75801416707811e-06, + "loss": 0.4685, + "step": 7394 + }, + { + "epoch": 0.3346911065852003, + "grad_norm": 0.5983085649938688, + "learning_rate": 7.757402802973846e-06, + "loss": 0.3261, + "step": 7395 + }, + { + "epoch": 0.3347363656935958, + "grad_norm": 0.7292220648834236, + "learning_rate": 7.756791379621461e-06, + "loss": 0.3354, + "step": 7396 + }, + { + "epoch": 0.3347816248019914, + "grad_norm": 0.6933013859235289, + "learning_rate": 7.756179897034101e-06, + "loss": 0.3502, + "step": 7397 + }, + { + "epoch": 0.33482688391038695, + "grad_norm": 0.7088971477959854, + "learning_rate": 7.7555683552249e-06, + "loss": 0.3665, + "step": 7398 + }, + { + "epoch": 0.33487214301878254, + "grad_norm": 0.6655689664230773, + "learning_rate": 7.754956754206995e-06, + "loss": 0.4009, + "step": 7399 + }, + { + "epoch": 0.3349174021271781, + "grad_norm": 0.6449599873272122, + "learning_rate": 7.754345093993531e-06, + "loss": 0.4111, + "step": 7400 + }, + { + "epoch": 0.33496266123557367, + "grad_norm": 0.642456407875221, + "learning_rate": 7.753733374597651e-06, + "loss": 0.2901, + "step": 7401 + }, + { + "epoch": 0.3350079203439692, + "grad_norm": 0.47876243681554803, + "learning_rate": 7.7531215960325e-06, + "loss": 0.4831, + "step": 7402 + }, + { + "epoch": 0.3350531794523648, + "grad_norm": 0.6578598929289875, + "learning_rate": 7.75250975831122e-06, + "loss": 0.395, + "step": 7403 + }, + { + "epoch": 0.3350984385607603, + "grad_norm": 0.43312235044318054, + "learning_rate": 7.751897861446957e-06, + "loss": 0.4638, + "step": 7404 + }, + { + "epoch": 0.3351436976691559, + "grad_norm": 0.5912177865190626, + "learning_rate": 7.751285905452863e-06, + "loss": 0.3715, + "step": 7405 + }, + { + "epoch": 0.3351889567775515, + "grad_norm": 0.6617835213947177, + "learning_rate": 7.750673890342087e-06, + "loss": 0.3882, + "step": 7406 + }, + { + "epoch": 0.33523421588594704, + "grad_norm": 0.33686268188490154, + "learning_rate": 7.750061816127773e-06, + "loss": 0.4945, + "step": 7407 + }, + { + "epoch": 0.33527947499434263, + "grad_norm": 0.6242490984195372, + "learning_rate": 7.749449682823077e-06, + "loss": 0.3567, + "step": 7408 + }, + { + "epoch": 0.33532473410273816, + "grad_norm": 0.3702368617253743, + "learning_rate": 7.748837490441154e-06, + "loss": 0.4814, + "step": 7409 + }, + { + "epoch": 0.33536999321113375, + "grad_norm": 0.31983652680776037, + "learning_rate": 7.748225238995155e-06, + "loss": 0.4851, + "step": 7410 + }, + { + "epoch": 0.3354152523195293, + "grad_norm": 0.6678022578758226, + "learning_rate": 7.747612928498236e-06, + "loss": 0.3703, + "step": 7411 + }, + { + "epoch": 0.3354605114279249, + "grad_norm": 0.6487321452560142, + "learning_rate": 7.747000558963553e-06, + "loss": 0.3133, + "step": 7412 + }, + { + "epoch": 0.3355057705363204, + "grad_norm": 0.37027873488301116, + "learning_rate": 7.746388130404266e-06, + "loss": 0.5095, + "step": 7413 + }, + { + "epoch": 0.335551029644716, + "grad_norm": 0.5749342746992915, + "learning_rate": 7.745775642833532e-06, + "loss": 0.3537, + "step": 7414 + }, + { + "epoch": 0.33559628875311154, + "grad_norm": 0.3615744542074436, + "learning_rate": 7.745163096264512e-06, + "loss": 0.5016, + "step": 7415 + }, + { + "epoch": 0.33564154786150713, + "grad_norm": 0.6853723855206779, + "learning_rate": 7.74455049071037e-06, + "loss": 0.3701, + "step": 7416 + }, + { + "epoch": 0.3356868069699027, + "grad_norm": 0.6218699873168575, + "learning_rate": 7.743937826184266e-06, + "loss": 0.329, + "step": 7417 + }, + { + "epoch": 0.33573206607829825, + "grad_norm": 0.6405323808791121, + "learning_rate": 7.743325102699366e-06, + "loss": 0.3976, + "step": 7418 + }, + { + "epoch": 0.33577732518669384, + "grad_norm": 0.7927461115027449, + "learning_rate": 7.742712320268835e-06, + "loss": 0.3702, + "step": 7419 + }, + { + "epoch": 0.3358225842950894, + "grad_norm": 0.37805600475702256, + "learning_rate": 7.742099478905837e-06, + "loss": 0.5159, + "step": 7420 + }, + { + "epoch": 0.33586784340348497, + "grad_norm": 0.6019854929627273, + "learning_rate": 7.741486578623546e-06, + "loss": 0.3676, + "step": 7421 + }, + { + "epoch": 0.3359131025118805, + "grad_norm": 0.5992008219954912, + "learning_rate": 7.740873619435127e-06, + "loss": 0.3441, + "step": 7422 + }, + { + "epoch": 0.3359583616202761, + "grad_norm": 0.6385439615181806, + "learning_rate": 7.740260601353755e-06, + "loss": 0.3698, + "step": 7423 + }, + { + "epoch": 0.3360036207286716, + "grad_norm": 0.8383219944781505, + "learning_rate": 7.739647524392595e-06, + "loss": 0.337, + "step": 7424 + }, + { + "epoch": 0.3360488798370672, + "grad_norm": 0.7062114631525629, + "learning_rate": 7.739034388564826e-06, + "loss": 0.3804, + "step": 7425 + }, + { + "epoch": 0.33609413894546275, + "grad_norm": 1.0240473141234805, + "learning_rate": 7.738421193883618e-06, + "loss": 0.385, + "step": 7426 + }, + { + "epoch": 0.33613939805385834, + "grad_norm": 0.4036271018286075, + "learning_rate": 7.737807940362153e-06, + "loss": 0.4713, + "step": 7427 + }, + { + "epoch": 0.33618465716225393, + "grad_norm": 0.6935362243615151, + "learning_rate": 7.7371946280136e-06, + "loss": 0.3415, + "step": 7428 + }, + { + "epoch": 0.33622991627064946, + "grad_norm": 0.6765723463844222, + "learning_rate": 7.736581256851143e-06, + "loss": 0.3547, + "step": 7429 + }, + { + "epoch": 0.33627517537904505, + "grad_norm": 0.29491475836160913, + "learning_rate": 7.735967826887957e-06, + "loss": 0.507, + "step": 7430 + }, + { + "epoch": 0.3363204344874406, + "grad_norm": 0.6213753045889875, + "learning_rate": 7.73535433813723e-06, + "loss": 0.3598, + "step": 7431 + }, + { + "epoch": 0.3363656935958362, + "grad_norm": 0.6048305024715104, + "learning_rate": 7.734740790612137e-06, + "loss": 0.3531, + "step": 7432 + }, + { + "epoch": 0.3364109527042317, + "grad_norm": 0.6111012611969582, + "learning_rate": 7.734127184325862e-06, + "loss": 0.3652, + "step": 7433 + }, + { + "epoch": 0.3364562118126273, + "grad_norm": 0.7314719236909824, + "learning_rate": 7.73351351929159e-06, + "loss": 0.3873, + "step": 7434 + }, + { + "epoch": 0.33650147092102284, + "grad_norm": 0.6751215172117923, + "learning_rate": 7.732899795522511e-06, + "loss": 0.3584, + "step": 7435 + }, + { + "epoch": 0.33654673002941843, + "grad_norm": 0.6526724286809542, + "learning_rate": 7.732286013031807e-06, + "loss": 0.3316, + "step": 7436 + }, + { + "epoch": 0.33659198913781396, + "grad_norm": 0.8181862564068937, + "learning_rate": 7.73167217183267e-06, + "loss": 0.3682, + "step": 7437 + }, + { + "epoch": 0.33663724824620955, + "grad_norm": 0.6340294789997006, + "learning_rate": 7.731058271938286e-06, + "loss": 0.3887, + "step": 7438 + }, + { + "epoch": 0.3366825073546051, + "grad_norm": 0.6380738026004611, + "learning_rate": 7.73044431336185e-06, + "loss": 0.3757, + "step": 7439 + }, + { + "epoch": 0.3367277664630007, + "grad_norm": 0.7063121664201003, + "learning_rate": 7.729830296116549e-06, + "loss": 0.3932, + "step": 7440 + }, + { + "epoch": 0.33677302557139627, + "grad_norm": 0.6163861356537637, + "learning_rate": 7.729216220215579e-06, + "loss": 0.3733, + "step": 7441 + }, + { + "epoch": 0.3368182846797918, + "grad_norm": 0.6808044512307527, + "learning_rate": 7.728602085672136e-06, + "loss": 0.3617, + "step": 7442 + }, + { + "epoch": 0.3368635437881874, + "grad_norm": 0.6416138091184128, + "learning_rate": 7.727987892499413e-06, + "loss": 0.301, + "step": 7443 + }, + { + "epoch": 0.3369088028965829, + "grad_norm": 0.6412030801412819, + "learning_rate": 7.72737364071061e-06, + "loss": 0.3737, + "step": 7444 + }, + { + "epoch": 0.3369540620049785, + "grad_norm": 0.6282639242310271, + "learning_rate": 7.726759330318922e-06, + "loss": 0.3606, + "step": 7445 + }, + { + "epoch": 0.33699932111337405, + "grad_norm": 0.7902263070049047, + "learning_rate": 7.726144961337552e-06, + "loss": 0.3427, + "step": 7446 + }, + { + "epoch": 0.33704458022176964, + "grad_norm": 1.2046021596760628, + "learning_rate": 7.7255305337797e-06, + "loss": 0.3538, + "step": 7447 + }, + { + "epoch": 0.3370898393301652, + "grad_norm": 0.67347299103925, + "learning_rate": 7.724916047658568e-06, + "loss": 0.3803, + "step": 7448 + }, + { + "epoch": 0.33713509843856077, + "grad_norm": 0.6249721615380065, + "learning_rate": 7.724301502987357e-06, + "loss": 0.3623, + "step": 7449 + }, + { + "epoch": 0.3371803575469563, + "grad_norm": 0.6553766994577328, + "learning_rate": 7.723686899779277e-06, + "loss": 0.3445, + "step": 7450 + }, + { + "epoch": 0.3372256166553519, + "grad_norm": 0.6312510750549889, + "learning_rate": 7.723072238047526e-06, + "loss": 0.3678, + "step": 7451 + }, + { + "epoch": 0.3372708757637475, + "grad_norm": 0.40615804589458515, + "learning_rate": 7.72245751780532e-06, + "loss": 0.4807, + "step": 7452 + }, + { + "epoch": 0.337316134872143, + "grad_norm": 0.6384877707589975, + "learning_rate": 7.721842739065862e-06, + "loss": 0.3547, + "step": 7453 + }, + { + "epoch": 0.3373613939805386, + "grad_norm": 0.6979226985911737, + "learning_rate": 7.721227901842363e-06, + "loss": 0.3621, + "step": 7454 + }, + { + "epoch": 0.33740665308893414, + "grad_norm": 0.6487294010425022, + "learning_rate": 7.720613006148034e-06, + "loss": 0.3578, + "step": 7455 + }, + { + "epoch": 0.33745191219732973, + "grad_norm": 0.8479401579050782, + "learning_rate": 7.719998051996087e-06, + "loss": 0.3972, + "step": 7456 + }, + { + "epoch": 0.33749717130572526, + "grad_norm": 0.3283701852873334, + "learning_rate": 7.719383039399735e-06, + "loss": 0.4766, + "step": 7457 + }, + { + "epoch": 0.33754243041412085, + "grad_norm": 0.6778409513847219, + "learning_rate": 7.718767968372193e-06, + "loss": 0.3381, + "step": 7458 + }, + { + "epoch": 0.3375876895225164, + "grad_norm": 0.6111463890412212, + "learning_rate": 7.71815283892668e-06, + "loss": 0.3235, + "step": 7459 + }, + { + "epoch": 0.337632948630912, + "grad_norm": 0.3121534940673971, + "learning_rate": 7.71753765107641e-06, + "loss": 0.4745, + "step": 7460 + }, + { + "epoch": 0.3376782077393075, + "grad_norm": 0.6458181090619489, + "learning_rate": 7.716922404834602e-06, + "loss": 0.3494, + "step": 7461 + }, + { + "epoch": 0.3377234668477031, + "grad_norm": 0.6411197840593588, + "learning_rate": 7.716307100214472e-06, + "loss": 0.416, + "step": 7462 + }, + { + "epoch": 0.3377687259560987, + "grad_norm": 0.6632890569738612, + "learning_rate": 7.715691737229249e-06, + "loss": 0.3312, + "step": 7463 + }, + { + "epoch": 0.3378139850644942, + "grad_norm": 0.2935415701154594, + "learning_rate": 7.715076315892152e-06, + "loss": 0.4926, + "step": 7464 + }, + { + "epoch": 0.3378592441728898, + "grad_norm": 0.6186396883713703, + "learning_rate": 7.714460836216402e-06, + "loss": 0.3823, + "step": 7465 + }, + { + "epoch": 0.33790450328128535, + "grad_norm": 0.6027909574603085, + "learning_rate": 7.713845298215226e-06, + "loss": 0.3537, + "step": 7466 + }, + { + "epoch": 0.33794976238968094, + "grad_norm": 0.6197701183196876, + "learning_rate": 7.713229701901848e-06, + "loss": 0.3794, + "step": 7467 + }, + { + "epoch": 0.3379950214980765, + "grad_norm": 0.2862160805249473, + "learning_rate": 7.712614047289498e-06, + "loss": 0.4657, + "step": 7468 + }, + { + "epoch": 0.33804028060647207, + "grad_norm": 0.6806505904540876, + "learning_rate": 7.711998334391404e-06, + "loss": 0.3869, + "step": 7469 + }, + { + "epoch": 0.3380855397148676, + "grad_norm": 0.7033828216041671, + "learning_rate": 7.711382563220793e-06, + "loss": 0.3814, + "step": 7470 + }, + { + "epoch": 0.3381307988232632, + "grad_norm": 0.6488784757057378, + "learning_rate": 7.7107667337909e-06, + "loss": 0.3832, + "step": 7471 + }, + { + "epoch": 0.3381760579316587, + "grad_norm": 0.3317792728759328, + "learning_rate": 7.710150846114954e-06, + "loss": 0.4736, + "step": 7472 + }, + { + "epoch": 0.3382213170400543, + "grad_norm": 0.6621196774829878, + "learning_rate": 7.70953490020619e-06, + "loss": 0.3642, + "step": 7473 + }, + { + "epoch": 0.33826657614844985, + "grad_norm": 0.6740884911039556, + "learning_rate": 7.708918896077843e-06, + "loss": 0.4162, + "step": 7474 + }, + { + "epoch": 0.33831183525684544, + "grad_norm": 0.6174594802456844, + "learning_rate": 7.708302833743149e-06, + "loss": 0.3285, + "step": 7475 + }, + { + "epoch": 0.33835709436524103, + "grad_norm": 0.6464458466179519, + "learning_rate": 7.707686713215346e-06, + "loss": 0.3641, + "step": 7476 + }, + { + "epoch": 0.33840235347363656, + "grad_norm": 0.6499025032731706, + "learning_rate": 7.70707053450767e-06, + "loss": 0.3353, + "step": 7477 + }, + { + "epoch": 0.33844761258203215, + "grad_norm": 0.5949856358113641, + "learning_rate": 7.706454297633363e-06, + "loss": 0.38, + "step": 7478 + }, + { + "epoch": 0.3384928716904277, + "grad_norm": 0.6759432774690013, + "learning_rate": 7.705838002605665e-06, + "loss": 0.3213, + "step": 7479 + }, + { + "epoch": 0.3385381307988233, + "grad_norm": 0.29385555492762644, + "learning_rate": 7.705221649437819e-06, + "loss": 0.4835, + "step": 7480 + }, + { + "epoch": 0.3385833899072188, + "grad_norm": 0.3342604546667831, + "learning_rate": 7.704605238143069e-06, + "loss": 0.5036, + "step": 7481 + }, + { + "epoch": 0.3386286490156144, + "grad_norm": 0.647801597038109, + "learning_rate": 7.703988768734658e-06, + "loss": 0.41, + "step": 7482 + }, + { + "epoch": 0.33867390812400994, + "grad_norm": 0.63915331270805, + "learning_rate": 7.703372241225832e-06, + "loss": 0.2949, + "step": 7483 + }, + { + "epoch": 0.3387191672324055, + "grad_norm": 0.6251380338112288, + "learning_rate": 7.702755655629841e-06, + "loss": 0.3707, + "step": 7484 + }, + { + "epoch": 0.33876442634080106, + "grad_norm": 0.7472768475392628, + "learning_rate": 7.702139011959933e-06, + "loss": 0.3788, + "step": 7485 + }, + { + "epoch": 0.33880968544919665, + "grad_norm": 0.6220251227286063, + "learning_rate": 7.701522310229353e-06, + "loss": 0.3689, + "step": 7486 + }, + { + "epoch": 0.33885494455759224, + "grad_norm": 0.31252483447081636, + "learning_rate": 7.700905550451359e-06, + "loss": 0.5129, + "step": 7487 + }, + { + "epoch": 0.3389002036659878, + "grad_norm": 0.2934067222093203, + "learning_rate": 7.700288732639198e-06, + "loss": 0.4867, + "step": 7488 + }, + { + "epoch": 0.33894546277438337, + "grad_norm": 0.6546002810607425, + "learning_rate": 7.699671856806126e-06, + "loss": 0.3321, + "step": 7489 + }, + { + "epoch": 0.3389907218827789, + "grad_norm": 0.6325439862884268, + "learning_rate": 7.699054922965398e-06, + "loss": 0.3346, + "step": 7490 + }, + { + "epoch": 0.3390359809911745, + "grad_norm": 0.740205050820994, + "learning_rate": 7.698437931130266e-06, + "loss": 0.3741, + "step": 7491 + }, + { + "epoch": 0.33908124009957, + "grad_norm": 0.6601280735750225, + "learning_rate": 7.697820881313994e-06, + "loss": 0.3616, + "step": 7492 + }, + { + "epoch": 0.3391264992079656, + "grad_norm": 0.6613533939553481, + "learning_rate": 7.697203773529835e-06, + "loss": 0.3569, + "step": 7493 + }, + { + "epoch": 0.33917175831636115, + "grad_norm": 0.6112239789987094, + "learning_rate": 7.696586607791053e-06, + "loss": 0.3738, + "step": 7494 + }, + { + "epoch": 0.33921701742475674, + "grad_norm": 0.7287828073782087, + "learning_rate": 7.695969384110906e-06, + "loss": 0.3303, + "step": 7495 + }, + { + "epoch": 0.3392622765331523, + "grad_norm": 0.6877461486725416, + "learning_rate": 7.695352102502655e-06, + "loss": 0.3679, + "step": 7496 + }, + { + "epoch": 0.33930753564154786, + "grad_norm": 0.7528212080107087, + "learning_rate": 7.694734762979566e-06, + "loss": 0.3722, + "step": 7497 + }, + { + "epoch": 0.33935279474994345, + "grad_norm": 0.6911967207059251, + "learning_rate": 7.694117365554905e-06, + "loss": 0.3523, + "step": 7498 + }, + { + "epoch": 0.339398053858339, + "grad_norm": 0.6781742116766306, + "learning_rate": 7.693499910241935e-06, + "loss": 0.3753, + "step": 7499 + }, + { + "epoch": 0.3394433129667346, + "grad_norm": 0.6668863342842705, + "learning_rate": 7.692882397053924e-06, + "loss": 0.3711, + "step": 7500 + }, + { + "epoch": 0.3394885720751301, + "grad_norm": 0.6889227130686282, + "learning_rate": 7.69226482600414e-06, + "loss": 0.3786, + "step": 7501 + }, + { + "epoch": 0.3395338311835257, + "grad_norm": 0.6258982774738167, + "learning_rate": 7.691647197105857e-06, + "loss": 0.3838, + "step": 7502 + }, + { + "epoch": 0.33957909029192124, + "grad_norm": 0.60363916132125, + "learning_rate": 7.69102951037234e-06, + "loss": 0.3668, + "step": 7503 + }, + { + "epoch": 0.3396243494003168, + "grad_norm": 0.45105570128430217, + "learning_rate": 7.690411765816864e-06, + "loss": 0.4735, + "step": 7504 + }, + { + "epoch": 0.33966960850871236, + "grad_norm": 0.6294237191403765, + "learning_rate": 7.689793963452703e-06, + "loss": 0.3415, + "step": 7505 + }, + { + "epoch": 0.33971486761710795, + "grad_norm": 0.6526197354715212, + "learning_rate": 7.68917610329313e-06, + "loss": 0.3648, + "step": 7506 + }, + { + "epoch": 0.3397601267255035, + "grad_norm": 0.6589135998127787, + "learning_rate": 7.68855818535142e-06, + "loss": 0.3441, + "step": 7507 + }, + { + "epoch": 0.3398053858338991, + "grad_norm": 0.6712897142276448, + "learning_rate": 7.687940209640853e-06, + "loss": 0.3835, + "step": 7508 + }, + { + "epoch": 0.3398506449422946, + "grad_norm": 0.3313425590280107, + "learning_rate": 7.687322176174708e-06, + "loss": 0.5078, + "step": 7509 + }, + { + "epoch": 0.3398959040506902, + "grad_norm": 0.6378473326228518, + "learning_rate": 7.686704084966263e-06, + "loss": 0.3913, + "step": 7510 + }, + { + "epoch": 0.3399411631590858, + "grad_norm": 0.6571514913747152, + "learning_rate": 7.686085936028798e-06, + "loss": 0.3348, + "step": 7511 + }, + { + "epoch": 0.3399864222674813, + "grad_norm": 0.6324628293351644, + "learning_rate": 7.685467729375596e-06, + "loss": 0.3366, + "step": 7512 + }, + { + "epoch": 0.3400316813758769, + "grad_norm": 0.6064251790572599, + "learning_rate": 7.684849465019938e-06, + "loss": 0.3518, + "step": 7513 + }, + { + "epoch": 0.34007694048427245, + "grad_norm": 0.6791002915888196, + "learning_rate": 7.684231142975113e-06, + "loss": 0.3573, + "step": 7514 + }, + { + "epoch": 0.34012219959266804, + "grad_norm": 0.6485858044842474, + "learning_rate": 7.683612763254404e-06, + "loss": 0.3393, + "step": 7515 + }, + { + "epoch": 0.3401674587010636, + "grad_norm": 0.34229835613429566, + "learning_rate": 7.682994325871098e-06, + "loss": 0.5176, + "step": 7516 + }, + { + "epoch": 0.34021271780945916, + "grad_norm": 0.5985761286493197, + "learning_rate": 7.682375830838487e-06, + "loss": 0.3647, + "step": 7517 + }, + { + "epoch": 0.3402579769178547, + "grad_norm": 0.6356574267140701, + "learning_rate": 7.681757278169854e-06, + "loss": 0.3234, + "step": 7518 + }, + { + "epoch": 0.3403032360262503, + "grad_norm": 0.685838642936561, + "learning_rate": 7.681138667878497e-06, + "loss": 0.3603, + "step": 7519 + }, + { + "epoch": 0.3403484951346458, + "grad_norm": 0.6511830826513854, + "learning_rate": 7.680519999977703e-06, + "loss": 0.3723, + "step": 7520 + }, + { + "epoch": 0.3403937542430414, + "grad_norm": 0.31154658968417265, + "learning_rate": 7.679901274480766e-06, + "loss": 0.4982, + "step": 7521 + }, + { + "epoch": 0.340439013351437, + "grad_norm": 0.6537163331357495, + "learning_rate": 7.67928249140098e-06, + "loss": 0.4237, + "step": 7522 + }, + { + "epoch": 0.34048427245983254, + "grad_norm": 0.6386803915716467, + "learning_rate": 7.678663650751648e-06, + "loss": 0.3303, + "step": 7523 + }, + { + "epoch": 0.3405295315682281, + "grad_norm": 0.6519793736472048, + "learning_rate": 7.678044752546056e-06, + "loss": 0.366, + "step": 7524 + }, + { + "epoch": 0.34057479067662366, + "grad_norm": 0.590842503948439, + "learning_rate": 7.677425796797509e-06, + "loss": 0.3097, + "step": 7525 + }, + { + "epoch": 0.34062004978501925, + "grad_norm": 0.6407967582943332, + "learning_rate": 7.676806783519304e-06, + "loss": 0.3608, + "step": 7526 + }, + { + "epoch": 0.3406653088934148, + "grad_norm": 0.7371405089237638, + "learning_rate": 7.676187712724742e-06, + "loss": 0.3703, + "step": 7527 + }, + { + "epoch": 0.3407105680018104, + "grad_norm": 0.6499977417250453, + "learning_rate": 7.675568584427125e-06, + "loss": 0.3762, + "step": 7528 + }, + { + "epoch": 0.3407558271102059, + "grad_norm": 0.6536935173708011, + "learning_rate": 7.674949398639759e-06, + "loss": 0.3582, + "step": 7529 + }, + { + "epoch": 0.3408010862186015, + "grad_norm": 0.6722326617995539, + "learning_rate": 7.674330155375942e-06, + "loss": 0.3582, + "step": 7530 + }, + { + "epoch": 0.34084634532699704, + "grad_norm": 0.6492041265689165, + "learning_rate": 7.673710854648988e-06, + "loss": 0.3397, + "step": 7531 + }, + { + "epoch": 0.3408916044353926, + "grad_norm": 0.6033954319869382, + "learning_rate": 7.673091496472195e-06, + "loss": 0.3484, + "step": 7532 + }, + { + "epoch": 0.34093686354378816, + "grad_norm": 0.38637999773221254, + "learning_rate": 7.67247208085888e-06, + "loss": 0.4913, + "step": 7533 + }, + { + "epoch": 0.34098212265218375, + "grad_norm": 0.7052793212540459, + "learning_rate": 7.671852607822346e-06, + "loss": 0.3513, + "step": 7534 + }, + { + "epoch": 0.34102738176057934, + "grad_norm": 0.6174511378249521, + "learning_rate": 7.671233077375903e-06, + "loss": 0.3736, + "step": 7535 + }, + { + "epoch": 0.3410726408689749, + "grad_norm": 0.6239886045962482, + "learning_rate": 7.670613489532868e-06, + "loss": 0.3414, + "step": 7536 + }, + { + "epoch": 0.34111789997737046, + "grad_norm": 0.8548263136871884, + "learning_rate": 7.66999384430655e-06, + "loss": 0.3477, + "step": 7537 + }, + { + "epoch": 0.341163159085766, + "grad_norm": 0.6002015590991432, + "learning_rate": 7.669374141710266e-06, + "loss": 0.3479, + "step": 7538 + }, + { + "epoch": 0.3412084181941616, + "grad_norm": 0.6336840470348314, + "learning_rate": 7.668754381757329e-06, + "loss": 0.3686, + "step": 7539 + }, + { + "epoch": 0.3412536773025571, + "grad_norm": 0.6427120609580672, + "learning_rate": 7.668134564461057e-06, + "loss": 0.3474, + "step": 7540 + }, + { + "epoch": 0.3412989364109527, + "grad_norm": 0.8789691593560188, + "learning_rate": 7.667514689834766e-06, + "loss": 0.3708, + "step": 7541 + }, + { + "epoch": 0.34134419551934825, + "grad_norm": 0.6235062003928623, + "learning_rate": 7.666894757891779e-06, + "loss": 0.3479, + "step": 7542 + }, + { + "epoch": 0.34138945462774384, + "grad_norm": 0.6322898144122554, + "learning_rate": 7.666274768645413e-06, + "loss": 0.3542, + "step": 7543 + }, + { + "epoch": 0.3414347137361394, + "grad_norm": 0.6630673959545894, + "learning_rate": 7.665654722108994e-06, + "loss": 0.3685, + "step": 7544 + }, + { + "epoch": 0.34147997284453496, + "grad_norm": 0.6387799928039584, + "learning_rate": 7.665034618295838e-06, + "loss": 0.3474, + "step": 7545 + }, + { + "epoch": 0.34152523195293055, + "grad_norm": 0.5943943062030385, + "learning_rate": 7.664414457219277e-06, + "loss": 0.3239, + "step": 7546 + }, + { + "epoch": 0.3415704910613261, + "grad_norm": 0.6699691702156413, + "learning_rate": 7.66379423889263e-06, + "loss": 0.3928, + "step": 7547 + }, + { + "epoch": 0.3416157501697217, + "grad_norm": 0.7179907829644857, + "learning_rate": 7.663173963329227e-06, + "loss": 0.3197, + "step": 7548 + }, + { + "epoch": 0.3416610092781172, + "grad_norm": 0.6417787563246365, + "learning_rate": 7.662553630542393e-06, + "loss": 0.3439, + "step": 7549 + }, + { + "epoch": 0.3417062683865128, + "grad_norm": 0.6565039262127776, + "learning_rate": 7.661933240545464e-06, + "loss": 0.3279, + "step": 7550 + }, + { + "epoch": 0.34175152749490834, + "grad_norm": 0.6660501472957977, + "learning_rate": 7.661312793351758e-06, + "loss": 0.388, + "step": 7551 + }, + { + "epoch": 0.3417967866033039, + "grad_norm": 0.6796749549491682, + "learning_rate": 7.660692288974618e-06, + "loss": 0.3604, + "step": 7552 + }, + { + "epoch": 0.34184204571169946, + "grad_norm": 0.5855195869802828, + "learning_rate": 7.660071727427372e-06, + "loss": 0.3145, + "step": 7553 + }, + { + "epoch": 0.34188730482009505, + "grad_norm": 0.6448670926148414, + "learning_rate": 7.659451108723353e-06, + "loss": 0.3663, + "step": 7554 + }, + { + "epoch": 0.3419325639284906, + "grad_norm": 0.6840365201027633, + "learning_rate": 7.658830432875899e-06, + "loss": 0.373, + "step": 7555 + }, + { + "epoch": 0.3419778230368862, + "grad_norm": 0.6320095353665366, + "learning_rate": 7.658209699898344e-06, + "loss": 0.3696, + "step": 7556 + }, + { + "epoch": 0.34202308214528176, + "grad_norm": 0.6769106274619351, + "learning_rate": 7.657588909804028e-06, + "loss": 0.4076, + "step": 7557 + }, + { + "epoch": 0.3420683412536773, + "grad_norm": 0.6830176902563303, + "learning_rate": 7.656968062606288e-06, + "loss": 0.3546, + "step": 7558 + }, + { + "epoch": 0.3421136003620729, + "grad_norm": 0.6386112845278985, + "learning_rate": 7.656347158318462e-06, + "loss": 0.3308, + "step": 7559 + }, + { + "epoch": 0.3421588594704684, + "grad_norm": 0.6494201950324293, + "learning_rate": 7.655726196953898e-06, + "loss": 0.36, + "step": 7560 + }, + { + "epoch": 0.342204118578864, + "grad_norm": 0.6433055486507153, + "learning_rate": 7.655105178525932e-06, + "loss": 0.3533, + "step": 7561 + }, + { + "epoch": 0.34224937768725955, + "grad_norm": 0.5024231652031483, + "learning_rate": 7.65448410304791e-06, + "loss": 0.4686, + "step": 7562 + }, + { + "epoch": 0.34229463679565514, + "grad_norm": 0.6118199949238902, + "learning_rate": 7.653862970533179e-06, + "loss": 0.3816, + "step": 7563 + }, + { + "epoch": 0.3423398959040507, + "grad_norm": 0.6683841803842258, + "learning_rate": 7.653241780995083e-06, + "loss": 0.3528, + "step": 7564 + }, + { + "epoch": 0.34238515501244626, + "grad_norm": 0.32221683070115276, + "learning_rate": 7.652620534446968e-06, + "loss": 0.4729, + "step": 7565 + }, + { + "epoch": 0.3424304141208418, + "grad_norm": 0.6251898531891275, + "learning_rate": 7.651999230902186e-06, + "loss": 0.3703, + "step": 7566 + }, + { + "epoch": 0.3424756732292374, + "grad_norm": 0.7174094885394138, + "learning_rate": 7.651377870374087e-06, + "loss": 0.3363, + "step": 7567 + }, + { + "epoch": 0.3425209323376329, + "grad_norm": 0.5838536870265789, + "learning_rate": 7.650756452876019e-06, + "loss": 0.3548, + "step": 7568 + }, + { + "epoch": 0.3425661914460285, + "grad_norm": 0.4020356162026709, + "learning_rate": 7.650134978421335e-06, + "loss": 0.472, + "step": 7569 + }, + { + "epoch": 0.3426114505544241, + "grad_norm": 0.6858008302933762, + "learning_rate": 7.64951344702339e-06, + "loss": 0.3974, + "step": 7570 + }, + { + "epoch": 0.34265670966281964, + "grad_norm": 0.6300589867991939, + "learning_rate": 7.648891858695542e-06, + "loss": 0.3443, + "step": 7571 + }, + { + "epoch": 0.3427019687712152, + "grad_norm": 0.5986157076384225, + "learning_rate": 7.64827021345114e-06, + "loss": 0.3277, + "step": 7572 + }, + { + "epoch": 0.34274722787961076, + "grad_norm": 0.3073571483471491, + "learning_rate": 7.647648511303545e-06, + "loss": 0.494, + "step": 7573 + }, + { + "epoch": 0.34279248698800635, + "grad_norm": 0.6350792333445885, + "learning_rate": 7.647026752266114e-06, + "loss": 0.3809, + "step": 7574 + }, + { + "epoch": 0.3428377460964019, + "grad_norm": 0.666039449246502, + "learning_rate": 7.64640493635221e-06, + "loss": 0.3917, + "step": 7575 + }, + { + "epoch": 0.3428830052047975, + "grad_norm": 0.735552361903718, + "learning_rate": 7.64578306357519e-06, + "loss": 0.3641, + "step": 7576 + }, + { + "epoch": 0.342928264313193, + "grad_norm": 0.7251610813098117, + "learning_rate": 7.64516113394842e-06, + "loss": 0.3148, + "step": 7577 + }, + { + "epoch": 0.3429735234215886, + "grad_norm": 0.6705458498226945, + "learning_rate": 7.64453914748526e-06, + "loss": 0.3425, + "step": 7578 + }, + { + "epoch": 0.34301878252998413, + "grad_norm": 0.6576445664417572, + "learning_rate": 7.643917104199076e-06, + "loss": 0.366, + "step": 7579 + }, + { + "epoch": 0.3430640416383797, + "grad_norm": 0.35501970819457634, + "learning_rate": 7.643295004103232e-06, + "loss": 0.4816, + "step": 7580 + }, + { + "epoch": 0.3431093007467753, + "grad_norm": 0.763870455320076, + "learning_rate": 7.6426728472111e-06, + "loss": 0.3533, + "step": 7581 + }, + { + "epoch": 0.34315455985517085, + "grad_norm": 0.6554494379318401, + "learning_rate": 7.642050633536042e-06, + "loss": 0.3617, + "step": 7582 + }, + { + "epoch": 0.34319981896356644, + "grad_norm": 0.6627115772554796, + "learning_rate": 7.641428363091431e-06, + "loss": 0.3789, + "step": 7583 + }, + { + "epoch": 0.343245078071962, + "grad_norm": 0.6152424957501429, + "learning_rate": 7.640806035890637e-06, + "loss": 0.3426, + "step": 7584 + }, + { + "epoch": 0.34329033718035756, + "grad_norm": 0.6534631216532331, + "learning_rate": 7.640183651947033e-06, + "loss": 0.3383, + "step": 7585 + }, + { + "epoch": 0.3433355962887531, + "grad_norm": 0.7446778963304378, + "learning_rate": 7.639561211273989e-06, + "loss": 0.3749, + "step": 7586 + }, + { + "epoch": 0.3433808553971487, + "grad_norm": 0.31481139433953165, + "learning_rate": 7.638938713884883e-06, + "loss": 0.4847, + "step": 7587 + }, + { + "epoch": 0.3434261145055442, + "grad_norm": 0.5922040233371281, + "learning_rate": 7.638316159793089e-06, + "loss": 0.3569, + "step": 7588 + }, + { + "epoch": 0.3434713736139398, + "grad_norm": 0.6249165881142379, + "learning_rate": 7.637693549011983e-06, + "loss": 0.3517, + "step": 7589 + }, + { + "epoch": 0.34351663272233535, + "grad_norm": 0.3221901775760477, + "learning_rate": 7.637070881554944e-06, + "loss": 0.4812, + "step": 7590 + }, + { + "epoch": 0.34356189183073094, + "grad_norm": 0.3070442368662165, + "learning_rate": 7.63644815743535e-06, + "loss": 0.4885, + "step": 7591 + }, + { + "epoch": 0.3436071509391265, + "grad_norm": 0.6727977151365402, + "learning_rate": 7.635825376666584e-06, + "loss": 0.3514, + "step": 7592 + }, + { + "epoch": 0.34365241004752206, + "grad_norm": 0.6778367970179924, + "learning_rate": 7.635202539262025e-06, + "loss": 0.3718, + "step": 7593 + }, + { + "epoch": 0.34369766915591765, + "grad_norm": 0.6923427020612646, + "learning_rate": 7.634579645235056e-06, + "loss": 0.3678, + "step": 7594 + }, + { + "epoch": 0.3437429282643132, + "grad_norm": 0.6398153691505553, + "learning_rate": 7.633956694599063e-06, + "loss": 0.3305, + "step": 7595 + }, + { + "epoch": 0.3437881873727088, + "grad_norm": 0.658762949535757, + "learning_rate": 7.63333368736743e-06, + "loss": 0.4038, + "step": 7596 + }, + { + "epoch": 0.3438334464811043, + "grad_norm": 0.36541076844789505, + "learning_rate": 7.632710623553543e-06, + "loss": 0.4857, + "step": 7597 + }, + { + "epoch": 0.3438787055894999, + "grad_norm": 0.6564004935929247, + "learning_rate": 7.632087503170793e-06, + "loss": 0.3249, + "step": 7598 + }, + { + "epoch": 0.34392396469789543, + "grad_norm": 0.6711467037755176, + "learning_rate": 7.631464326232562e-06, + "loss": 0.3469, + "step": 7599 + }, + { + "epoch": 0.343969223806291, + "grad_norm": 0.645995639153286, + "learning_rate": 7.630841092752248e-06, + "loss": 0.3429, + "step": 7600 + }, + { + "epoch": 0.34401448291468656, + "grad_norm": 0.6707935052593224, + "learning_rate": 7.630217802743238e-06, + "loss": 0.3921, + "step": 7601 + }, + { + "epoch": 0.34405974202308215, + "grad_norm": 0.611645996050189, + "learning_rate": 7.629594456218926e-06, + "loss": 0.3883, + "step": 7602 + }, + { + "epoch": 0.3441050011314777, + "grad_norm": 0.7375476165970931, + "learning_rate": 7.628971053192705e-06, + "loss": 0.3792, + "step": 7603 + }, + { + "epoch": 0.3441502602398733, + "grad_norm": 0.9643445777519964, + "learning_rate": 7.628347593677969e-06, + "loss": 0.377, + "step": 7604 + }, + { + "epoch": 0.34419551934826886, + "grad_norm": 0.42861613394280557, + "learning_rate": 7.6277240776881175e-06, + "loss": 0.4879, + "step": 7605 + }, + { + "epoch": 0.3442407784566644, + "grad_norm": 0.6210216944863378, + "learning_rate": 7.6271005052365465e-06, + "loss": 0.3676, + "step": 7606 + }, + { + "epoch": 0.34428603756506, + "grad_norm": 0.636479508470854, + "learning_rate": 7.6264768763366525e-06, + "loss": 0.3545, + "step": 7607 + }, + { + "epoch": 0.3443312966734555, + "grad_norm": 0.6229405722005883, + "learning_rate": 7.6258531910018375e-06, + "loss": 0.3568, + "step": 7608 + }, + { + "epoch": 0.3443765557818511, + "grad_norm": 0.5915074305794326, + "learning_rate": 7.625229449245501e-06, + "loss": 0.323, + "step": 7609 + }, + { + "epoch": 0.34442181489024665, + "grad_norm": 0.5860717764241135, + "learning_rate": 7.624605651081049e-06, + "loss": 0.3412, + "step": 7610 + }, + { + "epoch": 0.34446707399864224, + "grad_norm": 0.7414526006320594, + "learning_rate": 7.62398179652188e-06, + "loss": 0.3505, + "step": 7611 + }, + { + "epoch": 0.34451233310703777, + "grad_norm": 0.6203614209578381, + "learning_rate": 7.623357885581403e-06, + "loss": 0.3978, + "step": 7612 + }, + { + "epoch": 0.34455759221543336, + "grad_norm": 0.7438136046629493, + "learning_rate": 7.622733918273021e-06, + "loss": 0.365, + "step": 7613 + }, + { + "epoch": 0.3446028513238289, + "grad_norm": 0.6351053778164822, + "learning_rate": 7.6221098946101415e-06, + "loss": 0.3576, + "step": 7614 + }, + { + "epoch": 0.3446481104322245, + "grad_norm": 0.6493836667947682, + "learning_rate": 7.621485814606175e-06, + "loss": 0.3144, + "step": 7615 + }, + { + "epoch": 0.3446933695406201, + "grad_norm": 0.4247953999482638, + "learning_rate": 7.62086167827453e-06, + "loss": 0.4993, + "step": 7616 + }, + { + "epoch": 0.3447386286490156, + "grad_norm": 0.6601210723159385, + "learning_rate": 7.620237485628614e-06, + "loss": 0.3643, + "step": 7617 + }, + { + "epoch": 0.3447838877574112, + "grad_norm": 0.618361473474604, + "learning_rate": 7.619613236681845e-06, + "loss": 0.3511, + "step": 7618 + }, + { + "epoch": 0.34482914686580673, + "grad_norm": 0.6427019523563446, + "learning_rate": 7.618988931447633e-06, + "loss": 0.3331, + "step": 7619 + }, + { + "epoch": 0.3448744059742023, + "grad_norm": 0.6811436509905098, + "learning_rate": 7.61836456993939e-06, + "loss": 0.3763, + "step": 7620 + }, + { + "epoch": 0.34491966508259786, + "grad_norm": 0.752962159483992, + "learning_rate": 7.617740152170536e-06, + "loss": 0.3439, + "step": 7621 + }, + { + "epoch": 0.34496492419099345, + "grad_norm": 0.320214585605932, + "learning_rate": 7.617115678154485e-06, + "loss": 0.4688, + "step": 7622 + }, + { + "epoch": 0.345010183299389, + "grad_norm": 0.6349661881515777, + "learning_rate": 7.616491147904657e-06, + "loss": 0.3607, + "step": 7623 + }, + { + "epoch": 0.3450554424077846, + "grad_norm": 0.6861805691638369, + "learning_rate": 7.615866561434468e-06, + "loss": 0.3703, + "step": 7624 + }, + { + "epoch": 0.3451007015161801, + "grad_norm": 0.6198289549983903, + "learning_rate": 7.615241918757343e-06, + "loss": 0.3272, + "step": 7625 + }, + { + "epoch": 0.3451459606245757, + "grad_norm": 0.2929009723502456, + "learning_rate": 7.614617219886699e-06, + "loss": 0.4649, + "step": 7626 + }, + { + "epoch": 0.34519121973297123, + "grad_norm": 0.6858863221642914, + "learning_rate": 7.613992464835964e-06, + "loss": 0.3646, + "step": 7627 + }, + { + "epoch": 0.3452364788413668, + "grad_norm": 0.6786937831026911, + "learning_rate": 7.613367653618558e-06, + "loss": 0.3563, + "step": 7628 + }, + { + "epoch": 0.3452817379497624, + "grad_norm": 0.7177119428531356, + "learning_rate": 7.612742786247906e-06, + "loss": 0.3916, + "step": 7629 + }, + { + "epoch": 0.34532699705815795, + "grad_norm": 0.603190059176882, + "learning_rate": 7.612117862737437e-06, + "loss": 0.3232, + "step": 7630 + }, + { + "epoch": 0.34537225616655354, + "grad_norm": 0.5944795316400319, + "learning_rate": 7.611492883100579e-06, + "loss": 0.3464, + "step": 7631 + }, + { + "epoch": 0.34541751527494907, + "grad_norm": 0.6558781026577434, + "learning_rate": 7.610867847350758e-06, + "loss": 0.3884, + "step": 7632 + }, + { + "epoch": 0.34546277438334466, + "grad_norm": 0.637930146367971, + "learning_rate": 7.610242755501404e-06, + "loss": 0.3355, + "step": 7633 + }, + { + "epoch": 0.3455080334917402, + "grad_norm": 0.5539742893944569, + "learning_rate": 7.6096176075659535e-06, + "loss": 0.341, + "step": 7634 + }, + { + "epoch": 0.3455532926001358, + "grad_norm": 0.6214660355339781, + "learning_rate": 7.608992403557833e-06, + "loss": 0.3933, + "step": 7635 + }, + { + "epoch": 0.3455985517085313, + "grad_norm": 0.6163769966528868, + "learning_rate": 7.60836714349048e-06, + "loss": 0.3566, + "step": 7636 + }, + { + "epoch": 0.3456438108169269, + "grad_norm": 0.6619757585034614, + "learning_rate": 7.607741827377329e-06, + "loss": 0.3892, + "step": 7637 + }, + { + "epoch": 0.34568906992532245, + "grad_norm": 0.6139244345551975, + "learning_rate": 7.607116455231811e-06, + "loss": 0.3253, + "step": 7638 + }, + { + "epoch": 0.34573432903371804, + "grad_norm": 1.0639117936927491, + "learning_rate": 7.606491027067372e-06, + "loss": 0.3575, + "step": 7639 + }, + { + "epoch": 0.3457795881421136, + "grad_norm": 0.71432928918362, + "learning_rate": 7.605865542897443e-06, + "loss": 0.3811, + "step": 7640 + }, + { + "epoch": 0.34582484725050916, + "grad_norm": 0.59747298091808, + "learning_rate": 7.605240002735469e-06, + "loss": 0.3792, + "step": 7641 + }, + { + "epoch": 0.34587010635890475, + "grad_norm": 0.6090730545711344, + "learning_rate": 7.604614406594888e-06, + "loss": 0.3769, + "step": 7642 + }, + { + "epoch": 0.3459153654673003, + "grad_norm": 0.6527221123705976, + "learning_rate": 7.603988754489142e-06, + "loss": 0.3977, + "step": 7643 + }, + { + "epoch": 0.3459606245756959, + "grad_norm": 0.6269383326587715, + "learning_rate": 7.603363046431676e-06, + "loss": 0.3345, + "step": 7644 + }, + { + "epoch": 0.3460058836840914, + "grad_norm": 0.6320898437814025, + "learning_rate": 7.6027372824359336e-06, + "loss": 0.383, + "step": 7645 + }, + { + "epoch": 0.346051142792487, + "grad_norm": 0.6182188002592361, + "learning_rate": 7.60211146251536e-06, + "loss": 0.369, + "step": 7646 + }, + { + "epoch": 0.34609640190088253, + "grad_norm": 0.6648400989717609, + "learning_rate": 7.601485586683404e-06, + "loss": 0.3778, + "step": 7647 + }, + { + "epoch": 0.3461416610092781, + "grad_norm": 0.640924490371323, + "learning_rate": 7.600859654953513e-06, + "loss": 0.3981, + "step": 7648 + }, + { + "epoch": 0.34618692011767366, + "grad_norm": 0.657510092475443, + "learning_rate": 7.600233667339134e-06, + "loss": 0.3515, + "step": 7649 + }, + { + "epoch": 0.34623217922606925, + "grad_norm": 0.6443967712541337, + "learning_rate": 7.599607623853722e-06, + "loss": 0.3721, + "step": 7650 + }, + { + "epoch": 0.34627743833446484, + "grad_norm": 0.3703216977234791, + "learning_rate": 7.5989815245107235e-06, + "loss": 0.4724, + "step": 7651 + }, + { + "epoch": 0.34632269744286037, + "grad_norm": 0.31900130505503843, + "learning_rate": 7.5983553693235955e-06, + "loss": 0.4883, + "step": 7652 + }, + { + "epoch": 0.34636795655125596, + "grad_norm": 0.668104250745774, + "learning_rate": 7.597729158305791e-06, + "loss": 0.366, + "step": 7653 + }, + { + "epoch": 0.3464132156596515, + "grad_norm": 0.6319615814150348, + "learning_rate": 7.597102891470766e-06, + "loss": 0.3459, + "step": 7654 + }, + { + "epoch": 0.3464584747680471, + "grad_norm": 0.62178518968914, + "learning_rate": 7.596476568831974e-06, + "loss": 0.3496, + "step": 7655 + }, + { + "epoch": 0.3465037338764426, + "grad_norm": 0.7020410645187021, + "learning_rate": 7.595850190402877e-06, + "loss": 0.3431, + "step": 7656 + }, + { + "epoch": 0.3465489929848382, + "grad_norm": 0.6481777084808226, + "learning_rate": 7.595223756196931e-06, + "loss": 0.3603, + "step": 7657 + }, + { + "epoch": 0.34659425209323375, + "grad_norm": 0.6725199795510061, + "learning_rate": 7.594597266227599e-06, + "loss": 0.3261, + "step": 7658 + }, + { + "epoch": 0.34663951120162934, + "grad_norm": 0.6695539890315392, + "learning_rate": 7.593970720508337e-06, + "loss": 0.3911, + "step": 7659 + }, + { + "epoch": 0.34668477031002487, + "grad_norm": 0.6354345747973272, + "learning_rate": 7.5933441190526146e-06, + "loss": 0.329, + "step": 7660 + }, + { + "epoch": 0.34673002941842046, + "grad_norm": 0.6397382074660813, + "learning_rate": 7.59271746187389e-06, + "loss": 0.3688, + "step": 7661 + }, + { + "epoch": 0.346775288526816, + "grad_norm": 0.6931351805945015, + "learning_rate": 7.59209074898563e-06, + "loss": 0.4006, + "step": 7662 + }, + { + "epoch": 0.3468205476352116, + "grad_norm": 0.6902782157655498, + "learning_rate": 7.591463980401302e-06, + "loss": 0.3619, + "step": 7663 + }, + { + "epoch": 0.3468658067436072, + "grad_norm": 0.6431808265068502, + "learning_rate": 7.59083715613437e-06, + "loss": 0.3402, + "step": 7664 + }, + { + "epoch": 0.3469110658520027, + "grad_norm": 0.6268809460813694, + "learning_rate": 7.590210276198305e-06, + "loss": 0.3691, + "step": 7665 + }, + { + "epoch": 0.3469563249603983, + "grad_norm": 0.5677407303711991, + "learning_rate": 7.589583340606579e-06, + "loss": 0.486, + "step": 7666 + }, + { + "epoch": 0.34700158406879383, + "grad_norm": 0.6379834997740906, + "learning_rate": 7.588956349372657e-06, + "loss": 0.3523, + "step": 7667 + }, + { + "epoch": 0.3470468431771894, + "grad_norm": 0.654715150775096, + "learning_rate": 7.588329302510017e-06, + "loss": 0.3662, + "step": 7668 + }, + { + "epoch": 0.34709210228558496, + "grad_norm": 0.6607985524070082, + "learning_rate": 7.5877022000321285e-06, + "loss": 0.3515, + "step": 7669 + }, + { + "epoch": 0.34713736139398055, + "grad_norm": 1.0173624912637775, + "learning_rate": 7.5870750419524675e-06, + "loss": 0.3463, + "step": 7670 + }, + { + "epoch": 0.3471826205023761, + "grad_norm": 0.6332869161692951, + "learning_rate": 7.586447828284509e-06, + "loss": 0.3976, + "step": 7671 + }, + { + "epoch": 0.3472278796107717, + "grad_norm": 0.64736306872132, + "learning_rate": 7.58582055904173e-06, + "loss": 0.3406, + "step": 7672 + }, + { + "epoch": 0.3472731387191672, + "grad_norm": 0.6573305867265749, + "learning_rate": 7.585193234237611e-06, + "loss": 0.4192, + "step": 7673 + }, + { + "epoch": 0.3473183978275628, + "grad_norm": 0.635163449801587, + "learning_rate": 7.584565853885627e-06, + "loss": 0.3202, + "step": 7674 + }, + { + "epoch": 0.3473636569359584, + "grad_norm": 0.6196122097101256, + "learning_rate": 7.583938417999261e-06, + "loss": 0.3622, + "step": 7675 + }, + { + "epoch": 0.3474089160443539, + "grad_norm": 0.6688456245347942, + "learning_rate": 7.5833109265919955e-06, + "loss": 0.3226, + "step": 7676 + }, + { + "epoch": 0.3474541751527495, + "grad_norm": 0.6659623665378279, + "learning_rate": 7.5826833796773115e-06, + "loss": 0.3486, + "step": 7677 + }, + { + "epoch": 0.34749943426114505, + "grad_norm": 0.6305891165742237, + "learning_rate": 7.582055777268693e-06, + "loss": 0.3452, + "step": 7678 + }, + { + "epoch": 0.34754469336954064, + "grad_norm": 0.6338385838715717, + "learning_rate": 7.581428119379628e-06, + "loss": 0.3282, + "step": 7679 + }, + { + "epoch": 0.34758995247793617, + "grad_norm": 0.6191310672713078, + "learning_rate": 7.5808004060235995e-06, + "loss": 0.3418, + "step": 7680 + }, + { + "epoch": 0.34763521158633176, + "grad_norm": 0.48046456954070565, + "learning_rate": 7.580172637214098e-06, + "loss": 0.4705, + "step": 7681 + }, + { + "epoch": 0.3476804706947273, + "grad_norm": 0.6075556600854566, + "learning_rate": 7.57954481296461e-06, + "loss": 0.347, + "step": 7682 + }, + { + "epoch": 0.3477257298031229, + "grad_norm": 0.7261897682432522, + "learning_rate": 7.5789169332886255e-06, + "loss": 0.3466, + "step": 7683 + }, + { + "epoch": 0.3477709889115184, + "grad_norm": 0.743757384841053, + "learning_rate": 7.578288998199638e-06, + "loss": 0.3595, + "step": 7684 + }, + { + "epoch": 0.347816248019914, + "grad_norm": 0.6174087207637071, + "learning_rate": 7.5776610077111375e-06, + "loss": 0.3439, + "step": 7685 + }, + { + "epoch": 0.3478615071283096, + "grad_norm": 0.7420086401146211, + "learning_rate": 7.577032961836619e-06, + "loss": 0.3457, + "step": 7686 + }, + { + "epoch": 0.34790676623670513, + "grad_norm": 0.6720111974672807, + "learning_rate": 7.576404860589579e-06, + "loss": 0.3644, + "step": 7687 + }, + { + "epoch": 0.3479520253451007, + "grad_norm": 0.39524473437736063, + "learning_rate": 7.575776703983508e-06, + "loss": 0.5039, + "step": 7688 + }, + { + "epoch": 0.34799728445349626, + "grad_norm": 0.6664363625429858, + "learning_rate": 7.575148492031908e-06, + "loss": 0.3552, + "step": 7689 + }, + { + "epoch": 0.34804254356189185, + "grad_norm": 0.7055820427991333, + "learning_rate": 7.574520224748276e-06, + "loss": 0.4008, + "step": 7690 + }, + { + "epoch": 0.3480878026702874, + "grad_norm": 0.8020328548076989, + "learning_rate": 7.573891902146111e-06, + "loss": 0.3893, + "step": 7691 + }, + { + "epoch": 0.348133061778683, + "grad_norm": 0.3070808301664086, + "learning_rate": 7.573263524238914e-06, + "loss": 0.4878, + "step": 7692 + }, + { + "epoch": 0.3481783208870785, + "grad_norm": 0.653831880465432, + "learning_rate": 7.572635091040188e-06, + "loss": 0.3793, + "step": 7693 + }, + { + "epoch": 0.3482235799954741, + "grad_norm": 0.5919767279825535, + "learning_rate": 7.572006602563434e-06, + "loss": 0.3251, + "step": 7694 + }, + { + "epoch": 0.34826883910386963, + "grad_norm": 0.6968740648600568, + "learning_rate": 7.571378058822159e-06, + "loss": 0.3583, + "step": 7695 + }, + { + "epoch": 0.3483140982122652, + "grad_norm": 0.6407502883602284, + "learning_rate": 7.570749459829865e-06, + "loss": 0.4116, + "step": 7696 + }, + { + "epoch": 0.34835935732066076, + "grad_norm": 0.32059540456082625, + "learning_rate": 7.570120805600063e-06, + "loss": 0.4956, + "step": 7697 + }, + { + "epoch": 0.34840461642905635, + "grad_norm": 0.3229611926125327, + "learning_rate": 7.569492096146256e-06, + "loss": 0.5189, + "step": 7698 + }, + { + "epoch": 0.34844987553745194, + "grad_norm": 1.1869411507300307, + "learning_rate": 7.568863331481957e-06, + "loss": 0.3237, + "step": 7699 + }, + { + "epoch": 0.34849513464584747, + "grad_norm": 0.650355888585891, + "learning_rate": 7.568234511620674e-06, + "loss": 0.3359, + "step": 7700 + }, + { + "epoch": 0.34854039375424306, + "grad_norm": 0.32701937546089865, + "learning_rate": 7.567605636575919e-06, + "loss": 0.4972, + "step": 7701 + }, + { + "epoch": 0.3485856528626386, + "grad_norm": 0.6480347956638689, + "learning_rate": 7.566976706361204e-06, + "loss": 0.3598, + "step": 7702 + }, + { + "epoch": 0.3486309119710342, + "grad_norm": 0.670768338920183, + "learning_rate": 7.566347720990044e-06, + "loss": 0.3799, + "step": 7703 + }, + { + "epoch": 0.3486761710794297, + "grad_norm": 0.614859753339743, + "learning_rate": 7.565718680475953e-06, + "loss": 0.339, + "step": 7704 + }, + { + "epoch": 0.3487214301878253, + "grad_norm": 0.8436747082401596, + "learning_rate": 7.565089584832448e-06, + "loss": 0.3403, + "step": 7705 + }, + { + "epoch": 0.34876668929622084, + "grad_norm": 0.6671309781608522, + "learning_rate": 7.564460434073047e-06, + "loss": 0.3566, + "step": 7706 + }, + { + "epoch": 0.34881194840461643, + "grad_norm": 0.7655776253067961, + "learning_rate": 7.563831228211266e-06, + "loss": 0.3943, + "step": 7707 + }, + { + "epoch": 0.34885720751301197, + "grad_norm": 0.6443313925396289, + "learning_rate": 7.563201967260627e-06, + "loss": 0.3185, + "step": 7708 + }, + { + "epoch": 0.34890246662140756, + "grad_norm": 0.6727203672910881, + "learning_rate": 7.562572651234649e-06, + "loss": 0.3712, + "step": 7709 + }, + { + "epoch": 0.34894772572980315, + "grad_norm": 0.6965609685868654, + "learning_rate": 7.561943280146856e-06, + "loss": 0.3689, + "step": 7710 + }, + { + "epoch": 0.3489929848381987, + "grad_norm": 0.6877882531696877, + "learning_rate": 7.56131385401077e-06, + "loss": 0.354, + "step": 7711 + }, + { + "epoch": 0.3490382439465943, + "grad_norm": 0.6516043811205681, + "learning_rate": 7.560684372839915e-06, + "loss": 0.3654, + "step": 7712 + }, + { + "epoch": 0.3490835030549898, + "grad_norm": 0.6131498119539656, + "learning_rate": 7.560054836647819e-06, + "loss": 0.3211, + "step": 7713 + }, + { + "epoch": 0.3491287621633854, + "grad_norm": 0.5903973295467017, + "learning_rate": 7.559425245448006e-06, + "loss": 0.3683, + "step": 7714 + }, + { + "epoch": 0.34917402127178093, + "grad_norm": 0.7211704582858878, + "learning_rate": 7.558795599254005e-06, + "loss": 0.3272, + "step": 7715 + }, + { + "epoch": 0.3492192803801765, + "grad_norm": 0.6511390209664222, + "learning_rate": 7.558165898079346e-06, + "loss": 0.3561, + "step": 7716 + }, + { + "epoch": 0.34926453948857206, + "grad_norm": 0.6037263667339409, + "learning_rate": 7.5575361419375585e-06, + "loss": 0.3496, + "step": 7717 + }, + { + "epoch": 0.34930979859696765, + "grad_norm": 0.6542931110891881, + "learning_rate": 7.556906330842174e-06, + "loss": 0.3612, + "step": 7718 + }, + { + "epoch": 0.3493550577053632, + "grad_norm": 0.6544269421961227, + "learning_rate": 7.556276464806725e-06, + "loss": 0.3447, + "step": 7719 + }, + { + "epoch": 0.34940031681375877, + "grad_norm": 0.6179559855338046, + "learning_rate": 7.555646543844747e-06, + "loss": 0.352, + "step": 7720 + }, + { + "epoch": 0.34944557592215436, + "grad_norm": 0.3694286494252386, + "learning_rate": 7.555016567969773e-06, + "loss": 0.4923, + "step": 7721 + }, + { + "epoch": 0.3494908350305499, + "grad_norm": 0.3571334190934288, + "learning_rate": 7.554386537195339e-06, + "loss": 0.4915, + "step": 7722 + }, + { + "epoch": 0.3495360941389455, + "grad_norm": 0.639870037309397, + "learning_rate": 7.553756451534984e-06, + "loss": 0.3545, + "step": 7723 + }, + { + "epoch": 0.349581353247341, + "grad_norm": 0.6321033788590497, + "learning_rate": 7.553126311002248e-06, + "loss": 0.3035, + "step": 7724 + }, + { + "epoch": 0.3496266123557366, + "grad_norm": 0.725958941617707, + "learning_rate": 7.552496115610668e-06, + "loss": 0.3517, + "step": 7725 + }, + { + "epoch": 0.34967187146413214, + "grad_norm": 0.35637223581074784, + "learning_rate": 7.5518658653737844e-06, + "loss": 0.5128, + "step": 7726 + }, + { + "epoch": 0.34971713057252773, + "grad_norm": 0.6564009265796242, + "learning_rate": 7.551235560305142e-06, + "loss": 0.368, + "step": 7727 + }, + { + "epoch": 0.34976238968092327, + "grad_norm": 0.6910617140704731, + "learning_rate": 7.550605200418283e-06, + "loss": 0.3633, + "step": 7728 + }, + { + "epoch": 0.34980764878931886, + "grad_norm": 0.7162155865789955, + "learning_rate": 7.549974785726753e-06, + "loss": 0.4214, + "step": 7729 + }, + { + "epoch": 0.3498529078977144, + "grad_norm": 0.6295258356979103, + "learning_rate": 7.549344316244094e-06, + "loss": 0.3669, + "step": 7730 + }, + { + "epoch": 0.34989816700611, + "grad_norm": 0.8504213878378942, + "learning_rate": 7.548713791983857e-06, + "loss": 0.3781, + "step": 7731 + }, + { + "epoch": 0.3499434261145055, + "grad_norm": 0.6899801762438271, + "learning_rate": 7.548083212959588e-06, + "loss": 0.3408, + "step": 7732 + }, + { + "epoch": 0.3499886852229011, + "grad_norm": 0.6768212134505426, + "learning_rate": 7.547452579184836e-06, + "loss": 0.3435, + "step": 7733 + }, + { + "epoch": 0.3500339443312967, + "grad_norm": 0.6615478961791598, + "learning_rate": 7.546821890673153e-06, + "loss": 0.3902, + "step": 7734 + }, + { + "epoch": 0.35007920343969223, + "grad_norm": 0.6393455474883397, + "learning_rate": 7.546191147438089e-06, + "loss": 0.3873, + "step": 7735 + }, + { + "epoch": 0.3501244625480878, + "grad_norm": 0.6611638513107325, + "learning_rate": 7.545560349493197e-06, + "loss": 0.3244, + "step": 7736 + }, + { + "epoch": 0.35016972165648336, + "grad_norm": 0.6593511958016057, + "learning_rate": 7.544929496852033e-06, + "loss": 0.37, + "step": 7737 + }, + { + "epoch": 0.35021498076487895, + "grad_norm": 0.9726619848240113, + "learning_rate": 7.544298589528148e-06, + "loss": 0.346, + "step": 7738 + }, + { + "epoch": 0.3502602398732745, + "grad_norm": 0.521846771020006, + "learning_rate": 7.5436676275351e-06, + "loss": 0.4948, + "step": 7739 + }, + { + "epoch": 0.35030549898167007, + "grad_norm": 0.6224165394621645, + "learning_rate": 7.54303661088645e-06, + "loss": 0.3474, + "step": 7740 + }, + { + "epoch": 0.3503507580900656, + "grad_norm": 0.6603027913044086, + "learning_rate": 7.542405539595752e-06, + "loss": 0.3299, + "step": 7741 + }, + { + "epoch": 0.3503960171984612, + "grad_norm": 0.6366304885536189, + "learning_rate": 7.541774413676566e-06, + "loss": 0.3338, + "step": 7742 + }, + { + "epoch": 0.35044127630685673, + "grad_norm": 0.623869279581973, + "learning_rate": 7.541143233142456e-06, + "loss": 0.3731, + "step": 7743 + }, + { + "epoch": 0.3504865354152523, + "grad_norm": 0.6733659163360821, + "learning_rate": 7.540511998006982e-06, + "loss": 0.3644, + "step": 7744 + }, + { + "epoch": 0.3505317945236479, + "grad_norm": 0.8206727756834944, + "learning_rate": 7.539880708283709e-06, + "loss": 0.3503, + "step": 7745 + }, + { + "epoch": 0.35057705363204344, + "grad_norm": 0.5872189014995534, + "learning_rate": 7.539249363986196e-06, + "loss": 0.3412, + "step": 7746 + }, + { + "epoch": 0.35062231274043903, + "grad_norm": 0.7261999016080413, + "learning_rate": 7.538617965128018e-06, + "loss": 0.3779, + "step": 7747 + }, + { + "epoch": 0.35066757184883457, + "grad_norm": 0.6220194173842661, + "learning_rate": 7.537986511722732e-06, + "loss": 0.3461, + "step": 7748 + }, + { + "epoch": 0.35071283095723016, + "grad_norm": 0.6140523177540189, + "learning_rate": 7.537355003783915e-06, + "loss": 0.3505, + "step": 7749 + }, + { + "epoch": 0.3507580900656257, + "grad_norm": 0.4861428500111197, + "learning_rate": 7.53672344132513e-06, + "loss": 0.497, + "step": 7750 + }, + { + "epoch": 0.3508033491740213, + "grad_norm": 0.752805323197179, + "learning_rate": 7.53609182435995e-06, + "loss": 0.3759, + "step": 7751 + }, + { + "epoch": 0.3508486082824168, + "grad_norm": 0.6070374149228687, + "learning_rate": 7.535460152901945e-06, + "loss": 0.3968, + "step": 7752 + }, + { + "epoch": 0.3508938673908124, + "grad_norm": 0.6517684478502604, + "learning_rate": 7.534828426964687e-06, + "loss": 0.3702, + "step": 7753 + }, + { + "epoch": 0.35093912649920794, + "grad_norm": 0.633839696078463, + "learning_rate": 7.534196646561754e-06, + "loss": 0.3961, + "step": 7754 + }, + { + "epoch": 0.35098438560760353, + "grad_norm": 0.34285147538363264, + "learning_rate": 7.533564811706715e-06, + "loss": 0.5144, + "step": 7755 + }, + { + "epoch": 0.35102964471599907, + "grad_norm": 0.5787212050900984, + "learning_rate": 7.532932922413152e-06, + "loss": 0.3658, + "step": 7756 + }, + { + "epoch": 0.35107490382439466, + "grad_norm": 0.6087775891664018, + "learning_rate": 7.532300978694639e-06, + "loss": 0.3936, + "step": 7757 + }, + { + "epoch": 0.35112016293279025, + "grad_norm": 0.3209188592804357, + "learning_rate": 7.531668980564757e-06, + "loss": 0.4857, + "step": 7758 + }, + { + "epoch": 0.3511654220411858, + "grad_norm": 0.6416155319043709, + "learning_rate": 7.531036928037081e-06, + "loss": 0.3587, + "step": 7759 + }, + { + "epoch": 0.35121068114958137, + "grad_norm": 0.29564737935085245, + "learning_rate": 7.530404821125197e-06, + "loss": 0.4763, + "step": 7760 + }, + { + "epoch": 0.3512559402579769, + "grad_norm": 0.6204602551572096, + "learning_rate": 7.529772659842685e-06, + "loss": 0.3247, + "step": 7761 + }, + { + "epoch": 0.3513011993663725, + "grad_norm": 0.7110343797456145, + "learning_rate": 7.529140444203127e-06, + "loss": 0.3382, + "step": 7762 + }, + { + "epoch": 0.35134645847476803, + "grad_norm": 0.5993280775274655, + "learning_rate": 7.5285081742201085e-06, + "loss": 0.3661, + "step": 7763 + }, + { + "epoch": 0.3513917175831636, + "grad_norm": 0.6200060790127362, + "learning_rate": 7.527875849907216e-06, + "loss": 0.3864, + "step": 7764 + }, + { + "epoch": 0.35143697669155916, + "grad_norm": 0.5925761739408619, + "learning_rate": 7.527243471278034e-06, + "loss": 0.3635, + "step": 7765 + }, + { + "epoch": 0.35148223579995475, + "grad_norm": 0.7199944515738541, + "learning_rate": 7.526611038346153e-06, + "loss": 0.3621, + "step": 7766 + }, + { + "epoch": 0.3515274949083503, + "grad_norm": 0.6114599544378431, + "learning_rate": 7.5259785511251595e-06, + "loss": 0.3113, + "step": 7767 + }, + { + "epoch": 0.35157275401674587, + "grad_norm": 0.3694562932467032, + "learning_rate": 7.525346009628647e-06, + "loss": 0.4663, + "step": 7768 + }, + { + "epoch": 0.35161801312514146, + "grad_norm": 0.6326146960746679, + "learning_rate": 7.524713413870201e-06, + "loss": 0.3314, + "step": 7769 + }, + { + "epoch": 0.351663272233537, + "grad_norm": 0.6443492767754994, + "learning_rate": 7.524080763863422e-06, + "loss": 0.3561, + "step": 7770 + }, + { + "epoch": 0.3517085313419326, + "grad_norm": 0.6459826223012621, + "learning_rate": 7.5234480596218965e-06, + "loss": 0.341, + "step": 7771 + }, + { + "epoch": 0.3517537904503281, + "grad_norm": 0.6059566286637352, + "learning_rate": 7.522815301159223e-06, + "loss": 0.3372, + "step": 7772 + }, + { + "epoch": 0.3517990495587237, + "grad_norm": 0.629434666765441, + "learning_rate": 7.522182488488999e-06, + "loss": 0.3587, + "step": 7773 + }, + { + "epoch": 0.35184430866711924, + "grad_norm": 0.6345649812348073, + "learning_rate": 7.5215496216248175e-06, + "loss": 0.3562, + "step": 7774 + }, + { + "epoch": 0.35188956777551483, + "grad_norm": 0.34756610246884706, + "learning_rate": 7.520916700580279e-06, + "loss": 0.4792, + "step": 7775 + }, + { + "epoch": 0.35193482688391037, + "grad_norm": 0.6288100576988347, + "learning_rate": 7.5202837253689845e-06, + "loss": 0.3657, + "step": 7776 + }, + { + "epoch": 0.35198008599230596, + "grad_norm": 0.6067182425724056, + "learning_rate": 7.51965069600453e-06, + "loss": 0.3395, + "step": 7777 + }, + { + "epoch": 0.3520253451007015, + "grad_norm": 0.6321301031695498, + "learning_rate": 7.519017612500524e-06, + "loss": 0.3289, + "step": 7778 + }, + { + "epoch": 0.3520706042090971, + "grad_norm": 0.6059871782734803, + "learning_rate": 7.5183844748705645e-06, + "loss": 0.3107, + "step": 7779 + }, + { + "epoch": 0.35211586331749267, + "grad_norm": 0.6111636119938987, + "learning_rate": 7.517751283128258e-06, + "loss": 0.3416, + "step": 7780 + }, + { + "epoch": 0.3521611224258882, + "grad_norm": 0.6488860545994534, + "learning_rate": 7.517118037287207e-06, + "loss": 0.3623, + "step": 7781 + }, + { + "epoch": 0.3522063815342838, + "grad_norm": 1.131189321473581, + "learning_rate": 7.516484737361023e-06, + "loss": 0.358, + "step": 7782 + }, + { + "epoch": 0.35225164064267933, + "grad_norm": 1.2056760182043709, + "learning_rate": 7.515851383363309e-06, + "loss": 0.3434, + "step": 7783 + }, + { + "epoch": 0.3522968997510749, + "grad_norm": 0.6553290062431891, + "learning_rate": 7.515217975307677e-06, + "loss": 0.3283, + "step": 7784 + }, + { + "epoch": 0.35234215885947046, + "grad_norm": 0.6224771498224234, + "learning_rate": 7.514584513207734e-06, + "loss": 0.3355, + "step": 7785 + }, + { + "epoch": 0.35238741796786605, + "grad_norm": 0.40038324245671597, + "learning_rate": 7.513950997077094e-06, + "loss": 0.5041, + "step": 7786 + }, + { + "epoch": 0.3524326770762616, + "grad_norm": 0.626659752232271, + "learning_rate": 7.513317426929369e-06, + "loss": 0.3412, + "step": 7787 + }, + { + "epoch": 0.35247793618465717, + "grad_norm": 0.3044550549445809, + "learning_rate": 7.512683802778169e-06, + "loss": 0.5069, + "step": 7788 + }, + { + "epoch": 0.3525231952930527, + "grad_norm": 0.719800344359679, + "learning_rate": 7.512050124637114e-06, + "loss": 0.3139, + "step": 7789 + }, + { + "epoch": 0.3525684544014483, + "grad_norm": 0.3408857775887603, + "learning_rate": 7.511416392519815e-06, + "loss": 0.4754, + "step": 7790 + }, + { + "epoch": 0.35261371350984383, + "grad_norm": 0.6812537517058064, + "learning_rate": 7.51078260643989e-06, + "loss": 0.3829, + "step": 7791 + }, + { + "epoch": 0.3526589726182394, + "grad_norm": 0.703618849968868, + "learning_rate": 7.5101487664109605e-06, + "loss": 0.36, + "step": 7792 + }, + { + "epoch": 0.352704231726635, + "grad_norm": 0.7611903498969986, + "learning_rate": 7.509514872446642e-06, + "loss": 0.3385, + "step": 7793 + }, + { + "epoch": 0.35274949083503054, + "grad_norm": 0.3892495760709264, + "learning_rate": 7.5088809245605555e-06, + "loss": 0.4742, + "step": 7794 + }, + { + "epoch": 0.35279474994342613, + "grad_norm": 0.3789789286921561, + "learning_rate": 7.508246922766326e-06, + "loss": 0.4939, + "step": 7795 + }, + { + "epoch": 0.35284000905182167, + "grad_norm": 0.6595053522544354, + "learning_rate": 7.507612867077571e-06, + "loss": 0.3883, + "step": 7796 + }, + { + "epoch": 0.35288526816021726, + "grad_norm": 0.2995636894582247, + "learning_rate": 7.506978757507919e-06, + "loss": 0.4853, + "step": 7797 + }, + { + "epoch": 0.3529305272686128, + "grad_norm": 0.6309037212275371, + "learning_rate": 7.506344594070991e-06, + "loss": 0.3525, + "step": 7798 + }, + { + "epoch": 0.3529757863770084, + "grad_norm": 0.6936880134731889, + "learning_rate": 7.5057103767804175e-06, + "loss": 0.3816, + "step": 7799 + }, + { + "epoch": 0.3530210454854039, + "grad_norm": 0.630733616335528, + "learning_rate": 7.505076105649822e-06, + "loss": 0.3118, + "step": 7800 + }, + { + "epoch": 0.3530663045937995, + "grad_norm": 0.6846633350976611, + "learning_rate": 7.504441780692836e-06, + "loss": 0.4105, + "step": 7801 + }, + { + "epoch": 0.35311156370219504, + "grad_norm": 0.6526336163178629, + "learning_rate": 7.5038074019230865e-06, + "loss": 0.4859, + "step": 7802 + }, + { + "epoch": 0.35315682281059063, + "grad_norm": 0.6354983121815554, + "learning_rate": 7.503172969354206e-06, + "loss": 0.3362, + "step": 7803 + }, + { + "epoch": 0.3532020819189862, + "grad_norm": 0.616840523677705, + "learning_rate": 7.502538482999829e-06, + "loss": 0.3745, + "step": 7804 + }, + { + "epoch": 0.35324734102738176, + "grad_norm": 0.6277516775555112, + "learning_rate": 7.501903942873584e-06, + "loss": 0.3786, + "step": 7805 + }, + { + "epoch": 0.35329260013577735, + "grad_norm": 0.6373393336769787, + "learning_rate": 7.5012693489891065e-06, + "loss": 0.3646, + "step": 7806 + }, + { + "epoch": 0.3533378592441729, + "grad_norm": 0.6169337950282993, + "learning_rate": 7.500634701360034e-06, + "loss": 0.332, + "step": 7807 + }, + { + "epoch": 0.35338311835256847, + "grad_norm": 0.3384868150450684, + "learning_rate": 7.500000000000001e-06, + "loss": 0.5114, + "step": 7808 + }, + { + "epoch": 0.353428377460964, + "grad_norm": 0.6187640351076604, + "learning_rate": 7.499365244922646e-06, + "loss": 0.3781, + "step": 7809 + }, + { + "epoch": 0.3534736365693596, + "grad_norm": 0.6555446380655447, + "learning_rate": 7.498730436141609e-06, + "loss": 0.3588, + "step": 7810 + }, + { + "epoch": 0.35351889567775513, + "grad_norm": 0.6239720714131426, + "learning_rate": 7.498095573670528e-06, + "loss": 0.3885, + "step": 7811 + }, + { + "epoch": 0.3535641547861507, + "grad_norm": 0.6362622564026761, + "learning_rate": 7.497460657523047e-06, + "loss": 0.391, + "step": 7812 + }, + { + "epoch": 0.35360941389454625, + "grad_norm": 0.7405890819555525, + "learning_rate": 7.496825687712805e-06, + "loss": 0.3378, + "step": 7813 + }, + { + "epoch": 0.35365467300294184, + "grad_norm": 0.6959052316973275, + "learning_rate": 7.496190664253449e-06, + "loss": 0.3531, + "step": 7814 + }, + { + "epoch": 0.35369993211133743, + "grad_norm": 0.6077571764611213, + "learning_rate": 7.495555587158622e-06, + "loss": 0.3421, + "step": 7815 + }, + { + "epoch": 0.35374519121973297, + "grad_norm": 0.6469514940955886, + "learning_rate": 7.49492045644197e-06, + "loss": 0.3424, + "step": 7816 + }, + { + "epoch": 0.35379045032812856, + "grad_norm": 0.3811569747548171, + "learning_rate": 7.494285272117139e-06, + "loss": 0.4962, + "step": 7817 + }, + { + "epoch": 0.3538357094365241, + "grad_norm": 0.3295220610717146, + "learning_rate": 7.493650034197779e-06, + "loss": 0.5015, + "step": 7818 + }, + { + "epoch": 0.3538809685449197, + "grad_norm": 0.6595865740341526, + "learning_rate": 7.493014742697537e-06, + "loss": 0.3612, + "step": 7819 + }, + { + "epoch": 0.3539262276533152, + "grad_norm": 0.2732336585442226, + "learning_rate": 7.4923793976300665e-06, + "loss": 0.4877, + "step": 7820 + }, + { + "epoch": 0.3539714867617108, + "grad_norm": 0.797829661466962, + "learning_rate": 7.4917439990090165e-06, + "loss": 0.3558, + "step": 7821 + }, + { + "epoch": 0.35401674587010634, + "grad_norm": 0.6327197220334714, + "learning_rate": 7.491108546848041e-06, + "loss": 0.3307, + "step": 7822 + }, + { + "epoch": 0.35406200497850193, + "grad_norm": 0.4456977492630711, + "learning_rate": 7.490473041160794e-06, + "loss": 0.4877, + "step": 7823 + }, + { + "epoch": 0.35410726408689747, + "grad_norm": 0.37679343862054404, + "learning_rate": 7.489837481960931e-06, + "loss": 0.512, + "step": 7824 + }, + { + "epoch": 0.35415252319529306, + "grad_norm": 0.688862193477084, + "learning_rate": 7.489201869262106e-06, + "loss": 0.3437, + "step": 7825 + }, + { + "epoch": 0.3541977823036886, + "grad_norm": 0.612011506777598, + "learning_rate": 7.48856620307798e-06, + "loss": 0.3613, + "step": 7826 + }, + { + "epoch": 0.3542430414120842, + "grad_norm": 0.6547800401174696, + "learning_rate": 7.487930483422206e-06, + "loss": 0.3514, + "step": 7827 + }, + { + "epoch": 0.35428830052047977, + "grad_norm": 0.7017144490010115, + "learning_rate": 7.4872947103084495e-06, + "loss": 0.3471, + "step": 7828 + }, + { + "epoch": 0.3543335596288753, + "grad_norm": 0.6039843313631512, + "learning_rate": 7.4866588837503686e-06, + "loss": 0.3258, + "step": 7829 + }, + { + "epoch": 0.3543788187372709, + "grad_norm": 0.6856001322498695, + "learning_rate": 7.486023003761625e-06, + "loss": 0.3539, + "step": 7830 + }, + { + "epoch": 0.35442407784566643, + "grad_norm": 0.6239377737887924, + "learning_rate": 7.48538707035588e-06, + "loss": 0.3605, + "step": 7831 + }, + { + "epoch": 0.354469336954062, + "grad_norm": 0.7392468682747669, + "learning_rate": 7.484751083546804e-06, + "loss": 0.3635, + "step": 7832 + }, + { + "epoch": 0.35451459606245755, + "grad_norm": 0.6336965745586026, + "learning_rate": 7.484115043348056e-06, + "loss": 0.349, + "step": 7833 + }, + { + "epoch": 0.35455985517085314, + "grad_norm": 0.6547582868007384, + "learning_rate": 7.4834789497733065e-06, + "loss": 0.4627, + "step": 7834 + }, + { + "epoch": 0.3546051142792487, + "grad_norm": 0.4528387555975036, + "learning_rate": 7.482842802836221e-06, + "loss": 0.4812, + "step": 7835 + }, + { + "epoch": 0.35465037338764427, + "grad_norm": 0.6591548404559565, + "learning_rate": 7.482206602550469e-06, + "loss": 0.4041, + "step": 7836 + }, + { + "epoch": 0.3546956324960398, + "grad_norm": 0.8262674444288512, + "learning_rate": 7.481570348929722e-06, + "loss": 0.3721, + "step": 7837 + }, + { + "epoch": 0.3547408916044354, + "grad_norm": 0.7090929955067138, + "learning_rate": 7.480934041987649e-06, + "loss": 0.3699, + "step": 7838 + }, + { + "epoch": 0.354786150712831, + "grad_norm": 1.111779718586593, + "learning_rate": 7.480297681737922e-06, + "loss": 0.3902, + "step": 7839 + }, + { + "epoch": 0.3548314098212265, + "grad_norm": 0.6493248511249113, + "learning_rate": 7.479661268194217e-06, + "loss": 0.3197, + "step": 7840 + }, + { + "epoch": 0.3548766689296221, + "grad_norm": 0.6947138559490463, + "learning_rate": 7.479024801370206e-06, + "loss": 0.3554, + "step": 7841 + }, + { + "epoch": 0.35492192803801764, + "grad_norm": 0.750127064883699, + "learning_rate": 7.478388281279566e-06, + "loss": 0.3553, + "step": 7842 + }, + { + "epoch": 0.35496718714641323, + "grad_norm": 0.6068588275400576, + "learning_rate": 7.477751707935974e-06, + "loss": 0.3219, + "step": 7843 + }, + { + "epoch": 0.35501244625480877, + "grad_norm": 0.6667280699696232, + "learning_rate": 7.477115081353107e-06, + "loss": 0.3476, + "step": 7844 + }, + { + "epoch": 0.35505770536320436, + "grad_norm": 0.6720148038383784, + "learning_rate": 7.476478401544647e-06, + "loss": 0.3364, + "step": 7845 + }, + { + "epoch": 0.3551029644715999, + "grad_norm": 0.6749944921167566, + "learning_rate": 7.475841668524268e-06, + "loss": 0.3578, + "step": 7846 + }, + { + "epoch": 0.3551482235799955, + "grad_norm": 0.6546624428916041, + "learning_rate": 7.475204882305659e-06, + "loss": 0.3407, + "step": 7847 + }, + { + "epoch": 0.355193482688391, + "grad_norm": 0.6187047516268036, + "learning_rate": 7.474568042902497e-06, + "loss": 0.3473, + "step": 7848 + }, + { + "epoch": 0.3552387417967866, + "grad_norm": 0.6449786289711059, + "learning_rate": 7.4739311503284695e-06, + "loss": 0.3457, + "step": 7849 + }, + { + "epoch": 0.35528400090518214, + "grad_norm": 0.6612723807417603, + "learning_rate": 7.473294204597259e-06, + "loss": 0.3385, + "step": 7850 + }, + { + "epoch": 0.35532926001357773, + "grad_norm": 0.6023672840038461, + "learning_rate": 7.472657205722551e-06, + "loss": 0.3679, + "step": 7851 + }, + { + "epoch": 0.3553745191219733, + "grad_norm": 0.682869276494719, + "learning_rate": 7.472020153718036e-06, + "loss": 0.3502, + "step": 7852 + }, + { + "epoch": 0.35541977823036885, + "grad_norm": 0.658596181793742, + "learning_rate": 7.471383048597399e-06, + "loss": 0.3598, + "step": 7853 + }, + { + "epoch": 0.35546503733876444, + "grad_norm": 1.4779270416964452, + "learning_rate": 7.47074589037433e-06, + "loss": 0.502, + "step": 7854 + }, + { + "epoch": 0.35551029644716, + "grad_norm": 0.6974218981775188, + "learning_rate": 7.470108679062521e-06, + "loss": 0.3551, + "step": 7855 + }, + { + "epoch": 0.35555555555555557, + "grad_norm": 0.7470662270954693, + "learning_rate": 7.469471414675662e-06, + "loss": 0.3752, + "step": 7856 + }, + { + "epoch": 0.3556008146639511, + "grad_norm": 0.698352138931862, + "learning_rate": 7.468834097227448e-06, + "loss": 0.3742, + "step": 7857 + }, + { + "epoch": 0.3556460737723467, + "grad_norm": 0.70489735180748, + "learning_rate": 7.4681967267315715e-06, + "loss": 0.3402, + "step": 7858 + }, + { + "epoch": 0.35569133288074223, + "grad_norm": 0.6708085387779145, + "learning_rate": 7.4675593032017266e-06, + "loss": 0.3232, + "step": 7859 + }, + { + "epoch": 0.3557365919891378, + "grad_norm": 0.7004600236705987, + "learning_rate": 7.466921826651612e-06, + "loss": 0.3686, + "step": 7860 + }, + { + "epoch": 0.35578185109753335, + "grad_norm": 0.6822899888012113, + "learning_rate": 7.466284297094922e-06, + "loss": 0.3684, + "step": 7861 + }, + { + "epoch": 0.35582711020592894, + "grad_norm": 0.6431997953727273, + "learning_rate": 7.46564671454536e-06, + "loss": 0.3443, + "step": 7862 + }, + { + "epoch": 0.35587236931432453, + "grad_norm": 0.6400738731668444, + "learning_rate": 7.46500907901662e-06, + "loss": 0.3652, + "step": 7863 + }, + { + "epoch": 0.35591762842272007, + "grad_norm": 0.6476788329341486, + "learning_rate": 7.4643713905224065e-06, + "loss": 0.3405, + "step": 7864 + }, + { + "epoch": 0.35596288753111566, + "grad_norm": 0.9223979899731618, + "learning_rate": 7.463733649076421e-06, + "loss": 0.4326, + "step": 7865 + }, + { + "epoch": 0.3560081466395112, + "grad_norm": 0.6300366430679043, + "learning_rate": 7.4630958546923674e-06, + "loss": 0.3519, + "step": 7866 + }, + { + "epoch": 0.3560534057479068, + "grad_norm": 1.2353016132302856, + "learning_rate": 7.462458007383946e-06, + "loss": 0.4952, + "step": 7867 + }, + { + "epoch": 0.3560986648563023, + "grad_norm": 0.5951207777646207, + "learning_rate": 7.461820107164867e-06, + "loss": 0.3544, + "step": 7868 + }, + { + "epoch": 0.3561439239646979, + "grad_norm": 1.5186979849658055, + "learning_rate": 7.461182154048832e-06, + "loss": 0.3552, + "step": 7869 + }, + { + "epoch": 0.35618918307309344, + "grad_norm": 0.4881240184949437, + "learning_rate": 7.460544148049555e-06, + "loss": 0.492, + "step": 7870 + }, + { + "epoch": 0.35623444218148903, + "grad_norm": 0.8171773576837302, + "learning_rate": 7.45990608918074e-06, + "loss": 0.3741, + "step": 7871 + }, + { + "epoch": 0.35627970128988456, + "grad_norm": 0.7048751897975343, + "learning_rate": 7.459267977456097e-06, + "loss": 0.3668, + "step": 7872 + }, + { + "epoch": 0.35632496039828015, + "grad_norm": 0.68364472816167, + "learning_rate": 7.45862981288934e-06, + "loss": 0.3963, + "step": 7873 + }, + { + "epoch": 0.35637021950667574, + "grad_norm": 0.6353143543372414, + "learning_rate": 7.457991595494178e-06, + "loss": 0.3283, + "step": 7874 + }, + { + "epoch": 0.3564154786150713, + "grad_norm": 0.7565528040727152, + "learning_rate": 7.457353325284327e-06, + "loss": 0.4874, + "step": 7875 + }, + { + "epoch": 0.35646073772346687, + "grad_norm": 0.6582202067423059, + "learning_rate": 7.4567150022735e-06, + "loss": 0.381, + "step": 7876 + }, + { + "epoch": 0.3565059968318624, + "grad_norm": 0.6378632328892668, + "learning_rate": 7.45607662647541e-06, + "loss": 0.3426, + "step": 7877 + }, + { + "epoch": 0.356551255940258, + "grad_norm": 0.4406770262620411, + "learning_rate": 7.45543819790378e-06, + "loss": 0.476, + "step": 7878 + }, + { + "epoch": 0.35659651504865353, + "grad_norm": 0.36194061351320284, + "learning_rate": 7.454799716572324e-06, + "loss": 0.496, + "step": 7879 + }, + { + "epoch": 0.3566417741570491, + "grad_norm": 0.6696797075276905, + "learning_rate": 7.45416118249476e-06, + "loss": 0.3941, + "step": 7880 + }, + { + "epoch": 0.35668703326544465, + "grad_norm": 0.4478173564882491, + "learning_rate": 7.4535225956848115e-06, + "loss": 0.4982, + "step": 7881 + }, + { + "epoch": 0.35673229237384024, + "grad_norm": 0.6653358361375153, + "learning_rate": 7.452883956156197e-06, + "loss": 0.3999, + "step": 7882 + }, + { + "epoch": 0.3567775514822358, + "grad_norm": 0.6890302334830477, + "learning_rate": 7.452245263922638e-06, + "loss": 0.3903, + "step": 7883 + }, + { + "epoch": 0.35682281059063137, + "grad_norm": 0.6117361768377864, + "learning_rate": 7.4516065189978625e-06, + "loss": 0.3289, + "step": 7884 + }, + { + "epoch": 0.3568680696990269, + "grad_norm": 0.6131998172217197, + "learning_rate": 7.45096772139559e-06, + "loss": 0.5051, + "step": 7885 + }, + { + "epoch": 0.3569133288074225, + "grad_norm": 0.6483207055632356, + "learning_rate": 7.450328871129551e-06, + "loss": 0.4946, + "step": 7886 + }, + { + "epoch": 0.3569585879158181, + "grad_norm": 0.7360880947446763, + "learning_rate": 7.4496899682134684e-06, + "loss": 0.3814, + "step": 7887 + }, + { + "epoch": 0.3570038470242136, + "grad_norm": 0.4969670850818326, + "learning_rate": 7.449051012661073e-06, + "loss": 0.5084, + "step": 7888 + }, + { + "epoch": 0.3570491061326092, + "grad_norm": 0.654029005722767, + "learning_rate": 7.4484120044860915e-06, + "loss": 0.3032, + "step": 7889 + }, + { + "epoch": 0.35709436524100474, + "grad_norm": 0.7367063069317463, + "learning_rate": 7.447772943702258e-06, + "loss": 0.3829, + "step": 7890 + }, + { + "epoch": 0.35713962434940033, + "grad_norm": 0.6245920443685149, + "learning_rate": 7.4471338303233e-06, + "loss": 0.3148, + "step": 7891 + }, + { + "epoch": 0.35718488345779587, + "grad_norm": 0.6574094557932094, + "learning_rate": 7.4464946643629535e-06, + "loss": 0.3702, + "step": 7892 + }, + { + "epoch": 0.35723014256619146, + "grad_norm": 0.5573694083411818, + "learning_rate": 7.4458554458349485e-06, + "loss": 0.344, + "step": 7893 + }, + { + "epoch": 0.357275401674587, + "grad_norm": 0.6893382885904867, + "learning_rate": 7.445216174753022e-06, + "loss": 0.3853, + "step": 7894 + }, + { + "epoch": 0.3573206607829826, + "grad_norm": 0.6443073771654781, + "learning_rate": 7.444576851130911e-06, + "loss": 0.3627, + "step": 7895 + }, + { + "epoch": 0.3573659198913781, + "grad_norm": 0.659029696855723, + "learning_rate": 7.443937474982351e-06, + "loss": 0.3524, + "step": 7896 + }, + { + "epoch": 0.3574111789997737, + "grad_norm": 0.6751787259300283, + "learning_rate": 7.443298046321082e-06, + "loss": 0.3294, + "step": 7897 + }, + { + "epoch": 0.3574564381081693, + "grad_norm": 0.6531740656256767, + "learning_rate": 7.442658565160838e-06, + "loss": 0.3308, + "step": 7898 + }, + { + "epoch": 0.35750169721656483, + "grad_norm": 0.7154670512286024, + "learning_rate": 7.442019031515368e-06, + "loss": 0.2847, + "step": 7899 + }, + { + "epoch": 0.3575469563249604, + "grad_norm": 1.021117071184924, + "learning_rate": 7.4413794453984065e-06, + "loss": 0.4848, + "step": 7900 + }, + { + "epoch": 0.35759221543335595, + "grad_norm": 0.880368866321953, + "learning_rate": 7.4407398068237e-06, + "loss": 0.4649, + "step": 7901 + }, + { + "epoch": 0.35763747454175154, + "grad_norm": 0.6463014724398792, + "learning_rate": 7.440100115804991e-06, + "loss": 0.3355, + "step": 7902 + }, + { + "epoch": 0.3576827336501471, + "grad_norm": 0.6454359331546151, + "learning_rate": 7.439460372356025e-06, + "loss": 0.3782, + "step": 7903 + }, + { + "epoch": 0.35772799275854267, + "grad_norm": 0.6732821923908577, + "learning_rate": 7.438820576490546e-06, + "loss": 0.3673, + "step": 7904 + }, + { + "epoch": 0.3577732518669382, + "grad_norm": 0.6885551892394901, + "learning_rate": 7.438180728222306e-06, + "loss": 0.3884, + "step": 7905 + }, + { + "epoch": 0.3578185109753338, + "grad_norm": 1.0716877491064174, + "learning_rate": 7.4375408275650475e-06, + "loss": 0.4863, + "step": 7906 + }, + { + "epoch": 0.3578637700837293, + "grad_norm": 0.6553793841227316, + "learning_rate": 7.436900874532526e-06, + "loss": 0.3715, + "step": 7907 + }, + { + "epoch": 0.3579090291921249, + "grad_norm": 0.8814940863816225, + "learning_rate": 7.436260869138486e-06, + "loss": 0.5012, + "step": 7908 + }, + { + "epoch": 0.3579542883005205, + "grad_norm": 0.6642419514151628, + "learning_rate": 7.435620811396684e-06, + "loss": 0.3533, + "step": 7909 + }, + { + "epoch": 0.35799954740891604, + "grad_norm": 0.661127123804159, + "learning_rate": 7.434980701320871e-06, + "loss": 0.3352, + "step": 7910 + }, + { + "epoch": 0.35804480651731163, + "grad_norm": 0.68747966185606, + "learning_rate": 7.4343405389248e-06, + "loss": 0.401, + "step": 7911 + }, + { + "epoch": 0.35809006562570717, + "grad_norm": 0.6979271552290265, + "learning_rate": 7.43370032422223e-06, + "loss": 0.3478, + "step": 7912 + }, + { + "epoch": 0.35813532473410276, + "grad_norm": 0.7081415200993405, + "learning_rate": 7.433060057226913e-06, + "loss": 0.3571, + "step": 7913 + }, + { + "epoch": 0.3581805838424983, + "grad_norm": 0.6564714199182704, + "learning_rate": 7.432419737952607e-06, + "loss": 0.3477, + "step": 7914 + }, + { + "epoch": 0.3582258429508939, + "grad_norm": 0.6726460485709904, + "learning_rate": 7.431779366413073e-06, + "loss": 0.3791, + "step": 7915 + }, + { + "epoch": 0.3582711020592894, + "grad_norm": 1.088140205794908, + "learning_rate": 7.431138942622069e-06, + "loss": 0.5026, + "step": 7916 + }, + { + "epoch": 0.358316361167685, + "grad_norm": 0.8023510194742443, + "learning_rate": 7.430498466593355e-06, + "loss": 0.5146, + "step": 7917 + }, + { + "epoch": 0.35836162027608054, + "grad_norm": 0.6175256937878455, + "learning_rate": 7.429857938340693e-06, + "loss": 0.32, + "step": 7918 + }, + { + "epoch": 0.35840687938447613, + "grad_norm": 0.6853174040711785, + "learning_rate": 7.429217357877848e-06, + "loss": 0.366, + "step": 7919 + }, + { + "epoch": 0.35845213849287166, + "grad_norm": 0.5864777621038076, + "learning_rate": 7.4285767252185824e-06, + "loss": 0.4921, + "step": 7920 + }, + { + "epoch": 0.35849739760126725, + "grad_norm": 0.7552732461077418, + "learning_rate": 7.427936040376662e-06, + "loss": 0.4547, + "step": 7921 + }, + { + "epoch": 0.35854265670966284, + "grad_norm": 0.6815214328335388, + "learning_rate": 7.427295303365851e-06, + "loss": 0.3188, + "step": 7922 + }, + { + "epoch": 0.3585879158180584, + "grad_norm": 0.6302372503519815, + "learning_rate": 7.426654514199921e-06, + "loss": 0.3368, + "step": 7923 + }, + { + "epoch": 0.35863317492645397, + "grad_norm": 0.7116580236180317, + "learning_rate": 7.426013672892639e-06, + "loss": 0.379, + "step": 7924 + }, + { + "epoch": 0.3586784340348495, + "grad_norm": 0.6324604196524789, + "learning_rate": 7.425372779457771e-06, + "loss": 0.3561, + "step": 7925 + }, + { + "epoch": 0.3587236931432451, + "grad_norm": 0.9011199431306274, + "learning_rate": 7.424731833909094e-06, + "loss": 0.3985, + "step": 7926 + }, + { + "epoch": 0.3587689522516406, + "grad_norm": 0.6233800357442847, + "learning_rate": 7.4240908362603745e-06, + "loss": 0.4937, + "step": 7927 + }, + { + "epoch": 0.3588142113600362, + "grad_norm": 0.6580323715547287, + "learning_rate": 7.423449786525391e-06, + "loss": 0.3669, + "step": 7928 + }, + { + "epoch": 0.35885947046843175, + "grad_norm": 0.6788612202436244, + "learning_rate": 7.422808684717913e-06, + "loss": 0.3993, + "step": 7929 + }, + { + "epoch": 0.35890472957682734, + "grad_norm": 0.45794257757974255, + "learning_rate": 7.422167530851716e-06, + "loss": 0.4668, + "step": 7930 + }, + { + "epoch": 0.3589499886852229, + "grad_norm": 0.7067797996645679, + "learning_rate": 7.42152632494058e-06, + "loss": 0.4117, + "step": 7931 + }, + { + "epoch": 0.35899524779361847, + "grad_norm": 0.7374060859848149, + "learning_rate": 7.42088506699828e-06, + "loss": 0.4242, + "step": 7932 + }, + { + "epoch": 0.35904050690201406, + "grad_norm": 0.6940096071197507, + "learning_rate": 7.420243757038593e-06, + "loss": 0.3541, + "step": 7933 + }, + { + "epoch": 0.3590857660104096, + "grad_norm": 0.46216186246289315, + "learning_rate": 7.419602395075304e-06, + "loss": 0.479, + "step": 7934 + }, + { + "epoch": 0.3591310251188052, + "grad_norm": 0.7182676372748836, + "learning_rate": 7.418960981122188e-06, + "loss": 0.3263, + "step": 7935 + }, + { + "epoch": 0.3591762842272007, + "grad_norm": 0.6259753858694471, + "learning_rate": 7.418319515193032e-06, + "loss": 0.3706, + "step": 7936 + }, + { + "epoch": 0.3592215433355963, + "grad_norm": 0.8776219507096461, + "learning_rate": 7.4176779973016156e-06, + "loss": 0.3708, + "step": 7937 + }, + { + "epoch": 0.35926680244399184, + "grad_norm": 0.41646167675109397, + "learning_rate": 7.417036427461726e-06, + "loss": 0.4882, + "step": 7938 + }, + { + "epoch": 0.35931206155238743, + "grad_norm": 0.7252858911467089, + "learning_rate": 7.416394805687145e-06, + "loss": 0.3602, + "step": 7939 + }, + { + "epoch": 0.35935732066078296, + "grad_norm": 0.3726637247353118, + "learning_rate": 7.415753131991661e-06, + "loss": 0.4785, + "step": 7940 + }, + { + "epoch": 0.35940257976917855, + "grad_norm": 0.740712850578873, + "learning_rate": 7.415111406389063e-06, + "loss": 0.3669, + "step": 7941 + }, + { + "epoch": 0.3594478388775741, + "grad_norm": 0.6646504393206524, + "learning_rate": 7.414469628893137e-06, + "loss": 0.3762, + "step": 7942 + }, + { + "epoch": 0.3594930979859697, + "grad_norm": 0.6356744766828022, + "learning_rate": 7.413827799517674e-06, + "loss": 0.3475, + "step": 7943 + }, + { + "epoch": 0.35953835709436527, + "grad_norm": 0.607166637688898, + "learning_rate": 7.413185918276467e-06, + "loss": 0.3348, + "step": 7944 + }, + { + "epoch": 0.3595836162027608, + "grad_norm": 0.7131483598239857, + "learning_rate": 7.412543985183306e-06, + "loss": 0.4069, + "step": 7945 + }, + { + "epoch": 0.3596288753111564, + "grad_norm": 0.6299425375662295, + "learning_rate": 7.411902000251983e-06, + "loss": 0.3566, + "step": 7946 + }, + { + "epoch": 0.3596741344195519, + "grad_norm": 0.5049556350096941, + "learning_rate": 7.411259963496294e-06, + "loss": 0.4626, + "step": 7947 + }, + { + "epoch": 0.3597193935279475, + "grad_norm": 0.6506714341238237, + "learning_rate": 7.410617874930034e-06, + "loss": 0.3619, + "step": 7948 + }, + { + "epoch": 0.35976465263634305, + "grad_norm": 0.639220736506604, + "learning_rate": 7.409975734566998e-06, + "loss": 0.323, + "step": 7949 + }, + { + "epoch": 0.35980991174473864, + "grad_norm": 0.7206397484804806, + "learning_rate": 7.4093335424209875e-06, + "loss": 0.3704, + "step": 7950 + }, + { + "epoch": 0.3598551708531342, + "grad_norm": 0.37597732159073716, + "learning_rate": 7.4086912985057976e-06, + "loss": 0.4924, + "step": 7951 + }, + { + "epoch": 0.35990042996152977, + "grad_norm": 0.6455469760980388, + "learning_rate": 7.40804900283523e-06, + "loss": 0.3768, + "step": 7952 + }, + { + "epoch": 0.3599456890699253, + "grad_norm": 0.6301467889728702, + "learning_rate": 7.407406655423086e-06, + "loss": 0.3262, + "step": 7953 + }, + { + "epoch": 0.3599909481783209, + "grad_norm": 0.6135304956212745, + "learning_rate": 7.4067642562831656e-06, + "loss": 0.3356, + "step": 7954 + }, + { + "epoch": 0.3600362072867164, + "grad_norm": 0.6199600583542145, + "learning_rate": 7.406121805429274e-06, + "loss": 0.3211, + "step": 7955 + }, + { + "epoch": 0.360081466395112, + "grad_norm": 0.39504063291210206, + "learning_rate": 7.405479302875212e-06, + "loss": 0.4783, + "step": 7956 + }, + { + "epoch": 0.3601267255035076, + "grad_norm": 0.6760457687153064, + "learning_rate": 7.404836748634791e-06, + "loss": 0.389, + "step": 7957 + }, + { + "epoch": 0.36017198461190314, + "grad_norm": 0.6950822303958237, + "learning_rate": 7.404194142721812e-06, + "loss": 0.3426, + "step": 7958 + }, + { + "epoch": 0.36021724372029873, + "grad_norm": 0.7108334017246865, + "learning_rate": 7.403551485150086e-06, + "loss": 0.345, + "step": 7959 + }, + { + "epoch": 0.36026250282869426, + "grad_norm": 0.5868546699602407, + "learning_rate": 7.402908775933419e-06, + "loss": 0.3175, + "step": 7960 + }, + { + "epoch": 0.36030776193708985, + "grad_norm": 0.34926700370057423, + "learning_rate": 7.402266015085624e-06, + "loss": 0.4923, + "step": 7961 + }, + { + "epoch": 0.3603530210454854, + "grad_norm": 0.31677625195627207, + "learning_rate": 7.401623202620509e-06, + "loss": 0.4733, + "step": 7962 + }, + { + "epoch": 0.360398280153881, + "grad_norm": 0.6652658900532104, + "learning_rate": 7.40098033855189e-06, + "loss": 0.3187, + "step": 7963 + }, + { + "epoch": 0.3604435392622765, + "grad_norm": 0.3583699712794321, + "learning_rate": 7.4003374228935746e-06, + "loss": 0.5076, + "step": 7964 + }, + { + "epoch": 0.3604887983706721, + "grad_norm": 0.6172428639056611, + "learning_rate": 7.399694455659382e-06, + "loss": 0.3531, + "step": 7965 + }, + { + "epoch": 0.36053405747906764, + "grad_norm": 0.30345325260551814, + "learning_rate": 7.399051436863125e-06, + "loss": 0.4982, + "step": 7966 + }, + { + "epoch": 0.36057931658746323, + "grad_norm": 0.7233199446249501, + "learning_rate": 7.39840836651862e-06, + "loss": 0.3837, + "step": 7967 + }, + { + "epoch": 0.3606245756958588, + "grad_norm": 0.6763101826128397, + "learning_rate": 7.3977652446396855e-06, + "loss": 0.3595, + "step": 7968 + }, + { + "epoch": 0.36066983480425435, + "grad_norm": 0.6170249152749456, + "learning_rate": 7.397122071240141e-06, + "loss": 0.3527, + "step": 7969 + }, + { + "epoch": 0.36071509391264994, + "grad_norm": 0.5710552406604417, + "learning_rate": 7.396478846333805e-06, + "loss": 0.3615, + "step": 7970 + }, + { + "epoch": 0.3607603530210455, + "grad_norm": 0.6389271121653773, + "learning_rate": 7.395835569934498e-06, + "loss": 0.3775, + "step": 7971 + }, + { + "epoch": 0.36080561212944107, + "grad_norm": 0.6142088904895509, + "learning_rate": 7.395192242056044e-06, + "loss": 0.3772, + "step": 7972 + }, + { + "epoch": 0.3608508712378366, + "grad_norm": 0.6090371660988895, + "learning_rate": 7.394548862712264e-06, + "loss": 0.3425, + "step": 7973 + }, + { + "epoch": 0.3608961303462322, + "grad_norm": 0.35945651666517714, + "learning_rate": 7.393905431916985e-06, + "loss": 0.5164, + "step": 7974 + }, + { + "epoch": 0.3609413894546277, + "grad_norm": 0.6718626629578323, + "learning_rate": 7.393261949684027e-06, + "loss": 0.3691, + "step": 7975 + }, + { + "epoch": 0.3609866485630233, + "grad_norm": 0.32284190264535156, + "learning_rate": 7.392618416027224e-06, + "loss": 0.4721, + "step": 7976 + }, + { + "epoch": 0.36103190767141885, + "grad_norm": 0.6600892140380253, + "learning_rate": 7.3919748309603965e-06, + "loss": 0.3766, + "step": 7977 + }, + { + "epoch": 0.36107716677981444, + "grad_norm": 0.6506458136837409, + "learning_rate": 7.391331194497379e-06, + "loss": 0.352, + "step": 7978 + }, + { + "epoch": 0.36112242588821, + "grad_norm": 0.6494080072148609, + "learning_rate": 7.3906875066519964e-06, + "loss": 0.3817, + "step": 7979 + }, + { + "epoch": 0.36116768499660556, + "grad_norm": 0.6758242870450416, + "learning_rate": 7.390043767438083e-06, + "loss": 0.31, + "step": 7980 + }, + { + "epoch": 0.36121294410500115, + "grad_norm": 0.6400939611532723, + "learning_rate": 7.389399976869469e-06, + "loss": 0.3563, + "step": 7981 + }, + { + "epoch": 0.3612582032133967, + "grad_norm": 0.673003801643574, + "learning_rate": 7.388756134959989e-06, + "loss": 0.318, + "step": 7982 + }, + { + "epoch": 0.3613034623217923, + "grad_norm": 0.609327398367172, + "learning_rate": 7.388112241723475e-06, + "loss": 0.3677, + "step": 7983 + }, + { + "epoch": 0.3613487214301878, + "grad_norm": 0.6815401431992644, + "learning_rate": 7.387468297173764e-06, + "loss": 0.3236, + "step": 7984 + }, + { + "epoch": 0.3613939805385834, + "grad_norm": 0.6689289187919621, + "learning_rate": 7.386824301324691e-06, + "loss": 0.4191, + "step": 7985 + }, + { + "epoch": 0.36143923964697894, + "grad_norm": 0.6081946398697645, + "learning_rate": 7.386180254190095e-06, + "loss": 0.351, + "step": 7986 + }, + { + "epoch": 0.36148449875537453, + "grad_norm": 0.7500202226223562, + "learning_rate": 7.3855361557838145e-06, + "loss": 0.3843, + "step": 7987 + }, + { + "epoch": 0.36152975786377006, + "grad_norm": 0.6559071916954051, + "learning_rate": 7.384892006119687e-06, + "loss": 0.3757, + "step": 7988 + }, + { + "epoch": 0.36157501697216565, + "grad_norm": 0.4872198828882111, + "learning_rate": 7.384247805211556e-06, + "loss": 0.4832, + "step": 7989 + }, + { + "epoch": 0.3616202760805612, + "grad_norm": 0.3916520443560128, + "learning_rate": 7.383603553073262e-06, + "loss": 0.4856, + "step": 7990 + }, + { + "epoch": 0.3616655351889568, + "grad_norm": 0.5976092297300865, + "learning_rate": 7.382959249718648e-06, + "loss": 0.3566, + "step": 7991 + }, + { + "epoch": 0.36171079429735237, + "grad_norm": 0.67388547399459, + "learning_rate": 7.3823148951615605e-06, + "loss": 0.3494, + "step": 7992 + }, + { + "epoch": 0.3617560534057479, + "grad_norm": 0.6173366485190657, + "learning_rate": 7.38167048941584e-06, + "loss": 0.3578, + "step": 7993 + }, + { + "epoch": 0.3618013125141435, + "grad_norm": 0.5931781342085648, + "learning_rate": 7.381026032495338e-06, + "loss": 0.348, + "step": 7994 + }, + { + "epoch": 0.361846571622539, + "grad_norm": 0.6809323889524895, + "learning_rate": 7.3803815244138976e-06, + "loss": 0.4096, + "step": 7995 + }, + { + "epoch": 0.3618918307309346, + "grad_norm": 0.6209283680560373, + "learning_rate": 7.379736965185369e-06, + "loss": 0.3388, + "step": 7996 + }, + { + "epoch": 0.36193708983933015, + "grad_norm": 0.6650202358554252, + "learning_rate": 7.379092354823602e-06, + "loss": 0.3515, + "step": 7997 + }, + { + "epoch": 0.36198234894772574, + "grad_norm": 0.7646399480977066, + "learning_rate": 7.378447693342447e-06, + "loss": 0.4754, + "step": 7998 + }, + { + "epoch": 0.3620276080561213, + "grad_norm": 0.6603043887038389, + "learning_rate": 7.377802980755756e-06, + "loss": 0.3863, + "step": 7999 + }, + { + "epoch": 0.36207286716451687, + "grad_norm": 0.42204413689316006, + "learning_rate": 7.377158217077381e-06, + "loss": 0.5074, + "step": 8000 + }, + { + "epoch": 0.3621181262729124, + "grad_norm": 0.32598493421669134, + "learning_rate": 7.3765134023211785e-06, + "loss": 0.499, + "step": 8001 + }, + { + "epoch": 0.362163385381308, + "grad_norm": 0.6719469428185199, + "learning_rate": 7.375868536501001e-06, + "loss": 0.325, + "step": 8002 + }, + { + "epoch": 0.3622086444897036, + "grad_norm": 0.6800561272443149, + "learning_rate": 7.3752236196307045e-06, + "loss": 0.3806, + "step": 8003 + }, + { + "epoch": 0.3622539035980991, + "grad_norm": 0.6730755539353572, + "learning_rate": 7.374578651724149e-06, + "loss": 0.3323, + "step": 8004 + }, + { + "epoch": 0.3622991627064947, + "grad_norm": 0.6339768447450999, + "learning_rate": 7.373933632795192e-06, + "loss": 0.354, + "step": 8005 + }, + { + "epoch": 0.36234442181489024, + "grad_norm": 0.651900578435686, + "learning_rate": 7.37328856285769e-06, + "loss": 0.3718, + "step": 8006 + }, + { + "epoch": 0.36238968092328583, + "grad_norm": 0.6331718571481463, + "learning_rate": 7.372643441925508e-06, + "loss": 0.3649, + "step": 8007 + }, + { + "epoch": 0.36243494003168136, + "grad_norm": 0.6150645079124384, + "learning_rate": 7.371998270012504e-06, + "loss": 0.3652, + "step": 8008 + }, + { + "epoch": 0.36248019914007695, + "grad_norm": 0.6634436052319046, + "learning_rate": 7.371353047132542e-06, + "loss": 0.3629, + "step": 8009 + }, + { + "epoch": 0.3625254582484725, + "grad_norm": 1.0406447821867784, + "learning_rate": 7.370707773299486e-06, + "loss": 0.5121, + "step": 8010 + }, + { + "epoch": 0.3625707173568681, + "grad_norm": 0.6452982035033098, + "learning_rate": 7.370062448527202e-06, + "loss": 0.3304, + "step": 8011 + }, + { + "epoch": 0.3626159764652636, + "grad_norm": 0.7508094008376939, + "learning_rate": 7.369417072829555e-06, + "loss": 0.4023, + "step": 8012 + }, + { + "epoch": 0.3626612355736592, + "grad_norm": 0.6473029618057764, + "learning_rate": 7.368771646220412e-06, + "loss": 0.3824, + "step": 8013 + }, + { + "epoch": 0.36270649468205474, + "grad_norm": 0.694529569892738, + "learning_rate": 7.36812616871364e-06, + "loss": 0.3445, + "step": 8014 + }, + { + "epoch": 0.3627517537904503, + "grad_norm": 0.6285840846990028, + "learning_rate": 7.367480640323113e-06, + "loss": 0.3497, + "step": 8015 + }, + { + "epoch": 0.3627970128988459, + "grad_norm": 0.4083667963474437, + "learning_rate": 7.366835061062696e-06, + "loss": 0.4819, + "step": 8016 + }, + { + "epoch": 0.36284227200724145, + "grad_norm": 0.39779569020588457, + "learning_rate": 7.366189430946262e-06, + "loss": 0.4857, + "step": 8017 + }, + { + "epoch": 0.36288753111563704, + "grad_norm": 0.33896363384718664, + "learning_rate": 7.365543749987685e-06, + "loss": 0.4672, + "step": 8018 + }, + { + "epoch": 0.3629327902240326, + "grad_norm": 0.3043889489229819, + "learning_rate": 7.364898018200839e-06, + "loss": 0.4865, + "step": 8019 + }, + { + "epoch": 0.36297804933242817, + "grad_norm": 0.2978844693860657, + "learning_rate": 7.364252235599596e-06, + "loss": 0.4573, + "step": 8020 + }, + { + "epoch": 0.3630233084408237, + "grad_norm": 0.841995713596338, + "learning_rate": 7.363606402197836e-06, + "loss": 0.3927, + "step": 8021 + }, + { + "epoch": 0.3630685675492193, + "grad_norm": 0.7853050599559405, + "learning_rate": 7.362960518009432e-06, + "loss": 0.3717, + "step": 8022 + }, + { + "epoch": 0.3631138266576148, + "grad_norm": 0.7351000709156122, + "learning_rate": 7.362314583048265e-06, + "loss": 0.3284, + "step": 8023 + }, + { + "epoch": 0.3631590857660104, + "grad_norm": 0.8623144464403313, + "learning_rate": 7.361668597328212e-06, + "loss": 0.3766, + "step": 8024 + }, + { + "epoch": 0.36320434487440595, + "grad_norm": 0.6196968848506009, + "learning_rate": 7.361022560863154e-06, + "loss": 0.4829, + "step": 8025 + }, + { + "epoch": 0.36324960398280154, + "grad_norm": 0.7176131906312808, + "learning_rate": 7.360376473666973e-06, + "loss": 0.3669, + "step": 8026 + }, + { + "epoch": 0.36329486309119713, + "grad_norm": 0.6777973099375537, + "learning_rate": 7.359730335753551e-06, + "loss": 0.3583, + "step": 8027 + }, + { + "epoch": 0.36334012219959266, + "grad_norm": 0.6718870397866715, + "learning_rate": 7.35908414713677e-06, + "loss": 0.3671, + "step": 8028 + }, + { + "epoch": 0.36338538130798825, + "grad_norm": 0.5794901646044115, + "learning_rate": 7.358437907830518e-06, + "loss": 0.3311, + "step": 8029 + }, + { + "epoch": 0.3634306404163838, + "grad_norm": 0.368659357946139, + "learning_rate": 7.3577916178486775e-06, + "loss": 0.4644, + "step": 8030 + }, + { + "epoch": 0.3634758995247794, + "grad_norm": 0.6997437666807766, + "learning_rate": 7.357145277205138e-06, + "loss": 0.3567, + "step": 8031 + }, + { + "epoch": 0.3635211586331749, + "grad_norm": 0.3570708240657923, + "learning_rate": 7.356498885913784e-06, + "loss": 0.4997, + "step": 8032 + }, + { + "epoch": 0.3635664177415705, + "grad_norm": 0.8237636191487573, + "learning_rate": 7.3558524439885075e-06, + "loss": 0.4016, + "step": 8033 + }, + { + "epoch": 0.36361167684996604, + "grad_norm": 0.6271151725782911, + "learning_rate": 7.3552059514431985e-06, + "loss": 0.3163, + "step": 8034 + }, + { + "epoch": 0.3636569359583616, + "grad_norm": 0.6676304941879442, + "learning_rate": 7.3545594082917435e-06, + "loss": 0.4084, + "step": 8035 + }, + { + "epoch": 0.36370219506675716, + "grad_norm": 0.7293712480186316, + "learning_rate": 7.353912814548042e-06, + "loss": 0.4009, + "step": 8036 + }, + { + "epoch": 0.36374745417515275, + "grad_norm": 0.7012141766795378, + "learning_rate": 7.353266170225982e-06, + "loss": 0.3365, + "step": 8037 + }, + { + "epoch": 0.36379271328354834, + "grad_norm": 0.7480133971533515, + "learning_rate": 7.35261947533946e-06, + "loss": 0.3778, + "step": 8038 + }, + { + "epoch": 0.3638379723919439, + "grad_norm": 0.5479419567796888, + "learning_rate": 7.35197272990237e-06, + "loss": 0.505, + "step": 8039 + }, + { + "epoch": 0.36388323150033947, + "grad_norm": 0.6619275487363858, + "learning_rate": 7.35132593392861e-06, + "loss": 0.3614, + "step": 8040 + }, + { + "epoch": 0.363928490608735, + "grad_norm": 0.4062441097479375, + "learning_rate": 7.350679087432078e-06, + "loss": 0.4791, + "step": 8041 + }, + { + "epoch": 0.3639737497171306, + "grad_norm": 0.29776999395359294, + "learning_rate": 7.3500321904266725e-06, + "loss": 0.5068, + "step": 8042 + }, + { + "epoch": 0.3640190088255261, + "grad_norm": 0.8457303726828063, + "learning_rate": 7.349385242926291e-06, + "loss": 0.3744, + "step": 8043 + }, + { + "epoch": 0.3640642679339217, + "grad_norm": 1.0320355475306249, + "learning_rate": 7.348738244944837e-06, + "loss": 0.3642, + "step": 8044 + }, + { + "epoch": 0.36410952704231725, + "grad_norm": 0.4901552884921019, + "learning_rate": 7.348091196496212e-06, + "loss": 0.4909, + "step": 8045 + }, + { + "epoch": 0.36415478615071284, + "grad_norm": 0.749738817323516, + "learning_rate": 7.3474440975943185e-06, + "loss": 0.3305, + "step": 8046 + }, + { + "epoch": 0.3642000452591084, + "grad_norm": 0.6980541542956954, + "learning_rate": 7.346796948253061e-06, + "loss": 0.3835, + "step": 8047 + }, + { + "epoch": 0.36424530436750396, + "grad_norm": 0.6926662082886041, + "learning_rate": 7.346149748486345e-06, + "loss": 0.3878, + "step": 8048 + }, + { + "epoch": 0.3642905634758995, + "grad_norm": 0.6511728917753308, + "learning_rate": 7.345502498308076e-06, + "loss": 0.3591, + "step": 8049 + }, + { + "epoch": 0.3643358225842951, + "grad_norm": 0.63481958151217, + "learning_rate": 7.3448551977321615e-06, + "loss": 0.3413, + "step": 8050 + }, + { + "epoch": 0.3643810816926907, + "grad_norm": 0.4470564181613816, + "learning_rate": 7.344207846772511e-06, + "loss": 0.4872, + "step": 8051 + }, + { + "epoch": 0.3644263408010862, + "grad_norm": 0.61445838152631, + "learning_rate": 7.3435604454430345e-06, + "loss": 0.3575, + "step": 8052 + }, + { + "epoch": 0.3644715999094818, + "grad_norm": 0.34039730000214496, + "learning_rate": 7.34291299375764e-06, + "loss": 0.4941, + "step": 8053 + }, + { + "epoch": 0.36451685901787734, + "grad_norm": 0.5926980612134997, + "learning_rate": 7.342265491730243e-06, + "loss": 0.3466, + "step": 8054 + }, + { + "epoch": 0.3645621181262729, + "grad_norm": 0.2837414537247516, + "learning_rate": 7.341617939374753e-06, + "loss": 0.4955, + "step": 8055 + }, + { + "epoch": 0.36460737723466846, + "grad_norm": 0.7716956234209372, + "learning_rate": 7.340970336705084e-06, + "loss": 0.36, + "step": 8056 + }, + { + "epoch": 0.36465263634306405, + "grad_norm": 0.6427150881432653, + "learning_rate": 7.340322683735155e-06, + "loss": 0.3592, + "step": 8057 + }, + { + "epoch": 0.3646978954514596, + "grad_norm": 0.3857297404798561, + "learning_rate": 7.339674980478878e-06, + "loss": 0.4854, + "step": 8058 + }, + { + "epoch": 0.3647431545598552, + "grad_norm": 0.601291663477463, + "learning_rate": 7.339027226950171e-06, + "loss": 0.3543, + "step": 8059 + }, + { + "epoch": 0.3647884136682507, + "grad_norm": 0.7298123559205156, + "learning_rate": 7.338379423162953e-06, + "loss": 0.3713, + "step": 8060 + }, + { + "epoch": 0.3648336727766463, + "grad_norm": 0.6565806633034736, + "learning_rate": 7.337731569131143e-06, + "loss": 0.3452, + "step": 8061 + }, + { + "epoch": 0.3648789318850419, + "grad_norm": 0.3229528406928706, + "learning_rate": 7.3370836648686616e-06, + "loss": 0.4727, + "step": 8062 + }, + { + "epoch": 0.3649241909934374, + "grad_norm": 0.6929570188379901, + "learning_rate": 7.33643571038943e-06, + "loss": 0.348, + "step": 8063 + }, + { + "epoch": 0.364969450101833, + "grad_norm": 0.6625662022938806, + "learning_rate": 7.33578770570737e-06, + "loss": 0.3899, + "step": 8064 + }, + { + "epoch": 0.36501470921022855, + "grad_norm": 0.7017583007840354, + "learning_rate": 7.335139650836407e-06, + "loss": 0.4089, + "step": 8065 + }, + { + "epoch": 0.36505996831862414, + "grad_norm": 0.662474726091609, + "learning_rate": 7.3344915457904655e-06, + "loss": 0.3396, + "step": 8066 + }, + { + "epoch": 0.3651052274270197, + "grad_norm": 0.601157599169405, + "learning_rate": 7.3338433905834685e-06, + "loss": 0.3234, + "step": 8067 + }, + { + "epoch": 0.36515048653541526, + "grad_norm": 0.6897480223541927, + "learning_rate": 7.333195185229346e-06, + "loss": 0.3668, + "step": 8068 + }, + { + "epoch": 0.3651957456438108, + "grad_norm": 0.5994620628157344, + "learning_rate": 7.3325469297420246e-06, + "loss": 0.3894, + "step": 8069 + }, + { + "epoch": 0.3652410047522064, + "grad_norm": 0.6392091647460895, + "learning_rate": 7.331898624135434e-06, + "loss": 0.3513, + "step": 8070 + }, + { + "epoch": 0.3652862638606019, + "grad_norm": 0.3701285705436311, + "learning_rate": 7.331250268423505e-06, + "loss": 0.4707, + "step": 8071 + }, + { + "epoch": 0.3653315229689975, + "grad_norm": 0.5766710018646587, + "learning_rate": 7.330601862620164e-06, + "loss": 0.34, + "step": 8072 + }, + { + "epoch": 0.3653767820773931, + "grad_norm": 0.6618459831944085, + "learning_rate": 7.3299534067393495e-06, + "loss": 0.3681, + "step": 8073 + }, + { + "epoch": 0.36542204118578864, + "grad_norm": 0.8322945398975357, + "learning_rate": 7.329304900794991e-06, + "loss": 0.3479, + "step": 8074 + }, + { + "epoch": 0.3654673002941842, + "grad_norm": 0.8158659005341743, + "learning_rate": 7.328656344801025e-06, + "loss": 0.3681, + "step": 8075 + }, + { + "epoch": 0.36551255940257976, + "grad_norm": 0.8539346306396185, + "learning_rate": 7.328007738771385e-06, + "loss": 0.362, + "step": 8076 + }, + { + "epoch": 0.36555781851097535, + "grad_norm": 0.6563458590862646, + "learning_rate": 7.32735908272001e-06, + "loss": 0.3609, + "step": 8077 + }, + { + "epoch": 0.3656030776193709, + "grad_norm": 0.3209877402422395, + "learning_rate": 7.326710376660836e-06, + "loss": 0.502, + "step": 8078 + }, + { + "epoch": 0.3656483367277665, + "grad_norm": 0.675428159697903, + "learning_rate": 7.326061620607801e-06, + "loss": 0.3865, + "step": 8079 + }, + { + "epoch": 0.365693595836162, + "grad_norm": 0.6703471467189661, + "learning_rate": 7.325412814574847e-06, + "loss": 0.3579, + "step": 8080 + }, + { + "epoch": 0.3657388549445576, + "grad_norm": 0.602632944758576, + "learning_rate": 7.324763958575913e-06, + "loss": 0.3544, + "step": 8081 + }, + { + "epoch": 0.36578411405295314, + "grad_norm": 0.31864481918066306, + "learning_rate": 7.324115052624941e-06, + "loss": 0.5226, + "step": 8082 + }, + { + "epoch": 0.3658293731613487, + "grad_norm": 0.6424378043768086, + "learning_rate": 7.323466096735875e-06, + "loss": 0.3867, + "step": 8083 + }, + { + "epoch": 0.36587463226974426, + "grad_norm": 0.6450470744304372, + "learning_rate": 7.322817090922659e-06, + "loss": 0.359, + "step": 8084 + }, + { + "epoch": 0.36591989137813985, + "grad_norm": 0.7814567957687872, + "learning_rate": 7.322168035199237e-06, + "loss": 0.3518, + "step": 8085 + }, + { + "epoch": 0.36596515048653544, + "grad_norm": 0.6107592968368141, + "learning_rate": 7.3215189295795565e-06, + "loss": 0.3286, + "step": 8086 + }, + { + "epoch": 0.366010409594931, + "grad_norm": 0.6116002376209456, + "learning_rate": 7.320869774077564e-06, + "loss": 0.3235, + "step": 8087 + }, + { + "epoch": 0.36605566870332656, + "grad_norm": 0.3665436413370661, + "learning_rate": 7.320220568707207e-06, + "loss": 0.4684, + "step": 8088 + }, + { + "epoch": 0.3661009278117221, + "grad_norm": 0.6819908460809249, + "learning_rate": 7.319571313482437e-06, + "loss": 0.3676, + "step": 8089 + }, + { + "epoch": 0.3661461869201177, + "grad_norm": 0.6976100390540673, + "learning_rate": 7.318922008417203e-06, + "loss": 0.346, + "step": 8090 + }, + { + "epoch": 0.3661914460285132, + "grad_norm": 0.32398500017315196, + "learning_rate": 7.318272653525457e-06, + "loss": 0.4916, + "step": 8091 + }, + { + "epoch": 0.3662367051369088, + "grad_norm": 0.6873578858329197, + "learning_rate": 7.317623248821153e-06, + "loss": 0.3403, + "step": 8092 + }, + { + "epoch": 0.36628196424530435, + "grad_norm": 0.6234197882322604, + "learning_rate": 7.316973794318242e-06, + "loss": 0.3249, + "step": 8093 + }, + { + "epoch": 0.36632722335369994, + "grad_norm": 0.40565248593002673, + "learning_rate": 7.316324290030682e-06, + "loss": 0.4955, + "step": 8094 + }, + { + "epoch": 0.3663724824620955, + "grad_norm": 0.7118511193486391, + "learning_rate": 7.315674735972426e-06, + "loss": 0.3717, + "step": 8095 + }, + { + "epoch": 0.36641774157049106, + "grad_norm": 0.6902695636733798, + "learning_rate": 7.315025132157432e-06, + "loss": 0.386, + "step": 8096 + }, + { + "epoch": 0.36646300067888665, + "grad_norm": 0.3724437368013699, + "learning_rate": 7.314375478599657e-06, + "loss": 0.4877, + "step": 8097 + }, + { + "epoch": 0.3665082597872822, + "grad_norm": 0.6294176595203217, + "learning_rate": 7.313725775313061e-06, + "loss": 0.3668, + "step": 8098 + }, + { + "epoch": 0.3665535188956778, + "grad_norm": 0.6185721391255002, + "learning_rate": 7.313076022311605e-06, + "loss": 0.3367, + "step": 8099 + }, + { + "epoch": 0.3665987780040733, + "grad_norm": 0.2993327159764945, + "learning_rate": 7.31242621960925e-06, + "loss": 0.4597, + "step": 8100 + }, + { + "epoch": 0.3666440371124689, + "grad_norm": 0.2986381957076049, + "learning_rate": 7.311776367219956e-06, + "loss": 0.4973, + "step": 8101 + }, + { + "epoch": 0.36668929622086444, + "grad_norm": 0.6804572154313598, + "learning_rate": 7.3111264651576895e-06, + "loss": 0.3448, + "step": 8102 + }, + { + "epoch": 0.36673455532926, + "grad_norm": 0.8765735545656241, + "learning_rate": 7.310476513436412e-06, + "loss": 0.3784, + "step": 8103 + }, + { + "epoch": 0.36677981443765556, + "grad_norm": 0.2952493580258756, + "learning_rate": 7.3098265120700915e-06, + "loss": 0.4805, + "step": 8104 + }, + { + "epoch": 0.36682507354605115, + "grad_norm": 0.6513747567476289, + "learning_rate": 7.3091764610726935e-06, + "loss": 0.332, + "step": 8105 + }, + { + "epoch": 0.3668703326544467, + "grad_norm": 0.8286346174779458, + "learning_rate": 7.308526360458185e-06, + "loss": 0.3715, + "step": 8106 + }, + { + "epoch": 0.3669155917628423, + "grad_norm": 0.6412950037424189, + "learning_rate": 7.307876210240534e-06, + "loss": 0.3814, + "step": 8107 + }, + { + "epoch": 0.3669608508712378, + "grad_norm": 0.741223434168854, + "learning_rate": 7.3072260104337124e-06, + "loss": 0.3169, + "step": 8108 + }, + { + "epoch": 0.3670061099796334, + "grad_norm": 0.6052335633310483, + "learning_rate": 7.3065757610516895e-06, + "loss": 0.3636, + "step": 8109 + }, + { + "epoch": 0.367051369088029, + "grad_norm": 0.635252692031835, + "learning_rate": 7.305925462108439e-06, + "loss": 0.3614, + "step": 8110 + }, + { + "epoch": 0.3670966281964245, + "grad_norm": 0.634580241462948, + "learning_rate": 7.30527511361793e-06, + "loss": 0.3766, + "step": 8111 + }, + { + "epoch": 0.3671418873048201, + "grad_norm": 0.39177715832680615, + "learning_rate": 7.30462471559414e-06, + "loss": 0.5032, + "step": 8112 + }, + { + "epoch": 0.36718714641321565, + "grad_norm": 0.6742823922848321, + "learning_rate": 7.303974268051044e-06, + "loss": 0.3195, + "step": 8113 + }, + { + "epoch": 0.36723240552161124, + "grad_norm": 0.6544761152892868, + "learning_rate": 7.303323771002615e-06, + "loss": 0.3331, + "step": 8114 + }, + { + "epoch": 0.3672776646300068, + "grad_norm": 0.6472868999827004, + "learning_rate": 7.302673224462835e-06, + "loss": 0.3105, + "step": 8115 + }, + { + "epoch": 0.36732292373840236, + "grad_norm": 0.30055019896322244, + "learning_rate": 7.302022628445678e-06, + "loss": 0.4879, + "step": 8116 + }, + { + "epoch": 0.3673681828467979, + "grad_norm": 0.6749211147111039, + "learning_rate": 7.301371982965125e-06, + "loss": 0.3763, + "step": 8117 + }, + { + "epoch": 0.3674134419551935, + "grad_norm": 0.6467312875354413, + "learning_rate": 7.3007212880351565e-06, + "loss": 0.333, + "step": 8118 + }, + { + "epoch": 0.367458701063589, + "grad_norm": 0.6935178709365164, + "learning_rate": 7.3000705436697525e-06, + "loss": 0.3862, + "step": 8119 + }, + { + "epoch": 0.3675039601719846, + "grad_norm": 0.6099660368039522, + "learning_rate": 7.2994197498828975e-06, + "loss": 0.3278, + "step": 8120 + }, + { + "epoch": 0.3675492192803802, + "grad_norm": 0.6233582404645013, + "learning_rate": 7.298768906688576e-06, + "loss": 0.3747, + "step": 8121 + }, + { + "epoch": 0.36759447838877574, + "grad_norm": 0.6454675108502524, + "learning_rate": 7.298118014100766e-06, + "loss": 0.3304, + "step": 8122 + }, + { + "epoch": 0.3676397374971713, + "grad_norm": 0.6187331614448284, + "learning_rate": 7.297467072133463e-06, + "loss": 0.3657, + "step": 8123 + }, + { + "epoch": 0.36768499660556686, + "grad_norm": 0.6252633414228883, + "learning_rate": 7.296816080800646e-06, + "loss": 0.3546, + "step": 8124 + }, + { + "epoch": 0.36773025571396245, + "grad_norm": 0.6552389003007436, + "learning_rate": 7.296165040116308e-06, + "loss": 0.3447, + "step": 8125 + }, + { + "epoch": 0.367775514822358, + "grad_norm": 0.6660391799782329, + "learning_rate": 7.295513950094433e-06, + "loss": 0.3703, + "step": 8126 + }, + { + "epoch": 0.3678207739307536, + "grad_norm": 0.5997289999195763, + "learning_rate": 7.294862810749014e-06, + "loss": 0.3484, + "step": 8127 + }, + { + "epoch": 0.3678660330391491, + "grad_norm": 0.6706146748720235, + "learning_rate": 7.2942116220940406e-06, + "loss": 0.3371, + "step": 8128 + }, + { + "epoch": 0.3679112921475447, + "grad_norm": 0.6006983753253756, + "learning_rate": 7.293560384143506e-06, + "loss": 0.3094, + "step": 8129 + }, + { + "epoch": 0.36795655125594023, + "grad_norm": 0.631994678606701, + "learning_rate": 7.292909096911403e-06, + "loss": 0.3352, + "step": 8130 + }, + { + "epoch": 0.3680018103643358, + "grad_norm": 0.7523108426437606, + "learning_rate": 7.292257760411726e-06, + "loss": 0.37, + "step": 8131 + }, + { + "epoch": 0.3680470694727314, + "grad_norm": 0.6309566047038871, + "learning_rate": 7.29160637465847e-06, + "loss": 0.3563, + "step": 8132 + }, + { + "epoch": 0.36809232858112695, + "grad_norm": 0.37387998050983035, + "learning_rate": 7.290954939665632e-06, + "loss": 0.4873, + "step": 8133 + }, + { + "epoch": 0.36813758768952254, + "grad_norm": 0.6851452506465928, + "learning_rate": 7.290303455447208e-06, + "loss": 0.3671, + "step": 8134 + }, + { + "epoch": 0.3681828467979181, + "grad_norm": 0.7434686822496943, + "learning_rate": 7.289651922017195e-06, + "loss": 0.373, + "step": 8135 + }, + { + "epoch": 0.36822810590631366, + "grad_norm": 0.5914979977178154, + "learning_rate": 7.289000339389596e-06, + "loss": 0.337, + "step": 8136 + }, + { + "epoch": 0.3682733650147092, + "grad_norm": 0.6321514469503917, + "learning_rate": 7.288348707578409e-06, + "loss": 0.363, + "step": 8137 + }, + { + "epoch": 0.3683186241231048, + "grad_norm": 0.6629606564514017, + "learning_rate": 7.2876970265976365e-06, + "loss": 0.3691, + "step": 8138 + }, + { + "epoch": 0.3683638832315003, + "grad_norm": 0.6713936840695812, + "learning_rate": 7.287045296461281e-06, + "loss": 0.3425, + "step": 8139 + }, + { + "epoch": 0.3684091423398959, + "grad_norm": 0.6120971669362468, + "learning_rate": 7.2863935171833465e-06, + "loss": 0.3648, + "step": 8140 + }, + { + "epoch": 0.36845440144829145, + "grad_norm": 0.6659355828139842, + "learning_rate": 7.285741688777838e-06, + "loss": 0.3662, + "step": 8141 + }, + { + "epoch": 0.36849966055668704, + "grad_norm": 0.4104427222421775, + "learning_rate": 7.285089811258761e-06, + "loss": 0.502, + "step": 8142 + }, + { + "epoch": 0.36854491966508257, + "grad_norm": 0.6590165373916503, + "learning_rate": 7.28443788464012e-06, + "loss": 0.33, + "step": 8143 + }, + { + "epoch": 0.36859017877347816, + "grad_norm": 0.6633311656110775, + "learning_rate": 7.283785908935927e-06, + "loss": 0.3849, + "step": 8144 + }, + { + "epoch": 0.36863543788187375, + "grad_norm": 0.6063659937084452, + "learning_rate": 7.283133884160187e-06, + "loss": 0.316, + "step": 8145 + }, + { + "epoch": 0.3686806969902693, + "grad_norm": 0.5980474521604764, + "learning_rate": 7.282481810326915e-06, + "loss": 0.3796, + "step": 8146 + }, + { + "epoch": 0.3687259560986649, + "grad_norm": 0.5990355897144266, + "learning_rate": 7.281829687450117e-06, + "loss": 0.3323, + "step": 8147 + }, + { + "epoch": 0.3687712152070604, + "grad_norm": 0.6745356562340838, + "learning_rate": 7.281177515543807e-06, + "loss": 0.387, + "step": 8148 + }, + { + "epoch": 0.368816474315456, + "grad_norm": 0.6158116775354936, + "learning_rate": 7.280525294621999e-06, + "loss": 0.3219, + "step": 8149 + }, + { + "epoch": 0.36886173342385153, + "grad_norm": 0.6545144180352372, + "learning_rate": 7.2798730246987056e-06, + "loss": 0.3637, + "step": 8150 + }, + { + "epoch": 0.3689069925322471, + "grad_norm": 0.4375279106063995, + "learning_rate": 7.279220705787943e-06, + "loss": 0.471, + "step": 8151 + }, + { + "epoch": 0.36895225164064266, + "grad_norm": 0.38159927496792784, + "learning_rate": 7.278568337903729e-06, + "loss": 0.4683, + "step": 8152 + }, + { + "epoch": 0.36899751074903825, + "grad_norm": 0.6542186893818985, + "learning_rate": 7.2779159210600765e-06, + "loss": 0.3343, + "step": 8153 + }, + { + "epoch": 0.3690427698574338, + "grad_norm": 0.29272977656676263, + "learning_rate": 7.277263455271011e-06, + "loss": 0.4718, + "step": 8154 + }, + { + "epoch": 0.3690880289658294, + "grad_norm": 0.6501785373959793, + "learning_rate": 7.2766109405505445e-06, + "loss": 0.3524, + "step": 8155 + }, + { + "epoch": 0.36913328807422496, + "grad_norm": 0.6013988426375781, + "learning_rate": 7.275958376912703e-06, + "loss": 0.334, + "step": 8156 + }, + { + "epoch": 0.3691785471826205, + "grad_norm": 0.620811403055435, + "learning_rate": 7.275305764371505e-06, + "loss": 0.3269, + "step": 8157 + }, + { + "epoch": 0.3692238062910161, + "grad_norm": 0.6162970042756091, + "learning_rate": 7.274653102940974e-06, + "loss": 0.3129, + "step": 8158 + }, + { + "epoch": 0.3692690653994116, + "grad_norm": 0.6723016335543197, + "learning_rate": 7.274000392635134e-06, + "loss": 0.3956, + "step": 8159 + }, + { + "epoch": 0.3693143245078072, + "grad_norm": 0.6754868476325461, + "learning_rate": 7.273347633468011e-06, + "loss": 0.3703, + "step": 8160 + }, + { + "epoch": 0.36935958361620275, + "grad_norm": 0.6573254874606067, + "learning_rate": 7.272694825453628e-06, + "loss": 0.3852, + "step": 8161 + }, + { + "epoch": 0.36940484272459834, + "grad_norm": 0.7382278556803853, + "learning_rate": 7.272041968606014e-06, + "loss": 0.381, + "step": 8162 + }, + { + "epoch": 0.36945010183299387, + "grad_norm": 0.6845167040529891, + "learning_rate": 7.271389062939196e-06, + "loss": 0.3502, + "step": 8163 + }, + { + "epoch": 0.36949536094138946, + "grad_norm": 0.6551708802117328, + "learning_rate": 7.270736108467202e-06, + "loss": 0.3503, + "step": 8164 + }, + { + "epoch": 0.369540620049785, + "grad_norm": 0.6211818518982469, + "learning_rate": 7.2700831052040656e-06, + "loss": 0.3727, + "step": 8165 + }, + { + "epoch": 0.3695858791581806, + "grad_norm": 0.6544819832232823, + "learning_rate": 7.269430053163813e-06, + "loss": 0.3592, + "step": 8166 + }, + { + "epoch": 0.3696311382665762, + "grad_norm": 0.6687579884282855, + "learning_rate": 7.268776952360479e-06, + "loss": 0.3526, + "step": 8167 + }, + { + "epoch": 0.3696763973749717, + "grad_norm": 0.6643304048573219, + "learning_rate": 7.268123802808097e-06, + "loss": 0.3713, + "step": 8168 + }, + { + "epoch": 0.3697216564833673, + "grad_norm": 0.6462119618190477, + "learning_rate": 7.2674706045207e-06, + "loss": 0.3163, + "step": 8169 + }, + { + "epoch": 0.36976691559176283, + "grad_norm": 0.6632924357449026, + "learning_rate": 7.2668173575123234e-06, + "loss": 0.382, + "step": 8170 + }, + { + "epoch": 0.3698121747001584, + "grad_norm": 0.7171910384022454, + "learning_rate": 7.2661640617970054e-06, + "loss": 0.4129, + "step": 8171 + }, + { + "epoch": 0.36985743380855396, + "grad_norm": 0.6723190617891283, + "learning_rate": 7.26551071738878e-06, + "loss": 0.3729, + "step": 8172 + }, + { + "epoch": 0.36990269291694955, + "grad_norm": 0.6634564049143907, + "learning_rate": 7.264857324301688e-06, + "loss": 0.4029, + "step": 8173 + }, + { + "epoch": 0.3699479520253451, + "grad_norm": 0.7272365330599319, + "learning_rate": 7.264203882549766e-06, + "loss": 0.4852, + "step": 8174 + }, + { + "epoch": 0.3699932111337407, + "grad_norm": 0.6412200563909634, + "learning_rate": 7.26355039214706e-06, + "loss": 0.3433, + "step": 8175 + }, + { + "epoch": 0.3700384702421362, + "grad_norm": 0.3903053709652611, + "learning_rate": 7.262896853107606e-06, + "loss": 0.4617, + "step": 8176 + }, + { + "epoch": 0.3700837293505318, + "grad_norm": 0.6341750980091954, + "learning_rate": 7.262243265445449e-06, + "loss": 0.3362, + "step": 8177 + }, + { + "epoch": 0.37012898845892733, + "grad_norm": 0.6452881582463291, + "learning_rate": 7.261589629174632e-06, + "loss": 0.3275, + "step": 8178 + }, + { + "epoch": 0.3701742475673229, + "grad_norm": 0.6057918110267597, + "learning_rate": 7.260935944309201e-06, + "loss": 0.3315, + "step": 8179 + }, + { + "epoch": 0.3702195066757185, + "grad_norm": 0.6226973497002455, + "learning_rate": 7.260282210863199e-06, + "loss": 0.3658, + "step": 8180 + }, + { + "epoch": 0.37026476578411405, + "grad_norm": 0.6717952130988063, + "learning_rate": 7.2596284288506745e-06, + "loss": 0.3743, + "step": 8181 + }, + { + "epoch": 0.37031002489250964, + "grad_norm": 0.7272312458191902, + "learning_rate": 7.258974598285674e-06, + "loss": 0.4841, + "step": 8182 + }, + { + "epoch": 0.37035528400090517, + "grad_norm": 0.6751799745883079, + "learning_rate": 7.25832071918225e-06, + "loss": 0.3406, + "step": 8183 + }, + { + "epoch": 0.37040054310930076, + "grad_norm": 0.6424543775896905, + "learning_rate": 7.257666791554448e-06, + "loss": 0.3633, + "step": 8184 + }, + { + "epoch": 0.3704458022176963, + "grad_norm": 0.653516097898005, + "learning_rate": 7.25701281541632e-06, + "loss": 0.325, + "step": 8185 + }, + { + "epoch": 0.3704910613260919, + "grad_norm": 0.5794649639415843, + "learning_rate": 7.2563587907819185e-06, + "loss": 0.3244, + "step": 8186 + }, + { + "epoch": 0.3705363204344874, + "grad_norm": 0.7157715260093064, + "learning_rate": 7.255704717665298e-06, + "loss": 0.4209, + "step": 8187 + }, + { + "epoch": 0.370581579542883, + "grad_norm": 0.5884653553265714, + "learning_rate": 7.25505059608051e-06, + "loss": 0.3363, + "step": 8188 + }, + { + "epoch": 0.37062683865127855, + "grad_norm": 0.6741184230110532, + "learning_rate": 7.25439642604161e-06, + "loss": 0.3378, + "step": 8189 + }, + { + "epoch": 0.37067209775967414, + "grad_norm": 0.9005729167867248, + "learning_rate": 7.253742207562655e-06, + "loss": 0.3587, + "step": 8190 + }, + { + "epoch": 0.3707173568680697, + "grad_norm": 0.7030014372266986, + "learning_rate": 7.253087940657702e-06, + "loss": 0.3348, + "step": 8191 + }, + { + "epoch": 0.37076261597646526, + "grad_norm": 0.38392013327619495, + "learning_rate": 7.252433625340811e-06, + "loss": 0.5037, + "step": 8192 + }, + { + "epoch": 0.37080787508486085, + "grad_norm": 0.6781324653726816, + "learning_rate": 7.251779261626035e-06, + "loss": 0.3572, + "step": 8193 + }, + { + "epoch": 0.3708531341932564, + "grad_norm": 0.6761517906134753, + "learning_rate": 7.251124849527442e-06, + "loss": 0.3056, + "step": 8194 + }, + { + "epoch": 0.370898393301652, + "grad_norm": 0.6428503014211182, + "learning_rate": 7.250470389059088e-06, + "loss": 0.3996, + "step": 8195 + }, + { + "epoch": 0.3709436524100475, + "grad_norm": 0.6354730026376507, + "learning_rate": 7.2498158802350385e-06, + "loss": 0.3845, + "step": 8196 + }, + { + "epoch": 0.3709889115184431, + "grad_norm": 0.6703399462653616, + "learning_rate": 7.249161323069355e-06, + "loss": 0.3168, + "step": 8197 + }, + { + "epoch": 0.37103417062683863, + "grad_norm": 0.7792293419436107, + "learning_rate": 7.248506717576102e-06, + "loss": 0.3511, + "step": 8198 + }, + { + "epoch": 0.3710794297352342, + "grad_norm": 0.6482685165776548, + "learning_rate": 7.247852063769345e-06, + "loss": 0.4104, + "step": 8199 + }, + { + "epoch": 0.37112468884362976, + "grad_norm": 0.6355574305326335, + "learning_rate": 7.247197361663152e-06, + "loss": 0.3241, + "step": 8200 + }, + { + "epoch": 0.37116994795202535, + "grad_norm": 0.6312939670347133, + "learning_rate": 7.246542611271587e-06, + "loss": 0.3608, + "step": 8201 + }, + { + "epoch": 0.3712152070604209, + "grad_norm": 0.6416849686675478, + "learning_rate": 7.245887812608725e-06, + "loss": 0.425, + "step": 8202 + }, + { + "epoch": 0.37126046616881647, + "grad_norm": 0.690208012943487, + "learning_rate": 7.245232965688629e-06, + "loss": 0.3636, + "step": 8203 + }, + { + "epoch": 0.37130572527721206, + "grad_norm": 0.8421190596902749, + "learning_rate": 7.244578070525373e-06, + "loss": 0.3582, + "step": 8204 + }, + { + "epoch": 0.3713509843856076, + "grad_norm": 0.6219765356087398, + "learning_rate": 7.243923127133028e-06, + "loss": 0.3884, + "step": 8205 + }, + { + "epoch": 0.3713962434940032, + "grad_norm": 0.8775162184518147, + "learning_rate": 7.243268135525666e-06, + "loss": 0.3319, + "step": 8206 + }, + { + "epoch": 0.3714415026023987, + "grad_norm": 0.3858290275054255, + "learning_rate": 7.242613095717361e-06, + "loss": 0.5203, + "step": 8207 + }, + { + "epoch": 0.3714867617107943, + "grad_norm": 0.6141720874361678, + "learning_rate": 7.2419580077221906e-06, + "loss": 0.3723, + "step": 8208 + }, + { + "epoch": 0.37153202081918985, + "grad_norm": 0.6414007757700619, + "learning_rate": 7.241302871554226e-06, + "loss": 0.3723, + "step": 8209 + }, + { + "epoch": 0.37157727992758544, + "grad_norm": 0.6300823776488501, + "learning_rate": 7.240647687227547e-06, + "loss": 0.377, + "step": 8210 + }, + { + "epoch": 0.37162253903598097, + "grad_norm": 0.7237074014093474, + "learning_rate": 7.23999245475623e-06, + "loss": 0.4208, + "step": 8211 + }, + { + "epoch": 0.37166779814437656, + "grad_norm": 0.7325475420895977, + "learning_rate": 7.239337174154357e-06, + "loss": 0.3537, + "step": 8212 + }, + { + "epoch": 0.3717130572527721, + "grad_norm": 0.3137186371943764, + "learning_rate": 7.238681845436004e-06, + "loss": 0.4822, + "step": 8213 + }, + { + "epoch": 0.3717583163611677, + "grad_norm": 0.6413702247460596, + "learning_rate": 7.238026468615255e-06, + "loss": 0.3426, + "step": 8214 + }, + { + "epoch": 0.3718035754695633, + "grad_norm": 0.670904099564202, + "learning_rate": 7.23737104370619e-06, + "loss": 0.3609, + "step": 8215 + }, + { + "epoch": 0.3718488345779588, + "grad_norm": 0.6297418857803184, + "learning_rate": 7.236715570722892e-06, + "loss": 0.3477, + "step": 8216 + }, + { + "epoch": 0.3718940936863544, + "grad_norm": 0.5637446500275521, + "learning_rate": 7.236060049679446e-06, + "loss": 0.308, + "step": 8217 + }, + { + "epoch": 0.37193935279474993, + "grad_norm": 0.7496022031912113, + "learning_rate": 7.2354044805899385e-06, + "loss": 0.3905, + "step": 8218 + }, + { + "epoch": 0.3719846119031455, + "grad_norm": 0.6394406128286074, + "learning_rate": 7.234748863468453e-06, + "loss": 0.3696, + "step": 8219 + }, + { + "epoch": 0.37202987101154106, + "grad_norm": 0.5950512693336792, + "learning_rate": 7.234093198329078e-06, + "loss": 0.351, + "step": 8220 + }, + { + "epoch": 0.37207513011993665, + "grad_norm": 0.3256096310149524, + "learning_rate": 7.233437485185904e-06, + "loss": 0.483, + "step": 8221 + }, + { + "epoch": 0.3721203892283322, + "grad_norm": 0.28917444198334696, + "learning_rate": 7.232781724053014e-06, + "loss": 0.4734, + "step": 8222 + }, + { + "epoch": 0.3721656483367278, + "grad_norm": 0.5907937577751626, + "learning_rate": 7.232125914944506e-06, + "loss": 0.3499, + "step": 8223 + }, + { + "epoch": 0.3722109074451233, + "grad_norm": 0.2959842299878078, + "learning_rate": 7.2314700578744635e-06, + "loss": 0.503, + "step": 8224 + }, + { + "epoch": 0.3722561665535189, + "grad_norm": 0.6258794371939906, + "learning_rate": 7.230814152856986e-06, + "loss": 0.3641, + "step": 8225 + }, + { + "epoch": 0.3723014256619145, + "grad_norm": 0.6253935976814171, + "learning_rate": 7.230158199906163e-06, + "loss": 0.3553, + "step": 8226 + }, + { + "epoch": 0.37234668477031, + "grad_norm": 0.3048760301862293, + "learning_rate": 7.2295021990360896e-06, + "loss": 0.4685, + "step": 8227 + }, + { + "epoch": 0.3723919438787056, + "grad_norm": 0.6456964042297493, + "learning_rate": 7.228846150260861e-06, + "loss": 0.3783, + "step": 8228 + }, + { + "epoch": 0.37243720298710115, + "grad_norm": 0.6633579512268112, + "learning_rate": 7.228190053594575e-06, + "loss": 0.3743, + "step": 8229 + }, + { + "epoch": 0.37248246209549674, + "grad_norm": 0.6322349637412092, + "learning_rate": 7.227533909051327e-06, + "loss": 0.3664, + "step": 8230 + }, + { + "epoch": 0.37252772120389227, + "grad_norm": 0.6594001063414584, + "learning_rate": 7.2268777166452175e-06, + "loss": 0.3535, + "step": 8231 + }, + { + "epoch": 0.37257298031228786, + "grad_norm": 0.6914287196399008, + "learning_rate": 7.226221476390344e-06, + "loss": 0.3593, + "step": 8232 + }, + { + "epoch": 0.3726182394206834, + "grad_norm": 0.6762918894508828, + "learning_rate": 7.22556518830081e-06, + "loss": 0.3559, + "step": 8233 + }, + { + "epoch": 0.372663498529079, + "grad_norm": 0.6233630699017264, + "learning_rate": 7.224908852390714e-06, + "loss": 0.3231, + "step": 8234 + }, + { + "epoch": 0.3727087576374745, + "grad_norm": 0.32035678978033805, + "learning_rate": 7.224252468674161e-06, + "loss": 0.4999, + "step": 8235 + }, + { + "epoch": 0.3727540167458701, + "grad_norm": 0.32533747372396, + "learning_rate": 7.223596037165252e-06, + "loss": 0.4856, + "step": 8236 + }, + { + "epoch": 0.37279927585426564, + "grad_norm": 0.7078979169891529, + "learning_rate": 7.2229395578780955e-06, + "loss": 0.3852, + "step": 8237 + }, + { + "epoch": 0.37284453496266123, + "grad_norm": 0.7705429790577362, + "learning_rate": 7.222283030826795e-06, + "loss": 0.3419, + "step": 8238 + }, + { + "epoch": 0.3728897940710568, + "grad_norm": 0.5864709746736614, + "learning_rate": 7.221626456025456e-06, + "loss": 0.3375, + "step": 8239 + }, + { + "epoch": 0.37293505317945236, + "grad_norm": 0.6161853610664281, + "learning_rate": 7.220969833488188e-06, + "loss": 0.3456, + "step": 8240 + }, + { + "epoch": 0.37298031228784795, + "grad_norm": 0.6767120084765911, + "learning_rate": 7.2203131632291e-06, + "loss": 0.3624, + "step": 8241 + }, + { + "epoch": 0.3730255713962435, + "grad_norm": 0.673082152049448, + "learning_rate": 7.2196564452623015e-06, + "loss": 0.3455, + "step": 8242 + }, + { + "epoch": 0.3730708305046391, + "grad_norm": 0.6447050864070746, + "learning_rate": 7.218999679601903e-06, + "loss": 0.3499, + "step": 8243 + }, + { + "epoch": 0.3731160896130346, + "grad_norm": 0.6341690830643045, + "learning_rate": 7.2183428662620155e-06, + "loss": 0.3635, + "step": 8244 + }, + { + "epoch": 0.3731613487214302, + "grad_norm": 0.6284278281076813, + "learning_rate": 7.217686005256755e-06, + "loss": 0.3529, + "step": 8245 + }, + { + "epoch": 0.37320660782982573, + "grad_norm": 0.6416020521885557, + "learning_rate": 7.217029096600231e-06, + "loss": 0.3424, + "step": 8246 + }, + { + "epoch": 0.3732518669382213, + "grad_norm": 0.5983683465068236, + "learning_rate": 7.216372140306563e-06, + "loss": 0.3332, + "step": 8247 + }, + { + "epoch": 0.37329712604661686, + "grad_norm": 0.6559567018480142, + "learning_rate": 7.215715136389862e-06, + "loss": 0.356, + "step": 8248 + }, + { + "epoch": 0.37334238515501245, + "grad_norm": 0.6773549771240448, + "learning_rate": 7.21505808486425e-06, + "loss": 0.3941, + "step": 8249 + }, + { + "epoch": 0.37338764426340804, + "grad_norm": 0.6604796043184018, + "learning_rate": 7.2144009857438436e-06, + "loss": 0.3708, + "step": 8250 + }, + { + "epoch": 0.37343290337180357, + "grad_norm": 0.6423405444166955, + "learning_rate": 7.213743839042757e-06, + "loss": 0.3573, + "step": 8251 + }, + { + "epoch": 0.37347816248019916, + "grad_norm": 0.6249343983434719, + "learning_rate": 7.213086644775118e-06, + "loss": 0.3797, + "step": 8252 + }, + { + "epoch": 0.3735234215885947, + "grad_norm": 0.4517351308111027, + "learning_rate": 7.212429402955043e-06, + "loss": 0.4988, + "step": 8253 + }, + { + "epoch": 0.3735686806969903, + "grad_norm": 0.622628015585213, + "learning_rate": 7.211772113596656e-06, + "loss": 0.3425, + "step": 8254 + }, + { + "epoch": 0.3736139398053858, + "grad_norm": 0.333877141476755, + "learning_rate": 7.211114776714077e-06, + "loss": 0.4689, + "step": 8255 + }, + { + "epoch": 0.3736591989137814, + "grad_norm": 0.6688247811686822, + "learning_rate": 7.210457392321434e-06, + "loss": 0.332, + "step": 8256 + }, + { + "epoch": 0.37370445802217694, + "grad_norm": 0.5636180479977778, + "learning_rate": 7.209799960432851e-06, + "loss": 0.3083, + "step": 8257 + }, + { + "epoch": 0.37374971713057253, + "grad_norm": 0.5817093516022838, + "learning_rate": 7.209142481062452e-06, + "loss": 0.3139, + "step": 8258 + }, + { + "epoch": 0.37379497623896807, + "grad_norm": 0.35989511000783425, + "learning_rate": 7.208484954224366e-06, + "loss": 0.4904, + "step": 8259 + }, + { + "epoch": 0.37384023534736366, + "grad_norm": 0.3762427932277179, + "learning_rate": 7.207827379932724e-06, + "loss": 0.4874, + "step": 8260 + }, + { + "epoch": 0.37388549445575925, + "grad_norm": 0.3321377843546181, + "learning_rate": 7.207169758201649e-06, + "loss": 0.5235, + "step": 8261 + }, + { + "epoch": 0.3739307535641548, + "grad_norm": 0.8598727794773364, + "learning_rate": 7.206512089045277e-06, + "loss": 0.3389, + "step": 8262 + }, + { + "epoch": 0.3739760126725504, + "grad_norm": 0.6917048591356012, + "learning_rate": 7.205854372477735e-06, + "loss": 0.3633, + "step": 8263 + }, + { + "epoch": 0.3740212717809459, + "grad_norm": 0.6357038317196191, + "learning_rate": 7.2051966085131584e-06, + "loss": 0.3523, + "step": 8264 + }, + { + "epoch": 0.3740665308893415, + "grad_norm": 0.7597069027593606, + "learning_rate": 7.20453879716568e-06, + "loss": 0.2921, + "step": 8265 + }, + { + "epoch": 0.37411178999773703, + "grad_norm": 0.7653677810020674, + "learning_rate": 7.203880938449432e-06, + "loss": 0.3254, + "step": 8266 + }, + { + "epoch": 0.3741570491061326, + "grad_norm": 0.7374295563959377, + "learning_rate": 7.203223032378552e-06, + "loss": 0.3984, + "step": 8267 + }, + { + "epoch": 0.37420230821452816, + "grad_norm": 0.6887589709014885, + "learning_rate": 7.202565078967176e-06, + "loss": 0.3443, + "step": 8268 + }, + { + "epoch": 0.37424756732292375, + "grad_norm": 0.8163425475345482, + "learning_rate": 7.201907078229442e-06, + "loss": 0.3364, + "step": 8269 + }, + { + "epoch": 0.3742928264313193, + "grad_norm": 0.6668493386525876, + "learning_rate": 7.201249030179487e-06, + "loss": 0.3452, + "step": 8270 + }, + { + "epoch": 0.37433808553971487, + "grad_norm": 0.6931544278501053, + "learning_rate": 7.200590934831451e-06, + "loss": 0.3949, + "step": 8271 + }, + { + "epoch": 0.3743833446481104, + "grad_norm": 0.6957182852097032, + "learning_rate": 7.1999327921994735e-06, + "loss": 0.3592, + "step": 8272 + }, + { + "epoch": 0.374428603756506, + "grad_norm": 0.8084175042085993, + "learning_rate": 7.199274602297698e-06, + "loss": 0.2903, + "step": 8273 + }, + { + "epoch": 0.3744738628649016, + "grad_norm": 0.5942923954714772, + "learning_rate": 7.198616365140264e-06, + "loss": 0.3224, + "step": 8274 + }, + { + "epoch": 0.3745191219732971, + "grad_norm": 0.6934474912796738, + "learning_rate": 7.197958080741319e-06, + "loss": 0.3889, + "step": 8275 + }, + { + "epoch": 0.3745643810816927, + "grad_norm": 0.6741172782103588, + "learning_rate": 7.1972997491150046e-06, + "loss": 0.3787, + "step": 8276 + }, + { + "epoch": 0.37460964019008824, + "grad_norm": 0.7482604130933004, + "learning_rate": 7.196641370275467e-06, + "loss": 0.4912, + "step": 8277 + }, + { + "epoch": 0.37465489929848383, + "grad_norm": 0.6585374578882484, + "learning_rate": 7.195982944236853e-06, + "loss": 0.3698, + "step": 8278 + }, + { + "epoch": 0.37470015840687937, + "grad_norm": 0.6341317832916422, + "learning_rate": 7.195324471013309e-06, + "loss": 0.3301, + "step": 8279 + }, + { + "epoch": 0.37474541751527496, + "grad_norm": 0.6603462506256079, + "learning_rate": 7.194665950618986e-06, + "loss": 0.3892, + "step": 8280 + }, + { + "epoch": 0.3747906766236705, + "grad_norm": 0.6211242207857619, + "learning_rate": 7.194007383068031e-06, + "loss": 0.3192, + "step": 8281 + }, + { + "epoch": 0.3748359357320661, + "grad_norm": 0.7203308404997607, + "learning_rate": 7.193348768374595e-06, + "loss": 0.3717, + "step": 8282 + }, + { + "epoch": 0.3748811948404616, + "grad_norm": 0.6878629374944645, + "learning_rate": 7.192690106552833e-06, + "loss": 0.3376, + "step": 8283 + }, + { + "epoch": 0.3749264539488572, + "grad_norm": 0.6750065790075197, + "learning_rate": 7.1920313976168935e-06, + "loss": 0.3733, + "step": 8284 + }, + { + "epoch": 0.3749717130572528, + "grad_norm": 0.723135413194556, + "learning_rate": 7.191372641580931e-06, + "loss": 0.3331, + "step": 8285 + }, + { + "epoch": 0.37501697216564833, + "grad_norm": 0.5944902240791072, + "learning_rate": 7.190713838459101e-06, + "loss": 0.2828, + "step": 8286 + }, + { + "epoch": 0.3750622312740439, + "grad_norm": 0.6726537174612722, + "learning_rate": 7.190054988265559e-06, + "loss": 0.3305, + "step": 8287 + }, + { + "epoch": 0.37510749038243946, + "grad_norm": 0.6223775834738695, + "learning_rate": 7.189396091014462e-06, + "loss": 0.3667, + "step": 8288 + }, + { + "epoch": 0.37515274949083505, + "grad_norm": 0.6676977292485492, + "learning_rate": 7.188737146719967e-06, + "loss": 0.3757, + "step": 8289 + }, + { + "epoch": 0.3751980085992306, + "grad_norm": 0.6564201500650513, + "learning_rate": 7.188078155396232e-06, + "loss": 0.331, + "step": 8290 + }, + { + "epoch": 0.37524326770762617, + "grad_norm": 0.6770082220237673, + "learning_rate": 7.187419117057419e-06, + "loss": 0.3548, + "step": 8291 + }, + { + "epoch": 0.3752885268160217, + "grad_norm": 0.6494451263198261, + "learning_rate": 7.1867600317176875e-06, + "loss": 0.3185, + "step": 8292 + }, + { + "epoch": 0.3753337859244173, + "grad_norm": 0.7351655528808541, + "learning_rate": 7.186100899391198e-06, + "loss": 0.5227, + "step": 8293 + }, + { + "epoch": 0.37537904503281283, + "grad_norm": 0.6711745935920342, + "learning_rate": 7.185441720092114e-06, + "loss": 0.3454, + "step": 8294 + }, + { + "epoch": 0.3754243041412084, + "grad_norm": 0.6135920701861901, + "learning_rate": 7.1847824938346e-06, + "loss": 0.3194, + "step": 8295 + }, + { + "epoch": 0.375469563249604, + "grad_norm": 0.6005901565788068, + "learning_rate": 7.18412322063282e-06, + "loss": 0.391, + "step": 8296 + }, + { + "epoch": 0.37551482235799954, + "grad_norm": 0.6292338562077902, + "learning_rate": 7.183463900500941e-06, + "loss": 0.3364, + "step": 8297 + }, + { + "epoch": 0.37556008146639513, + "grad_norm": 0.34141101180389294, + "learning_rate": 7.182804533453127e-06, + "loss": 0.5301, + "step": 8298 + }, + { + "epoch": 0.37560534057479067, + "grad_norm": 0.6197088953157461, + "learning_rate": 7.182145119503549e-06, + "loss": 0.3473, + "step": 8299 + }, + { + "epoch": 0.37565059968318626, + "grad_norm": 0.33265449349879456, + "learning_rate": 7.181485658666375e-06, + "loss": 0.4761, + "step": 8300 + }, + { + "epoch": 0.3756958587915818, + "grad_norm": 0.640304362055324, + "learning_rate": 7.180826150955772e-06, + "loss": 0.3073, + "step": 8301 + }, + { + "epoch": 0.3757411178999774, + "grad_norm": 0.6860724167861825, + "learning_rate": 7.180166596385915e-06, + "loss": 0.4338, + "step": 8302 + }, + { + "epoch": 0.3757863770083729, + "grad_norm": 0.6019944334202502, + "learning_rate": 7.179506994970972e-06, + "loss": 0.3085, + "step": 8303 + }, + { + "epoch": 0.3758316361167685, + "grad_norm": 0.656435666847941, + "learning_rate": 7.178847346725119e-06, + "loss": 0.3716, + "step": 8304 + }, + { + "epoch": 0.37587689522516404, + "grad_norm": 0.6424257873205942, + "learning_rate": 7.178187651662527e-06, + "loss": 0.3585, + "step": 8305 + }, + { + "epoch": 0.37592215433355963, + "grad_norm": 0.6525988216561274, + "learning_rate": 7.177527909797373e-06, + "loss": 0.4092, + "step": 8306 + }, + { + "epoch": 0.37596741344195517, + "grad_norm": 0.679056074665069, + "learning_rate": 7.176868121143831e-06, + "loss": 0.3302, + "step": 8307 + }, + { + "epoch": 0.37601267255035076, + "grad_norm": 0.6405564739339455, + "learning_rate": 7.176208285716079e-06, + "loss": 0.3311, + "step": 8308 + }, + { + "epoch": 0.37605793165874635, + "grad_norm": 0.7141348763998681, + "learning_rate": 7.175548403528295e-06, + "loss": 0.399, + "step": 8309 + }, + { + "epoch": 0.3761031907671419, + "grad_norm": 0.7062128340100978, + "learning_rate": 7.174888474594659e-06, + "loss": 0.3938, + "step": 8310 + }, + { + "epoch": 0.37614844987553747, + "grad_norm": 0.6758179480303619, + "learning_rate": 7.174228498929347e-06, + "loss": 0.3702, + "step": 8311 + }, + { + "epoch": 0.376193708983933, + "grad_norm": 0.5505683467660143, + "learning_rate": 7.1735684765465444e-06, + "loss": 0.4848, + "step": 8312 + }, + { + "epoch": 0.3762389680923286, + "grad_norm": 0.45578075324415335, + "learning_rate": 7.172908407460429e-06, + "loss": 0.4858, + "step": 8313 + }, + { + "epoch": 0.37628422720072413, + "grad_norm": 0.7382639843039384, + "learning_rate": 7.172248291685187e-06, + "loss": 0.316, + "step": 8314 + }, + { + "epoch": 0.3763294863091197, + "grad_norm": 0.8101613462731673, + "learning_rate": 7.171588129234999e-06, + "loss": 0.3486, + "step": 8315 + }, + { + "epoch": 0.37637474541751526, + "grad_norm": 0.6506764101308028, + "learning_rate": 7.170927920124052e-06, + "loss": 0.3294, + "step": 8316 + }, + { + "epoch": 0.37642000452591085, + "grad_norm": 0.5881378075645671, + "learning_rate": 7.1702676643665325e-06, + "loss": 0.3211, + "step": 8317 + }, + { + "epoch": 0.3764652636343064, + "grad_norm": 0.6630934221929646, + "learning_rate": 7.169607361976627e-06, + "loss": 0.3051, + "step": 8318 + }, + { + "epoch": 0.37651052274270197, + "grad_norm": 0.6771901923424336, + "learning_rate": 7.16894701296852e-06, + "loss": 0.3842, + "step": 8319 + }, + { + "epoch": 0.37655578185109756, + "grad_norm": 0.6014323203917278, + "learning_rate": 7.168286617356406e-06, + "loss": 0.3436, + "step": 8320 + }, + { + "epoch": 0.3766010409594931, + "grad_norm": 0.6818623234030072, + "learning_rate": 7.1676261751544696e-06, + "loss": 0.3776, + "step": 8321 + }, + { + "epoch": 0.3766463000678887, + "grad_norm": 0.6291103487682812, + "learning_rate": 7.1669656863769055e-06, + "loss": 0.3312, + "step": 8322 + }, + { + "epoch": 0.3766915591762842, + "grad_norm": 0.5881389565856336, + "learning_rate": 7.166305151037905e-06, + "loss": 0.3592, + "step": 8323 + }, + { + "epoch": 0.3767368182846798, + "grad_norm": 0.6840081359541433, + "learning_rate": 7.165644569151658e-06, + "loss": 0.3734, + "step": 8324 + }, + { + "epoch": 0.37678207739307534, + "grad_norm": 0.6440224446417455, + "learning_rate": 7.1649839407323606e-06, + "loss": 0.3562, + "step": 8325 + }, + { + "epoch": 0.37682733650147093, + "grad_norm": 0.6498735609301556, + "learning_rate": 7.164323265794209e-06, + "loss": 0.363, + "step": 8326 + }, + { + "epoch": 0.37687259560986647, + "grad_norm": 0.641845120340163, + "learning_rate": 7.163662544351396e-06, + "loss": 0.3671, + "step": 8327 + }, + { + "epoch": 0.37691785471826206, + "grad_norm": 0.6982652323012907, + "learning_rate": 7.163001776418121e-06, + "loss": 0.3521, + "step": 8328 + }, + { + "epoch": 0.3769631138266576, + "grad_norm": 0.6243317055112345, + "learning_rate": 7.162340962008581e-06, + "loss": 0.3521, + "step": 8329 + }, + { + "epoch": 0.3770083729350532, + "grad_norm": 0.6649935914439344, + "learning_rate": 7.1616801011369755e-06, + "loss": 0.3369, + "step": 8330 + }, + { + "epoch": 0.3770536320434487, + "grad_norm": 0.6427306330278062, + "learning_rate": 7.161019193817503e-06, + "loss": 0.3221, + "step": 8331 + }, + { + "epoch": 0.3770988911518443, + "grad_norm": 0.6021879131102966, + "learning_rate": 7.1603582400643646e-06, + "loss": 0.3212, + "step": 8332 + }, + { + "epoch": 0.3771441502602399, + "grad_norm": 1.1681307049239376, + "learning_rate": 7.159697239891764e-06, + "loss": 0.4842, + "step": 8333 + }, + { + "epoch": 0.37718940936863543, + "grad_norm": 0.7385597893203592, + "learning_rate": 7.159036193313902e-06, + "loss": 0.3408, + "step": 8334 + }, + { + "epoch": 0.377234668477031, + "grad_norm": 0.6559267542328838, + "learning_rate": 7.158375100344983e-06, + "loss": 0.3593, + "step": 8335 + }, + { + "epoch": 0.37727992758542656, + "grad_norm": 0.6265308322487304, + "learning_rate": 7.157713960999212e-06, + "loss": 0.3517, + "step": 8336 + }, + { + "epoch": 0.37732518669382215, + "grad_norm": 0.6300808220892394, + "learning_rate": 7.157052775290795e-06, + "loss": 0.3205, + "step": 8337 + }, + { + "epoch": 0.3773704458022177, + "grad_norm": 0.7314504650540607, + "learning_rate": 7.156391543233938e-06, + "loss": 0.3674, + "step": 8338 + }, + { + "epoch": 0.37741570491061327, + "grad_norm": 0.625857506921556, + "learning_rate": 7.155730264842852e-06, + "loss": 0.3975, + "step": 8339 + }, + { + "epoch": 0.3774609640190088, + "grad_norm": 0.6983912978077121, + "learning_rate": 7.155068940131741e-06, + "loss": 0.3646, + "step": 8340 + }, + { + "epoch": 0.3775062231274044, + "grad_norm": 0.6330685645971086, + "learning_rate": 7.154407569114818e-06, + "loss": 0.3419, + "step": 8341 + }, + { + "epoch": 0.37755148223579993, + "grad_norm": 0.6612187254544507, + "learning_rate": 7.153746151806293e-06, + "loss": 0.4922, + "step": 8342 + }, + { + "epoch": 0.3775967413441955, + "grad_norm": 0.616178122020675, + "learning_rate": 7.153084688220379e-06, + "loss": 0.3582, + "step": 8343 + }, + { + "epoch": 0.3776420004525911, + "grad_norm": 0.5975510050770201, + "learning_rate": 7.152423178371286e-06, + "loss": 0.343, + "step": 8344 + }, + { + "epoch": 0.37768725956098664, + "grad_norm": 0.48499907097437206, + "learning_rate": 7.15176162227323e-06, + "loss": 0.4942, + "step": 8345 + }, + { + "epoch": 0.37773251866938223, + "grad_norm": 0.6269278584054762, + "learning_rate": 7.151100019940427e-06, + "loss": 0.3072, + "step": 8346 + }, + { + "epoch": 0.37777777777777777, + "grad_norm": 1.0530027938874988, + "learning_rate": 7.1504383713870895e-06, + "loss": 0.3565, + "step": 8347 + }, + { + "epoch": 0.37782303688617336, + "grad_norm": 0.6296408615092338, + "learning_rate": 7.149776676627436e-06, + "loss": 0.3581, + "step": 8348 + }, + { + "epoch": 0.3778682959945689, + "grad_norm": 0.38352804422665704, + "learning_rate": 7.149114935675685e-06, + "loss": 0.4654, + "step": 8349 + }, + { + "epoch": 0.3779135551029645, + "grad_norm": 0.3512682890162911, + "learning_rate": 7.148453148546055e-06, + "loss": 0.4577, + "step": 8350 + }, + { + "epoch": 0.37795881421136, + "grad_norm": 0.6552021817776984, + "learning_rate": 7.1477913152527635e-06, + "loss": 0.3564, + "step": 8351 + }, + { + "epoch": 0.3780040733197556, + "grad_norm": 0.6176790702307078, + "learning_rate": 7.1471294358100344e-06, + "loss": 0.3423, + "step": 8352 + }, + { + "epoch": 0.37804933242815114, + "grad_norm": 0.765039383704981, + "learning_rate": 7.146467510232088e-06, + "loss": 0.3545, + "step": 8353 + }, + { + "epoch": 0.37809459153654673, + "grad_norm": 0.6006632425414946, + "learning_rate": 7.145805538533146e-06, + "loss": 0.3008, + "step": 8354 + }, + { + "epoch": 0.3781398506449423, + "grad_norm": 0.6770490817071343, + "learning_rate": 7.145143520727434e-06, + "loss": 0.3485, + "step": 8355 + }, + { + "epoch": 0.37818510975333786, + "grad_norm": 0.7613669358884392, + "learning_rate": 7.144481456829178e-06, + "loss": 0.3462, + "step": 8356 + }, + { + "epoch": 0.37823036886173345, + "grad_norm": 0.6814959868650572, + "learning_rate": 7.1438193468525986e-06, + "loss": 0.3768, + "step": 8357 + }, + { + "epoch": 0.378275627970129, + "grad_norm": 0.5110195752010532, + "learning_rate": 7.143157190811927e-06, + "loss": 0.4857, + "step": 8358 + }, + { + "epoch": 0.37832088707852457, + "grad_norm": 0.6838577856291563, + "learning_rate": 7.14249498872139e-06, + "loss": 0.3829, + "step": 8359 + }, + { + "epoch": 0.3783661461869201, + "grad_norm": 0.746673282701903, + "learning_rate": 7.141832740595217e-06, + "loss": 0.3886, + "step": 8360 + }, + { + "epoch": 0.3784114052953157, + "grad_norm": 0.4042609860628919, + "learning_rate": 7.141170446447634e-06, + "loss": 0.4868, + "step": 8361 + }, + { + "epoch": 0.37845666440371123, + "grad_norm": 0.6604856741415678, + "learning_rate": 7.140508106292876e-06, + "loss": 0.3452, + "step": 8362 + }, + { + "epoch": 0.3785019235121068, + "grad_norm": 0.6152179001971164, + "learning_rate": 7.139845720145172e-06, + "loss": 0.3489, + "step": 8363 + }, + { + "epoch": 0.37854718262050235, + "grad_norm": 0.6226880833935763, + "learning_rate": 7.139183288018756e-06, + "loss": 0.3523, + "step": 8364 + }, + { + "epoch": 0.37859244172889794, + "grad_norm": 0.747242633729006, + "learning_rate": 7.13852080992786e-06, + "loss": 0.324, + "step": 8365 + }, + { + "epoch": 0.3786377008372935, + "grad_norm": 0.6172830431009755, + "learning_rate": 7.137858285886721e-06, + "loss": 0.3626, + "step": 8366 + }, + { + "epoch": 0.37868295994568907, + "grad_norm": 0.6480631555125602, + "learning_rate": 7.137195715909573e-06, + "loss": 0.4062, + "step": 8367 + }, + { + "epoch": 0.37872821905408466, + "grad_norm": 0.6270206468414993, + "learning_rate": 7.136533100010654e-06, + "loss": 0.3615, + "step": 8368 + }, + { + "epoch": 0.3787734781624802, + "grad_norm": 0.5960545309428391, + "learning_rate": 7.135870438204198e-06, + "loss": 0.3309, + "step": 8369 + }, + { + "epoch": 0.3788187372708758, + "grad_norm": 0.6132416090139773, + "learning_rate": 7.1352077305044485e-06, + "loss": 0.368, + "step": 8370 + }, + { + "epoch": 0.3788639963792713, + "grad_norm": 0.5923204264613755, + "learning_rate": 7.1345449769256416e-06, + "loss": 0.3448, + "step": 8371 + }, + { + "epoch": 0.3789092554876669, + "grad_norm": 0.522084772545119, + "learning_rate": 7.133882177482019e-06, + "loss": 0.4879, + "step": 8372 + }, + { + "epoch": 0.37895451459606244, + "grad_norm": 0.4472915138713644, + "learning_rate": 7.133219332187823e-06, + "loss": 0.4873, + "step": 8373 + }, + { + "epoch": 0.37899977370445803, + "grad_norm": 0.5607250276819523, + "learning_rate": 7.132556441057294e-06, + "loss": 0.2654, + "step": 8374 + }, + { + "epoch": 0.37904503281285357, + "grad_norm": 0.6821875143652115, + "learning_rate": 7.131893504104677e-06, + "loss": 0.3619, + "step": 8375 + }, + { + "epoch": 0.37909029192124916, + "grad_norm": 0.6041315305261291, + "learning_rate": 7.131230521344217e-06, + "loss": 0.353, + "step": 8376 + }, + { + "epoch": 0.3791355510296447, + "grad_norm": 0.6264230011816776, + "learning_rate": 7.130567492790157e-06, + "loss": 0.3891, + "step": 8377 + }, + { + "epoch": 0.3791808101380403, + "grad_norm": 0.6333311023261349, + "learning_rate": 7.129904418456745e-06, + "loss": 0.3123, + "step": 8378 + }, + { + "epoch": 0.37922606924643587, + "grad_norm": 0.699733653598983, + "learning_rate": 7.129241298358231e-06, + "loss": 0.3426, + "step": 8379 + }, + { + "epoch": 0.3792713283548314, + "grad_norm": 0.6604529388686061, + "learning_rate": 7.128578132508859e-06, + "loss": 0.3427, + "step": 8380 + }, + { + "epoch": 0.379316587463227, + "grad_norm": 0.6022601095193064, + "learning_rate": 7.127914920922883e-06, + "loss": 0.3482, + "step": 8381 + }, + { + "epoch": 0.37936184657162253, + "grad_norm": 0.7434609203247454, + "learning_rate": 7.127251663614547e-06, + "loss": 0.3629, + "step": 8382 + }, + { + "epoch": 0.3794071056800181, + "grad_norm": 0.6220340421846623, + "learning_rate": 7.126588360598109e-06, + "loss": 0.3559, + "step": 8383 + }, + { + "epoch": 0.37945236478841365, + "grad_norm": 0.6740928356810203, + "learning_rate": 7.125925011887818e-06, + "loss": 0.335, + "step": 8384 + }, + { + "epoch": 0.37949762389680924, + "grad_norm": 0.7475687257974962, + "learning_rate": 7.125261617497926e-06, + "loss": 0.3686, + "step": 8385 + }, + { + "epoch": 0.3795428830052048, + "grad_norm": 0.8291350729963307, + "learning_rate": 7.12459817744269e-06, + "loss": 0.4895, + "step": 8386 + }, + { + "epoch": 0.37958814211360037, + "grad_norm": 0.632240955498442, + "learning_rate": 7.123934691736365e-06, + "loss": 0.3739, + "step": 8387 + }, + { + "epoch": 0.3796334012219959, + "grad_norm": 0.6842845487643655, + "learning_rate": 7.123271160393206e-06, + "loss": 0.3869, + "step": 8388 + }, + { + "epoch": 0.3796786603303915, + "grad_norm": 0.6127478816563668, + "learning_rate": 7.122607583427472e-06, + "loss": 0.3238, + "step": 8389 + }, + { + "epoch": 0.3797239194387871, + "grad_norm": 0.6564050899226402, + "learning_rate": 7.121943960853418e-06, + "loss": 0.3773, + "step": 8390 + }, + { + "epoch": 0.3797691785471826, + "grad_norm": 0.6262183560049303, + "learning_rate": 7.121280292685307e-06, + "loss": 0.3562, + "step": 8391 + }, + { + "epoch": 0.3798144376555782, + "grad_norm": 0.636568518471423, + "learning_rate": 7.120616578937397e-06, + "loss": 0.3793, + "step": 8392 + }, + { + "epoch": 0.37985969676397374, + "grad_norm": 0.6503707596443266, + "learning_rate": 7.1199528196239495e-06, + "loss": 0.3582, + "step": 8393 + }, + { + "epoch": 0.37990495587236933, + "grad_norm": 0.648733837093838, + "learning_rate": 7.119289014759228e-06, + "loss": 0.3641, + "step": 8394 + }, + { + "epoch": 0.37995021498076487, + "grad_norm": 0.6033527463965486, + "learning_rate": 7.118625164357493e-06, + "loss": 0.3614, + "step": 8395 + }, + { + "epoch": 0.37999547408916046, + "grad_norm": 0.6515459622908713, + "learning_rate": 7.117961268433012e-06, + "loss": 0.387, + "step": 8396 + }, + { + "epoch": 0.380040733197556, + "grad_norm": 0.4871667020278753, + "learning_rate": 7.117297327000046e-06, + "loss": 0.4947, + "step": 8397 + }, + { + "epoch": 0.3800859923059516, + "grad_norm": 0.6267015745529267, + "learning_rate": 7.116633340072863e-06, + "loss": 0.3506, + "step": 8398 + }, + { + "epoch": 0.3801312514143471, + "grad_norm": 0.6194526502875497, + "learning_rate": 7.115969307665733e-06, + "loss": 0.3351, + "step": 8399 + }, + { + "epoch": 0.3801765105227427, + "grad_norm": 0.6102449278498785, + "learning_rate": 7.115305229792918e-06, + "loss": 0.3439, + "step": 8400 + }, + { + "epoch": 0.38022176963113824, + "grad_norm": 0.7654057374127621, + "learning_rate": 7.114641106468692e-06, + "loss": 0.3679, + "step": 8401 + }, + { + "epoch": 0.38026702873953383, + "grad_norm": 0.6773434947716912, + "learning_rate": 7.113976937707324e-06, + "loss": 0.3582, + "step": 8402 + }, + { + "epoch": 0.3803122878479294, + "grad_norm": 0.31799243148880396, + "learning_rate": 7.1133127235230825e-06, + "loss": 0.4777, + "step": 8403 + }, + { + "epoch": 0.38035754695632495, + "grad_norm": 0.6289377071621696, + "learning_rate": 7.1126484639302425e-06, + "loss": 0.3615, + "step": 8404 + }, + { + "epoch": 0.38040280606472054, + "grad_norm": 0.6278500233355753, + "learning_rate": 7.111984158943075e-06, + "loss": 0.3255, + "step": 8405 + }, + { + "epoch": 0.3804480651731161, + "grad_norm": 0.6312913692634605, + "learning_rate": 7.1113198085758535e-06, + "loss": 0.3665, + "step": 8406 + }, + { + "epoch": 0.38049332428151167, + "grad_norm": 0.6265823361581598, + "learning_rate": 7.110655412842855e-06, + "loss": 0.3658, + "step": 8407 + }, + { + "epoch": 0.3805385833899072, + "grad_norm": 0.6374687650436551, + "learning_rate": 7.109990971758354e-06, + "loss": 0.3447, + "step": 8408 + }, + { + "epoch": 0.3805838424983028, + "grad_norm": 0.6293191892231478, + "learning_rate": 7.109326485336626e-06, + "loss": 0.337, + "step": 8409 + }, + { + "epoch": 0.38062910160669833, + "grad_norm": 0.7138126845674831, + "learning_rate": 7.108661953591953e-06, + "loss": 0.3398, + "step": 8410 + }, + { + "epoch": 0.3806743607150939, + "grad_norm": 0.7703350926583172, + "learning_rate": 7.107997376538606e-06, + "loss": 0.3442, + "step": 8411 + }, + { + "epoch": 0.38071961982348945, + "grad_norm": 0.658339137124922, + "learning_rate": 7.107332754190874e-06, + "loss": 0.3825, + "step": 8412 + }, + { + "epoch": 0.38076487893188504, + "grad_norm": 0.6314047841513415, + "learning_rate": 7.1066680865630335e-06, + "loss": 0.4002, + "step": 8413 + }, + { + "epoch": 0.38081013804028063, + "grad_norm": 0.6477014404457618, + "learning_rate": 7.106003373669363e-06, + "loss": 0.3591, + "step": 8414 + }, + { + "epoch": 0.38085539714867617, + "grad_norm": 0.6041802367550553, + "learning_rate": 7.10533861552415e-06, + "loss": 0.33, + "step": 8415 + }, + { + "epoch": 0.38090065625707176, + "grad_norm": 0.6430900504898354, + "learning_rate": 7.104673812141676e-06, + "loss": 0.3893, + "step": 8416 + }, + { + "epoch": 0.3809459153654673, + "grad_norm": 0.6314476922290938, + "learning_rate": 7.104008963536224e-06, + "loss": 0.3551, + "step": 8417 + }, + { + "epoch": 0.3809911744738629, + "grad_norm": 0.7428431343765651, + "learning_rate": 7.1033440697220845e-06, + "loss": 0.3344, + "step": 8418 + }, + { + "epoch": 0.3810364335822584, + "grad_norm": 0.6049014863415836, + "learning_rate": 7.102679130713538e-06, + "loss": 0.354, + "step": 8419 + }, + { + "epoch": 0.381081692690654, + "grad_norm": 0.6490441683398328, + "learning_rate": 7.102014146524877e-06, + "loss": 0.3793, + "step": 8420 + }, + { + "epoch": 0.38112695179904954, + "grad_norm": 0.6654902860757159, + "learning_rate": 7.101349117170386e-06, + "loss": 0.3522, + "step": 8421 + }, + { + "epoch": 0.38117221090744513, + "grad_norm": 0.6555822112102216, + "learning_rate": 7.1006840426643576e-06, + "loss": 0.3493, + "step": 8422 + }, + { + "epoch": 0.38121747001584066, + "grad_norm": 0.6365048153101841, + "learning_rate": 7.10001892302108e-06, + "loss": 0.3384, + "step": 8423 + }, + { + "epoch": 0.38126272912423625, + "grad_norm": 0.6887085895296732, + "learning_rate": 7.099353758254846e-06, + "loss": 0.3476, + "step": 8424 + }, + { + "epoch": 0.38130798823263184, + "grad_norm": 0.6080229886754357, + "learning_rate": 7.0986885483799475e-06, + "loss": 0.3124, + "step": 8425 + }, + { + "epoch": 0.3813532473410274, + "grad_norm": 0.647329971750059, + "learning_rate": 7.098023293410677e-06, + "loss": 0.3001, + "step": 8426 + }, + { + "epoch": 0.38139850644942297, + "grad_norm": 0.6508641604197013, + "learning_rate": 7.09735799336133e-06, + "loss": 0.3491, + "step": 8427 + }, + { + "epoch": 0.3814437655578185, + "grad_norm": 0.6191855403182719, + "learning_rate": 7.096692648246203e-06, + "loss": 0.327, + "step": 8428 + }, + { + "epoch": 0.3814890246662141, + "grad_norm": 1.0524445853788542, + "learning_rate": 7.096027258079587e-06, + "loss": 0.3401, + "step": 8429 + }, + { + "epoch": 0.38153428377460963, + "grad_norm": 0.6245006137232687, + "learning_rate": 7.095361822875786e-06, + "loss": 0.3223, + "step": 8430 + }, + { + "epoch": 0.3815795428830052, + "grad_norm": 0.6383255613126531, + "learning_rate": 7.094696342649092e-06, + "loss": 0.3883, + "step": 8431 + }, + { + "epoch": 0.38162480199140075, + "grad_norm": 0.6874734938881214, + "learning_rate": 7.094030817413808e-06, + "loss": 0.3414, + "step": 8432 + }, + { + "epoch": 0.38167006109979634, + "grad_norm": 0.5845736916154386, + "learning_rate": 7.093365247184234e-06, + "loss": 0.3289, + "step": 8433 + }, + { + "epoch": 0.3817153202081919, + "grad_norm": 0.5131578573043148, + "learning_rate": 7.09269963197467e-06, + "loss": 0.4684, + "step": 8434 + }, + { + "epoch": 0.38176057931658747, + "grad_norm": 0.6426976817130162, + "learning_rate": 7.092033971799417e-06, + "loss": 0.4068, + "step": 8435 + }, + { + "epoch": 0.381805838424983, + "grad_norm": 0.3146884550883189, + "learning_rate": 7.09136826667278e-06, + "loss": 0.4816, + "step": 8436 + }, + { + "epoch": 0.3818510975333786, + "grad_norm": 0.8773610470531031, + "learning_rate": 7.0907025166090615e-06, + "loss": 0.3525, + "step": 8437 + }, + { + "epoch": 0.3818963566417742, + "grad_norm": 0.6356233415643519, + "learning_rate": 7.090036721622567e-06, + "loss": 0.3579, + "step": 8438 + }, + { + "epoch": 0.3819416157501697, + "grad_norm": 0.7375475936847615, + "learning_rate": 7.089370881727604e-06, + "loss": 0.3432, + "step": 8439 + }, + { + "epoch": 0.3819868748585653, + "grad_norm": 0.6364433797110283, + "learning_rate": 7.0887049969384756e-06, + "loss": 0.3353, + "step": 8440 + }, + { + "epoch": 0.38203213396696084, + "grad_norm": 0.5537782812413454, + "learning_rate": 7.088039067269493e-06, + "loss": 0.2949, + "step": 8441 + }, + { + "epoch": 0.38207739307535643, + "grad_norm": 0.6125051256367956, + "learning_rate": 7.087373092734964e-06, + "loss": 0.355, + "step": 8442 + }, + { + "epoch": 0.38212265218375197, + "grad_norm": 0.5627303676401337, + "learning_rate": 7.086707073349197e-06, + "loss": 0.4963, + "step": 8443 + }, + { + "epoch": 0.38216791129214756, + "grad_norm": 0.6606337857741633, + "learning_rate": 7.086041009126504e-06, + "loss": 0.3288, + "step": 8444 + }, + { + "epoch": 0.3822131704005431, + "grad_norm": 0.6542767650447026, + "learning_rate": 7.0853749000811965e-06, + "loss": 0.317, + "step": 8445 + }, + { + "epoch": 0.3822584295089387, + "grad_norm": 0.40963340986258956, + "learning_rate": 7.084708746227589e-06, + "loss": 0.4834, + "step": 8446 + }, + { + "epoch": 0.3823036886173342, + "grad_norm": 0.5833579676406702, + "learning_rate": 7.084042547579992e-06, + "loss": 0.3572, + "step": 8447 + }, + { + "epoch": 0.3823489477257298, + "grad_norm": 0.5936420943321252, + "learning_rate": 7.08337630415272e-06, + "loss": 0.3346, + "step": 8448 + }, + { + "epoch": 0.3823942068341254, + "grad_norm": 0.6331350642510168, + "learning_rate": 7.082710015960091e-06, + "loss": 0.36, + "step": 8449 + }, + { + "epoch": 0.38243946594252093, + "grad_norm": 0.6776388853726919, + "learning_rate": 7.08204368301642e-06, + "loss": 0.3742, + "step": 8450 + }, + { + "epoch": 0.3824847250509165, + "grad_norm": 0.6536913237318658, + "learning_rate": 7.081377305336025e-06, + "loss": 0.3375, + "step": 8451 + }, + { + "epoch": 0.38252998415931205, + "grad_norm": 0.6744660832075964, + "learning_rate": 7.080710882933225e-06, + "loss": 0.3457, + "step": 8452 + }, + { + "epoch": 0.38257524326770764, + "grad_norm": 0.620253089835594, + "learning_rate": 7.080044415822337e-06, + "loss": 0.3678, + "step": 8453 + }, + { + "epoch": 0.3826205023761032, + "grad_norm": 0.6134172858330584, + "learning_rate": 7.079377904017683e-06, + "loss": 0.3096, + "step": 8454 + }, + { + "epoch": 0.38266576148449877, + "grad_norm": 0.6199238081498524, + "learning_rate": 7.078711347533585e-06, + "loss": 0.3486, + "step": 8455 + }, + { + "epoch": 0.3827110205928943, + "grad_norm": 0.6391699024069745, + "learning_rate": 7.078044746384365e-06, + "loss": 0.3599, + "step": 8456 + }, + { + "epoch": 0.3827562797012899, + "grad_norm": 0.6803022292367258, + "learning_rate": 7.077378100584344e-06, + "loss": 0.3583, + "step": 8457 + }, + { + "epoch": 0.3828015388096854, + "grad_norm": 0.7097666551654821, + "learning_rate": 7.076711410147849e-06, + "loss": 0.5134, + "step": 8458 + }, + { + "epoch": 0.382846797918081, + "grad_norm": 0.6725298205711432, + "learning_rate": 7.076044675089203e-06, + "loss": 0.3769, + "step": 8459 + }, + { + "epoch": 0.38289205702647655, + "grad_norm": 0.7210712947175153, + "learning_rate": 7.075377895422735e-06, + "loss": 0.3512, + "step": 8460 + }, + { + "epoch": 0.38293731613487214, + "grad_norm": 0.5975178708495634, + "learning_rate": 7.074711071162768e-06, + "loss": 0.3757, + "step": 8461 + }, + { + "epoch": 0.38298257524326773, + "grad_norm": 0.5915727400449279, + "learning_rate": 7.074044202323632e-06, + "loss": 0.3438, + "step": 8462 + }, + { + "epoch": 0.38302783435166327, + "grad_norm": 0.6853253841017686, + "learning_rate": 7.073377288919657e-06, + "loss": 0.3433, + "step": 8463 + }, + { + "epoch": 0.38307309346005886, + "grad_norm": 2.0075612522721986, + "learning_rate": 7.072710330965171e-06, + "loss": 0.3868, + "step": 8464 + }, + { + "epoch": 0.3831183525684544, + "grad_norm": 0.649754434745739, + "learning_rate": 7.072043328474507e-06, + "loss": 0.3257, + "step": 8465 + }, + { + "epoch": 0.38316361167685, + "grad_norm": 0.5866531935730779, + "learning_rate": 7.071376281461994e-06, + "loss": 0.3428, + "step": 8466 + }, + { + "epoch": 0.3832088707852455, + "grad_norm": 0.6168824482768775, + "learning_rate": 7.0707091899419685e-06, + "loss": 0.3747, + "step": 8467 + }, + { + "epoch": 0.3832541298936411, + "grad_norm": 0.6362663525465235, + "learning_rate": 7.070042053928763e-06, + "loss": 0.3468, + "step": 8468 + }, + { + "epoch": 0.38329938900203664, + "grad_norm": 0.6595243836995929, + "learning_rate": 7.0693748734367076e-06, + "loss": 0.3614, + "step": 8469 + }, + { + "epoch": 0.38334464811043223, + "grad_norm": 0.6817648534737756, + "learning_rate": 7.068707648480145e-06, + "loss": 0.3327, + "step": 8470 + }, + { + "epoch": 0.38338990721882776, + "grad_norm": 0.6195886664641986, + "learning_rate": 7.068040379073406e-06, + "loss": 0.3607, + "step": 8471 + }, + { + "epoch": 0.38343516632722335, + "grad_norm": 0.6799828912841049, + "learning_rate": 7.067373065230834e-06, + "loss": 0.3758, + "step": 8472 + }, + { + "epoch": 0.38348042543561894, + "grad_norm": 0.6652330351884505, + "learning_rate": 7.0667057069667625e-06, + "loss": 0.3508, + "step": 8473 + }, + { + "epoch": 0.3835256845440145, + "grad_norm": 0.704107025489184, + "learning_rate": 7.066038304295533e-06, + "loss": 0.3389, + "step": 8474 + }, + { + "epoch": 0.38357094365241007, + "grad_norm": 0.5711102874307746, + "learning_rate": 7.065370857231484e-06, + "loss": 0.3261, + "step": 8475 + }, + { + "epoch": 0.3836162027608056, + "grad_norm": 0.7373487417259906, + "learning_rate": 7.064703365788961e-06, + "loss": 0.372, + "step": 8476 + }, + { + "epoch": 0.3836614618692012, + "grad_norm": 0.616650070917948, + "learning_rate": 7.064035829982302e-06, + "loss": 0.2955, + "step": 8477 + }, + { + "epoch": 0.3837067209775967, + "grad_norm": 1.5530480359895966, + "learning_rate": 7.063368249825855e-06, + "loss": 0.3735, + "step": 8478 + }, + { + "epoch": 0.3837519800859923, + "grad_norm": 0.5846369271162104, + "learning_rate": 7.062700625333958e-06, + "loss": 0.4856, + "step": 8479 + }, + { + "epoch": 0.38379723919438785, + "grad_norm": 0.5626364168272818, + "learning_rate": 7.0620329565209625e-06, + "loss": 0.4911, + "step": 8480 + }, + { + "epoch": 0.38384249830278344, + "grad_norm": 0.6561269380631167, + "learning_rate": 7.06136524340121e-06, + "loss": 0.3438, + "step": 8481 + }, + { + "epoch": 0.383887757411179, + "grad_norm": 0.7824785353303435, + "learning_rate": 7.06069748598905e-06, + "loss": 0.3875, + "step": 8482 + }, + { + "epoch": 0.38393301651957457, + "grad_norm": 0.6290979318250164, + "learning_rate": 7.0600296842988305e-06, + "loss": 0.3339, + "step": 8483 + }, + { + "epoch": 0.38397827562797016, + "grad_norm": 0.6420260562777924, + "learning_rate": 7.0593618383448995e-06, + "loss": 0.3701, + "step": 8484 + }, + { + "epoch": 0.3840235347363657, + "grad_norm": 0.6896202946580808, + "learning_rate": 7.0586939481416065e-06, + "loss": 0.3389, + "step": 8485 + }, + { + "epoch": 0.3840687938447613, + "grad_norm": 0.6372493711656572, + "learning_rate": 7.058026013703304e-06, + "loss": 0.3515, + "step": 8486 + }, + { + "epoch": 0.3841140529531568, + "grad_norm": 0.7460403703740545, + "learning_rate": 7.057358035044344e-06, + "loss": 0.4759, + "step": 8487 + }, + { + "epoch": 0.3841593120615524, + "grad_norm": 0.6708392710946648, + "learning_rate": 7.0566900121790775e-06, + "loss": 0.3474, + "step": 8488 + }, + { + "epoch": 0.38420457116994794, + "grad_norm": 0.6826571122233077, + "learning_rate": 7.05602194512186e-06, + "loss": 0.3688, + "step": 8489 + }, + { + "epoch": 0.38424983027834353, + "grad_norm": 0.6349015181977148, + "learning_rate": 7.055353833887045e-06, + "loss": 0.3285, + "step": 8490 + }, + { + "epoch": 0.38429508938673906, + "grad_norm": 0.6544983112078551, + "learning_rate": 7.054685678488991e-06, + "loss": 0.3661, + "step": 8491 + }, + { + "epoch": 0.38434034849513465, + "grad_norm": 0.6218008745167531, + "learning_rate": 7.054017478942048e-06, + "loss": 0.3361, + "step": 8492 + }, + { + "epoch": 0.3843856076035302, + "grad_norm": 0.6182357777520696, + "learning_rate": 7.05334923526058e-06, + "loss": 0.3383, + "step": 8493 + }, + { + "epoch": 0.3844308667119258, + "grad_norm": 0.37265872321485444, + "learning_rate": 7.052680947458944e-06, + "loss": 0.4587, + "step": 8494 + }, + { + "epoch": 0.3844761258203213, + "grad_norm": 0.7152783685098152, + "learning_rate": 7.052012615551498e-06, + "loss": 0.3411, + "step": 8495 + }, + { + "epoch": 0.3845213849287169, + "grad_norm": 0.6712838532770757, + "learning_rate": 7.051344239552603e-06, + "loss": 0.3328, + "step": 8496 + }, + { + "epoch": 0.3845666440371125, + "grad_norm": 0.6847543279933849, + "learning_rate": 7.050675819476623e-06, + "loss": 0.3337, + "step": 8497 + }, + { + "epoch": 0.384611903145508, + "grad_norm": 0.6356767753615933, + "learning_rate": 7.0500073553379136e-06, + "loss": 0.3469, + "step": 8498 + }, + { + "epoch": 0.3846571622539036, + "grad_norm": 0.6343150294252268, + "learning_rate": 7.049338847150845e-06, + "loss": 0.364, + "step": 8499 + }, + { + "epoch": 0.38470242136229915, + "grad_norm": 0.6221401451155776, + "learning_rate": 7.048670294929777e-06, + "loss": 0.3565, + "step": 8500 + }, + { + "epoch": 0.38474768047069474, + "grad_norm": 0.38006128196027994, + "learning_rate": 7.0480016986890775e-06, + "loss": 0.4921, + "step": 8501 + }, + { + "epoch": 0.3847929395790903, + "grad_norm": 0.6303947949330196, + "learning_rate": 7.047333058443111e-06, + "loss": 0.3808, + "step": 8502 + }, + { + "epoch": 0.38483819868748587, + "grad_norm": 0.3335813258860991, + "learning_rate": 7.046664374206246e-06, + "loss": 0.4623, + "step": 8503 + }, + { + "epoch": 0.3848834577958814, + "grad_norm": 0.6438951118125865, + "learning_rate": 7.045995645992848e-06, + "loss": 0.3664, + "step": 8504 + }, + { + "epoch": 0.384928716904277, + "grad_norm": 1.1040704150941836, + "learning_rate": 7.045326873817289e-06, + "loss": 0.3756, + "step": 8505 + }, + { + "epoch": 0.3849739760126725, + "grad_norm": 0.7376731985285496, + "learning_rate": 7.0446580576939346e-06, + "loss": 0.3627, + "step": 8506 + }, + { + "epoch": 0.3850192351210681, + "grad_norm": 0.31769986927945976, + "learning_rate": 7.043989197637161e-06, + "loss": 0.5044, + "step": 8507 + }, + { + "epoch": 0.3850644942294637, + "grad_norm": 0.6709946462444102, + "learning_rate": 7.043320293661335e-06, + "loss": 0.3845, + "step": 8508 + }, + { + "epoch": 0.38510975333785924, + "grad_norm": 0.6800794170963768, + "learning_rate": 7.0426513457808334e-06, + "loss": 0.3091, + "step": 8509 + }, + { + "epoch": 0.38515501244625483, + "grad_norm": 0.6348048129635555, + "learning_rate": 7.041982354010026e-06, + "loss": 0.3342, + "step": 8510 + }, + { + "epoch": 0.38520027155465036, + "grad_norm": 0.3253351493544958, + "learning_rate": 7.041313318363291e-06, + "loss": 0.4947, + "step": 8511 + }, + { + "epoch": 0.38524553066304595, + "grad_norm": 0.6502405066779691, + "learning_rate": 7.0406442388550016e-06, + "loss": 0.3766, + "step": 8512 + }, + { + "epoch": 0.3852907897714415, + "grad_norm": 0.7237918307112422, + "learning_rate": 7.039975115499534e-06, + "loss": 0.3657, + "step": 8513 + }, + { + "epoch": 0.3853360488798371, + "grad_norm": 0.2887575351537154, + "learning_rate": 7.039305948311268e-06, + "loss": 0.5045, + "step": 8514 + }, + { + "epoch": 0.3853813079882326, + "grad_norm": 0.2819195233385298, + "learning_rate": 7.038636737304578e-06, + "loss": 0.4686, + "step": 8515 + }, + { + "epoch": 0.3854265670966282, + "grad_norm": 0.6932569182724204, + "learning_rate": 7.037967482493848e-06, + "loss": 0.2888, + "step": 8516 + }, + { + "epoch": 0.38547182620502374, + "grad_norm": 0.6763109238832349, + "learning_rate": 7.037298183893455e-06, + "loss": 0.3678, + "step": 8517 + }, + { + "epoch": 0.3855170853134193, + "grad_norm": 0.6190293228159512, + "learning_rate": 7.036628841517783e-06, + "loss": 0.3418, + "step": 8518 + }, + { + "epoch": 0.3855623444218149, + "grad_norm": 0.6782949993433552, + "learning_rate": 7.03595945538121e-06, + "loss": 0.2971, + "step": 8519 + }, + { + "epoch": 0.38560760353021045, + "grad_norm": 0.73709177330135, + "learning_rate": 7.035290025498121e-06, + "loss": 0.3748, + "step": 8520 + }, + { + "epoch": 0.38565286263860604, + "grad_norm": 0.34987588010434106, + "learning_rate": 7.0346205518829015e-06, + "loss": 0.5048, + "step": 8521 + }, + { + "epoch": 0.3856981217470016, + "grad_norm": 0.6244611959658506, + "learning_rate": 7.033951034549935e-06, + "loss": 0.3662, + "step": 8522 + }, + { + "epoch": 0.38574338085539717, + "grad_norm": 0.30869497121445927, + "learning_rate": 7.033281473513608e-06, + "loss": 0.4954, + "step": 8523 + }, + { + "epoch": 0.3857886399637927, + "grad_norm": 0.6578803599813655, + "learning_rate": 7.032611868788306e-06, + "loss": 0.3213, + "step": 8524 + }, + { + "epoch": 0.3858338990721883, + "grad_norm": 0.6887830147701085, + "learning_rate": 7.031942220388418e-06, + "loss": 0.3556, + "step": 8525 + }, + { + "epoch": 0.3858791581805838, + "grad_norm": 0.6383950468224833, + "learning_rate": 7.031272528328332e-06, + "loss": 0.3319, + "step": 8526 + }, + { + "epoch": 0.3859244172889794, + "grad_norm": 0.6276590559652488, + "learning_rate": 7.030602792622439e-06, + "loss": 0.3399, + "step": 8527 + }, + { + "epoch": 0.38596967639737495, + "grad_norm": 0.6297693881364205, + "learning_rate": 7.029933013285127e-06, + "loss": 0.3898, + "step": 8528 + }, + { + "epoch": 0.38601493550577054, + "grad_norm": 0.6446800167295498, + "learning_rate": 7.0292631903307895e-06, + "loss": 0.3021, + "step": 8529 + }, + { + "epoch": 0.3860601946141661, + "grad_norm": 0.6276385632566854, + "learning_rate": 7.028593323773819e-06, + "loss": 0.3825, + "step": 8530 + }, + { + "epoch": 0.38610545372256166, + "grad_norm": 0.8475822563749026, + "learning_rate": 7.027923413628608e-06, + "loss": 0.3399, + "step": 8531 + }, + { + "epoch": 0.38615071283095725, + "grad_norm": 0.5886263362806824, + "learning_rate": 7.027253459909551e-06, + "loss": 0.3282, + "step": 8532 + }, + { + "epoch": 0.3861959719393528, + "grad_norm": 0.5911191875338679, + "learning_rate": 7.026583462631044e-06, + "loss": 0.3683, + "step": 8533 + }, + { + "epoch": 0.3862412310477484, + "grad_norm": 0.5994274018430233, + "learning_rate": 7.025913421807482e-06, + "loss": 0.3613, + "step": 8534 + }, + { + "epoch": 0.3862864901561439, + "grad_norm": 0.6328861964249055, + "learning_rate": 7.025243337453263e-06, + "loss": 0.296, + "step": 8535 + }, + { + "epoch": 0.3863317492645395, + "grad_norm": 0.7749002298447163, + "learning_rate": 7.024573209582783e-06, + "loss": 0.3296, + "step": 8536 + }, + { + "epoch": 0.38637700837293504, + "grad_norm": 0.6125646428390575, + "learning_rate": 7.0239030382104445e-06, + "loss": 0.3091, + "step": 8537 + }, + { + "epoch": 0.38642226748133063, + "grad_norm": 0.6023846423567603, + "learning_rate": 7.023232823350646e-06, + "loss": 0.345, + "step": 8538 + }, + { + "epoch": 0.38646752658972616, + "grad_norm": 0.6524052798822022, + "learning_rate": 7.022562565017788e-06, + "loss": 0.351, + "step": 8539 + }, + { + "epoch": 0.38651278569812175, + "grad_norm": 0.6046955830979769, + "learning_rate": 7.021892263226271e-06, + "loss": 0.3365, + "step": 8540 + }, + { + "epoch": 0.3865580448065173, + "grad_norm": 0.46937747458957113, + "learning_rate": 7.0212219179904996e-06, + "loss": 0.5064, + "step": 8541 + }, + { + "epoch": 0.3866033039149129, + "grad_norm": 0.583346339177339, + "learning_rate": 7.020551529324877e-06, + "loss": 0.3157, + "step": 8542 + }, + { + "epoch": 0.38664856302330847, + "grad_norm": 0.637042671091487, + "learning_rate": 7.019881097243808e-06, + "loss": 0.3187, + "step": 8543 + }, + { + "epoch": 0.386693822131704, + "grad_norm": 0.6792643299062936, + "learning_rate": 7.019210621761698e-06, + "loss": 0.3934, + "step": 8544 + }, + { + "epoch": 0.3867390812400996, + "grad_norm": 0.6082188060902372, + "learning_rate": 7.018540102892952e-06, + "loss": 0.3355, + "step": 8545 + }, + { + "epoch": 0.3867843403484951, + "grad_norm": 0.6264804930140524, + "learning_rate": 7.017869540651979e-06, + "loss": 0.3702, + "step": 8546 + }, + { + "epoch": 0.3868295994568907, + "grad_norm": 0.2948388318861362, + "learning_rate": 7.017198935053189e-06, + "loss": 0.4738, + "step": 8547 + }, + { + "epoch": 0.38687485856528625, + "grad_norm": 0.6400740222345551, + "learning_rate": 7.016528286110986e-06, + "loss": 0.3663, + "step": 8548 + }, + { + "epoch": 0.38692011767368184, + "grad_norm": 0.6173682417693278, + "learning_rate": 7.0158575938397856e-06, + "loss": 0.3538, + "step": 8549 + }, + { + "epoch": 0.3869653767820774, + "grad_norm": 0.6417881388009231, + "learning_rate": 7.015186858253995e-06, + "loss": 0.377, + "step": 8550 + }, + { + "epoch": 0.38701063589047296, + "grad_norm": 0.7749490309522027, + "learning_rate": 7.01451607936803e-06, + "loss": 0.3569, + "step": 8551 + }, + { + "epoch": 0.3870558949988685, + "grad_norm": 0.6523873906515983, + "learning_rate": 7.013845257196301e-06, + "loss": 0.3697, + "step": 8552 + }, + { + "epoch": 0.3871011541072641, + "grad_norm": 0.6536056364088692, + "learning_rate": 7.013174391753222e-06, + "loss": 0.3814, + "step": 8553 + }, + { + "epoch": 0.3871464132156596, + "grad_norm": 0.6198002088630558, + "learning_rate": 7.012503483053209e-06, + "loss": 0.3477, + "step": 8554 + }, + { + "epoch": 0.3871916723240552, + "grad_norm": 0.6534747588713392, + "learning_rate": 7.0118325311106774e-06, + "loss": 0.3823, + "step": 8555 + }, + { + "epoch": 0.3872369314324508, + "grad_norm": 0.5953562968043516, + "learning_rate": 7.011161535940042e-06, + "loss": 0.3591, + "step": 8556 + }, + { + "epoch": 0.38728219054084634, + "grad_norm": 0.7085087214260861, + "learning_rate": 7.0104904975557245e-06, + "loss": 0.4196, + "step": 8557 + }, + { + "epoch": 0.38732744964924193, + "grad_norm": 0.6173288924149105, + "learning_rate": 7.009819415972136e-06, + "loss": 0.3636, + "step": 8558 + }, + { + "epoch": 0.38737270875763746, + "grad_norm": 0.39121363517546165, + "learning_rate": 7.009148291203707e-06, + "loss": 0.4874, + "step": 8559 + }, + { + "epoch": 0.38741796786603305, + "grad_norm": 0.35145612881645605, + "learning_rate": 7.008477123264849e-06, + "loss": 0.4868, + "step": 8560 + }, + { + "epoch": 0.3874632269744286, + "grad_norm": 0.6610092433572923, + "learning_rate": 7.007805912169985e-06, + "loss": 0.3342, + "step": 8561 + }, + { + "epoch": 0.3875084860828242, + "grad_norm": 0.2939613276299066, + "learning_rate": 7.00713465793354e-06, + "loss": 0.4758, + "step": 8562 + }, + { + "epoch": 0.3875537451912197, + "grad_norm": 0.7630658549598067, + "learning_rate": 7.006463360569935e-06, + "loss": 0.3631, + "step": 8563 + }, + { + "epoch": 0.3875990042996153, + "grad_norm": 0.6235937228967168, + "learning_rate": 7.005792020093596e-06, + "loss": 0.343, + "step": 8564 + }, + { + "epoch": 0.38764426340801084, + "grad_norm": 0.5722417308890443, + "learning_rate": 7.005120636518945e-06, + "loss": 0.2882, + "step": 8565 + }, + { + "epoch": 0.3876895225164064, + "grad_norm": 0.7099392150866518, + "learning_rate": 7.004449209860411e-06, + "loss": 0.3704, + "step": 8566 + }, + { + "epoch": 0.387734781624802, + "grad_norm": 0.6039098044963439, + "learning_rate": 7.003777740132419e-06, + "loss": 0.3677, + "step": 8567 + }, + { + "epoch": 0.38778004073319755, + "grad_norm": 0.4846309482443427, + "learning_rate": 7.003106227349399e-06, + "loss": 0.4731, + "step": 8568 + }, + { + "epoch": 0.38782529984159314, + "grad_norm": 0.6206185445623791, + "learning_rate": 7.002434671525776e-06, + "loss": 0.316, + "step": 8569 + }, + { + "epoch": 0.3878705589499887, + "grad_norm": 0.6252646740494667, + "learning_rate": 7.001763072675984e-06, + "loss": 0.3469, + "step": 8570 + }, + { + "epoch": 0.38791581805838427, + "grad_norm": 0.5977188730549259, + "learning_rate": 7.0010914308144495e-06, + "loss": 0.3514, + "step": 8571 + }, + { + "epoch": 0.3879610771667798, + "grad_norm": 0.7207959710427151, + "learning_rate": 7.000419745955608e-06, + "loss": 0.3466, + "step": 8572 + }, + { + "epoch": 0.3880063362751754, + "grad_norm": 0.3444553025472076, + "learning_rate": 6.999748018113889e-06, + "loss": 0.518, + "step": 8573 + }, + { + "epoch": 0.3880515953835709, + "grad_norm": 0.5920163103863829, + "learning_rate": 6.999076247303727e-06, + "loss": 0.3185, + "step": 8574 + }, + { + "epoch": 0.3880968544919665, + "grad_norm": 0.6407433001736258, + "learning_rate": 6.998404433539556e-06, + "loss": 0.3491, + "step": 8575 + }, + { + "epoch": 0.38814211360036205, + "grad_norm": 0.6195459294965006, + "learning_rate": 6.997732576835812e-06, + "loss": 0.3723, + "step": 8576 + }, + { + "epoch": 0.38818737270875764, + "grad_norm": 0.29890055951620087, + "learning_rate": 6.997060677206928e-06, + "loss": 0.494, + "step": 8577 + }, + { + "epoch": 0.38823263181715323, + "grad_norm": 0.6119850633665948, + "learning_rate": 6.996388734667347e-06, + "loss": 0.3426, + "step": 8578 + }, + { + "epoch": 0.38827789092554876, + "grad_norm": 0.7260265378995115, + "learning_rate": 6.995716749231501e-06, + "loss": 0.3454, + "step": 8579 + }, + { + "epoch": 0.38832315003394435, + "grad_norm": 0.5750007638619133, + "learning_rate": 6.995044720913834e-06, + "loss": 0.3197, + "step": 8580 + }, + { + "epoch": 0.3883684091423399, + "grad_norm": 0.3534386399426898, + "learning_rate": 6.994372649728781e-06, + "loss": 0.5086, + "step": 8581 + }, + { + "epoch": 0.3884136682507355, + "grad_norm": 0.7465663173943224, + "learning_rate": 6.993700535690786e-06, + "loss": 0.377, + "step": 8582 + }, + { + "epoch": 0.388458927359131, + "grad_norm": 0.6635549051184639, + "learning_rate": 6.993028378814288e-06, + "loss": 0.3151, + "step": 8583 + }, + { + "epoch": 0.3885041864675266, + "grad_norm": 0.6300073254729769, + "learning_rate": 6.992356179113735e-06, + "loss": 0.367, + "step": 8584 + }, + { + "epoch": 0.38854944557592214, + "grad_norm": 0.6398951088006489, + "learning_rate": 6.991683936603562e-06, + "loss": 0.3958, + "step": 8585 + }, + { + "epoch": 0.3885947046843177, + "grad_norm": 0.6160243481480446, + "learning_rate": 6.991011651298223e-06, + "loss": 0.376, + "step": 8586 + }, + { + "epoch": 0.38863996379271326, + "grad_norm": 0.6365641272355594, + "learning_rate": 6.990339323212154e-06, + "loss": 0.2957, + "step": 8587 + }, + { + "epoch": 0.38868522290110885, + "grad_norm": 0.640848693473412, + "learning_rate": 6.989666952359809e-06, + "loss": 0.3649, + "step": 8588 + }, + { + "epoch": 0.3887304820095044, + "grad_norm": 0.7008769461942187, + "learning_rate": 6.988994538755631e-06, + "loss": 0.3437, + "step": 8589 + }, + { + "epoch": 0.3887757411179, + "grad_norm": 0.601194932850971, + "learning_rate": 6.988322082414069e-06, + "loss": 0.3158, + "step": 8590 + }, + { + "epoch": 0.38882100022629557, + "grad_norm": 0.6295313129212234, + "learning_rate": 6.987649583349572e-06, + "loss": 0.3819, + "step": 8591 + }, + { + "epoch": 0.3888662593346911, + "grad_norm": 0.674097830312063, + "learning_rate": 6.98697704157659e-06, + "loss": 0.3393, + "step": 8592 + }, + { + "epoch": 0.3889115184430867, + "grad_norm": 0.6002185493604161, + "learning_rate": 6.986304457109574e-06, + "loss": 0.3284, + "step": 8593 + }, + { + "epoch": 0.3889567775514822, + "grad_norm": 0.669798338747915, + "learning_rate": 6.9856318299629755e-06, + "loss": 0.337, + "step": 8594 + }, + { + "epoch": 0.3890020366598778, + "grad_norm": 0.6334498651193496, + "learning_rate": 6.984959160151248e-06, + "loss": 0.3352, + "step": 8595 + }, + { + "epoch": 0.38904729576827335, + "grad_norm": 0.610674143561136, + "learning_rate": 6.984286447688844e-06, + "loss": 0.3368, + "step": 8596 + }, + { + "epoch": 0.38909255487666894, + "grad_norm": 0.34689978420777917, + "learning_rate": 6.983613692590219e-06, + "loss": 0.4979, + "step": 8597 + }, + { + "epoch": 0.3891378139850645, + "grad_norm": 0.6095161885854828, + "learning_rate": 6.9829408948698274e-06, + "loss": 0.3903, + "step": 8598 + }, + { + "epoch": 0.38918307309346006, + "grad_norm": 0.3022837257767364, + "learning_rate": 6.982268054542127e-06, + "loss": 0.4885, + "step": 8599 + }, + { + "epoch": 0.3892283322018556, + "grad_norm": 0.6723165210046169, + "learning_rate": 6.981595171621572e-06, + "loss": 0.344, + "step": 8600 + }, + { + "epoch": 0.3892735913102512, + "grad_norm": 0.7296296760079936, + "learning_rate": 6.980922246122626e-06, + "loss": 0.367, + "step": 8601 + }, + { + "epoch": 0.3893188504186468, + "grad_norm": 0.6500560720045101, + "learning_rate": 6.980249278059742e-06, + "loss": 0.337, + "step": 8602 + }, + { + "epoch": 0.3893641095270423, + "grad_norm": 0.6182250761658985, + "learning_rate": 6.979576267447385e-06, + "loss": 0.3465, + "step": 8603 + }, + { + "epoch": 0.3894093686354379, + "grad_norm": 0.6968747034851669, + "learning_rate": 6.9789032143000125e-06, + "loss": 0.3402, + "step": 8604 + }, + { + "epoch": 0.38945462774383344, + "grad_norm": 0.6643017799130763, + "learning_rate": 6.978230118632088e-06, + "loss": 0.3708, + "step": 8605 + }, + { + "epoch": 0.389499886852229, + "grad_norm": 0.6227026827677045, + "learning_rate": 6.977556980458073e-06, + "loss": 0.3538, + "step": 8606 + }, + { + "epoch": 0.38954514596062456, + "grad_norm": 0.6742507578789183, + "learning_rate": 6.976883799792434e-06, + "loss": 0.3822, + "step": 8607 + }, + { + "epoch": 0.38959040506902015, + "grad_norm": 0.6814209280655347, + "learning_rate": 6.9762105766496315e-06, + "loss": 0.3246, + "step": 8608 + }, + { + "epoch": 0.3896356641774157, + "grad_norm": 0.647736370344086, + "learning_rate": 6.975537311044136e-06, + "loss": 0.334, + "step": 8609 + }, + { + "epoch": 0.3896809232858113, + "grad_norm": 0.6228079659433321, + "learning_rate": 6.974864002990409e-06, + "loss": 0.3683, + "step": 8610 + }, + { + "epoch": 0.3897261823942068, + "grad_norm": 0.6380735975515563, + "learning_rate": 6.97419065250292e-06, + "loss": 0.3641, + "step": 8611 + }, + { + "epoch": 0.3897714415026024, + "grad_norm": 1.1646045621050385, + "learning_rate": 6.973517259596138e-06, + "loss": 0.3522, + "step": 8612 + }, + { + "epoch": 0.389816700610998, + "grad_norm": 0.5993642665906557, + "learning_rate": 6.9728438242845295e-06, + "loss": 0.3384, + "step": 8613 + }, + { + "epoch": 0.3898619597193935, + "grad_norm": 0.4615567787347811, + "learning_rate": 6.972170346582568e-06, + "loss": 0.4898, + "step": 8614 + }, + { + "epoch": 0.3899072188277891, + "grad_norm": 0.6679530107934812, + "learning_rate": 6.9714968265047234e-06, + "loss": 0.3456, + "step": 8615 + }, + { + "epoch": 0.38995247793618465, + "grad_norm": 0.6438760491238555, + "learning_rate": 6.9708232640654646e-06, + "loss": 0.3786, + "step": 8616 + }, + { + "epoch": 0.38999773704458024, + "grad_norm": 0.6502734654050656, + "learning_rate": 6.9701496592792695e-06, + "loss": 0.3107, + "step": 8617 + }, + { + "epoch": 0.3900429961529758, + "grad_norm": 0.655581936435849, + "learning_rate": 6.969476012160607e-06, + "loss": 0.3594, + "step": 8618 + }, + { + "epoch": 0.39008825526137136, + "grad_norm": 0.5840458237076638, + "learning_rate": 6.9688023227239555e-06, + "loss": 0.3011, + "step": 8619 + }, + { + "epoch": 0.3901335143697669, + "grad_norm": 0.3520744592844673, + "learning_rate": 6.968128590983787e-06, + "loss": 0.4994, + "step": 8620 + }, + { + "epoch": 0.3901787734781625, + "grad_norm": 0.6239584092422918, + "learning_rate": 6.967454816954581e-06, + "loss": 0.3088, + "step": 8621 + }, + { + "epoch": 0.390224032586558, + "grad_norm": 0.5966980508763927, + "learning_rate": 6.966781000650813e-06, + "loss": 0.3539, + "step": 8622 + }, + { + "epoch": 0.3902692916949536, + "grad_norm": 0.6395866873179475, + "learning_rate": 6.966107142086962e-06, + "loss": 0.3503, + "step": 8623 + }, + { + "epoch": 0.39031455080334915, + "grad_norm": 0.9190596121602028, + "learning_rate": 6.965433241277506e-06, + "loss": 0.4116, + "step": 8624 + }, + { + "epoch": 0.39035980991174474, + "grad_norm": 0.6402288106541163, + "learning_rate": 6.964759298236927e-06, + "loss": 0.3258, + "step": 8625 + }, + { + "epoch": 0.3904050690201403, + "grad_norm": 0.6504348325632919, + "learning_rate": 6.964085312979706e-06, + "loss": 0.3668, + "step": 8626 + }, + { + "epoch": 0.39045032812853586, + "grad_norm": 0.6216577096624377, + "learning_rate": 6.963411285520322e-06, + "loss": 0.312, + "step": 8627 + }, + { + "epoch": 0.39049558723693145, + "grad_norm": 0.6559684801547805, + "learning_rate": 6.962737215873261e-06, + "loss": 0.3615, + "step": 8628 + }, + { + "epoch": 0.390540846345327, + "grad_norm": 0.6719182999512758, + "learning_rate": 6.962063104053003e-06, + "loss": 0.3883, + "step": 8629 + }, + { + "epoch": 0.3905861054537226, + "grad_norm": 0.6345286639652378, + "learning_rate": 6.961388950074038e-06, + "loss": 0.3304, + "step": 8630 + }, + { + "epoch": 0.3906313645621181, + "grad_norm": 0.7763447214288505, + "learning_rate": 6.960714753950847e-06, + "loss": 0.3543, + "step": 8631 + }, + { + "epoch": 0.3906766236705137, + "grad_norm": 0.6207165825510894, + "learning_rate": 6.960040515697918e-06, + "loss": 0.3274, + "step": 8632 + }, + { + "epoch": 0.39072188277890924, + "grad_norm": 0.6863537745023689, + "learning_rate": 6.9593662353297375e-06, + "loss": 0.3376, + "step": 8633 + }, + { + "epoch": 0.3907671418873048, + "grad_norm": 0.6378501990610004, + "learning_rate": 6.958691912860794e-06, + "loss": 0.3066, + "step": 8634 + }, + { + "epoch": 0.39081240099570036, + "grad_norm": 0.618393802299301, + "learning_rate": 6.958017548305578e-06, + "loss": 0.3535, + "step": 8635 + }, + { + "epoch": 0.39085766010409595, + "grad_norm": 0.6521097916175539, + "learning_rate": 6.95734314167858e-06, + "loss": 0.3512, + "step": 8636 + }, + { + "epoch": 0.39090291921249154, + "grad_norm": 0.35284015932868484, + "learning_rate": 6.956668692994286e-06, + "loss": 0.4716, + "step": 8637 + }, + { + "epoch": 0.3909481783208871, + "grad_norm": 0.6396524024828847, + "learning_rate": 6.955994202267193e-06, + "loss": 0.3446, + "step": 8638 + }, + { + "epoch": 0.39099343742928266, + "grad_norm": 0.6206237459503144, + "learning_rate": 6.955319669511793e-06, + "loss": 0.321, + "step": 8639 + }, + { + "epoch": 0.3910386965376782, + "grad_norm": 0.29714977861285985, + "learning_rate": 6.954645094742577e-06, + "loss": 0.4887, + "step": 8640 + }, + { + "epoch": 0.3910839556460738, + "grad_norm": 0.6168756752989212, + "learning_rate": 6.9539704779740415e-06, + "loss": 0.3238, + "step": 8641 + }, + { + "epoch": 0.3911292147544693, + "grad_norm": 0.593369640280946, + "learning_rate": 6.953295819220681e-06, + "loss": 0.3272, + "step": 8642 + }, + { + "epoch": 0.3911744738628649, + "grad_norm": 0.287094237240238, + "learning_rate": 6.952621118496994e-06, + "loss": 0.4801, + "step": 8643 + }, + { + "epoch": 0.39121973297126045, + "grad_norm": 0.6246467794851839, + "learning_rate": 6.9519463758174745e-06, + "loss": 0.3513, + "step": 8644 + }, + { + "epoch": 0.39126499207965604, + "grad_norm": 0.6384595000969184, + "learning_rate": 6.951271591196623e-06, + "loss": 0.3522, + "step": 8645 + }, + { + "epoch": 0.39131025118805157, + "grad_norm": 0.6290461760291466, + "learning_rate": 6.950596764648938e-06, + "loss": 0.3511, + "step": 8646 + }, + { + "epoch": 0.39135551029644716, + "grad_norm": 0.6447982036114861, + "learning_rate": 6.9499218961889205e-06, + "loss": 0.3602, + "step": 8647 + }, + { + "epoch": 0.39140076940484275, + "grad_norm": 0.6259492232560252, + "learning_rate": 6.949246985831069e-06, + "loss": 0.3224, + "step": 8648 + }, + { + "epoch": 0.3914460285132383, + "grad_norm": 0.644176305316592, + "learning_rate": 6.948572033589887e-06, + "loss": 0.2964, + "step": 8649 + }, + { + "epoch": 0.3914912876216339, + "grad_norm": 0.6377018378945989, + "learning_rate": 6.9478970394798755e-06, + "loss": 0.2948, + "step": 8650 + }, + { + "epoch": 0.3915365467300294, + "grad_norm": 0.34507467046136586, + "learning_rate": 6.9472220035155394e-06, + "loss": 0.5053, + "step": 8651 + }, + { + "epoch": 0.391581805838425, + "grad_norm": 0.32562521184348303, + "learning_rate": 6.9465469257113825e-06, + "loss": 0.4863, + "step": 8652 + }, + { + "epoch": 0.39162706494682054, + "grad_norm": 0.7729315088269995, + "learning_rate": 6.945871806081911e-06, + "loss": 0.3197, + "step": 8653 + }, + { + "epoch": 0.3916723240552161, + "grad_norm": 0.6479846752585515, + "learning_rate": 6.945196644641631e-06, + "loss": 0.3031, + "step": 8654 + }, + { + "epoch": 0.39171758316361166, + "grad_norm": 0.6411110092285045, + "learning_rate": 6.944521441405049e-06, + "loss": 0.3529, + "step": 8655 + }, + { + "epoch": 0.39176284227200725, + "grad_norm": 0.7137929377143929, + "learning_rate": 6.943846196386673e-06, + "loss": 0.3444, + "step": 8656 + }, + { + "epoch": 0.3918081013804028, + "grad_norm": 0.6422030826957724, + "learning_rate": 6.943170909601013e-06, + "loss": 0.3802, + "step": 8657 + }, + { + "epoch": 0.3918533604887984, + "grad_norm": 0.6520353535812056, + "learning_rate": 6.942495581062578e-06, + "loss": 0.3379, + "step": 8658 + }, + { + "epoch": 0.3918986195971939, + "grad_norm": 0.6514855160159058, + "learning_rate": 6.94182021078588e-06, + "loss": 0.3506, + "step": 8659 + }, + { + "epoch": 0.3919438787055895, + "grad_norm": 0.6215040971747843, + "learning_rate": 6.941144798785429e-06, + "loss": 0.3467, + "step": 8660 + }, + { + "epoch": 0.3919891378139851, + "grad_norm": 0.6040186673839965, + "learning_rate": 6.9404693450757366e-06, + "loss": 0.3085, + "step": 8661 + }, + { + "epoch": 0.3920343969223806, + "grad_norm": 0.6247800042275801, + "learning_rate": 6.939793849671318e-06, + "loss": 0.3384, + "step": 8662 + }, + { + "epoch": 0.3920796560307762, + "grad_norm": 0.6908979460847018, + "learning_rate": 6.939118312586688e-06, + "loss": 0.3421, + "step": 8663 + }, + { + "epoch": 0.39212491513917175, + "grad_norm": 0.610891920990332, + "learning_rate": 6.938442733836361e-06, + "loss": 0.3246, + "step": 8664 + }, + { + "epoch": 0.39217017424756734, + "grad_norm": 0.6193622434028262, + "learning_rate": 6.9377671134348535e-06, + "loss": 0.317, + "step": 8665 + }, + { + "epoch": 0.3922154333559629, + "grad_norm": 0.6122163861863653, + "learning_rate": 6.93709145139668e-06, + "loss": 0.3232, + "step": 8666 + }, + { + "epoch": 0.39226069246435846, + "grad_norm": 0.6713130434464446, + "learning_rate": 6.936415747736363e-06, + "loss": 0.273, + "step": 8667 + }, + { + "epoch": 0.392305951572754, + "grad_norm": 0.5804329365473563, + "learning_rate": 6.935740002468417e-06, + "loss": 0.3356, + "step": 8668 + }, + { + "epoch": 0.3923512106811496, + "grad_norm": 0.6704525479055023, + "learning_rate": 6.935064215607364e-06, + "loss": 0.3619, + "step": 8669 + }, + { + "epoch": 0.3923964697895451, + "grad_norm": 0.5214643061521009, + "learning_rate": 6.934388387167726e-06, + "loss": 0.4794, + "step": 8670 + }, + { + "epoch": 0.3924417288979407, + "grad_norm": 0.7548003104336839, + "learning_rate": 6.933712517164019e-06, + "loss": 0.3655, + "step": 8671 + }, + { + "epoch": 0.3924869880063363, + "grad_norm": 0.6111333579007964, + "learning_rate": 6.933036605610773e-06, + "loss": 0.2902, + "step": 8672 + }, + { + "epoch": 0.39253224711473184, + "grad_norm": 0.6062223905435875, + "learning_rate": 6.932360652522504e-06, + "loss": 0.3162, + "step": 8673 + }, + { + "epoch": 0.3925775062231274, + "grad_norm": 0.6587197514595092, + "learning_rate": 6.93168465791374e-06, + "loss": 0.3598, + "step": 8674 + }, + { + "epoch": 0.39262276533152296, + "grad_norm": 0.6525325821139949, + "learning_rate": 6.931008621799007e-06, + "loss": 0.3691, + "step": 8675 + }, + { + "epoch": 0.39266802443991855, + "grad_norm": 0.6237709662578704, + "learning_rate": 6.930332544192829e-06, + "loss": 0.3762, + "step": 8676 + }, + { + "epoch": 0.3927132835483141, + "grad_norm": 0.6245083049472832, + "learning_rate": 6.929656425109731e-06, + "loss": 0.3514, + "step": 8677 + }, + { + "epoch": 0.3927585426567097, + "grad_norm": 0.6767809573399547, + "learning_rate": 6.9289802645642455e-06, + "loss": 0.3382, + "step": 8678 + }, + { + "epoch": 0.3928038017651052, + "grad_norm": 0.41616801093824357, + "learning_rate": 6.928304062570897e-06, + "loss": 0.4813, + "step": 8679 + }, + { + "epoch": 0.3928490608735008, + "grad_norm": 0.7707353902927744, + "learning_rate": 6.927627819144217e-06, + "loss": 0.3297, + "step": 8680 + }, + { + "epoch": 0.39289431998189633, + "grad_norm": 0.6385764521466519, + "learning_rate": 6.926951534298736e-06, + "loss": 0.3728, + "step": 8681 + }, + { + "epoch": 0.3929395790902919, + "grad_norm": 0.6644952441279728, + "learning_rate": 6.926275208048984e-06, + "loss": 0.3528, + "step": 8682 + }, + { + "epoch": 0.39298483819868746, + "grad_norm": 0.6429728839288491, + "learning_rate": 6.925598840409493e-06, + "loss": 0.3732, + "step": 8683 + }, + { + "epoch": 0.39303009730708305, + "grad_norm": 0.6111124585554257, + "learning_rate": 6.924922431394798e-06, + "loss": 0.349, + "step": 8684 + }, + { + "epoch": 0.39307535641547864, + "grad_norm": 0.2904134706836162, + "learning_rate": 6.924245981019432e-06, + "loss": 0.4543, + "step": 8685 + }, + { + "epoch": 0.3931206155238742, + "grad_norm": 0.6445296325024885, + "learning_rate": 6.92356948929793e-06, + "loss": 0.365, + "step": 8686 + }, + { + "epoch": 0.39316587463226976, + "grad_norm": 0.622289117555611, + "learning_rate": 6.922892956244827e-06, + "loss": 0.3587, + "step": 8687 + }, + { + "epoch": 0.3932111337406653, + "grad_norm": 0.2850696554299266, + "learning_rate": 6.92221638187466e-06, + "loss": 0.5007, + "step": 8688 + }, + { + "epoch": 0.3932563928490609, + "grad_norm": 0.6122601998647502, + "learning_rate": 6.921539766201967e-06, + "loss": 0.3236, + "step": 8689 + }, + { + "epoch": 0.3933016519574564, + "grad_norm": 0.709577675183887, + "learning_rate": 6.920863109241285e-06, + "loss": 0.3563, + "step": 8690 + }, + { + "epoch": 0.393346911065852, + "grad_norm": 0.34339414934632356, + "learning_rate": 6.920186411007155e-06, + "loss": 0.4917, + "step": 8691 + }, + { + "epoch": 0.39339217017424755, + "grad_norm": 0.7456381059814225, + "learning_rate": 6.919509671514116e-06, + "loss": 0.3629, + "step": 8692 + }, + { + "epoch": 0.39343742928264314, + "grad_norm": 0.6745920091564857, + "learning_rate": 6.91883289077671e-06, + "loss": 0.3314, + "step": 8693 + }, + { + "epoch": 0.39348268839103867, + "grad_norm": 0.6477926625245785, + "learning_rate": 6.918156068809479e-06, + "loss": 0.384, + "step": 8694 + }, + { + "epoch": 0.39352794749943426, + "grad_norm": 0.2827842762719348, + "learning_rate": 6.917479205626965e-06, + "loss": 0.4691, + "step": 8695 + }, + { + "epoch": 0.39357320660782985, + "grad_norm": 0.2858251259802762, + "learning_rate": 6.916802301243711e-06, + "loss": 0.4701, + "step": 8696 + }, + { + "epoch": 0.3936184657162254, + "grad_norm": 0.6481176264184711, + "learning_rate": 6.916125355674264e-06, + "loss": 0.3364, + "step": 8697 + }, + { + "epoch": 0.393663724824621, + "grad_norm": 0.6563203748286447, + "learning_rate": 6.915448368933166e-06, + "loss": 0.3543, + "step": 8698 + }, + { + "epoch": 0.3937089839330165, + "grad_norm": 0.6412880506103583, + "learning_rate": 6.914771341034967e-06, + "loss": 0.3515, + "step": 8699 + }, + { + "epoch": 0.3937542430414121, + "grad_norm": 0.29259987008716926, + "learning_rate": 6.914094271994211e-06, + "loss": 0.4955, + "step": 8700 + }, + { + "epoch": 0.39379950214980763, + "grad_norm": 0.6285536609147793, + "learning_rate": 6.913417161825449e-06, + "loss": 0.3612, + "step": 8701 + }, + { + "epoch": 0.3938447612582032, + "grad_norm": 0.2906222931727884, + "learning_rate": 6.912740010543229e-06, + "loss": 0.4956, + "step": 8702 + }, + { + "epoch": 0.39389002036659876, + "grad_norm": 0.7385466443098395, + "learning_rate": 6.912062818162101e-06, + "loss": 0.3475, + "step": 8703 + }, + { + "epoch": 0.39393527947499435, + "grad_norm": 0.2980880050919162, + "learning_rate": 6.911385584696615e-06, + "loss": 0.4687, + "step": 8704 + }, + { + "epoch": 0.3939805385833899, + "grad_norm": 0.3011430205086542, + "learning_rate": 6.910708310161323e-06, + "loss": 0.4903, + "step": 8705 + }, + { + "epoch": 0.3940257976917855, + "grad_norm": 0.6786333339665385, + "learning_rate": 6.910030994570778e-06, + "loss": 0.3609, + "step": 8706 + }, + { + "epoch": 0.39407105680018106, + "grad_norm": 0.6605605300378226, + "learning_rate": 6.909353637939533e-06, + "loss": 0.3728, + "step": 8707 + }, + { + "epoch": 0.3941163159085766, + "grad_norm": 0.3013435270774591, + "learning_rate": 6.908676240282141e-06, + "loss": 0.498, + "step": 8708 + }, + { + "epoch": 0.3941615750169722, + "grad_norm": 0.6301341462442973, + "learning_rate": 6.907998801613162e-06, + "loss": 0.3521, + "step": 8709 + }, + { + "epoch": 0.3942068341253677, + "grad_norm": 0.28087605899378787, + "learning_rate": 6.907321321947146e-06, + "loss": 0.4472, + "step": 8710 + }, + { + "epoch": 0.3942520932337633, + "grad_norm": 0.7817288977807025, + "learning_rate": 6.906643801298654e-06, + "loss": 0.3586, + "step": 8711 + }, + { + "epoch": 0.39429735234215885, + "grad_norm": 1.1246420663438081, + "learning_rate": 6.9059662396822415e-06, + "loss": 0.3506, + "step": 8712 + }, + { + "epoch": 0.39434261145055444, + "grad_norm": 0.6710933590669182, + "learning_rate": 6.905288637112468e-06, + "loss": 0.3839, + "step": 8713 + }, + { + "epoch": 0.39438787055894997, + "grad_norm": 0.7314548660479658, + "learning_rate": 6.904610993603894e-06, + "loss": 0.3217, + "step": 8714 + }, + { + "epoch": 0.39443312966734556, + "grad_norm": 0.3041015740060619, + "learning_rate": 6.90393330917108e-06, + "loss": 0.497, + "step": 8715 + }, + { + "epoch": 0.3944783887757411, + "grad_norm": 0.6279941312234719, + "learning_rate": 6.903255583828585e-06, + "loss": 0.3684, + "step": 8716 + }, + { + "epoch": 0.3945236478841367, + "grad_norm": 0.7164162219907889, + "learning_rate": 6.902577817590975e-06, + "loss": 0.3792, + "step": 8717 + }, + { + "epoch": 0.3945689069925322, + "grad_norm": 0.6107098021250627, + "learning_rate": 6.901900010472811e-06, + "loss": 0.3403, + "step": 8718 + }, + { + "epoch": 0.3946141661009278, + "grad_norm": 0.5809393957416973, + "learning_rate": 6.901222162488655e-06, + "loss": 0.35, + "step": 8719 + }, + { + "epoch": 0.3946594252093234, + "grad_norm": 0.6329370924311692, + "learning_rate": 6.9005442736530745e-06, + "loss": 0.3235, + "step": 8720 + }, + { + "epoch": 0.39470468431771893, + "grad_norm": 0.6260093623120035, + "learning_rate": 6.899866343980635e-06, + "loss": 0.3836, + "step": 8721 + }, + { + "epoch": 0.3947499434261145, + "grad_norm": 0.6883826862977819, + "learning_rate": 6.899188373485903e-06, + "loss": 0.3625, + "step": 8722 + }, + { + "epoch": 0.39479520253451006, + "grad_norm": 0.5849484026828342, + "learning_rate": 6.8985103621834455e-06, + "loss": 0.3133, + "step": 8723 + }, + { + "epoch": 0.39484046164290565, + "grad_norm": 0.623484541700811, + "learning_rate": 6.8978323100878305e-06, + "loss": 0.3496, + "step": 8724 + }, + { + "epoch": 0.3948857207513012, + "grad_norm": 0.5962752422609053, + "learning_rate": 6.897154217213629e-06, + "loss": 0.3508, + "step": 8725 + }, + { + "epoch": 0.3949309798596968, + "grad_norm": 0.6707375328358476, + "learning_rate": 6.8964760835754095e-06, + "loss": 0.3668, + "step": 8726 + }, + { + "epoch": 0.3949762389680923, + "grad_norm": 0.32723708194093143, + "learning_rate": 6.895797909187745e-06, + "loss": 0.5035, + "step": 8727 + }, + { + "epoch": 0.3950214980764879, + "grad_norm": 0.5904918431761856, + "learning_rate": 6.8951196940652045e-06, + "loss": 0.3286, + "step": 8728 + }, + { + "epoch": 0.39506675718488343, + "grad_norm": 0.6513250106633817, + "learning_rate": 6.894441438222362e-06, + "loss": 0.3425, + "step": 8729 + }, + { + "epoch": 0.395112016293279, + "grad_norm": 0.7455422173357793, + "learning_rate": 6.89376314167379e-06, + "loss": 0.3482, + "step": 8730 + }, + { + "epoch": 0.3951572754016746, + "grad_norm": 0.6774642915248136, + "learning_rate": 6.893084804434067e-06, + "loss": 0.3727, + "step": 8731 + }, + { + "epoch": 0.39520253451007015, + "grad_norm": 0.6096825832240897, + "learning_rate": 6.892406426517764e-06, + "loss": 0.3227, + "step": 8732 + }, + { + "epoch": 0.39524779361846574, + "grad_norm": 0.666446310795665, + "learning_rate": 6.8917280079394596e-06, + "loss": 0.3622, + "step": 8733 + }, + { + "epoch": 0.39529305272686127, + "grad_norm": 0.6059322710118061, + "learning_rate": 6.891049548713731e-06, + "loss": 0.3496, + "step": 8734 + }, + { + "epoch": 0.39533831183525686, + "grad_norm": 0.3275420726550203, + "learning_rate": 6.8903710488551544e-06, + "loss": 0.4777, + "step": 8735 + }, + { + "epoch": 0.3953835709436524, + "grad_norm": 0.6373966786981582, + "learning_rate": 6.889692508378312e-06, + "loss": 0.382, + "step": 8736 + }, + { + "epoch": 0.395428830052048, + "grad_norm": 0.657915152352585, + "learning_rate": 6.889013927297778e-06, + "loss": 0.3628, + "step": 8737 + }, + { + "epoch": 0.3954740891604435, + "grad_norm": 0.43447516800031616, + "learning_rate": 6.888335305628138e-06, + "loss": 0.4972, + "step": 8738 + }, + { + "epoch": 0.3955193482688391, + "grad_norm": 0.294283044657983, + "learning_rate": 6.887656643383972e-06, + "loss": 0.4785, + "step": 8739 + }, + { + "epoch": 0.39556460737723464, + "grad_norm": 0.701929657079196, + "learning_rate": 6.886977940579862e-06, + "loss": 0.3414, + "step": 8740 + }, + { + "epoch": 0.39560986648563023, + "grad_norm": 0.6942881894781736, + "learning_rate": 6.886299197230391e-06, + "loss": 0.3436, + "step": 8741 + }, + { + "epoch": 0.3956551255940258, + "grad_norm": 0.5906622336773468, + "learning_rate": 6.885620413350145e-06, + "loss": 0.3261, + "step": 8742 + }, + { + "epoch": 0.39570038470242136, + "grad_norm": 0.732763832599632, + "learning_rate": 6.884941588953706e-06, + "loss": 0.3508, + "step": 8743 + }, + { + "epoch": 0.39574564381081695, + "grad_norm": 0.7552325739031256, + "learning_rate": 6.884262724055663e-06, + "loss": 0.3474, + "step": 8744 + }, + { + "epoch": 0.3957909029192125, + "grad_norm": 0.6542164691291017, + "learning_rate": 6.8835838186705985e-06, + "loss": 0.3221, + "step": 8745 + }, + { + "epoch": 0.3958361620276081, + "grad_norm": 0.6364917758168952, + "learning_rate": 6.8829048728131056e-06, + "loss": 0.3626, + "step": 8746 + }, + { + "epoch": 0.3958814211360036, + "grad_norm": 0.6307626027957765, + "learning_rate": 6.882225886497768e-06, + "loss": 0.3339, + "step": 8747 + }, + { + "epoch": 0.3959266802443992, + "grad_norm": 0.6846426846072403, + "learning_rate": 6.8815468597391785e-06, + "loss": 0.3451, + "step": 8748 + }, + { + "epoch": 0.39597193935279473, + "grad_norm": 0.6193145573313631, + "learning_rate": 6.880867792551924e-06, + "loss": 0.3539, + "step": 8749 + }, + { + "epoch": 0.3960171984611903, + "grad_norm": 0.6411114657727086, + "learning_rate": 6.880188684950599e-06, + "loss": 0.3727, + "step": 8750 + }, + { + "epoch": 0.39606245756958586, + "grad_norm": 0.676463974354279, + "learning_rate": 6.879509536949792e-06, + "loss": 0.3605, + "step": 8751 + }, + { + "epoch": 0.39610771667798145, + "grad_norm": 0.6642196025191914, + "learning_rate": 6.878830348564098e-06, + "loss": 0.3631, + "step": 8752 + }, + { + "epoch": 0.396152975786377, + "grad_norm": 0.6307656132854766, + "learning_rate": 6.878151119808111e-06, + "loss": 0.3443, + "step": 8753 + }, + { + "epoch": 0.39619823489477257, + "grad_norm": 0.6247080085850564, + "learning_rate": 6.8774718506964245e-06, + "loss": 0.3222, + "step": 8754 + }, + { + "epoch": 0.39624349400316816, + "grad_norm": 0.9383560582951295, + "learning_rate": 6.876792541243633e-06, + "loss": 0.3564, + "step": 8755 + }, + { + "epoch": 0.3962887531115637, + "grad_norm": 0.6281135643962057, + "learning_rate": 6.876113191464336e-06, + "loss": 0.364, + "step": 8756 + }, + { + "epoch": 0.3963340122199593, + "grad_norm": 0.7782318894620474, + "learning_rate": 6.875433801373128e-06, + "loss": 0.289, + "step": 8757 + }, + { + "epoch": 0.3963792713283548, + "grad_norm": 0.4686477527301178, + "learning_rate": 6.8747543709846064e-06, + "loss": 0.4773, + "step": 8758 + }, + { + "epoch": 0.3964245304367504, + "grad_norm": 0.5931616218567658, + "learning_rate": 6.8740749003133725e-06, + "loss": 0.3568, + "step": 8759 + }, + { + "epoch": 0.39646978954514595, + "grad_norm": 0.6620622851903066, + "learning_rate": 6.873395389374024e-06, + "loss": 0.3537, + "step": 8760 + }, + { + "epoch": 0.39651504865354154, + "grad_norm": 0.6560883533650138, + "learning_rate": 6.872715838181161e-06, + "loss": 0.3823, + "step": 8761 + }, + { + "epoch": 0.39656030776193707, + "grad_norm": 0.6712679049437771, + "learning_rate": 6.872036246749387e-06, + "loss": 0.3656, + "step": 8762 + }, + { + "epoch": 0.39660556687033266, + "grad_norm": 0.576014190977509, + "learning_rate": 6.871356615093306e-06, + "loss": 0.3273, + "step": 8763 + }, + { + "epoch": 0.3966508259787282, + "grad_norm": 0.6717922727634008, + "learning_rate": 6.870676943227516e-06, + "loss": 0.3706, + "step": 8764 + }, + { + "epoch": 0.3966960850871238, + "grad_norm": 0.40196289686369996, + "learning_rate": 6.869997231166625e-06, + "loss": 0.4814, + "step": 8765 + }, + { + "epoch": 0.3967413441955194, + "grad_norm": 0.39845981511037476, + "learning_rate": 6.869317478925236e-06, + "loss": 0.4755, + "step": 8766 + }, + { + "epoch": 0.3967866033039149, + "grad_norm": 0.7263105208145871, + "learning_rate": 6.8686376865179576e-06, + "loss": 0.3497, + "step": 8767 + }, + { + "epoch": 0.3968318624123105, + "grad_norm": 0.6656152094364597, + "learning_rate": 6.867957853959392e-06, + "loss": 0.3601, + "step": 8768 + }, + { + "epoch": 0.39687712152070603, + "grad_norm": 0.6096887779698981, + "learning_rate": 6.86727798126415e-06, + "loss": 0.3859, + "step": 8769 + }, + { + "epoch": 0.3969223806291016, + "grad_norm": 0.7120773328838061, + "learning_rate": 6.866598068446839e-06, + "loss": 0.3555, + "step": 8770 + }, + { + "epoch": 0.39696763973749716, + "grad_norm": 0.9733028374256942, + "learning_rate": 6.8659181155220674e-06, + "loss": 0.3321, + "step": 8771 + }, + { + "epoch": 0.39701289884589275, + "grad_norm": 0.7205067740179895, + "learning_rate": 6.865238122504449e-06, + "loss": 0.3416, + "step": 8772 + }, + { + "epoch": 0.3970581579542883, + "grad_norm": 0.7190878853947535, + "learning_rate": 6.86455808940859e-06, + "loss": 0.3867, + "step": 8773 + }, + { + "epoch": 0.39710341706268387, + "grad_norm": 0.5962105622474269, + "learning_rate": 6.863878016249103e-06, + "loss": 0.3064, + "step": 8774 + }, + { + "epoch": 0.3971486761710794, + "grad_norm": 0.4949437489129078, + "learning_rate": 6.8631979030406045e-06, + "loss": 0.4991, + "step": 8775 + }, + { + "epoch": 0.397193935279475, + "grad_norm": 0.7032902018583524, + "learning_rate": 6.862517749797703e-06, + "loss": 0.3329, + "step": 8776 + }, + { + "epoch": 0.3972391943878706, + "grad_norm": 0.6427673222676692, + "learning_rate": 6.861837556535018e-06, + "loss": 0.3734, + "step": 8777 + }, + { + "epoch": 0.3972844534962661, + "grad_norm": 0.6180141133170143, + "learning_rate": 6.86115732326716e-06, + "loss": 0.3366, + "step": 8778 + }, + { + "epoch": 0.3973297126046617, + "grad_norm": 0.6308046882524578, + "learning_rate": 6.860477050008749e-06, + "loss": 0.3522, + "step": 8779 + }, + { + "epoch": 0.39737497171305725, + "grad_norm": 0.6888985871886418, + "learning_rate": 6.859796736774399e-06, + "loss": 0.4036, + "step": 8780 + }, + { + "epoch": 0.39742023082145284, + "grad_norm": 0.6252466017722336, + "learning_rate": 6.859116383578729e-06, + "loss": 0.3372, + "step": 8781 + }, + { + "epoch": 0.39746548992984837, + "grad_norm": 0.6339694577973134, + "learning_rate": 6.858435990436357e-06, + "loss": 0.3682, + "step": 8782 + }, + { + "epoch": 0.39751074903824396, + "grad_norm": 0.6378813574811948, + "learning_rate": 6.857755557361904e-06, + "loss": 0.3422, + "step": 8783 + }, + { + "epoch": 0.3975560081466395, + "grad_norm": 0.6418165335242025, + "learning_rate": 6.8570750843699906e-06, + "loss": 0.3562, + "step": 8784 + }, + { + "epoch": 0.3976012672550351, + "grad_norm": 0.6998441305303985, + "learning_rate": 6.856394571475236e-06, + "loss": 0.3942, + "step": 8785 + }, + { + "epoch": 0.3976465263634306, + "grad_norm": 0.6763851297883828, + "learning_rate": 6.855714018692266e-06, + "loss": 0.3044, + "step": 8786 + }, + { + "epoch": 0.3976917854718262, + "grad_norm": 0.3356185758187047, + "learning_rate": 6.855033426035698e-06, + "loss": 0.4853, + "step": 8787 + }, + { + "epoch": 0.39773704458022174, + "grad_norm": 0.6597605889712057, + "learning_rate": 6.854352793520161e-06, + "loss": 0.3482, + "step": 8788 + }, + { + "epoch": 0.39778230368861733, + "grad_norm": 0.30797573717135995, + "learning_rate": 6.853672121160277e-06, + "loss": 0.5004, + "step": 8789 + }, + { + "epoch": 0.3978275627970129, + "grad_norm": 0.29981839225811097, + "learning_rate": 6.852991408970673e-06, + "loss": 0.4789, + "step": 8790 + }, + { + "epoch": 0.39787282190540846, + "grad_norm": 0.7316998984685005, + "learning_rate": 6.852310656965973e-06, + "loss": 0.3341, + "step": 8791 + }, + { + "epoch": 0.39791808101380405, + "grad_norm": 0.6594598143900187, + "learning_rate": 6.8516298651608075e-06, + "loss": 0.3401, + "step": 8792 + }, + { + "epoch": 0.3979633401221996, + "grad_norm": 0.647731276075123, + "learning_rate": 6.850949033569802e-06, + "loss": 0.2948, + "step": 8793 + }, + { + "epoch": 0.3980085992305952, + "grad_norm": 0.6491077344661909, + "learning_rate": 6.850268162207587e-06, + "loss": 0.3411, + "step": 8794 + }, + { + "epoch": 0.3980538583389907, + "grad_norm": 0.6955332825784432, + "learning_rate": 6.84958725108879e-06, + "loss": 0.3782, + "step": 8795 + }, + { + "epoch": 0.3980991174473863, + "grad_norm": 0.6033407972613656, + "learning_rate": 6.848906300228047e-06, + "loss": 0.3427, + "step": 8796 + }, + { + "epoch": 0.39814437655578183, + "grad_norm": 0.6390814926999342, + "learning_rate": 6.8482253096399835e-06, + "loss": 0.3563, + "step": 8797 + }, + { + "epoch": 0.3981896356641774, + "grad_norm": 0.8164864194155153, + "learning_rate": 6.847544279339235e-06, + "loss": 0.4011, + "step": 8798 + }, + { + "epoch": 0.39823489477257296, + "grad_norm": 0.6229443416891212, + "learning_rate": 6.8468632093404356e-06, + "loss": 0.3459, + "step": 8799 + }, + { + "epoch": 0.39828015388096855, + "grad_norm": 0.6137351192779077, + "learning_rate": 6.846182099658216e-06, + "loss": 0.3232, + "step": 8800 + }, + { + "epoch": 0.39832541298936414, + "grad_norm": 0.6480828136217117, + "learning_rate": 6.845500950307215e-06, + "loss": 0.3585, + "step": 8801 + }, + { + "epoch": 0.39837067209775967, + "grad_norm": 0.6361253861236457, + "learning_rate": 6.8448197613020664e-06, + "loss": 0.3287, + "step": 8802 + }, + { + "epoch": 0.39841593120615526, + "grad_norm": 0.6415046705485342, + "learning_rate": 6.844138532657405e-06, + "loss": 0.3557, + "step": 8803 + }, + { + "epoch": 0.3984611903145508, + "grad_norm": 0.6278134895515164, + "learning_rate": 6.843457264387874e-06, + "loss": 0.3274, + "step": 8804 + }, + { + "epoch": 0.3985064494229464, + "grad_norm": 0.6363884007409067, + "learning_rate": 6.842775956508104e-06, + "loss": 0.3383, + "step": 8805 + }, + { + "epoch": 0.3985517085313419, + "grad_norm": 0.837352345152312, + "learning_rate": 6.8420946090327416e-06, + "loss": 0.335, + "step": 8806 + }, + { + "epoch": 0.3985969676397375, + "grad_norm": 0.5971161813211178, + "learning_rate": 6.841413221976422e-06, + "loss": 0.3698, + "step": 8807 + }, + { + "epoch": 0.39864222674813304, + "grad_norm": 0.7140643777217771, + "learning_rate": 6.840731795353788e-06, + "loss": 0.3433, + "step": 8808 + }, + { + "epoch": 0.39868748585652863, + "grad_norm": 0.523852046512976, + "learning_rate": 6.840050329179481e-06, + "loss": 0.4989, + "step": 8809 + }, + { + "epoch": 0.39873274496492417, + "grad_norm": 0.4224525675611614, + "learning_rate": 6.839368823468144e-06, + "loss": 0.4669, + "step": 8810 + }, + { + "epoch": 0.39877800407331976, + "grad_norm": 0.6361397108087635, + "learning_rate": 6.838687278234419e-06, + "loss": 0.3543, + "step": 8811 + }, + { + "epoch": 0.3988232631817153, + "grad_norm": 0.3237349798154476, + "learning_rate": 6.838005693492953e-06, + "loss": 0.4774, + "step": 8812 + }, + { + "epoch": 0.3988685222901109, + "grad_norm": 0.6200239783378564, + "learning_rate": 6.837324069258389e-06, + "loss": 0.3643, + "step": 8813 + }, + { + "epoch": 0.3989137813985065, + "grad_norm": 0.742821653188746, + "learning_rate": 6.836642405545374e-06, + "loss": 0.3307, + "step": 8814 + }, + { + "epoch": 0.398959040506902, + "grad_norm": 0.5872253694957739, + "learning_rate": 6.8359607023685544e-06, + "loss": 0.3176, + "step": 8815 + }, + { + "epoch": 0.3990042996152976, + "grad_norm": 0.6693686789127838, + "learning_rate": 6.835278959742577e-06, + "loss": 0.3363, + "step": 8816 + }, + { + "epoch": 0.39904955872369313, + "grad_norm": 0.5871827507512425, + "learning_rate": 6.8345971776820944e-06, + "loss": 0.3102, + "step": 8817 + }, + { + "epoch": 0.3990948178320887, + "grad_norm": 0.6081941918857661, + "learning_rate": 6.833915356201749e-06, + "loss": 0.4883, + "step": 8818 + }, + { + "epoch": 0.39914007694048426, + "grad_norm": 0.542069814873775, + "learning_rate": 6.833233495316198e-06, + "loss": 0.5231, + "step": 8819 + }, + { + "epoch": 0.39918533604887985, + "grad_norm": 0.6396501003092583, + "learning_rate": 6.832551595040089e-06, + "loss": 0.3471, + "step": 8820 + }, + { + "epoch": 0.3992305951572754, + "grad_norm": 0.6680543486040922, + "learning_rate": 6.8318696553880736e-06, + "loss": 0.3832, + "step": 8821 + }, + { + "epoch": 0.39927585426567097, + "grad_norm": 0.651301525618315, + "learning_rate": 6.831187676374807e-06, + "loss": 0.3086, + "step": 8822 + }, + { + "epoch": 0.3993211133740665, + "grad_norm": 0.6727077273084098, + "learning_rate": 6.83050565801494e-06, + "loss": 0.3631, + "step": 8823 + }, + { + "epoch": 0.3993663724824621, + "grad_norm": 0.6163866932954916, + "learning_rate": 6.8298236003231264e-06, + "loss": 0.3353, + "step": 8824 + }, + { + "epoch": 0.3994116315908577, + "grad_norm": 0.6197188225438527, + "learning_rate": 6.829141503314027e-06, + "loss": 0.3233, + "step": 8825 + }, + { + "epoch": 0.3994568906992532, + "grad_norm": 0.6424342060833129, + "learning_rate": 6.8284593670022925e-06, + "loss": 0.3059, + "step": 8826 + }, + { + "epoch": 0.3995021498076488, + "grad_norm": 0.6480051962242951, + "learning_rate": 6.827777191402584e-06, + "loss": 0.3934, + "step": 8827 + }, + { + "epoch": 0.39954740891604434, + "grad_norm": 0.9945980509456495, + "learning_rate": 6.827094976529555e-06, + "loss": 0.5059, + "step": 8828 + }, + { + "epoch": 0.39959266802443993, + "grad_norm": 0.6656078511701508, + "learning_rate": 6.826412722397867e-06, + "loss": 0.3237, + "step": 8829 + }, + { + "epoch": 0.39963792713283547, + "grad_norm": 0.6621313999952261, + "learning_rate": 6.8257304290221794e-06, + "loss": 0.3523, + "step": 8830 + }, + { + "epoch": 0.39968318624123106, + "grad_norm": 0.6759962831435172, + "learning_rate": 6.8250480964171526e-06, + "loss": 0.3349, + "step": 8831 + }, + { + "epoch": 0.3997284453496266, + "grad_norm": 0.6632127212233728, + "learning_rate": 6.824365724597446e-06, + "loss": 0.3805, + "step": 8832 + }, + { + "epoch": 0.3997737044580222, + "grad_norm": 0.33370362668382353, + "learning_rate": 6.823683313577725e-06, + "loss": 0.4721, + "step": 8833 + }, + { + "epoch": 0.3998189635664177, + "grad_norm": 0.5915639289253449, + "learning_rate": 6.823000863372649e-06, + "loss": 0.3339, + "step": 8834 + }, + { + "epoch": 0.3998642226748133, + "grad_norm": 0.6474460312332149, + "learning_rate": 6.822318373996884e-06, + "loss": 0.3628, + "step": 8835 + }, + { + "epoch": 0.3999094817832089, + "grad_norm": 0.3924738120952629, + "learning_rate": 6.8216358454650935e-06, + "loss": 0.4712, + "step": 8836 + }, + { + "epoch": 0.39995474089160443, + "grad_norm": 0.6035930899859268, + "learning_rate": 6.820953277791944e-06, + "loss": 0.313, + "step": 8837 + }, + { + "epoch": 0.4, + "grad_norm": 0.6635310443361517, + "learning_rate": 6.8202706709921e-06, + "loss": 0.3697, + "step": 8838 + }, + { + "epoch": 0.40004525910839556, + "grad_norm": 0.6681270122548083, + "learning_rate": 6.81958802508023e-06, + "loss": 0.3649, + "step": 8839 + }, + { + "epoch": 0.40009051821679115, + "grad_norm": 0.5962241863603737, + "learning_rate": 6.818905340071004e-06, + "loss": 0.3629, + "step": 8840 + }, + { + "epoch": 0.4001357773251867, + "grad_norm": 0.6215104277591916, + "learning_rate": 6.818222615979087e-06, + "loss": 0.328, + "step": 8841 + }, + { + "epoch": 0.40018103643358227, + "grad_norm": 0.6175036142464018, + "learning_rate": 6.817539852819149e-06, + "loss": 0.3285, + "step": 8842 + }, + { + "epoch": 0.4002262955419778, + "grad_norm": 0.705849429599764, + "learning_rate": 6.816857050605864e-06, + "loss": 0.3486, + "step": 8843 + }, + { + "epoch": 0.4002715546503734, + "grad_norm": 0.6307225714621325, + "learning_rate": 6.8161742093539005e-06, + "loss": 0.3474, + "step": 8844 + }, + { + "epoch": 0.40031681375876893, + "grad_norm": 0.6726724488003374, + "learning_rate": 6.81549132907793e-06, + "loss": 0.354, + "step": 8845 + }, + { + "epoch": 0.4003620728671645, + "grad_norm": 0.48933163712064615, + "learning_rate": 6.814808409792628e-06, + "loss": 0.5051, + "step": 8846 + }, + { + "epoch": 0.40040733197556005, + "grad_norm": 0.6639468435423697, + "learning_rate": 6.814125451512666e-06, + "loss": 0.3468, + "step": 8847 + }, + { + "epoch": 0.40045259108395564, + "grad_norm": 0.6380662284908794, + "learning_rate": 6.8134424542527215e-06, + "loss": 0.3581, + "step": 8848 + }, + { + "epoch": 0.40049785019235123, + "grad_norm": 0.6132429048830895, + "learning_rate": 6.812759418027466e-06, + "loss": 0.3805, + "step": 8849 + }, + { + "epoch": 0.40054310930074677, + "grad_norm": 0.6307231742366833, + "learning_rate": 6.812076342851579e-06, + "loss": 0.3224, + "step": 8850 + }, + { + "epoch": 0.40058836840914236, + "grad_norm": 0.6191078091758072, + "learning_rate": 6.811393228739737e-06, + "loss": 0.3307, + "step": 8851 + }, + { + "epoch": 0.4006336275175379, + "grad_norm": 0.32682970215003815, + "learning_rate": 6.810710075706618e-06, + "loss": 0.4633, + "step": 8852 + }, + { + "epoch": 0.4006788866259335, + "grad_norm": 0.6713896907402072, + "learning_rate": 6.8100268837669e-06, + "loss": 0.3689, + "step": 8853 + }, + { + "epoch": 0.400724145734329, + "grad_norm": 0.66965729091601, + "learning_rate": 6.809343652935263e-06, + "loss": 0.3655, + "step": 8854 + }, + { + "epoch": 0.4007694048427246, + "grad_norm": 0.3073650613234617, + "learning_rate": 6.808660383226388e-06, + "loss": 0.4726, + "step": 8855 + }, + { + "epoch": 0.40081466395112014, + "grad_norm": 0.3061154699957953, + "learning_rate": 6.807977074654957e-06, + "loss": 0.4807, + "step": 8856 + }, + { + "epoch": 0.40085992305951573, + "grad_norm": 0.6343734983995349, + "learning_rate": 6.807293727235651e-06, + "loss": 0.3332, + "step": 8857 + }, + { + "epoch": 0.40090518216791127, + "grad_norm": 0.6222786895743901, + "learning_rate": 6.806610340983154e-06, + "loss": 0.3331, + "step": 8858 + }, + { + "epoch": 0.40095044127630686, + "grad_norm": 0.5848984802642063, + "learning_rate": 6.8059269159121484e-06, + "loss": 0.3793, + "step": 8859 + }, + { + "epoch": 0.40099570038470245, + "grad_norm": 0.6518113755263946, + "learning_rate": 6.8052434520373204e-06, + "loss": 0.3325, + "step": 8860 + }, + { + "epoch": 0.401040959493098, + "grad_norm": 0.6355972797900781, + "learning_rate": 6.804559949373355e-06, + "loss": 0.348, + "step": 8861 + }, + { + "epoch": 0.40108621860149357, + "grad_norm": 0.6869756142935315, + "learning_rate": 6.803876407934939e-06, + "loss": 0.3771, + "step": 8862 + }, + { + "epoch": 0.4011314777098891, + "grad_norm": 0.3713660385866207, + "learning_rate": 6.803192827736758e-06, + "loss": 0.4636, + "step": 8863 + }, + { + "epoch": 0.4011767368182847, + "grad_norm": 0.649725866030626, + "learning_rate": 6.802509208793502e-06, + "loss": 0.314, + "step": 8864 + }, + { + "epoch": 0.40122199592668023, + "grad_norm": 0.6290007862830288, + "learning_rate": 6.80182555111986e-06, + "loss": 0.3146, + "step": 8865 + }, + { + "epoch": 0.4012672550350758, + "grad_norm": 0.6416736202002891, + "learning_rate": 6.80114185473052e-06, + "loss": 0.3255, + "step": 8866 + }, + { + "epoch": 0.40131251414347135, + "grad_norm": 0.646691722276954, + "learning_rate": 6.800458119640172e-06, + "loss": 0.3269, + "step": 8867 + }, + { + "epoch": 0.40135777325186694, + "grad_norm": 0.601916942813255, + "learning_rate": 6.79977434586351e-06, + "loss": 0.3505, + "step": 8868 + }, + { + "epoch": 0.4014030323602625, + "grad_norm": 0.6052031851252415, + "learning_rate": 6.799090533415225e-06, + "loss": 0.3107, + "step": 8869 + }, + { + "epoch": 0.40144829146865807, + "grad_norm": 0.6420105058443195, + "learning_rate": 6.798406682310009e-06, + "loss": 0.3625, + "step": 8870 + }, + { + "epoch": 0.40149355057705366, + "grad_norm": 0.6665437907809222, + "learning_rate": 6.797722792562558e-06, + "loss": 0.3505, + "step": 8871 + }, + { + "epoch": 0.4015388096854492, + "grad_norm": 0.6927302844427357, + "learning_rate": 6.797038864187564e-06, + "loss": 0.3484, + "step": 8872 + }, + { + "epoch": 0.4015840687938448, + "grad_norm": 0.30249294964986345, + "learning_rate": 6.796354897199726e-06, + "loss": 0.4747, + "step": 8873 + }, + { + "epoch": 0.4016293279022403, + "grad_norm": 0.6182420381926265, + "learning_rate": 6.795670891613737e-06, + "loss": 0.3636, + "step": 8874 + }, + { + "epoch": 0.4016745870106359, + "grad_norm": 0.6310058170885068, + "learning_rate": 6.794986847444296e-06, + "loss": 0.3258, + "step": 8875 + }, + { + "epoch": 0.40171984611903144, + "grad_norm": 0.6888547574317977, + "learning_rate": 6.7943027647061e-06, + "loss": 0.3617, + "step": 8876 + }, + { + "epoch": 0.40176510522742703, + "grad_norm": 0.5791059330086225, + "learning_rate": 6.793618643413848e-06, + "loss": 0.3105, + "step": 8877 + }, + { + "epoch": 0.40181036433582257, + "grad_norm": 0.6450799542232147, + "learning_rate": 6.792934483582242e-06, + "loss": 0.4149, + "step": 8878 + }, + { + "epoch": 0.40185562344421816, + "grad_norm": 0.3069299614041707, + "learning_rate": 6.792250285225978e-06, + "loss": 0.4816, + "step": 8879 + }, + { + "epoch": 0.4019008825526137, + "grad_norm": 0.6801140072717393, + "learning_rate": 6.791566048359761e-06, + "loss": 0.3549, + "step": 8880 + }, + { + "epoch": 0.4019461416610093, + "grad_norm": 0.6316838717075581, + "learning_rate": 6.7908817729982936e-06, + "loss": 0.3048, + "step": 8881 + }, + { + "epoch": 0.4019914007694048, + "grad_norm": 0.6214297573389774, + "learning_rate": 6.790197459156275e-06, + "loss": 0.3788, + "step": 8882 + }, + { + "epoch": 0.4020366598778004, + "grad_norm": 0.6234239462090553, + "learning_rate": 6.789513106848412e-06, + "loss": 0.3267, + "step": 8883 + }, + { + "epoch": 0.402081918986196, + "grad_norm": 0.29946524483125314, + "learning_rate": 6.788828716089407e-06, + "loss": 0.4826, + "step": 8884 + }, + { + "epoch": 0.40212717809459153, + "grad_norm": 0.612541727714678, + "learning_rate": 6.78814428689397e-06, + "loss": 0.3385, + "step": 8885 + }, + { + "epoch": 0.4021724372029871, + "grad_norm": 0.5987562227498454, + "learning_rate": 6.787459819276802e-06, + "loss": 0.3526, + "step": 8886 + }, + { + "epoch": 0.40221769631138266, + "grad_norm": 0.6365973311112314, + "learning_rate": 6.786775313252611e-06, + "loss": 0.3332, + "step": 8887 + }, + { + "epoch": 0.40226295541977825, + "grad_norm": 0.5884045771732902, + "learning_rate": 6.7860907688361084e-06, + "loss": 0.3377, + "step": 8888 + }, + { + "epoch": 0.4023082145281738, + "grad_norm": 0.6385536028054486, + "learning_rate": 6.785406186042e-06, + "loss": 0.3605, + "step": 8889 + }, + { + "epoch": 0.40235347363656937, + "grad_norm": 0.5846199958871605, + "learning_rate": 6.7847215648849964e-06, + "loss": 0.3168, + "step": 8890 + }, + { + "epoch": 0.4023987327449649, + "grad_norm": 0.6891479504414567, + "learning_rate": 6.784036905379807e-06, + "loss": 0.383, + "step": 8891 + }, + { + "epoch": 0.4024439918533605, + "grad_norm": 0.6574568805801104, + "learning_rate": 6.783352207541144e-06, + "loss": 0.3155, + "step": 8892 + }, + { + "epoch": 0.40248925096175603, + "grad_norm": 0.34285635218528, + "learning_rate": 6.782667471383719e-06, + "loss": 0.4899, + "step": 8893 + }, + { + "epoch": 0.4025345100701516, + "grad_norm": 0.5997713023380722, + "learning_rate": 6.7819826969222465e-06, + "loss": 0.3361, + "step": 8894 + }, + { + "epoch": 0.4025797691785472, + "grad_norm": 0.6014580200566753, + "learning_rate": 6.781297884171436e-06, + "loss": 0.3324, + "step": 8895 + }, + { + "epoch": 0.40262502828694274, + "grad_norm": 0.7053990750107237, + "learning_rate": 6.780613033146008e-06, + "loss": 0.3372, + "step": 8896 + }, + { + "epoch": 0.40267028739533833, + "grad_norm": 0.6005162280243345, + "learning_rate": 6.779928143860672e-06, + "loss": 0.3809, + "step": 8897 + }, + { + "epoch": 0.40271554650373387, + "grad_norm": 0.6332156859771381, + "learning_rate": 6.779243216330149e-06, + "loss": 0.3489, + "step": 8898 + }, + { + "epoch": 0.40276080561212946, + "grad_norm": 0.5848052680601269, + "learning_rate": 6.7785582505691525e-06, + "loss": 0.3228, + "step": 8899 + }, + { + "epoch": 0.402806064720525, + "grad_norm": 0.3297464573181845, + "learning_rate": 6.777873246592403e-06, + "loss": 0.4915, + "step": 8900 + }, + { + "epoch": 0.4028513238289206, + "grad_norm": 0.6603865664890434, + "learning_rate": 6.777188204414615e-06, + "loss": 0.3845, + "step": 8901 + }, + { + "epoch": 0.4028965829373161, + "grad_norm": 0.6331108359086557, + "learning_rate": 6.776503124050514e-06, + "loss": 0.3845, + "step": 8902 + }, + { + "epoch": 0.4029418420457117, + "grad_norm": 0.6393073819687393, + "learning_rate": 6.775818005514815e-06, + "loss": 0.3873, + "step": 8903 + }, + { + "epoch": 0.40298710115410724, + "grad_norm": 0.2893071946967361, + "learning_rate": 6.7751328488222414e-06, + "loss": 0.4827, + "step": 8904 + }, + { + "epoch": 0.40303236026250283, + "grad_norm": 0.6510819344466393, + "learning_rate": 6.774447653987515e-06, + "loss": 0.3215, + "step": 8905 + }, + { + "epoch": 0.40307761937089837, + "grad_norm": 0.7675879619243432, + "learning_rate": 6.773762421025359e-06, + "loss": 0.3793, + "step": 8906 + }, + { + "epoch": 0.40312287847929396, + "grad_norm": 0.29502548677594626, + "learning_rate": 6.773077149950494e-06, + "loss": 0.4829, + "step": 8907 + }, + { + "epoch": 0.40316813758768955, + "grad_norm": 0.6446546984632263, + "learning_rate": 6.772391840777648e-06, + "loss": 0.3047, + "step": 8908 + }, + { + "epoch": 0.4032133966960851, + "grad_norm": 0.7347921913071014, + "learning_rate": 6.771706493521546e-06, + "loss": 0.3229, + "step": 8909 + }, + { + "epoch": 0.40325865580448067, + "grad_norm": 0.6707052281228602, + "learning_rate": 6.771021108196912e-06, + "loss": 0.3785, + "step": 8910 + }, + { + "epoch": 0.4033039149128762, + "grad_norm": 0.6264078396574432, + "learning_rate": 6.770335684818472e-06, + "loss": 0.3774, + "step": 8911 + }, + { + "epoch": 0.4033491740212718, + "grad_norm": 0.2943032704082629, + "learning_rate": 6.7696502234009576e-06, + "loss": 0.4636, + "step": 8912 + }, + { + "epoch": 0.40339443312966733, + "grad_norm": 0.6016668486483328, + "learning_rate": 6.768964723959093e-06, + "loss": 0.3458, + "step": 8913 + }, + { + "epoch": 0.4034396922380629, + "grad_norm": 0.8107831458714004, + "learning_rate": 6.768279186507611e-06, + "loss": 0.3216, + "step": 8914 + }, + { + "epoch": 0.40348495134645845, + "grad_norm": 0.640569774825471, + "learning_rate": 6.7675936110612405e-06, + "loss": 0.3378, + "step": 8915 + }, + { + "epoch": 0.40353021045485404, + "grad_norm": 0.5799696440458336, + "learning_rate": 6.766907997634711e-06, + "loss": 0.3228, + "step": 8916 + }, + { + "epoch": 0.4035754695632496, + "grad_norm": 0.6273281359680126, + "learning_rate": 6.766222346242755e-06, + "loss": 0.3377, + "step": 8917 + }, + { + "epoch": 0.40362072867164517, + "grad_norm": 0.8178978298391293, + "learning_rate": 6.765536656900105e-06, + "loss": 0.3666, + "step": 8918 + }, + { + "epoch": 0.40366598778004076, + "grad_norm": 0.318327709742364, + "learning_rate": 6.764850929621496e-06, + "loss": 0.4742, + "step": 8919 + }, + { + "epoch": 0.4037112468884363, + "grad_norm": 0.5859553890996915, + "learning_rate": 6.764165164421661e-06, + "loss": 0.3087, + "step": 8920 + }, + { + "epoch": 0.4037565059968319, + "grad_norm": 0.6387102179408355, + "learning_rate": 6.763479361315334e-06, + "loss": 0.3398, + "step": 8921 + }, + { + "epoch": 0.4038017651052274, + "grad_norm": 0.5772994972957782, + "learning_rate": 6.762793520317251e-06, + "loss": 0.331, + "step": 8922 + }, + { + "epoch": 0.403847024213623, + "grad_norm": 0.5899030881067611, + "learning_rate": 6.7621076414421505e-06, + "loss": 0.3443, + "step": 8923 + }, + { + "epoch": 0.40389228332201854, + "grad_norm": 0.669877351584218, + "learning_rate": 6.761421724704768e-06, + "loss": 0.3731, + "step": 8924 + }, + { + "epoch": 0.40393754243041413, + "grad_norm": 0.6487265557529385, + "learning_rate": 6.760735770119843e-06, + "loss": 0.3242, + "step": 8925 + }, + { + "epoch": 0.40398280153880967, + "grad_norm": 0.6209192812599255, + "learning_rate": 6.7600497777021125e-06, + "loss": 0.311, + "step": 8926 + }, + { + "epoch": 0.40402806064720526, + "grad_norm": 0.6228468323866984, + "learning_rate": 6.7593637474663195e-06, + "loss": 0.2858, + "step": 8927 + }, + { + "epoch": 0.4040733197556008, + "grad_norm": 0.6184542504966413, + "learning_rate": 6.758677679427204e-06, + "loss": 0.369, + "step": 8928 + }, + { + "epoch": 0.4041185788639964, + "grad_norm": 0.3459435067429982, + "learning_rate": 6.757991573599504e-06, + "loss": 0.4819, + "step": 8929 + }, + { + "epoch": 0.40416383797239197, + "grad_norm": 0.6301670896170039, + "learning_rate": 6.7573054299979655e-06, + "loss": 0.3685, + "step": 8930 + }, + { + "epoch": 0.4042090970807875, + "grad_norm": 0.6889789268158945, + "learning_rate": 6.756619248637331e-06, + "loss": 0.3526, + "step": 8931 + }, + { + "epoch": 0.4042543561891831, + "grad_norm": 0.2939816782313439, + "learning_rate": 6.755933029532342e-06, + "loss": 0.4792, + "step": 8932 + }, + { + "epoch": 0.40429961529757863, + "grad_norm": 0.8951588454851417, + "learning_rate": 6.755246772697748e-06, + "loss": 0.3159, + "step": 8933 + }, + { + "epoch": 0.4043448744059742, + "grad_norm": 0.7378167619676134, + "learning_rate": 6.754560478148289e-06, + "loss": 0.3452, + "step": 8934 + }, + { + "epoch": 0.40439013351436975, + "grad_norm": 0.6214978287086871, + "learning_rate": 6.753874145898716e-06, + "loss": 0.3129, + "step": 8935 + }, + { + "epoch": 0.40443539262276534, + "grad_norm": 0.6569656789319982, + "learning_rate": 6.753187775963773e-06, + "loss": 0.3781, + "step": 8936 + }, + { + "epoch": 0.4044806517311609, + "grad_norm": 0.32579131965062763, + "learning_rate": 6.752501368358209e-06, + "loss": 0.4666, + "step": 8937 + }, + { + "epoch": 0.40452591083955647, + "grad_norm": 0.6825464587480135, + "learning_rate": 6.751814923096773e-06, + "loss": 0.368, + "step": 8938 + }, + { + "epoch": 0.404571169947952, + "grad_norm": 0.6420022976480055, + "learning_rate": 6.751128440194216e-06, + "loss": 0.3341, + "step": 8939 + }, + { + "epoch": 0.4046164290563476, + "grad_norm": 0.6656853771714591, + "learning_rate": 6.750441919665286e-06, + "loss": 0.3565, + "step": 8940 + }, + { + "epoch": 0.4046616881647431, + "grad_norm": 0.6230241276435788, + "learning_rate": 6.7497553615247355e-06, + "loss": 0.3591, + "step": 8941 + }, + { + "epoch": 0.4047069472731387, + "grad_norm": 0.29656768184411453, + "learning_rate": 6.749068765787316e-06, + "loss": 0.4804, + "step": 8942 + }, + { + "epoch": 0.4047522063815343, + "grad_norm": 0.6631787295713072, + "learning_rate": 6.748382132467781e-06, + "loss": 0.3531, + "step": 8943 + }, + { + "epoch": 0.40479746548992984, + "grad_norm": 0.616176087322102, + "learning_rate": 6.7476954615808835e-06, + "loss": 0.3612, + "step": 8944 + }, + { + "epoch": 0.40484272459832543, + "grad_norm": 0.6365449640829076, + "learning_rate": 6.747008753141377e-06, + "loss": 0.3362, + "step": 8945 + }, + { + "epoch": 0.40488798370672097, + "grad_norm": 0.646535504292674, + "learning_rate": 6.74632200716402e-06, + "loss": 0.3492, + "step": 8946 + }, + { + "epoch": 0.40493324281511656, + "grad_norm": 0.6643896596439289, + "learning_rate": 6.745635223663565e-06, + "loss": 0.3331, + "step": 8947 + }, + { + "epoch": 0.4049785019235121, + "grad_norm": 0.6066751912169945, + "learning_rate": 6.7449484026547705e-06, + "loss": 0.3664, + "step": 8948 + }, + { + "epoch": 0.4050237610319077, + "grad_norm": 0.6417521544879048, + "learning_rate": 6.744261544152395e-06, + "loss": 0.3496, + "step": 8949 + }, + { + "epoch": 0.4050690201403032, + "grad_norm": 0.6291110101036786, + "learning_rate": 6.743574648171195e-06, + "loss": 0.2984, + "step": 8950 + }, + { + "epoch": 0.4051142792486988, + "grad_norm": 0.3148204187264297, + "learning_rate": 6.7428877147259305e-06, + "loss": 0.4766, + "step": 8951 + }, + { + "epoch": 0.40515953835709434, + "grad_norm": 0.30458788376917484, + "learning_rate": 6.742200743831364e-06, + "loss": 0.4854, + "step": 8952 + }, + { + "epoch": 0.40520479746548993, + "grad_norm": 0.6258531165395406, + "learning_rate": 6.741513735502252e-06, + "loss": 0.3461, + "step": 8953 + }, + { + "epoch": 0.4052500565738855, + "grad_norm": 0.6165429605530012, + "learning_rate": 6.740826689753359e-06, + "loss": 0.3262, + "step": 8954 + }, + { + "epoch": 0.40529531568228105, + "grad_norm": 0.6414593000502342, + "learning_rate": 6.740139606599448e-06, + "loss": 0.2975, + "step": 8955 + }, + { + "epoch": 0.40534057479067664, + "grad_norm": 0.27098158133221006, + "learning_rate": 6.73945248605528e-06, + "loss": 0.493, + "step": 8956 + }, + { + "epoch": 0.4053858338990722, + "grad_norm": 0.6298077983608283, + "learning_rate": 6.738765328135621e-06, + "loss": 0.3464, + "step": 8957 + }, + { + "epoch": 0.40543109300746777, + "grad_norm": 0.6444828431474185, + "learning_rate": 6.7380781328552346e-06, + "loss": 0.3491, + "step": 8958 + }, + { + "epoch": 0.4054763521158633, + "grad_norm": 0.2905457425379055, + "learning_rate": 6.737390900228888e-06, + "loss": 0.4622, + "step": 8959 + }, + { + "epoch": 0.4055216112242589, + "grad_norm": 0.694225612326219, + "learning_rate": 6.736703630271347e-06, + "loss": 0.3944, + "step": 8960 + }, + { + "epoch": 0.40556687033265443, + "grad_norm": 0.6095087960379569, + "learning_rate": 6.736016322997379e-06, + "loss": 0.3612, + "step": 8961 + }, + { + "epoch": 0.40561212944105, + "grad_norm": 0.7793100472578198, + "learning_rate": 6.7353289784217525e-06, + "loss": 0.328, + "step": 8962 + }, + { + "epoch": 0.40565738854944555, + "grad_norm": 0.5994659358964869, + "learning_rate": 6.734641596559234e-06, + "loss": 0.3221, + "step": 8963 + }, + { + "epoch": 0.40570264765784114, + "grad_norm": 0.7179033980482736, + "learning_rate": 6.733954177424598e-06, + "loss": 0.3511, + "step": 8964 + }, + { + "epoch": 0.40574790676623673, + "grad_norm": 0.6262554659821052, + "learning_rate": 6.733266721032609e-06, + "loss": 0.3766, + "step": 8965 + }, + { + "epoch": 0.40579316587463227, + "grad_norm": 0.6368406550673319, + "learning_rate": 6.732579227398043e-06, + "loss": 0.3574, + "step": 8966 + }, + { + "epoch": 0.40583842498302786, + "grad_norm": 0.6136063256982253, + "learning_rate": 6.731891696535671e-06, + "loss": 0.3499, + "step": 8967 + }, + { + "epoch": 0.4058836840914234, + "grad_norm": 0.33916521123273174, + "learning_rate": 6.731204128460265e-06, + "loss": 0.4881, + "step": 8968 + }, + { + "epoch": 0.405928943199819, + "grad_norm": 0.6510734811700759, + "learning_rate": 6.730516523186599e-06, + "loss": 0.3713, + "step": 8969 + }, + { + "epoch": 0.4059742023082145, + "grad_norm": 0.6554821074225733, + "learning_rate": 6.729828880729448e-06, + "loss": 0.3469, + "step": 8970 + }, + { + "epoch": 0.4060194614166101, + "grad_norm": 0.29512133094170623, + "learning_rate": 6.7291412011035866e-06, + "loss": 0.4643, + "step": 8971 + }, + { + "epoch": 0.40606472052500564, + "grad_norm": 0.2946429814813781, + "learning_rate": 6.728453484323791e-06, + "loss": 0.452, + "step": 8972 + }, + { + "epoch": 0.40610997963340123, + "grad_norm": 0.7983353679920422, + "learning_rate": 6.727765730404841e-06, + "loss": 0.3698, + "step": 8973 + }, + { + "epoch": 0.40615523874179676, + "grad_norm": 0.6758477176684954, + "learning_rate": 6.7270779393615095e-06, + "loss": 0.3641, + "step": 8974 + }, + { + "epoch": 0.40620049785019235, + "grad_norm": 0.589628555008071, + "learning_rate": 6.726390111208579e-06, + "loss": 0.2701, + "step": 8975 + }, + { + "epoch": 0.4062457569585879, + "grad_norm": 0.6707201662154986, + "learning_rate": 6.725702245960827e-06, + "loss": 0.2902, + "step": 8976 + }, + { + "epoch": 0.4062910160669835, + "grad_norm": 0.6308128620949878, + "learning_rate": 6.725014343633033e-06, + "loss": 0.3468, + "step": 8977 + }, + { + "epoch": 0.40633627517537907, + "grad_norm": 0.6225704778634961, + "learning_rate": 6.7243264042399795e-06, + "loss": 0.2778, + "step": 8978 + }, + { + "epoch": 0.4063815342837746, + "grad_norm": 0.6543361066301392, + "learning_rate": 6.7236384277964465e-06, + "loss": 0.349, + "step": 8979 + }, + { + "epoch": 0.4064267933921702, + "grad_norm": 0.6259696940928321, + "learning_rate": 6.722950414317218e-06, + "loss": 0.3477, + "step": 8980 + }, + { + "epoch": 0.40647205250056573, + "grad_norm": 0.66971951479067, + "learning_rate": 6.722262363817077e-06, + "loss": 0.3543, + "step": 8981 + }, + { + "epoch": 0.4065173116089613, + "grad_norm": 0.3774940043813065, + "learning_rate": 6.721574276310807e-06, + "loss": 0.4749, + "step": 8982 + }, + { + "epoch": 0.40656257071735685, + "grad_norm": 0.706138680641507, + "learning_rate": 6.720886151813194e-06, + "loss": 0.3383, + "step": 8983 + }, + { + "epoch": 0.40660782982575244, + "grad_norm": 0.6063299043643507, + "learning_rate": 6.720197990339022e-06, + "loss": 0.41, + "step": 8984 + }, + { + "epoch": 0.406653088934148, + "grad_norm": 0.6917969396568446, + "learning_rate": 6.719509791903078e-06, + "loss": 0.3464, + "step": 8985 + }, + { + "epoch": 0.40669834804254357, + "grad_norm": 0.5925119104848456, + "learning_rate": 6.718821556520151e-06, + "loss": 0.3329, + "step": 8986 + }, + { + "epoch": 0.4067436071509391, + "grad_norm": 0.7172368950413045, + "learning_rate": 6.718133284205026e-06, + "loss": 0.3235, + "step": 8987 + }, + { + "epoch": 0.4067888662593347, + "grad_norm": 0.35530670970656764, + "learning_rate": 6.717444974972495e-06, + "loss": 0.482, + "step": 8988 + }, + { + "epoch": 0.4068341253677303, + "grad_norm": 0.6179833011689086, + "learning_rate": 6.716756628837345e-06, + "loss": 0.3467, + "step": 8989 + }, + { + "epoch": 0.4068793844761258, + "grad_norm": 0.6400629806758316, + "learning_rate": 6.716068245814369e-06, + "loss": 0.3698, + "step": 8990 + }, + { + "epoch": 0.4069246435845214, + "grad_norm": 0.5903144512944397, + "learning_rate": 6.715379825918357e-06, + "loss": 0.3034, + "step": 8991 + }, + { + "epoch": 0.40696990269291694, + "grad_norm": 0.5593568955536351, + "learning_rate": 6.714691369164099e-06, + "loss": 0.3332, + "step": 8992 + }, + { + "epoch": 0.40701516180131253, + "grad_norm": 0.6698171715141726, + "learning_rate": 6.714002875566392e-06, + "loss": 0.3597, + "step": 8993 + }, + { + "epoch": 0.40706042090970807, + "grad_norm": 0.4575285327706009, + "learning_rate": 6.713314345140025e-06, + "loss": 0.4941, + "step": 8994 + }, + { + "epoch": 0.40710568001810366, + "grad_norm": 0.29855367789502585, + "learning_rate": 6.712625777899797e-06, + "loss": 0.4714, + "step": 8995 + }, + { + "epoch": 0.4071509391264992, + "grad_norm": 0.6371976573051277, + "learning_rate": 6.7119371738605e-06, + "loss": 0.3387, + "step": 8996 + }, + { + "epoch": 0.4071961982348948, + "grad_norm": 0.5841837035168561, + "learning_rate": 6.711248533036931e-06, + "loss": 0.3558, + "step": 8997 + }, + { + "epoch": 0.4072414573432903, + "grad_norm": 0.5992793271836137, + "learning_rate": 6.710559855443885e-06, + "loss": 0.3371, + "step": 8998 + }, + { + "epoch": 0.4072867164516859, + "grad_norm": 0.6635349205549409, + "learning_rate": 6.709871141096164e-06, + "loss": 0.3865, + "step": 8999 + }, + { + "epoch": 0.4073319755600815, + "grad_norm": 0.5703140692706825, + "learning_rate": 6.709182390008563e-06, + "loss": 0.3479, + "step": 9000 + }, + { + "epoch": 0.40737723466847703, + "grad_norm": 0.3243766357473611, + "learning_rate": 6.70849360219588e-06, + "loss": 0.494, + "step": 9001 + }, + { + "epoch": 0.4074224937768726, + "grad_norm": 0.6563326984838999, + "learning_rate": 6.70780477767292e-06, + "loss": 0.3719, + "step": 9002 + }, + { + "epoch": 0.40746775288526815, + "grad_norm": 0.6227235866029053, + "learning_rate": 6.7071159164544775e-06, + "loss": 0.3754, + "step": 9003 + }, + { + "epoch": 0.40751301199366374, + "grad_norm": 0.6609459209436451, + "learning_rate": 6.706427018555359e-06, + "loss": 0.3696, + "step": 9004 + }, + { + "epoch": 0.4075582711020593, + "grad_norm": 0.5852792648844204, + "learning_rate": 6.705738083990363e-06, + "loss": 0.2979, + "step": 9005 + }, + { + "epoch": 0.40760353021045487, + "grad_norm": 0.30829767078260123, + "learning_rate": 6.705049112774295e-06, + "loss": 0.5131, + "step": 9006 + }, + { + "epoch": 0.4076487893188504, + "grad_norm": 0.6404114897481298, + "learning_rate": 6.704360104921959e-06, + "loss": 0.3456, + "step": 9007 + }, + { + "epoch": 0.407694048427246, + "grad_norm": 0.6660237164001803, + "learning_rate": 6.703671060448158e-06, + "loss": 0.3363, + "step": 9008 + }, + { + "epoch": 0.4077393075356415, + "grad_norm": 0.2834674880371996, + "learning_rate": 6.702981979367699e-06, + "loss": 0.4687, + "step": 9009 + }, + { + "epoch": 0.4077845666440371, + "grad_norm": 0.5858283436495668, + "learning_rate": 6.7022928616953865e-06, + "loss": 0.3304, + "step": 9010 + }, + { + "epoch": 0.40782982575243265, + "grad_norm": 0.5975401010177234, + "learning_rate": 6.701603707446029e-06, + "loss": 0.3362, + "step": 9011 + }, + { + "epoch": 0.40787508486082824, + "grad_norm": 0.6278500455622565, + "learning_rate": 6.7009145166344355e-06, + "loss": 0.3549, + "step": 9012 + }, + { + "epoch": 0.40792034396922383, + "grad_norm": 0.5997105715074218, + "learning_rate": 6.700225289275411e-06, + "loss": 0.3401, + "step": 9013 + }, + { + "epoch": 0.40796560307761937, + "grad_norm": 0.6259148881571266, + "learning_rate": 6.699536025383768e-06, + "loss": 0.3463, + "step": 9014 + }, + { + "epoch": 0.40801086218601496, + "grad_norm": 0.6228649006571026, + "learning_rate": 6.698846724974315e-06, + "loss": 0.335, + "step": 9015 + }, + { + "epoch": 0.4080561212944105, + "grad_norm": 0.30266594936736707, + "learning_rate": 6.6981573880618636e-06, + "loss": 0.4701, + "step": 9016 + }, + { + "epoch": 0.4081013804028061, + "grad_norm": 0.2953912308079833, + "learning_rate": 6.697468014661226e-06, + "loss": 0.465, + "step": 9017 + }, + { + "epoch": 0.4081466395112016, + "grad_norm": 0.6024204480029517, + "learning_rate": 6.696778604787213e-06, + "loss": 0.3318, + "step": 9018 + }, + { + "epoch": 0.4081918986195972, + "grad_norm": 0.6727613447472707, + "learning_rate": 6.69608915845464e-06, + "loss": 0.3546, + "step": 9019 + }, + { + "epoch": 0.40823715772799274, + "grad_norm": 0.6725825317612492, + "learning_rate": 6.69539967567832e-06, + "loss": 0.3267, + "step": 9020 + }, + { + "epoch": 0.40828241683638833, + "grad_norm": 0.6251203031313752, + "learning_rate": 6.694710156473067e-06, + "loss": 0.3287, + "step": 9021 + }, + { + "epoch": 0.40832767594478386, + "grad_norm": 0.5948194157228681, + "learning_rate": 6.694020600853699e-06, + "loss": 0.3493, + "step": 9022 + }, + { + "epoch": 0.40837293505317945, + "grad_norm": 0.6262507575282605, + "learning_rate": 6.69333100883503e-06, + "loss": 0.3588, + "step": 9023 + }, + { + "epoch": 0.40841819416157504, + "grad_norm": 0.35450308304949074, + "learning_rate": 6.692641380431879e-06, + "loss": 0.4705, + "step": 9024 + }, + { + "epoch": 0.4084634532699706, + "grad_norm": 0.607371939659254, + "learning_rate": 6.691951715659063e-06, + "loss": 0.3682, + "step": 9025 + }, + { + "epoch": 0.40850871237836617, + "grad_norm": 0.3412133730192881, + "learning_rate": 6.691262014531401e-06, + "loss": 0.4836, + "step": 9026 + }, + { + "epoch": 0.4085539714867617, + "grad_norm": 0.6366134224531387, + "learning_rate": 6.690572277063711e-06, + "loss": 0.3402, + "step": 9027 + }, + { + "epoch": 0.4085992305951573, + "grad_norm": 0.2912418177907017, + "learning_rate": 6.689882503270818e-06, + "loss": 0.5028, + "step": 9028 + }, + { + "epoch": 0.4086444897035528, + "grad_norm": 0.3163477138902858, + "learning_rate": 6.689192693167539e-06, + "loss": 0.4761, + "step": 9029 + }, + { + "epoch": 0.4086897488119484, + "grad_norm": 0.28719296847973913, + "learning_rate": 6.688502846768697e-06, + "loss": 0.473, + "step": 9030 + }, + { + "epoch": 0.40873500792034395, + "grad_norm": 0.6244954493296053, + "learning_rate": 6.6878129640891135e-06, + "loss": 0.3251, + "step": 9031 + }, + { + "epoch": 0.40878026702873954, + "grad_norm": 0.6516279469831836, + "learning_rate": 6.687123045143613e-06, + "loss": 0.3849, + "step": 9032 + }, + { + "epoch": 0.4088255261371351, + "grad_norm": 0.5998224028305761, + "learning_rate": 6.686433089947022e-06, + "loss": 0.2979, + "step": 9033 + }, + { + "epoch": 0.40887078524553067, + "grad_norm": 0.3103766703711543, + "learning_rate": 6.685743098514161e-06, + "loss": 0.4565, + "step": 9034 + }, + { + "epoch": 0.4089160443539262, + "grad_norm": 0.6139132238441113, + "learning_rate": 6.685053070859861e-06, + "loss": 0.3101, + "step": 9035 + }, + { + "epoch": 0.4089613034623218, + "grad_norm": 0.6160606143251894, + "learning_rate": 6.684363006998944e-06, + "loss": 0.3024, + "step": 9036 + }, + { + "epoch": 0.4090065625707174, + "grad_norm": 0.30752656365509423, + "learning_rate": 6.683672906946239e-06, + "loss": 0.4851, + "step": 9037 + }, + { + "epoch": 0.4090518216791129, + "grad_norm": 0.6513482944547165, + "learning_rate": 6.682982770716575e-06, + "loss": 0.369, + "step": 9038 + }, + { + "epoch": 0.4090970807875085, + "grad_norm": 0.7791012897019023, + "learning_rate": 6.682292598324779e-06, + "loss": 0.3203, + "step": 9039 + }, + { + "epoch": 0.40914233989590404, + "grad_norm": 0.6894452263955075, + "learning_rate": 6.681602389785683e-06, + "loss": 0.3487, + "step": 9040 + }, + { + "epoch": 0.40918759900429963, + "grad_norm": 0.6752996121282719, + "learning_rate": 6.680912145114116e-06, + "loss": 0.3672, + "step": 9041 + }, + { + "epoch": 0.40923285811269516, + "grad_norm": 0.6197637473794768, + "learning_rate": 6.680221864324908e-06, + "loss": 0.3466, + "step": 9042 + }, + { + "epoch": 0.40927811722109075, + "grad_norm": 0.6856247512459379, + "learning_rate": 6.679531547432896e-06, + "loss": 0.3436, + "step": 9043 + }, + { + "epoch": 0.4093233763294863, + "grad_norm": 0.6567848141481791, + "learning_rate": 6.6788411944529064e-06, + "loss": 0.3553, + "step": 9044 + }, + { + "epoch": 0.4093686354378819, + "grad_norm": 0.7058855753044633, + "learning_rate": 6.678150805399777e-06, + "loss": 0.3257, + "step": 9045 + }, + { + "epoch": 0.4094138945462774, + "grad_norm": 0.588293854134537, + "learning_rate": 6.67746038028834e-06, + "loss": 0.3346, + "step": 9046 + }, + { + "epoch": 0.409459153654673, + "grad_norm": 0.6538508362128201, + "learning_rate": 6.676769919133431e-06, + "loss": 0.3763, + "step": 9047 + }, + { + "epoch": 0.4095044127630686, + "grad_norm": 0.4019729609796205, + "learning_rate": 6.6760794219498874e-06, + "loss": 0.4864, + "step": 9048 + }, + { + "epoch": 0.4095496718714641, + "grad_norm": 0.65655875129064, + "learning_rate": 6.675388888752544e-06, + "loss": 0.3817, + "step": 9049 + }, + { + "epoch": 0.4095949309798597, + "grad_norm": 0.6169079869153397, + "learning_rate": 6.674698319556239e-06, + "loss": 0.3708, + "step": 9050 + }, + { + "epoch": 0.40964019008825525, + "grad_norm": 0.6588170636410992, + "learning_rate": 6.674007714375812e-06, + "loss": 0.3638, + "step": 9051 + }, + { + "epoch": 0.40968544919665084, + "grad_norm": 0.6373846978424119, + "learning_rate": 6.673317073226097e-06, + "loss": 0.3221, + "step": 9052 + }, + { + "epoch": 0.4097307083050464, + "grad_norm": 0.6560858708014767, + "learning_rate": 6.672626396121942e-06, + "loss": 0.3713, + "step": 9053 + }, + { + "epoch": 0.40977596741344197, + "grad_norm": 0.6087462390017654, + "learning_rate": 6.671935683078179e-06, + "loss": 0.3882, + "step": 9054 + }, + { + "epoch": 0.4098212265218375, + "grad_norm": 0.705119345393188, + "learning_rate": 6.6712449341096555e-06, + "loss": 0.3496, + "step": 9055 + }, + { + "epoch": 0.4098664856302331, + "grad_norm": 0.6344883167152161, + "learning_rate": 6.67055414923121e-06, + "loss": 0.3491, + "step": 9056 + }, + { + "epoch": 0.4099117447386286, + "grad_norm": 0.6210838758555896, + "learning_rate": 6.669863328457686e-06, + "loss": 0.3344, + "step": 9057 + }, + { + "epoch": 0.4099570038470242, + "grad_norm": 0.3234130847164188, + "learning_rate": 6.6691724718039285e-06, + "loss": 0.4915, + "step": 9058 + }, + { + "epoch": 0.4100022629554198, + "grad_norm": 0.6170558332327496, + "learning_rate": 6.668481579284781e-06, + "loss": 0.3855, + "step": 9059 + }, + { + "epoch": 0.41004752206381534, + "grad_norm": 0.6614143628085448, + "learning_rate": 6.667790650915089e-06, + "loss": 0.3475, + "step": 9060 + }, + { + "epoch": 0.41009278117221093, + "grad_norm": 0.642262702067851, + "learning_rate": 6.667099686709697e-06, + "loss": 0.3502, + "step": 9061 + }, + { + "epoch": 0.41013804028060646, + "grad_norm": 0.6701479057169873, + "learning_rate": 6.666408686683455e-06, + "loss": 0.3367, + "step": 9062 + }, + { + "epoch": 0.41018329938900205, + "grad_norm": 0.5870143826263454, + "learning_rate": 6.665717650851205e-06, + "loss": 0.298, + "step": 9063 + }, + { + "epoch": 0.4102285584973976, + "grad_norm": 0.6748879525929066, + "learning_rate": 6.665026579227802e-06, + "loss": 0.3733, + "step": 9064 + }, + { + "epoch": 0.4102738176057932, + "grad_norm": 0.6586658160921205, + "learning_rate": 6.66433547182809e-06, + "loss": 0.3482, + "step": 9065 + }, + { + "epoch": 0.4103190767141887, + "grad_norm": 0.7078301453617645, + "learning_rate": 6.663644328666921e-06, + "loss": 0.3387, + "step": 9066 + }, + { + "epoch": 0.4103643358225843, + "grad_norm": 0.6306545682547016, + "learning_rate": 6.662953149759144e-06, + "loss": 0.332, + "step": 9067 + }, + { + "epoch": 0.41040959493097984, + "grad_norm": 0.6647220500569193, + "learning_rate": 6.6622619351196115e-06, + "loss": 0.3714, + "step": 9068 + }, + { + "epoch": 0.4104548540393754, + "grad_norm": 0.6229576833143935, + "learning_rate": 6.661570684763175e-06, + "loss": 0.3428, + "step": 9069 + }, + { + "epoch": 0.41050011314777096, + "grad_norm": 0.6352150216178791, + "learning_rate": 6.660879398704689e-06, + "loss": 0.3541, + "step": 9070 + }, + { + "epoch": 0.41054537225616655, + "grad_norm": 0.6452785895743032, + "learning_rate": 6.660188076959004e-06, + "loss": 0.3341, + "step": 9071 + }, + { + "epoch": 0.41059063136456214, + "grad_norm": 0.6412904402714849, + "learning_rate": 6.659496719540976e-06, + "loss": 0.3371, + "step": 9072 + }, + { + "epoch": 0.4106358904729577, + "grad_norm": 0.6635652999800646, + "learning_rate": 6.658805326465462e-06, + "loss": 0.3943, + "step": 9073 + }, + { + "epoch": 0.41068114958135327, + "grad_norm": 0.31807832099944305, + "learning_rate": 6.658113897747315e-06, + "loss": 0.4658, + "step": 9074 + }, + { + "epoch": 0.4107264086897488, + "grad_norm": 0.6611497775536742, + "learning_rate": 6.657422433401392e-06, + "loss": 0.306, + "step": 9075 + }, + { + "epoch": 0.4107716677981444, + "grad_norm": 0.6214667092573483, + "learning_rate": 6.656730933442552e-06, + "loss": 0.354, + "step": 9076 + }, + { + "epoch": 0.4108169269065399, + "grad_norm": 0.6319617771618785, + "learning_rate": 6.656039397885653e-06, + "loss": 0.3566, + "step": 9077 + }, + { + "epoch": 0.4108621860149355, + "grad_norm": 0.6248363665958656, + "learning_rate": 6.6553478267455526e-06, + "loss": 0.3404, + "step": 9078 + }, + { + "epoch": 0.41090744512333105, + "grad_norm": 0.6214439842268216, + "learning_rate": 6.654656220037112e-06, + "loss": 0.3264, + "step": 9079 + }, + { + "epoch": 0.41095270423172664, + "grad_norm": 0.6171175050479654, + "learning_rate": 6.653964577775192e-06, + "loss": 0.3447, + "step": 9080 + }, + { + "epoch": 0.4109979633401222, + "grad_norm": 0.3451405243141214, + "learning_rate": 6.653272899974652e-06, + "loss": 0.4523, + "step": 9081 + }, + { + "epoch": 0.41104322244851776, + "grad_norm": 0.6359812714302097, + "learning_rate": 6.652581186650355e-06, + "loss": 0.3703, + "step": 9082 + }, + { + "epoch": 0.41108848155691335, + "grad_norm": 0.624058737568414, + "learning_rate": 6.651889437817165e-06, + "loss": 0.3389, + "step": 9083 + }, + { + "epoch": 0.4111337406653089, + "grad_norm": 0.574939779007028, + "learning_rate": 6.6511976534899414e-06, + "loss": 0.2987, + "step": 9084 + }, + { + "epoch": 0.4111789997737045, + "grad_norm": 0.6465268244781667, + "learning_rate": 6.650505833683555e-06, + "loss": 0.352, + "step": 9085 + }, + { + "epoch": 0.4112242588821, + "grad_norm": 0.6275208315388503, + "learning_rate": 6.649813978412866e-06, + "loss": 0.3659, + "step": 9086 + }, + { + "epoch": 0.4112695179904956, + "grad_norm": 0.6279978110455504, + "learning_rate": 6.6491220876927406e-06, + "loss": 0.3388, + "step": 9087 + }, + { + "epoch": 0.41131477709889114, + "grad_norm": 0.36609485100695516, + "learning_rate": 6.648430161538047e-06, + "loss": 0.4842, + "step": 9088 + }, + { + "epoch": 0.41136003620728673, + "grad_norm": 0.6285456556219239, + "learning_rate": 6.6477381999636525e-06, + "loss": 0.3161, + "step": 9089 + }, + { + "epoch": 0.41140529531568226, + "grad_norm": 0.6172940666972289, + "learning_rate": 6.647046202984424e-06, + "loss": 0.3104, + "step": 9090 + }, + { + "epoch": 0.41145055442407785, + "grad_norm": 0.266702196880152, + "learning_rate": 6.646354170615232e-06, + "loss": 0.4603, + "step": 9091 + }, + { + "epoch": 0.4114958135324734, + "grad_norm": 0.6172185599650262, + "learning_rate": 6.645662102870944e-06, + "loss": 0.3367, + "step": 9092 + }, + { + "epoch": 0.411541072640869, + "grad_norm": 0.6231255908509027, + "learning_rate": 6.644969999766434e-06, + "loss": 0.3369, + "step": 9093 + }, + { + "epoch": 0.41158633174926457, + "grad_norm": 0.6711560277546211, + "learning_rate": 6.644277861316569e-06, + "loss": 0.2951, + "step": 9094 + }, + { + "epoch": 0.4116315908576601, + "grad_norm": 0.7050844540839191, + "learning_rate": 6.643585687536224e-06, + "loss": 0.3233, + "step": 9095 + }, + { + "epoch": 0.4116768499660557, + "grad_norm": 0.33534558413590837, + "learning_rate": 6.642893478440269e-06, + "loss": 0.4817, + "step": 9096 + }, + { + "epoch": 0.4117221090744512, + "grad_norm": 0.6289465132386788, + "learning_rate": 6.6422012340435796e-06, + "loss": 0.3155, + "step": 9097 + }, + { + "epoch": 0.4117673681828468, + "grad_norm": 0.6164854466612605, + "learning_rate": 6.641508954361029e-06, + "loss": 0.3155, + "step": 9098 + }, + { + "epoch": 0.41181262729124235, + "grad_norm": 0.9583668980218552, + "learning_rate": 6.640816639407494e-06, + "loss": 0.3495, + "step": 9099 + }, + { + "epoch": 0.41185788639963794, + "grad_norm": 0.6494624743247183, + "learning_rate": 6.640124289197845e-06, + "loss": 0.3603, + "step": 9100 + }, + { + "epoch": 0.4119031455080335, + "grad_norm": 0.3167337648889493, + "learning_rate": 6.639431903746967e-06, + "loss": 0.495, + "step": 9101 + }, + { + "epoch": 0.41194840461642906, + "grad_norm": 0.6147842236862651, + "learning_rate": 6.638739483069729e-06, + "loss": 0.4093, + "step": 9102 + }, + { + "epoch": 0.4119936637248246, + "grad_norm": 0.6248505173002057, + "learning_rate": 6.6380470271810146e-06, + "loss": 0.3575, + "step": 9103 + }, + { + "epoch": 0.4120389228332202, + "grad_norm": 0.6509884609769292, + "learning_rate": 6.637354536095699e-06, + "loss": 0.3657, + "step": 9104 + }, + { + "epoch": 0.4120841819416157, + "grad_norm": 0.31781228423799274, + "learning_rate": 6.636662009828665e-06, + "loss": 0.5212, + "step": 9105 + }, + { + "epoch": 0.4121294410500113, + "grad_norm": 0.2928137845304512, + "learning_rate": 6.635969448394789e-06, + "loss": 0.4782, + "step": 9106 + }, + { + "epoch": 0.4121747001584069, + "grad_norm": 0.6442762188793938, + "learning_rate": 6.635276851808955e-06, + "loss": 0.3403, + "step": 9107 + }, + { + "epoch": 0.41221995926680244, + "grad_norm": 0.6580738386452342, + "learning_rate": 6.634584220086043e-06, + "loss": 0.3659, + "step": 9108 + }, + { + "epoch": 0.41226521837519803, + "grad_norm": 0.6581215553798003, + "learning_rate": 6.633891553240938e-06, + "loss": 0.3583, + "step": 9109 + }, + { + "epoch": 0.41231047748359356, + "grad_norm": 0.6474338516977602, + "learning_rate": 6.63319885128852e-06, + "loss": 0.3061, + "step": 9110 + }, + { + "epoch": 0.41235573659198915, + "grad_norm": 0.6093922450524109, + "learning_rate": 6.632506114243676e-06, + "loss": 0.339, + "step": 9111 + }, + { + "epoch": 0.4124009957003847, + "grad_norm": 0.6672804842611628, + "learning_rate": 6.631813342121289e-06, + "loss": 0.3515, + "step": 9112 + }, + { + "epoch": 0.4124462548087803, + "grad_norm": 0.6295667090545991, + "learning_rate": 6.631120534936244e-06, + "loss": 0.341, + "step": 9113 + }, + { + "epoch": 0.4124915139171758, + "grad_norm": 0.6006838717938614, + "learning_rate": 6.6304276927034305e-06, + "loss": 0.312, + "step": 9114 + }, + { + "epoch": 0.4125367730255714, + "grad_norm": 0.7046041718813307, + "learning_rate": 6.629734815437731e-06, + "loss": 0.3529, + "step": 9115 + }, + { + "epoch": 0.41258203213396694, + "grad_norm": 0.6305552420053111, + "learning_rate": 6.629041903154038e-06, + "loss": 0.3505, + "step": 9116 + }, + { + "epoch": 0.4126272912423625, + "grad_norm": 0.6647842074571177, + "learning_rate": 6.628348955867237e-06, + "loss": 0.342, + "step": 9117 + }, + { + "epoch": 0.4126725503507581, + "grad_norm": 0.7025522272858689, + "learning_rate": 6.627655973592216e-06, + "loss": 0.3299, + "step": 9118 + }, + { + "epoch": 0.41271780945915365, + "grad_norm": 0.4316424212966116, + "learning_rate": 6.626962956343868e-06, + "loss": 0.4953, + "step": 9119 + }, + { + "epoch": 0.41276306856754924, + "grad_norm": 0.6497356997555592, + "learning_rate": 6.626269904137086e-06, + "loss": 0.3277, + "step": 9120 + }, + { + "epoch": 0.4128083276759448, + "grad_norm": 0.6221328793676323, + "learning_rate": 6.625576816986754e-06, + "loss": 0.3319, + "step": 9121 + }, + { + "epoch": 0.41285358678434037, + "grad_norm": 0.6172935644265816, + "learning_rate": 6.624883694907772e-06, + "loss": 0.3294, + "step": 9122 + }, + { + "epoch": 0.4128988458927359, + "grad_norm": 0.6632952225923628, + "learning_rate": 6.624190537915028e-06, + "loss": 0.3276, + "step": 9123 + }, + { + "epoch": 0.4129441050011315, + "grad_norm": 0.6007638726533377, + "learning_rate": 6.6234973460234184e-06, + "loss": 0.3668, + "step": 9124 + }, + { + "epoch": 0.412989364109527, + "grad_norm": 0.6670716039159356, + "learning_rate": 6.6228041192478365e-06, + "loss": 0.3211, + "step": 9125 + }, + { + "epoch": 0.4130346232179226, + "grad_norm": 0.608979806865586, + "learning_rate": 6.622110857603179e-06, + "loss": 0.3393, + "step": 9126 + }, + { + "epoch": 0.41307988232631815, + "grad_norm": 0.6191617627583516, + "learning_rate": 6.6214175611043395e-06, + "loss": 0.3359, + "step": 9127 + }, + { + "epoch": 0.41312514143471374, + "grad_norm": 0.7211852075791401, + "learning_rate": 6.620724229766219e-06, + "loss": 0.3426, + "step": 9128 + }, + { + "epoch": 0.4131704005431093, + "grad_norm": 0.3787113096246672, + "learning_rate": 6.62003086360371e-06, + "loss": 0.4728, + "step": 9129 + }, + { + "epoch": 0.41321565965150486, + "grad_norm": 0.7311943998256455, + "learning_rate": 6.6193374626317155e-06, + "loss": 0.3416, + "step": 9130 + }, + { + "epoch": 0.41326091875990045, + "grad_norm": 0.3177175282849726, + "learning_rate": 6.61864402686513e-06, + "loss": 0.4925, + "step": 9131 + }, + { + "epoch": 0.413306177868296, + "grad_norm": 0.2790578219806817, + "learning_rate": 6.617950556318858e-06, + "loss": 0.4782, + "step": 9132 + }, + { + "epoch": 0.4133514369766916, + "grad_norm": 0.7027309997642883, + "learning_rate": 6.617257051007796e-06, + "loss": 0.3752, + "step": 9133 + }, + { + "epoch": 0.4133966960850871, + "grad_norm": 0.6393780180131584, + "learning_rate": 6.616563510946848e-06, + "loss": 0.3496, + "step": 9134 + }, + { + "epoch": 0.4134419551934827, + "grad_norm": 0.593594767525673, + "learning_rate": 6.615869936150914e-06, + "loss": 0.3649, + "step": 9135 + }, + { + "epoch": 0.41348721430187824, + "grad_norm": 0.610098381858523, + "learning_rate": 6.6151763266348975e-06, + "loss": 0.3348, + "step": 9136 + }, + { + "epoch": 0.4135324734102738, + "grad_norm": 0.6090898027798755, + "learning_rate": 6.614482682413703e-06, + "loss": 0.3531, + "step": 9137 + }, + { + "epoch": 0.41357773251866936, + "grad_norm": 0.6430454610132569, + "learning_rate": 6.613789003502236e-06, + "loss": 0.3304, + "step": 9138 + }, + { + "epoch": 0.41362299162706495, + "grad_norm": 0.6193389709370466, + "learning_rate": 6.6130952899153966e-06, + "loss": 0.3471, + "step": 9139 + }, + { + "epoch": 0.4136682507354605, + "grad_norm": 0.6107043710984218, + "learning_rate": 6.6124015416680955e-06, + "loss": 0.3211, + "step": 9140 + }, + { + "epoch": 0.4137135098438561, + "grad_norm": 0.6248234817107684, + "learning_rate": 6.611707758775238e-06, + "loss": 0.3417, + "step": 9141 + }, + { + "epoch": 0.41375876895225167, + "grad_norm": 0.6257364165127264, + "learning_rate": 6.611013941251728e-06, + "loss": 0.3247, + "step": 9142 + }, + { + "epoch": 0.4138040280606472, + "grad_norm": 0.6544488321986621, + "learning_rate": 6.61032008911248e-06, + "loss": 0.3538, + "step": 9143 + }, + { + "epoch": 0.4138492871690428, + "grad_norm": 0.5954957989123458, + "learning_rate": 6.609626202372396e-06, + "loss": 0.3456, + "step": 9144 + }, + { + "epoch": 0.4138945462774383, + "grad_norm": 0.49113248105346125, + "learning_rate": 6.6089322810463895e-06, + "loss": 0.4729, + "step": 9145 + }, + { + "epoch": 0.4139398053858339, + "grad_norm": 0.8460178834179111, + "learning_rate": 6.60823832514937e-06, + "loss": 0.3611, + "step": 9146 + }, + { + "epoch": 0.41398506449422945, + "grad_norm": 0.6319870355690369, + "learning_rate": 6.6075443346962475e-06, + "loss": 0.3432, + "step": 9147 + }, + { + "epoch": 0.41403032360262504, + "grad_norm": 0.6076256702515779, + "learning_rate": 6.606850309701936e-06, + "loss": 0.3163, + "step": 9148 + }, + { + "epoch": 0.4140755827110206, + "grad_norm": 0.696995594690972, + "learning_rate": 6.606156250181346e-06, + "loss": 0.3641, + "step": 9149 + }, + { + "epoch": 0.41412084181941616, + "grad_norm": 0.3384968033801821, + "learning_rate": 6.6054621561493896e-06, + "loss": 0.4977, + "step": 9150 + }, + { + "epoch": 0.4141661009278117, + "grad_norm": 0.6415829878760637, + "learning_rate": 6.604768027620984e-06, + "loss": 0.3514, + "step": 9151 + }, + { + "epoch": 0.4142113600362073, + "grad_norm": 0.5922317216981312, + "learning_rate": 6.60407386461104e-06, + "loss": 0.3517, + "step": 9152 + }, + { + "epoch": 0.4142566191446029, + "grad_norm": 0.33405495227931115, + "learning_rate": 6.603379667134478e-06, + "loss": 0.4755, + "step": 9153 + }, + { + "epoch": 0.4143018782529984, + "grad_norm": 0.6350463815012574, + "learning_rate": 6.602685435206209e-06, + "loss": 0.3481, + "step": 9154 + }, + { + "epoch": 0.414347137361394, + "grad_norm": 0.6168553855298148, + "learning_rate": 6.6019911688411535e-06, + "loss": 0.3343, + "step": 9155 + }, + { + "epoch": 0.41439239646978954, + "grad_norm": 0.7275595796187577, + "learning_rate": 6.601296868054227e-06, + "loss": 0.3139, + "step": 9156 + }, + { + "epoch": 0.4144376555781851, + "grad_norm": 0.3305580939019304, + "learning_rate": 6.600602532860349e-06, + "loss": 0.4838, + "step": 9157 + }, + { + "epoch": 0.41448291468658066, + "grad_norm": 0.5836929686129411, + "learning_rate": 6.599908163274439e-06, + "loss": 0.3148, + "step": 9158 + }, + { + "epoch": 0.41452817379497625, + "grad_norm": 0.5992491283122371, + "learning_rate": 6.599213759311416e-06, + "loss": 0.3648, + "step": 9159 + }, + { + "epoch": 0.4145734329033718, + "grad_norm": 0.6499845187153163, + "learning_rate": 6.598519320986201e-06, + "loss": 0.384, + "step": 9160 + }, + { + "epoch": 0.4146186920117674, + "grad_norm": 0.6498925304476317, + "learning_rate": 6.5978248483137165e-06, + "loss": 0.3766, + "step": 9161 + }, + { + "epoch": 0.4146639511201629, + "grad_norm": 0.6344836570957235, + "learning_rate": 6.597130341308881e-06, + "loss": 0.3851, + "step": 9162 + }, + { + "epoch": 0.4147092102285585, + "grad_norm": 0.31294675135178723, + "learning_rate": 6.5964357999866214e-06, + "loss": 0.4889, + "step": 9163 + }, + { + "epoch": 0.41475446933695403, + "grad_norm": 0.6378095099327816, + "learning_rate": 6.595741224361858e-06, + "loss": 0.3172, + "step": 9164 + }, + { + "epoch": 0.4147997284453496, + "grad_norm": 0.6152584122949394, + "learning_rate": 6.595046614449518e-06, + "loss": 0.3677, + "step": 9165 + }, + { + "epoch": 0.4148449875537452, + "grad_norm": 0.2874984063973487, + "learning_rate": 6.594351970264525e-06, + "loss": 0.4763, + "step": 9166 + }, + { + "epoch": 0.41489024666214075, + "grad_norm": 0.816731893554995, + "learning_rate": 6.593657291821804e-06, + "loss": 0.3501, + "step": 9167 + }, + { + "epoch": 0.41493550577053634, + "grad_norm": 0.30283741282069704, + "learning_rate": 6.592962579136283e-06, + "loss": 0.5067, + "step": 9168 + }, + { + "epoch": 0.4149807648789319, + "grad_norm": 0.6416453864713597, + "learning_rate": 6.592267832222888e-06, + "loss": 0.3793, + "step": 9169 + }, + { + "epoch": 0.41502602398732746, + "grad_norm": 0.6468314186517801, + "learning_rate": 6.591573051096549e-06, + "loss": 0.3798, + "step": 9170 + }, + { + "epoch": 0.415071283095723, + "grad_norm": 0.6383963610937056, + "learning_rate": 6.5908782357721914e-06, + "loss": 0.3458, + "step": 9171 + }, + { + "epoch": 0.4151165422041186, + "grad_norm": 0.6506341832672858, + "learning_rate": 6.590183386264748e-06, + "loss": 0.3717, + "step": 9172 + }, + { + "epoch": 0.4151618013125141, + "grad_norm": 0.7739364454783211, + "learning_rate": 6.5894885025891455e-06, + "loss": 0.3672, + "step": 9173 + }, + { + "epoch": 0.4152070604209097, + "grad_norm": 0.6417639748068534, + "learning_rate": 6.5887935847603204e-06, + "loss": 0.2971, + "step": 9174 + }, + { + "epoch": 0.41525231952930525, + "grad_norm": 0.5913598035198431, + "learning_rate": 6.588098632793197e-06, + "loss": 0.3405, + "step": 9175 + }, + { + "epoch": 0.41529757863770084, + "grad_norm": 0.6306761921126739, + "learning_rate": 6.5874036467027135e-06, + "loss": 0.3168, + "step": 9176 + }, + { + "epoch": 0.4153428377460964, + "grad_norm": 0.6407055478791495, + "learning_rate": 6.5867086265038005e-06, + "loss": 0.3436, + "step": 9177 + }, + { + "epoch": 0.41538809685449196, + "grad_norm": 0.7739259144316215, + "learning_rate": 6.586013572211394e-06, + "loss": 0.3196, + "step": 9178 + }, + { + "epoch": 0.41543335596288755, + "grad_norm": 0.3843724146831492, + "learning_rate": 6.585318483840424e-06, + "loss": 0.4897, + "step": 9179 + }, + { + "epoch": 0.4154786150712831, + "grad_norm": 0.6610425816406361, + "learning_rate": 6.58462336140583e-06, + "loss": 0.3899, + "step": 9180 + }, + { + "epoch": 0.4155238741796787, + "grad_norm": 0.28457472167734965, + "learning_rate": 6.583928204922546e-06, + "loss": 0.4788, + "step": 9181 + }, + { + "epoch": 0.4155691332880742, + "grad_norm": 0.62001978773301, + "learning_rate": 6.5832330144055116e-06, + "loss": 0.3485, + "step": 9182 + }, + { + "epoch": 0.4156143923964698, + "grad_norm": 0.6396613108916812, + "learning_rate": 6.58253778986966e-06, + "loss": 0.3399, + "step": 9183 + }, + { + "epoch": 0.41565965150486534, + "grad_norm": 0.6411496367755108, + "learning_rate": 6.5818425313299325e-06, + "loss": 0.3741, + "step": 9184 + }, + { + "epoch": 0.4157049106132609, + "grad_norm": 0.6754167372367614, + "learning_rate": 6.581147238801268e-06, + "loss": 0.3171, + "step": 9185 + }, + { + "epoch": 0.41575016972165646, + "grad_norm": 0.6254106383490038, + "learning_rate": 6.5804519122986045e-06, + "loss": 0.3366, + "step": 9186 + }, + { + "epoch": 0.41579542883005205, + "grad_norm": 0.6739536124384865, + "learning_rate": 6.5797565518368835e-06, + "loss": 0.3561, + "step": 9187 + }, + { + "epoch": 0.41584068793844764, + "grad_norm": 0.7134668711042326, + "learning_rate": 6.579061157431046e-06, + "loss": 0.3764, + "step": 9188 + }, + { + "epoch": 0.4158859470468432, + "grad_norm": 0.630690784551514, + "learning_rate": 6.578365729096034e-06, + "loss": 0.3168, + "step": 9189 + }, + { + "epoch": 0.41593120615523876, + "grad_norm": 0.631189148468012, + "learning_rate": 6.57767026684679e-06, + "loss": 0.3313, + "step": 9190 + }, + { + "epoch": 0.4159764652636343, + "grad_norm": 0.6650612871222172, + "learning_rate": 6.576974770698259e-06, + "loss": 0.3645, + "step": 9191 + }, + { + "epoch": 0.4160217243720299, + "grad_norm": 0.6459197229536661, + "learning_rate": 6.576279240665381e-06, + "loss": 0.3724, + "step": 9192 + }, + { + "epoch": 0.4160669834804254, + "grad_norm": 0.583243594845825, + "learning_rate": 6.575583676763105e-06, + "loss": 0.3587, + "step": 9193 + }, + { + "epoch": 0.416112242588821, + "grad_norm": 0.6114416733865374, + "learning_rate": 6.574888079006374e-06, + "loss": 0.349, + "step": 9194 + }, + { + "epoch": 0.41615750169721655, + "grad_norm": 0.6310044548859889, + "learning_rate": 6.574192447410136e-06, + "loss": 0.3292, + "step": 9195 + }, + { + "epoch": 0.41620276080561214, + "grad_norm": 0.6890581030769432, + "learning_rate": 6.573496781989336e-06, + "loss": 0.2981, + "step": 9196 + }, + { + "epoch": 0.41624801991400767, + "grad_norm": 0.6077206380083984, + "learning_rate": 6.572801082758923e-06, + "loss": 0.3722, + "step": 9197 + }, + { + "epoch": 0.41629327902240326, + "grad_norm": 0.5266079459598753, + "learning_rate": 6.5721053497338464e-06, + "loss": 0.5054, + "step": 9198 + }, + { + "epoch": 0.4163385381307988, + "grad_norm": 0.6082102498170532, + "learning_rate": 6.571409582929053e-06, + "loss": 0.3137, + "step": 9199 + }, + { + "epoch": 0.4163837972391944, + "grad_norm": 0.6345993046401832, + "learning_rate": 6.570713782359493e-06, + "loss": 0.3673, + "step": 9200 + }, + { + "epoch": 0.41642905634759, + "grad_norm": 0.6354457264023634, + "learning_rate": 6.57001794804012e-06, + "loss": 0.3619, + "step": 9201 + }, + { + "epoch": 0.4164743154559855, + "grad_norm": 0.7378579848599901, + "learning_rate": 6.569322079985881e-06, + "loss": 0.3376, + "step": 9202 + }, + { + "epoch": 0.4165195745643811, + "grad_norm": 0.6618244842604736, + "learning_rate": 6.568626178211732e-06, + "loss": 0.3416, + "step": 9203 + }, + { + "epoch": 0.41656483367277664, + "grad_norm": 0.6361598077473152, + "learning_rate": 6.567930242732624e-06, + "loss": 0.3417, + "step": 9204 + }, + { + "epoch": 0.4166100927811722, + "grad_norm": 0.6876223638053927, + "learning_rate": 6.5672342735635095e-06, + "loss": 0.3632, + "step": 9205 + }, + { + "epoch": 0.41665535188956776, + "grad_norm": 0.3977714891906502, + "learning_rate": 6.566538270719345e-06, + "loss": 0.4684, + "step": 9206 + }, + { + "epoch": 0.41670061099796335, + "grad_norm": 0.31331673993273423, + "learning_rate": 6.565842234215085e-06, + "loss": 0.4653, + "step": 9207 + }, + { + "epoch": 0.4167458701063589, + "grad_norm": 0.7117005769691125, + "learning_rate": 6.5651461640656825e-06, + "loss": 0.3515, + "step": 9208 + }, + { + "epoch": 0.4167911292147545, + "grad_norm": 0.6359816588793948, + "learning_rate": 6.564450060286098e-06, + "loss": 0.3227, + "step": 9209 + }, + { + "epoch": 0.41683638832315, + "grad_norm": 1.259017491550393, + "learning_rate": 6.563753922891284e-06, + "loss": 0.3229, + "step": 9210 + }, + { + "epoch": 0.4168816474315456, + "grad_norm": 0.4617368788687044, + "learning_rate": 6.563057751896204e-06, + "loss": 0.4819, + "step": 9211 + }, + { + "epoch": 0.4169269065399412, + "grad_norm": 0.6432718249738655, + "learning_rate": 6.562361547315811e-06, + "loss": 0.331, + "step": 9212 + }, + { + "epoch": 0.4169721656483367, + "grad_norm": 0.6933318021267154, + "learning_rate": 6.561665309165067e-06, + "loss": 0.3203, + "step": 9213 + }, + { + "epoch": 0.4170174247567323, + "grad_norm": 0.31445652021371545, + "learning_rate": 6.560969037458933e-06, + "loss": 0.4685, + "step": 9214 + }, + { + "epoch": 0.41706268386512785, + "grad_norm": 0.6828889036220073, + "learning_rate": 6.5602727322123675e-06, + "loss": 0.3184, + "step": 9215 + }, + { + "epoch": 0.41710794297352344, + "grad_norm": 0.6778087903452624, + "learning_rate": 6.5595763934403335e-06, + "loss": 0.3864, + "step": 9216 + }, + { + "epoch": 0.417153202081919, + "grad_norm": 0.68361582002995, + "learning_rate": 6.5588800211577915e-06, + "loss": 0.3403, + "step": 9217 + }, + { + "epoch": 0.41719846119031456, + "grad_norm": 0.6222438144782241, + "learning_rate": 6.558183615379708e-06, + "loss": 0.3495, + "step": 9218 + }, + { + "epoch": 0.4172437202987101, + "grad_norm": 0.6665841861318481, + "learning_rate": 6.557487176121042e-06, + "loss": 0.3167, + "step": 9219 + }, + { + "epoch": 0.4172889794071057, + "grad_norm": 0.6534720487634615, + "learning_rate": 6.5567907033967616e-06, + "loss": 0.403, + "step": 9220 + }, + { + "epoch": 0.4173342385155012, + "grad_norm": 0.4525851786579055, + "learning_rate": 6.556094197221828e-06, + "loss": 0.4636, + "step": 9221 + }, + { + "epoch": 0.4173794976238968, + "grad_norm": 0.35344989041189595, + "learning_rate": 6.5553976576112124e-06, + "loss": 0.4762, + "step": 9222 + }, + { + "epoch": 0.4174247567322924, + "grad_norm": 0.6190618498454409, + "learning_rate": 6.554701084579876e-06, + "loss": 0.2829, + "step": 9223 + }, + { + "epoch": 0.41747001584068794, + "grad_norm": 0.6814680694155807, + "learning_rate": 6.554004478142789e-06, + "loss": 0.3461, + "step": 9224 + }, + { + "epoch": 0.4175152749490835, + "grad_norm": 0.67215300592011, + "learning_rate": 6.553307838314919e-06, + "loss": 0.3507, + "step": 9225 + }, + { + "epoch": 0.41756053405747906, + "grad_norm": 0.6642982648054744, + "learning_rate": 6.552611165111233e-06, + "loss": 0.3373, + "step": 9226 + }, + { + "epoch": 0.41760579316587465, + "grad_norm": 0.5308400151907808, + "learning_rate": 6.551914458546702e-06, + "loss": 0.4814, + "step": 9227 + }, + { + "epoch": 0.4176510522742702, + "grad_norm": 0.6098024845830202, + "learning_rate": 6.5512177186362956e-06, + "loss": 0.3097, + "step": 9228 + }, + { + "epoch": 0.4176963113826658, + "grad_norm": 0.3971927840088478, + "learning_rate": 6.5505209453949844e-06, + "loss": 0.4894, + "step": 9229 + }, + { + "epoch": 0.4177415704910613, + "grad_norm": 0.6215690645807083, + "learning_rate": 6.5498241388377415e-06, + "loss": 0.304, + "step": 9230 + }, + { + "epoch": 0.4177868295994569, + "grad_norm": 0.6501272460909904, + "learning_rate": 6.549127298979535e-06, + "loss": 0.3561, + "step": 9231 + }, + { + "epoch": 0.41783208870785243, + "grad_norm": 1.047121293479635, + "learning_rate": 6.5484304258353435e-06, + "loss": 0.3503, + "step": 9232 + }, + { + "epoch": 0.417877347816248, + "grad_norm": 0.6911381850116922, + "learning_rate": 6.547733519420136e-06, + "loss": 0.3353, + "step": 9233 + }, + { + "epoch": 0.41792260692464356, + "grad_norm": 0.6769210876746538, + "learning_rate": 6.54703657974889e-06, + "loss": 0.3237, + "step": 9234 + }, + { + "epoch": 0.41796786603303915, + "grad_norm": 0.6153345028518419, + "learning_rate": 6.546339606836578e-06, + "loss": 0.3453, + "step": 9235 + }, + { + "epoch": 0.41801312514143474, + "grad_norm": 0.6601778241017583, + "learning_rate": 6.545642600698179e-06, + "loss": 0.358, + "step": 9236 + }, + { + "epoch": 0.4180583842498303, + "grad_norm": 0.6643447241178514, + "learning_rate": 6.544945561348665e-06, + "loss": 0.3401, + "step": 9237 + }, + { + "epoch": 0.41810364335822586, + "grad_norm": 0.6757724192115454, + "learning_rate": 6.544248488803017e-06, + "loss": 0.3554, + "step": 9238 + }, + { + "epoch": 0.4181489024666214, + "grad_norm": 0.611764580109547, + "learning_rate": 6.5435513830762125e-06, + "loss": 0.4217, + "step": 9239 + }, + { + "epoch": 0.418194161575017, + "grad_norm": 0.5760886846843914, + "learning_rate": 6.542854244183229e-06, + "loss": 0.3357, + "step": 9240 + }, + { + "epoch": 0.4182394206834125, + "grad_norm": 0.6448729001164268, + "learning_rate": 6.542157072139046e-06, + "loss": 0.3631, + "step": 9241 + }, + { + "epoch": 0.4182846797918081, + "grad_norm": 0.6129535632789035, + "learning_rate": 6.541459866958644e-06, + "loss": 0.3061, + "step": 9242 + }, + { + "epoch": 0.41832993890020365, + "grad_norm": 0.617909176415105, + "learning_rate": 6.540762628657003e-06, + "loss": 0.3362, + "step": 9243 + }, + { + "epoch": 0.41837519800859924, + "grad_norm": 0.6611050290972347, + "learning_rate": 6.5400653572491055e-06, + "loss": 0.3623, + "step": 9244 + }, + { + "epoch": 0.41842045711699477, + "grad_norm": 0.5944838215802871, + "learning_rate": 6.539368052749935e-06, + "loss": 0.3086, + "step": 9245 + }, + { + "epoch": 0.41846571622539036, + "grad_norm": 0.6339489370126075, + "learning_rate": 6.538670715174471e-06, + "loss": 0.3581, + "step": 9246 + }, + { + "epoch": 0.41851097533378595, + "grad_norm": 0.600420415879879, + "learning_rate": 6.537973344537699e-06, + "loss": 0.3388, + "step": 9247 + }, + { + "epoch": 0.4185562344421815, + "grad_norm": 0.6029103116209852, + "learning_rate": 6.537275940854604e-06, + "loss": 0.3529, + "step": 9248 + }, + { + "epoch": 0.4186014935505771, + "grad_norm": 0.6447280673198621, + "learning_rate": 6.536578504140172e-06, + "loss": 0.4934, + "step": 9249 + }, + { + "epoch": 0.4186467526589726, + "grad_norm": 0.6194822194770873, + "learning_rate": 6.535881034409384e-06, + "loss": 0.3363, + "step": 9250 + }, + { + "epoch": 0.4186920117673682, + "grad_norm": 0.6502620940642008, + "learning_rate": 6.535183531677232e-06, + "loss": 0.3372, + "step": 9251 + }, + { + "epoch": 0.41873727087576373, + "grad_norm": 0.6700261335411145, + "learning_rate": 6.534485995958699e-06, + "loss": 0.3933, + "step": 9252 + }, + { + "epoch": 0.4187825299841593, + "grad_norm": 0.349521227256, + "learning_rate": 6.533788427268777e-06, + "loss": 0.492, + "step": 9253 + }, + { + "epoch": 0.41882778909255486, + "grad_norm": 0.6771202574472491, + "learning_rate": 6.533090825622451e-06, + "loss": 0.3542, + "step": 9254 + }, + { + "epoch": 0.41887304820095045, + "grad_norm": 0.6251717433479416, + "learning_rate": 6.532393191034711e-06, + "loss": 0.3166, + "step": 9255 + }, + { + "epoch": 0.418918307309346, + "grad_norm": 0.6941485466388639, + "learning_rate": 6.53169552352055e-06, + "loss": 0.3458, + "step": 9256 + }, + { + "epoch": 0.4189635664177416, + "grad_norm": 0.6536270290477265, + "learning_rate": 6.530997823094956e-06, + "loss": 0.3605, + "step": 9257 + }, + { + "epoch": 0.4190088255261371, + "grad_norm": 0.6279317153068226, + "learning_rate": 6.530300089772918e-06, + "loss": 0.35, + "step": 9258 + }, + { + "epoch": 0.4190540846345327, + "grad_norm": 0.6270067678352912, + "learning_rate": 6.529602323569435e-06, + "loss": 0.3297, + "step": 9259 + }, + { + "epoch": 0.4190993437429283, + "grad_norm": 0.6002916694129556, + "learning_rate": 6.528904524499492e-06, + "loss": 0.3209, + "step": 9260 + }, + { + "epoch": 0.4191446028513238, + "grad_norm": 0.5958028898914056, + "learning_rate": 6.5282066925780896e-06, + "loss": 0.3243, + "step": 9261 + }, + { + "epoch": 0.4191898619597194, + "grad_norm": 0.41948313179471364, + "learning_rate": 6.527508827820217e-06, + "loss": 0.5249, + "step": 9262 + }, + { + "epoch": 0.41923512106811495, + "grad_norm": 0.6650697757035404, + "learning_rate": 6.526810930240872e-06, + "loss": 0.3436, + "step": 9263 + }, + { + "epoch": 0.41928038017651054, + "grad_norm": 0.5925651770758924, + "learning_rate": 6.526112999855049e-06, + "loss": 0.3139, + "step": 9264 + }, + { + "epoch": 0.41932563928490607, + "grad_norm": 0.6675276809606014, + "learning_rate": 6.525415036677745e-06, + "loss": 0.3447, + "step": 9265 + }, + { + "epoch": 0.41937089839330166, + "grad_norm": 0.6131476470944407, + "learning_rate": 6.524717040723956e-06, + "loss": 0.3409, + "step": 9266 + }, + { + "epoch": 0.4194161575016972, + "grad_norm": 0.6353899230609578, + "learning_rate": 6.524019012008681e-06, + "loss": 0.3838, + "step": 9267 + }, + { + "epoch": 0.4194614166100928, + "grad_norm": 0.3245527559211798, + "learning_rate": 6.523320950546919e-06, + "loss": 0.4586, + "step": 9268 + }, + { + "epoch": 0.4195066757184883, + "grad_norm": 0.705392549434997, + "learning_rate": 6.522622856353667e-06, + "loss": 0.366, + "step": 9269 + }, + { + "epoch": 0.4195519348268839, + "grad_norm": 0.3172452665983041, + "learning_rate": 6.521924729443928e-06, + "loss": 0.5068, + "step": 9270 + }, + { + "epoch": 0.4195971939352795, + "grad_norm": 0.6478833118357851, + "learning_rate": 6.521226569832699e-06, + "loss": 0.3742, + "step": 9271 + }, + { + "epoch": 0.41964245304367503, + "grad_norm": 0.6690294381143386, + "learning_rate": 6.520528377534984e-06, + "loss": 0.3379, + "step": 9272 + }, + { + "epoch": 0.4196877121520706, + "grad_norm": 0.5924226382675255, + "learning_rate": 6.519830152565784e-06, + "loss": 0.3108, + "step": 9273 + }, + { + "epoch": 0.41973297126046616, + "grad_norm": 0.8549559792432717, + "learning_rate": 6.5191318949401005e-06, + "loss": 0.3508, + "step": 9274 + }, + { + "epoch": 0.41977823036886175, + "grad_norm": 0.6788708282638739, + "learning_rate": 6.51843360467294e-06, + "loss": 0.3518, + "step": 9275 + }, + { + "epoch": 0.4198234894772573, + "grad_norm": 0.6608690918320347, + "learning_rate": 6.517735281779304e-06, + "loss": 0.3614, + "step": 9276 + }, + { + "epoch": 0.4198687485856529, + "grad_norm": 0.644622363537874, + "learning_rate": 6.517036926274198e-06, + "loss": 0.3568, + "step": 9277 + }, + { + "epoch": 0.4199140076940484, + "grad_norm": 0.672102533674108, + "learning_rate": 6.51633853817263e-06, + "loss": 0.3761, + "step": 9278 + }, + { + "epoch": 0.419959266802444, + "grad_norm": 0.6160561545018294, + "learning_rate": 6.5156401174896e-06, + "loss": 0.317, + "step": 9279 + }, + { + "epoch": 0.42000452591083953, + "grad_norm": 0.6409995525830237, + "learning_rate": 6.514941664240122e-06, + "loss": 0.3363, + "step": 9280 + }, + { + "epoch": 0.4200497850192351, + "grad_norm": 0.6213300786894572, + "learning_rate": 6.5142431784391976e-06, + "loss": 0.2734, + "step": 9281 + }, + { + "epoch": 0.4200950441276307, + "grad_norm": 0.6648642571051393, + "learning_rate": 6.513544660101841e-06, + "loss": 0.3613, + "step": 9282 + }, + { + "epoch": 0.42014030323602625, + "grad_norm": 0.4573516667736759, + "learning_rate": 6.512846109243056e-06, + "loss": 0.4955, + "step": 9283 + }, + { + "epoch": 0.42018556234442184, + "grad_norm": 0.6287135334931294, + "learning_rate": 6.512147525877856e-06, + "loss": 0.3704, + "step": 9284 + }, + { + "epoch": 0.42023082145281737, + "grad_norm": 0.6254628698269683, + "learning_rate": 6.5114489100212485e-06, + "loss": 0.3798, + "step": 9285 + }, + { + "epoch": 0.42027608056121296, + "grad_norm": 0.6000869513094843, + "learning_rate": 6.510750261688246e-06, + "loss": 0.3456, + "step": 9286 + }, + { + "epoch": 0.4203213396696085, + "grad_norm": 0.6583406363645683, + "learning_rate": 6.510051580893861e-06, + "loss": 0.3511, + "step": 9287 + }, + { + "epoch": 0.4203665987780041, + "grad_norm": 0.30321244352810744, + "learning_rate": 6.509352867653106e-06, + "loss": 0.479, + "step": 9288 + }, + { + "epoch": 0.4204118578863996, + "grad_norm": 0.6540125283971915, + "learning_rate": 6.508654121980992e-06, + "loss": 0.3494, + "step": 9289 + }, + { + "epoch": 0.4204571169947952, + "grad_norm": 0.6378658016227312, + "learning_rate": 6.507955343892536e-06, + "loss": 0.3639, + "step": 9290 + }, + { + "epoch": 0.42050237610319074, + "grad_norm": 0.6512258485466873, + "learning_rate": 6.507256533402749e-06, + "loss": 0.3373, + "step": 9291 + }, + { + "epoch": 0.42054763521158633, + "grad_norm": 0.6278434316788836, + "learning_rate": 6.506557690526649e-06, + "loss": 0.3492, + "step": 9292 + }, + { + "epoch": 0.42059289431998187, + "grad_norm": 0.6065740516331564, + "learning_rate": 6.5058588152792516e-06, + "loss": 0.365, + "step": 9293 + }, + { + "epoch": 0.42063815342837746, + "grad_norm": 0.6239073368527323, + "learning_rate": 6.5051599076755735e-06, + "loss": 0.3135, + "step": 9294 + }, + { + "epoch": 0.42068341253677305, + "grad_norm": 0.6390514081870159, + "learning_rate": 6.50446096773063e-06, + "loss": 0.3243, + "step": 9295 + }, + { + "epoch": 0.4207286716451686, + "grad_norm": 0.6657578724741773, + "learning_rate": 6.503761995459443e-06, + "loss": 0.304, + "step": 9296 + }, + { + "epoch": 0.4207739307535642, + "grad_norm": 0.608600570311915, + "learning_rate": 6.503062990877028e-06, + "loss": 0.2998, + "step": 9297 + }, + { + "epoch": 0.4208191898619597, + "grad_norm": 0.5960942254073808, + "learning_rate": 6.502363953998406e-06, + "loss": 0.3535, + "step": 9298 + }, + { + "epoch": 0.4208644489703553, + "grad_norm": 0.5951803109195455, + "learning_rate": 6.501664884838597e-06, + "loss": 0.3158, + "step": 9299 + }, + { + "epoch": 0.42090970807875083, + "grad_norm": 0.3982193169565782, + "learning_rate": 6.500965783412621e-06, + "loss": 0.4864, + "step": 9300 + }, + { + "epoch": 0.4209549671871464, + "grad_norm": 0.6354665260421296, + "learning_rate": 6.5002666497355015e-06, + "loss": 0.3853, + "step": 9301 + }, + { + "epoch": 0.42100022629554196, + "grad_norm": 0.7922319996822186, + "learning_rate": 6.4995674838222575e-06, + "loss": 0.3432, + "step": 9302 + }, + { + "epoch": 0.42104548540393755, + "grad_norm": 0.720572508906418, + "learning_rate": 6.498868285687916e-06, + "loss": 0.3436, + "step": 9303 + }, + { + "epoch": 0.4210907445123331, + "grad_norm": 1.0918756163190484, + "learning_rate": 6.498169055347498e-06, + "loss": 0.3878, + "step": 9304 + }, + { + "epoch": 0.42113600362072867, + "grad_norm": 0.6265199496335899, + "learning_rate": 6.497469792816027e-06, + "loss": 0.3567, + "step": 9305 + }, + { + "epoch": 0.42118126272912426, + "grad_norm": 0.6569177766236907, + "learning_rate": 6.49677049810853e-06, + "loss": 0.3204, + "step": 9306 + }, + { + "epoch": 0.4212265218375198, + "grad_norm": 0.5878224560887056, + "learning_rate": 6.4960711712400314e-06, + "loss": 0.345, + "step": 9307 + }, + { + "epoch": 0.4212717809459154, + "grad_norm": 0.6379857860705639, + "learning_rate": 6.4953718122255584e-06, + "loss": 0.3928, + "step": 9308 + }, + { + "epoch": 0.4213170400543109, + "grad_norm": 0.3598080210324236, + "learning_rate": 6.494672421080139e-06, + "loss": 0.464, + "step": 9309 + }, + { + "epoch": 0.4213622991627065, + "grad_norm": 0.33433311563012347, + "learning_rate": 6.493972997818798e-06, + "loss": 0.501, + "step": 9310 + }, + { + "epoch": 0.42140755827110205, + "grad_norm": 0.6860341505203552, + "learning_rate": 6.493273542456567e-06, + "loss": 0.3916, + "step": 9311 + }, + { + "epoch": 0.42145281737949764, + "grad_norm": 0.7154400037378822, + "learning_rate": 6.492574055008474e-06, + "loss": 0.3437, + "step": 9312 + }, + { + "epoch": 0.42149807648789317, + "grad_norm": 0.6669624091081103, + "learning_rate": 6.491874535489547e-06, + "loss": 0.351, + "step": 9313 + }, + { + "epoch": 0.42154333559628876, + "grad_norm": 0.4255153708413403, + "learning_rate": 6.4911749839148195e-06, + "loss": 0.492, + "step": 9314 + }, + { + "epoch": 0.4215885947046843, + "grad_norm": 0.6582232973062351, + "learning_rate": 6.490475400299321e-06, + "loss": 0.3438, + "step": 9315 + }, + { + "epoch": 0.4216338538130799, + "grad_norm": 0.35842591242246086, + "learning_rate": 6.489775784658083e-06, + "loss": 0.4968, + "step": 9316 + }, + { + "epoch": 0.4216791129214755, + "grad_norm": 0.6233884916153226, + "learning_rate": 6.489076137006141e-06, + "loss": 0.3256, + "step": 9317 + }, + { + "epoch": 0.421724372029871, + "grad_norm": 0.626987885497307, + "learning_rate": 6.488376457358525e-06, + "loss": 0.3612, + "step": 9318 + }, + { + "epoch": 0.4217696311382666, + "grad_norm": 0.7825176726361815, + "learning_rate": 6.487676745730271e-06, + "loss": 0.3438, + "step": 9319 + }, + { + "epoch": 0.42181489024666213, + "grad_norm": 0.7636418718424111, + "learning_rate": 6.4869770021364105e-06, + "loss": 0.3372, + "step": 9320 + }, + { + "epoch": 0.4218601493550577, + "grad_norm": 0.6968989386721643, + "learning_rate": 6.486277226591982e-06, + "loss": 0.3473, + "step": 9321 + }, + { + "epoch": 0.42190540846345326, + "grad_norm": 0.41793635105649984, + "learning_rate": 6.4855774191120215e-06, + "loss": 0.4952, + "step": 9322 + }, + { + "epoch": 0.42195066757184885, + "grad_norm": 0.6678593798332022, + "learning_rate": 6.484877579711565e-06, + "loss": 0.3457, + "step": 9323 + }, + { + "epoch": 0.4219959266802444, + "grad_norm": 0.634953767633606, + "learning_rate": 6.484177708405649e-06, + "loss": 0.34, + "step": 9324 + }, + { + "epoch": 0.42204118578863997, + "grad_norm": 0.3158100492797705, + "learning_rate": 6.4834778052093125e-06, + "loss": 0.482, + "step": 9325 + }, + { + "epoch": 0.4220864448970355, + "grad_norm": 0.5584665390209338, + "learning_rate": 6.482777870137594e-06, + "loss": 0.329, + "step": 9326 + }, + { + "epoch": 0.4221317040054311, + "grad_norm": 0.5845534166457408, + "learning_rate": 6.4820779032055335e-06, + "loss": 0.3376, + "step": 9327 + }, + { + "epoch": 0.42217696311382663, + "grad_norm": 0.6833774180728347, + "learning_rate": 6.481377904428171e-06, + "loss": 0.341, + "step": 9328 + }, + { + "epoch": 0.4222222222222222, + "grad_norm": 0.6828282315122735, + "learning_rate": 6.4806778738205455e-06, + "loss": 0.3291, + "step": 9329 + }, + { + "epoch": 0.4222674813306178, + "grad_norm": 0.613631604631963, + "learning_rate": 6.479977811397702e-06, + "loss": 0.319, + "step": 9330 + }, + { + "epoch": 0.42231274043901335, + "grad_norm": 0.6997442427758623, + "learning_rate": 6.479277717174679e-06, + "loss": 0.3601, + "step": 9331 + }, + { + "epoch": 0.42235799954740894, + "grad_norm": 0.6189173652158538, + "learning_rate": 6.478577591166523e-06, + "loss": 0.3411, + "step": 9332 + }, + { + "epoch": 0.42240325865580447, + "grad_norm": 0.5729779897205061, + "learning_rate": 6.477877433388274e-06, + "loss": 0.2974, + "step": 9333 + }, + { + "epoch": 0.42244851776420006, + "grad_norm": 0.3622437132156431, + "learning_rate": 6.477177243854978e-06, + "loss": 0.4943, + "step": 9334 + }, + { + "epoch": 0.4224937768725956, + "grad_norm": 0.32321408167241117, + "learning_rate": 6.476477022581681e-06, + "loss": 0.483, + "step": 9335 + }, + { + "epoch": 0.4225390359809912, + "grad_norm": 0.6281626093772569, + "learning_rate": 6.475776769583426e-06, + "loss": 0.326, + "step": 9336 + }, + { + "epoch": 0.4225842950893867, + "grad_norm": 0.6403466744410745, + "learning_rate": 6.475076484875262e-06, + "loss": 0.3697, + "step": 9337 + }, + { + "epoch": 0.4226295541977823, + "grad_norm": 0.6502452275171857, + "learning_rate": 6.4743761684722354e-06, + "loss": 0.3769, + "step": 9338 + }, + { + "epoch": 0.42267481330617784, + "grad_norm": 0.5923763294515152, + "learning_rate": 6.4736758203893915e-06, + "loss": 0.339, + "step": 9339 + }, + { + "epoch": 0.42272007241457343, + "grad_norm": 0.6721740786041263, + "learning_rate": 6.472975440641781e-06, + "loss": 0.3564, + "step": 9340 + }, + { + "epoch": 0.422765331522969, + "grad_norm": 0.6428716930000375, + "learning_rate": 6.472275029244452e-06, + "loss": 0.3491, + "step": 9341 + }, + { + "epoch": 0.42281059063136456, + "grad_norm": 0.4617371834957663, + "learning_rate": 6.471574586212454e-06, + "loss": 0.4759, + "step": 9342 + }, + { + "epoch": 0.42285584973976015, + "grad_norm": 0.39874311204630764, + "learning_rate": 6.470874111560837e-06, + "loss": 0.4809, + "step": 9343 + }, + { + "epoch": 0.4229011088481557, + "grad_norm": 0.3015463366870361, + "learning_rate": 6.470173605304655e-06, + "loss": 0.4753, + "step": 9344 + }, + { + "epoch": 0.4229463679565513, + "grad_norm": 0.6940333918436483, + "learning_rate": 6.469473067458956e-06, + "loss": 0.3577, + "step": 9345 + }, + { + "epoch": 0.4229916270649468, + "grad_norm": 0.3984963380341798, + "learning_rate": 6.468772498038795e-06, + "loss": 0.4999, + "step": 9346 + }, + { + "epoch": 0.4230368861733424, + "grad_norm": 0.6775492573141172, + "learning_rate": 6.468071897059222e-06, + "loss": 0.3329, + "step": 9347 + }, + { + "epoch": 0.42308214528173793, + "grad_norm": 0.7675894133760427, + "learning_rate": 6.467371264535295e-06, + "loss": 0.339, + "step": 9348 + }, + { + "epoch": 0.4231274043901335, + "grad_norm": 0.658077152410635, + "learning_rate": 6.466670600482065e-06, + "loss": 0.4015, + "step": 9349 + }, + { + "epoch": 0.42317266349852906, + "grad_norm": 0.6308682728934399, + "learning_rate": 6.465969904914589e-06, + "loss": 0.3805, + "step": 9350 + }, + { + "epoch": 0.42321792260692465, + "grad_norm": 0.5976569284225053, + "learning_rate": 6.4652691778479215e-06, + "loss": 0.3121, + "step": 9351 + }, + { + "epoch": 0.42326318171532024, + "grad_norm": 0.7100140072873342, + "learning_rate": 6.4645684192971195e-06, + "loss": 0.3197, + "step": 9352 + }, + { + "epoch": 0.42330844082371577, + "grad_norm": 0.6518239354126296, + "learning_rate": 6.463867629277241e-06, + "loss": 0.4765, + "step": 9353 + }, + { + "epoch": 0.42335369993211136, + "grad_norm": 0.6269798987646074, + "learning_rate": 6.463166807803342e-06, + "loss": 0.3341, + "step": 9354 + }, + { + "epoch": 0.4233989590405069, + "grad_norm": 0.7638670907037846, + "learning_rate": 6.462465954890482e-06, + "loss": 0.338, + "step": 9355 + }, + { + "epoch": 0.4234442181489025, + "grad_norm": 0.6502171356431934, + "learning_rate": 6.46176507055372e-06, + "loss": 0.3279, + "step": 9356 + }, + { + "epoch": 0.423489477257298, + "grad_norm": 0.33969432269480376, + "learning_rate": 6.461064154808118e-06, + "loss": 0.4782, + "step": 9357 + }, + { + "epoch": 0.4235347363656936, + "grad_norm": 0.7200506948589684, + "learning_rate": 6.460363207668734e-06, + "loss": 0.3244, + "step": 9358 + }, + { + "epoch": 0.42357999547408914, + "grad_norm": 0.59014599078168, + "learning_rate": 6.45966222915063e-06, + "loss": 0.2856, + "step": 9359 + }, + { + "epoch": 0.42362525458248473, + "grad_norm": 0.7047584502504611, + "learning_rate": 6.4589612192688656e-06, + "loss": 0.3552, + "step": 9360 + }, + { + "epoch": 0.42367051369088027, + "grad_norm": 0.6219126607206661, + "learning_rate": 6.458260178038508e-06, + "loss": 0.3337, + "step": 9361 + }, + { + "epoch": 0.42371577279927586, + "grad_norm": 0.6525766470892861, + "learning_rate": 6.457559105474617e-06, + "loss": 0.3628, + "step": 9362 + }, + { + "epoch": 0.4237610319076714, + "grad_norm": 0.49286730889390995, + "learning_rate": 6.456858001592257e-06, + "loss": 0.5076, + "step": 9363 + }, + { + "epoch": 0.423806291016067, + "grad_norm": 0.43228591511802883, + "learning_rate": 6.456156866406493e-06, + "loss": 0.5279, + "step": 9364 + }, + { + "epoch": 0.4238515501244626, + "grad_norm": 0.7548313358238873, + "learning_rate": 6.45545569993239e-06, + "loss": 0.3571, + "step": 9365 + }, + { + "epoch": 0.4238968092328581, + "grad_norm": 0.6385538040486504, + "learning_rate": 6.454754502185015e-06, + "loss": 0.2961, + "step": 9366 + }, + { + "epoch": 0.4239420683412537, + "grad_norm": 0.7150213127800817, + "learning_rate": 6.454053273179435e-06, + "loss": 0.3793, + "step": 9367 + }, + { + "epoch": 0.42398732744964923, + "grad_norm": 0.42403833084216347, + "learning_rate": 6.453352012930713e-06, + "loss": 0.4684, + "step": 9368 + }, + { + "epoch": 0.4240325865580448, + "grad_norm": 0.8437666534491314, + "learning_rate": 6.452650721453921e-06, + "loss": 0.4078, + "step": 9369 + }, + { + "epoch": 0.42407784566644036, + "grad_norm": 0.44738550576229486, + "learning_rate": 6.451949398764127e-06, + "loss": 0.5022, + "step": 9370 + }, + { + "epoch": 0.42412310477483595, + "grad_norm": 0.6883373927932057, + "learning_rate": 6.451248044876399e-06, + "loss": 0.3763, + "step": 9371 + }, + { + "epoch": 0.4241683638832315, + "grad_norm": 0.6465158845984057, + "learning_rate": 6.450546659805807e-06, + "loss": 0.3658, + "step": 9372 + }, + { + "epoch": 0.42421362299162707, + "grad_norm": 0.5976605595425212, + "learning_rate": 6.449845243567424e-06, + "loss": 0.3262, + "step": 9373 + }, + { + "epoch": 0.4242588821000226, + "grad_norm": 0.6650479113683798, + "learning_rate": 6.449143796176318e-06, + "loss": 0.3406, + "step": 9374 + }, + { + "epoch": 0.4243041412084182, + "grad_norm": 0.6812372296874605, + "learning_rate": 6.448442317647563e-06, + "loss": 0.3514, + "step": 9375 + }, + { + "epoch": 0.4243494003168138, + "grad_norm": 0.5904246462988874, + "learning_rate": 6.447740807996232e-06, + "loss": 0.3203, + "step": 9376 + }, + { + "epoch": 0.4243946594252093, + "grad_norm": 0.6512807613106717, + "learning_rate": 6.447039267237397e-06, + "loss": 0.3109, + "step": 9377 + }, + { + "epoch": 0.4244399185336049, + "grad_norm": 0.7020948553285277, + "learning_rate": 6.446337695386132e-06, + "loss": 0.3841, + "step": 9378 + }, + { + "epoch": 0.42448517764200044, + "grad_norm": 0.6360695297590829, + "learning_rate": 6.445636092457512e-06, + "loss": 0.3559, + "step": 9379 + }, + { + "epoch": 0.42453043675039603, + "grad_norm": 0.6904837117783633, + "learning_rate": 6.444934458466614e-06, + "loss": 0.303, + "step": 9380 + }, + { + "epoch": 0.42457569585879157, + "grad_norm": 0.7228940949355944, + "learning_rate": 6.444232793428511e-06, + "loss": 0.3771, + "step": 9381 + }, + { + "epoch": 0.42462095496718716, + "grad_norm": 0.6104398617473498, + "learning_rate": 6.4435310973582795e-06, + "loss": 0.3762, + "step": 9382 + }, + { + "epoch": 0.4246662140755827, + "grad_norm": 0.579187174280872, + "learning_rate": 6.442829370271e-06, + "loss": 0.3394, + "step": 9383 + }, + { + "epoch": 0.4247114731839783, + "grad_norm": 0.7785916025725149, + "learning_rate": 6.442127612181747e-06, + "loss": 0.3552, + "step": 9384 + }, + { + "epoch": 0.4247567322923738, + "grad_norm": 0.6105297157822821, + "learning_rate": 6.441425823105603e-06, + "loss": 0.3298, + "step": 9385 + }, + { + "epoch": 0.4248019914007694, + "grad_norm": 0.6995238406533221, + "learning_rate": 6.440724003057643e-06, + "loss": 0.3274, + "step": 9386 + }, + { + "epoch": 0.42484725050916494, + "grad_norm": 0.6808658548787668, + "learning_rate": 6.440022152052951e-06, + "loss": 0.3037, + "step": 9387 + }, + { + "epoch": 0.42489250961756053, + "grad_norm": 0.6164624205567748, + "learning_rate": 6.4393202701066046e-06, + "loss": 0.3627, + "step": 9388 + }, + { + "epoch": 0.4249377687259561, + "grad_norm": 0.5725999624006355, + "learning_rate": 6.4386183572336854e-06, + "loss": 0.4658, + "step": 9389 + }, + { + "epoch": 0.42498302783435166, + "grad_norm": 0.6049117002644915, + "learning_rate": 6.437916413449278e-06, + "loss": 0.3545, + "step": 9390 + }, + { + "epoch": 0.42502828694274725, + "grad_norm": 0.6477856887287778, + "learning_rate": 6.437214438768462e-06, + "loss": 0.3626, + "step": 9391 + }, + { + "epoch": 0.4250735460511428, + "grad_norm": 0.6093961114293333, + "learning_rate": 6.436512433206321e-06, + "loss": 0.3455, + "step": 9392 + }, + { + "epoch": 0.42511880515953837, + "grad_norm": 0.6660332275889961, + "learning_rate": 6.435810396777941e-06, + "loss": 0.3519, + "step": 9393 + }, + { + "epoch": 0.4251640642679339, + "grad_norm": 0.6843726330753063, + "learning_rate": 6.435108329498404e-06, + "loss": 0.3201, + "step": 9394 + }, + { + "epoch": 0.4252093233763295, + "grad_norm": 0.6189098497854676, + "learning_rate": 6.434406231382797e-06, + "loss": 0.3305, + "step": 9395 + }, + { + "epoch": 0.42525458248472503, + "grad_norm": 0.6627815625904901, + "learning_rate": 6.433704102446207e-06, + "loss": 0.3814, + "step": 9396 + }, + { + "epoch": 0.4252998415931206, + "grad_norm": 0.6338700019878458, + "learning_rate": 6.433001942703717e-06, + "loss": 0.3447, + "step": 9397 + }, + { + "epoch": 0.42534510070151615, + "grad_norm": 0.4237312118309754, + "learning_rate": 6.432299752170419e-06, + "loss": 0.4892, + "step": 9398 + }, + { + "epoch": 0.42539035980991174, + "grad_norm": 0.37646514307108286, + "learning_rate": 6.431597530861396e-06, + "loss": 0.5054, + "step": 9399 + }, + { + "epoch": 0.42543561891830733, + "grad_norm": 0.7563459022781303, + "learning_rate": 6.430895278791739e-06, + "loss": 0.3495, + "step": 9400 + }, + { + "epoch": 0.42548087802670287, + "grad_norm": 0.6547677278910567, + "learning_rate": 6.4301929959765375e-06, + "loss": 0.3455, + "step": 9401 + }, + { + "epoch": 0.42552613713509846, + "grad_norm": 0.6676633441951189, + "learning_rate": 6.429490682430881e-06, + "loss": 0.3177, + "step": 9402 + }, + { + "epoch": 0.425571396243494, + "grad_norm": 0.36912778358643694, + "learning_rate": 6.42878833816986e-06, + "loss": 0.4467, + "step": 9403 + }, + { + "epoch": 0.4256166553518896, + "grad_norm": 0.7687649341155516, + "learning_rate": 6.428085963208567e-06, + "loss": 0.3217, + "step": 9404 + }, + { + "epoch": 0.4256619144602851, + "grad_norm": 0.6299817488297312, + "learning_rate": 6.427383557562091e-06, + "loss": 0.3548, + "step": 9405 + }, + { + "epoch": 0.4257071735686807, + "grad_norm": 0.624422589718829, + "learning_rate": 6.426681121245527e-06, + "loss": 0.3691, + "step": 9406 + }, + { + "epoch": 0.42575243267707624, + "grad_norm": 0.6652405335093032, + "learning_rate": 6.4259786542739676e-06, + "loss": 0.3522, + "step": 9407 + }, + { + "epoch": 0.42579769178547183, + "grad_norm": 0.6581907622201878, + "learning_rate": 6.425276156662506e-06, + "loss": 0.4053, + "step": 9408 + }, + { + "epoch": 0.42584295089386737, + "grad_norm": 0.6736635664062993, + "learning_rate": 6.424573628426239e-06, + "loss": 0.3439, + "step": 9409 + }, + { + "epoch": 0.42588821000226296, + "grad_norm": 0.35440063532291827, + "learning_rate": 6.423871069580256e-06, + "loss": 0.4577, + "step": 9410 + }, + { + "epoch": 0.42593346911065855, + "grad_norm": 0.6730631122694064, + "learning_rate": 6.423168480139661e-06, + "loss": 0.3462, + "step": 9411 + }, + { + "epoch": 0.4259787282190541, + "grad_norm": 0.2975473247448006, + "learning_rate": 6.4224658601195445e-06, + "loss": 0.4678, + "step": 9412 + }, + { + "epoch": 0.42602398732744967, + "grad_norm": 0.7888191764002732, + "learning_rate": 6.4217632095350046e-06, + "loss": 0.3254, + "step": 9413 + }, + { + "epoch": 0.4260692464358452, + "grad_norm": 0.6764281596247608, + "learning_rate": 6.421060528401141e-06, + "loss": 0.3372, + "step": 9414 + }, + { + "epoch": 0.4261145055442408, + "grad_norm": 0.6446197101185022, + "learning_rate": 6.42035781673305e-06, + "loss": 0.3741, + "step": 9415 + }, + { + "epoch": 0.42615976465263633, + "grad_norm": 0.6554610625181327, + "learning_rate": 6.419655074545833e-06, + "loss": 0.3316, + "step": 9416 + }, + { + "epoch": 0.4262050237610319, + "grad_norm": 0.7530874118558856, + "learning_rate": 6.41895230185459e-06, + "loss": 0.3527, + "step": 9417 + }, + { + "epoch": 0.42625028286942745, + "grad_norm": 0.670199306873179, + "learning_rate": 6.418249498674417e-06, + "loss": 0.3683, + "step": 9418 + }, + { + "epoch": 0.42629554197782304, + "grad_norm": 0.36679226234773377, + "learning_rate": 6.41754666502042e-06, + "loss": 0.4751, + "step": 9419 + }, + { + "epoch": 0.4263408010862186, + "grad_norm": 0.6648165630423366, + "learning_rate": 6.416843800907698e-06, + "loss": 0.3224, + "step": 9420 + }, + { + "epoch": 0.42638606019461417, + "grad_norm": 0.34829911958308424, + "learning_rate": 6.416140906351355e-06, + "loss": 0.4722, + "step": 9421 + }, + { + "epoch": 0.4264313193030097, + "grad_norm": 0.6661627189350696, + "learning_rate": 6.4154379813664926e-06, + "loss": 0.3925, + "step": 9422 + }, + { + "epoch": 0.4264765784114053, + "grad_norm": 0.6552253402331372, + "learning_rate": 6.4147350259682155e-06, + "loss": 0.3515, + "step": 9423 + }, + { + "epoch": 0.4265218375198009, + "grad_norm": 0.6309157437493499, + "learning_rate": 6.414032040171627e-06, + "loss": 0.3435, + "step": 9424 + }, + { + "epoch": 0.4265670966281964, + "grad_norm": 0.6562280887615455, + "learning_rate": 6.413329023991834e-06, + "loss": 0.3759, + "step": 9425 + }, + { + "epoch": 0.426612355736592, + "grad_norm": 0.6730454798509388, + "learning_rate": 6.412625977443939e-06, + "loss": 0.3487, + "step": 9426 + }, + { + "epoch": 0.42665761484498754, + "grad_norm": 0.3568611834228988, + "learning_rate": 6.411922900543053e-06, + "loss": 0.5055, + "step": 9427 + }, + { + "epoch": 0.42670287395338313, + "grad_norm": 0.3593917177276333, + "learning_rate": 6.411219793304278e-06, + "loss": 0.4881, + "step": 9428 + }, + { + "epoch": 0.42674813306177867, + "grad_norm": 0.6544861807828374, + "learning_rate": 6.410516655742725e-06, + "loss": 0.3341, + "step": 9429 + }, + { + "epoch": 0.42679339217017426, + "grad_norm": 0.655777602680432, + "learning_rate": 6.4098134878735005e-06, + "loss": 0.3393, + "step": 9430 + }, + { + "epoch": 0.4268386512785698, + "grad_norm": 0.6226018999883778, + "learning_rate": 6.409110289711715e-06, + "loss": 0.2939, + "step": 9431 + }, + { + "epoch": 0.4268839103869654, + "grad_norm": 0.7018429768891283, + "learning_rate": 6.4084070612724765e-06, + "loss": 0.364, + "step": 9432 + }, + { + "epoch": 0.4269291694953609, + "grad_norm": 0.34061795632785796, + "learning_rate": 6.407703802570896e-06, + "loss": 0.4937, + "step": 9433 + }, + { + "epoch": 0.4269744286037565, + "grad_norm": 0.7167933010917638, + "learning_rate": 6.407000513622083e-06, + "loss": 0.3767, + "step": 9434 + }, + { + "epoch": 0.4270196877121521, + "grad_norm": 0.643637606655535, + "learning_rate": 6.4062971944411514e-06, + "loss": 0.3876, + "step": 9435 + }, + { + "epoch": 0.42706494682054763, + "grad_norm": 0.6931332491271285, + "learning_rate": 6.405593845043212e-06, + "loss": 0.3753, + "step": 9436 + }, + { + "epoch": 0.4271102059289432, + "grad_norm": 0.3162355029302319, + "learning_rate": 6.4048904654433785e-06, + "loss": 0.4618, + "step": 9437 + }, + { + "epoch": 0.42715546503733876, + "grad_norm": 0.7194911038746254, + "learning_rate": 6.4041870556567645e-06, + "loss": 0.347, + "step": 9438 + }, + { + "epoch": 0.42720072414573435, + "grad_norm": 0.6319541792320363, + "learning_rate": 6.4034836156984805e-06, + "loss": 0.3973, + "step": 9439 + }, + { + "epoch": 0.4272459832541299, + "grad_norm": 0.6732680070654008, + "learning_rate": 6.4027801455836466e-06, + "loss": 0.3761, + "step": 9440 + }, + { + "epoch": 0.42729124236252547, + "grad_norm": 0.5858062417249877, + "learning_rate": 6.402076645327374e-06, + "loss": 0.3256, + "step": 9441 + }, + { + "epoch": 0.427336501470921, + "grad_norm": 0.6912276975662498, + "learning_rate": 6.401373114944781e-06, + "loss": 0.3255, + "step": 9442 + }, + { + "epoch": 0.4273817605793166, + "grad_norm": 0.5969928258655933, + "learning_rate": 6.400669554450985e-06, + "loss": 0.3305, + "step": 9443 + }, + { + "epoch": 0.42742701968771213, + "grad_norm": 0.3981362523108663, + "learning_rate": 6.3999659638611e-06, + "loss": 0.4703, + "step": 9444 + }, + { + "epoch": 0.4274722787961077, + "grad_norm": 0.6625477549277766, + "learning_rate": 6.399262343190247e-06, + "loss": 0.3542, + "step": 9445 + }, + { + "epoch": 0.4275175379045033, + "grad_norm": 1.0213337138421563, + "learning_rate": 6.398558692453545e-06, + "loss": 0.3116, + "step": 9446 + }, + { + "epoch": 0.42756279701289884, + "grad_norm": 0.6018532829667804, + "learning_rate": 6.397855011666109e-06, + "loss": 0.3276, + "step": 9447 + }, + { + "epoch": 0.42760805612129443, + "grad_norm": 0.6584790078653553, + "learning_rate": 6.397151300843065e-06, + "loss": 0.3211, + "step": 9448 + }, + { + "epoch": 0.42765331522968997, + "grad_norm": 1.0077965139681038, + "learning_rate": 6.396447559999528e-06, + "loss": 0.3514, + "step": 9449 + }, + { + "epoch": 0.42769857433808556, + "grad_norm": 0.6114458956301414, + "learning_rate": 6.3957437891506236e-06, + "loss": 0.3492, + "step": 9450 + }, + { + "epoch": 0.4277438334464811, + "grad_norm": 0.3438870869900401, + "learning_rate": 6.395039988311472e-06, + "loss": 0.4868, + "step": 9451 + }, + { + "epoch": 0.4277890925548767, + "grad_norm": 0.6191567022098997, + "learning_rate": 6.394336157497195e-06, + "loss": 0.3622, + "step": 9452 + }, + { + "epoch": 0.4278343516632722, + "grad_norm": 0.654114259431335, + "learning_rate": 6.393632296722916e-06, + "loss": 0.3345, + "step": 9453 + }, + { + "epoch": 0.4278796107716678, + "grad_norm": 0.631728787366428, + "learning_rate": 6.39292840600376e-06, + "loss": 0.3388, + "step": 9454 + }, + { + "epoch": 0.42792486988006334, + "grad_norm": 0.6681836940210151, + "learning_rate": 6.39222448535485e-06, + "loss": 0.3387, + "step": 9455 + }, + { + "epoch": 0.42797012898845893, + "grad_norm": 0.6476944324988394, + "learning_rate": 6.3915205347913124e-06, + "loss": 0.3091, + "step": 9456 + }, + { + "epoch": 0.42801538809685447, + "grad_norm": 0.5720201070992996, + "learning_rate": 6.3908165543282706e-06, + "loss": 0.3336, + "step": 9457 + }, + { + "epoch": 0.42806064720525006, + "grad_norm": 0.5846592923118611, + "learning_rate": 6.390112543980854e-06, + "loss": 0.2851, + "step": 9458 + }, + { + "epoch": 0.42810590631364565, + "grad_norm": 0.6430948603061865, + "learning_rate": 6.389408503764188e-06, + "loss": 0.3803, + "step": 9459 + }, + { + "epoch": 0.4281511654220412, + "grad_norm": 0.6632950837946977, + "learning_rate": 6.3887044336934005e-06, + "loss": 0.3493, + "step": 9460 + }, + { + "epoch": 0.42819642453043677, + "grad_norm": 0.6500691181724101, + "learning_rate": 6.38800033378362e-06, + "loss": 0.3627, + "step": 9461 + }, + { + "epoch": 0.4282416836388323, + "grad_norm": 0.6254439914297663, + "learning_rate": 6.387296204049975e-06, + "loss": 0.3479, + "step": 9462 + }, + { + "epoch": 0.4282869427472279, + "grad_norm": 0.6662471119654548, + "learning_rate": 6.386592044507595e-06, + "loss": 0.3588, + "step": 9463 + }, + { + "epoch": 0.42833220185562343, + "grad_norm": 0.6723755867111733, + "learning_rate": 6.385887855171611e-06, + "loss": 0.3142, + "step": 9464 + }, + { + "epoch": 0.428377460964019, + "grad_norm": 0.6204698827060886, + "learning_rate": 6.3851836360571525e-06, + "loss": 0.3371, + "step": 9465 + }, + { + "epoch": 0.42842272007241455, + "grad_norm": 0.7133235517253447, + "learning_rate": 6.384479387179353e-06, + "loss": 0.3384, + "step": 9466 + }, + { + "epoch": 0.42846797918081014, + "grad_norm": 0.37723556554414667, + "learning_rate": 6.383775108553344e-06, + "loss": 0.4872, + "step": 9467 + }, + { + "epoch": 0.4285132382892057, + "grad_norm": 0.7365255621440524, + "learning_rate": 6.383070800194257e-06, + "loss": 0.372, + "step": 9468 + }, + { + "epoch": 0.42855849739760127, + "grad_norm": 0.5662704539748532, + "learning_rate": 6.382366462117227e-06, + "loss": 0.3212, + "step": 9469 + }, + { + "epoch": 0.42860375650599686, + "grad_norm": 0.6703587861944948, + "learning_rate": 6.381662094337385e-06, + "loss": 0.3255, + "step": 9470 + }, + { + "epoch": 0.4286490156143924, + "grad_norm": 0.6570286513757565, + "learning_rate": 6.380957696869872e-06, + "loss": 0.3454, + "step": 9471 + }, + { + "epoch": 0.428694274722788, + "grad_norm": 0.653602329680775, + "learning_rate": 6.380253269729816e-06, + "loss": 0.3366, + "step": 9472 + }, + { + "epoch": 0.4287395338311835, + "grad_norm": 0.6247528482411291, + "learning_rate": 6.379548812932358e-06, + "loss": 0.3405, + "step": 9473 + }, + { + "epoch": 0.4287847929395791, + "grad_norm": 0.6564823764144673, + "learning_rate": 6.3788443264926325e-06, + "loss": 0.3396, + "step": 9474 + }, + { + "epoch": 0.42883005204797464, + "grad_norm": 0.6338050514675024, + "learning_rate": 6.378139810425777e-06, + "loss": 0.3855, + "step": 9475 + }, + { + "epoch": 0.42887531115637023, + "grad_norm": 0.6320833873764625, + "learning_rate": 6.37743526474693e-06, + "loss": 0.343, + "step": 9476 + }, + { + "epoch": 0.42892057026476577, + "grad_norm": 0.6692924545788045, + "learning_rate": 6.37673068947123e-06, + "loss": 0.3721, + "step": 9477 + }, + { + "epoch": 0.42896582937316136, + "grad_norm": 0.6481553878042078, + "learning_rate": 6.376026084613813e-06, + "loss": 0.3149, + "step": 9478 + }, + { + "epoch": 0.4290110884815569, + "grad_norm": 0.6127879848183435, + "learning_rate": 6.375321450189826e-06, + "loss": 0.3226, + "step": 9479 + }, + { + "epoch": 0.4290563475899525, + "grad_norm": 0.6519182770593378, + "learning_rate": 6.374616786214402e-06, + "loss": 0.3491, + "step": 9480 + }, + { + "epoch": 0.429101606698348, + "grad_norm": 0.6238691493270582, + "learning_rate": 6.373912092702686e-06, + "loss": 0.339, + "step": 9481 + }, + { + "epoch": 0.4291468658067436, + "grad_norm": 0.6332219367972534, + "learning_rate": 6.3732073696698194e-06, + "loss": 0.312, + "step": 9482 + }, + { + "epoch": 0.4291921249151392, + "grad_norm": 0.3263672533628197, + "learning_rate": 6.372502617130942e-06, + "loss": 0.4572, + "step": 9483 + }, + { + "epoch": 0.42923738402353473, + "grad_norm": 0.5997874965565261, + "learning_rate": 6.371797835101201e-06, + "loss": 0.352, + "step": 9484 + }, + { + "epoch": 0.4292826431319303, + "grad_norm": 0.31386368068126863, + "learning_rate": 6.371093023595736e-06, + "loss": 0.4785, + "step": 9485 + }, + { + "epoch": 0.42932790224032585, + "grad_norm": 0.5963847286606663, + "learning_rate": 6.370388182629693e-06, + "loss": 0.3197, + "step": 9486 + }, + { + "epoch": 0.42937316134872144, + "grad_norm": 0.6467820210257044, + "learning_rate": 6.3696833122182175e-06, + "loss": 0.3258, + "step": 9487 + }, + { + "epoch": 0.429418420457117, + "grad_norm": 0.601051314895126, + "learning_rate": 6.368978412376456e-06, + "loss": 0.3583, + "step": 9488 + }, + { + "epoch": 0.42946367956551257, + "grad_norm": 0.5897040599319083, + "learning_rate": 6.3682734831195495e-06, + "loss": 0.3357, + "step": 9489 + }, + { + "epoch": 0.4295089386739081, + "grad_norm": 0.5789772461006665, + "learning_rate": 6.367568524462651e-06, + "loss": 0.2893, + "step": 9490 + }, + { + "epoch": 0.4295541977823037, + "grad_norm": 0.6720690671844922, + "learning_rate": 6.366863536420903e-06, + "loss": 0.3493, + "step": 9491 + }, + { + "epoch": 0.4295994568906992, + "grad_norm": 0.9106057929009909, + "learning_rate": 6.3661585190094555e-06, + "loss": 0.3756, + "step": 9492 + }, + { + "epoch": 0.4296447159990948, + "grad_norm": 0.6571383440927613, + "learning_rate": 6.365453472243458e-06, + "loss": 0.3645, + "step": 9493 + }, + { + "epoch": 0.4296899751074904, + "grad_norm": 0.6064326061612422, + "learning_rate": 6.36474839613806e-06, + "loss": 0.3029, + "step": 9494 + }, + { + "epoch": 0.42973523421588594, + "grad_norm": 0.691674373407346, + "learning_rate": 6.364043290708409e-06, + "loss": 0.3526, + "step": 9495 + }, + { + "epoch": 0.42978049332428153, + "grad_norm": 0.6137817520380174, + "learning_rate": 6.363338155969658e-06, + "loss": 0.31, + "step": 9496 + }, + { + "epoch": 0.42982575243267707, + "grad_norm": 0.6353400075810914, + "learning_rate": 6.362632991936956e-06, + "loss": 0.3724, + "step": 9497 + }, + { + "epoch": 0.42987101154107266, + "grad_norm": 0.6286136029257747, + "learning_rate": 6.361927798625458e-06, + "loss": 0.3388, + "step": 9498 + }, + { + "epoch": 0.4299162706494682, + "grad_norm": 0.6081121318544058, + "learning_rate": 6.361222576050312e-06, + "loss": 0.3398, + "step": 9499 + }, + { + "epoch": 0.4299615297578638, + "grad_norm": 0.6939297880857666, + "learning_rate": 6.360517324226676e-06, + "loss": 0.3425, + "step": 9500 + }, + { + "epoch": 0.4300067888662593, + "grad_norm": 0.6198614495621046, + "learning_rate": 6.3598120431697e-06, + "loss": 0.3216, + "step": 9501 + }, + { + "epoch": 0.4300520479746549, + "grad_norm": 0.6307838569841074, + "learning_rate": 6.35910673289454e-06, + "loss": 0.3396, + "step": 9502 + }, + { + "epoch": 0.43009730708305044, + "grad_norm": 0.6183345620328679, + "learning_rate": 6.358401393416349e-06, + "loss": 0.3387, + "step": 9503 + }, + { + "epoch": 0.43014256619144603, + "grad_norm": 0.4830405915588279, + "learning_rate": 6.357696024750286e-06, + "loss": 0.4909, + "step": 9504 + }, + { + "epoch": 0.4301878252998416, + "grad_norm": 0.6336024479610478, + "learning_rate": 6.356990626911503e-06, + "loss": 0.3208, + "step": 9505 + }, + { + "epoch": 0.43023308440823715, + "grad_norm": 0.6690021288856202, + "learning_rate": 6.356285199915162e-06, + "loss": 0.364, + "step": 9506 + }, + { + "epoch": 0.43027834351663274, + "grad_norm": 0.6478736001609527, + "learning_rate": 6.355579743776415e-06, + "loss": 0.3436, + "step": 9507 + }, + { + "epoch": 0.4303236026250283, + "grad_norm": 0.7058901153166096, + "learning_rate": 6.354874258510425e-06, + "loss": 0.3633, + "step": 9508 + }, + { + "epoch": 0.43036886173342387, + "grad_norm": 0.646687768947303, + "learning_rate": 6.3541687441323466e-06, + "loss": 0.3173, + "step": 9509 + }, + { + "epoch": 0.4304141208418194, + "grad_norm": 0.6384243764796345, + "learning_rate": 6.353463200657341e-06, + "loss": 0.3546, + "step": 9510 + }, + { + "epoch": 0.430459379950215, + "grad_norm": 0.5782346792836871, + "learning_rate": 6.352757628100569e-06, + "loss": 0.3214, + "step": 9511 + }, + { + "epoch": 0.4305046390586105, + "grad_norm": 0.6410978720387365, + "learning_rate": 6.352052026477189e-06, + "loss": 0.3601, + "step": 9512 + }, + { + "epoch": 0.4305498981670061, + "grad_norm": 0.6338795047306764, + "learning_rate": 6.351346395802365e-06, + "loss": 0.2959, + "step": 9513 + }, + { + "epoch": 0.43059515727540165, + "grad_norm": 0.6219979703602239, + "learning_rate": 6.350640736091256e-06, + "loss": 0.3601, + "step": 9514 + }, + { + "epoch": 0.43064041638379724, + "grad_norm": 0.603935883727528, + "learning_rate": 6.349935047359026e-06, + "loss": 0.3235, + "step": 9515 + }, + { + "epoch": 0.4306856754921928, + "grad_norm": 0.6304081216699563, + "learning_rate": 6.349229329620839e-06, + "loss": 0.3183, + "step": 9516 + }, + { + "epoch": 0.43073093460058837, + "grad_norm": 0.6260900712445192, + "learning_rate": 6.348523582891857e-06, + "loss": 0.3703, + "step": 9517 + }, + { + "epoch": 0.43077619370898396, + "grad_norm": 0.7608811482008814, + "learning_rate": 6.347817807187242e-06, + "loss": 0.3287, + "step": 9518 + }, + { + "epoch": 0.4308214528173795, + "grad_norm": 0.6650272533873631, + "learning_rate": 6.347112002522167e-06, + "loss": 0.32, + "step": 9519 + }, + { + "epoch": 0.4308667119257751, + "grad_norm": 0.6347777593589885, + "learning_rate": 6.346406168911787e-06, + "loss": 0.3253, + "step": 9520 + }, + { + "epoch": 0.4309119710341706, + "grad_norm": 0.6417946074599703, + "learning_rate": 6.3457003063712775e-06, + "loss": 0.3733, + "step": 9521 + }, + { + "epoch": 0.4309572301425662, + "grad_norm": 0.7687534975018414, + "learning_rate": 6.344994414915801e-06, + "loss": 0.322, + "step": 9522 + }, + { + "epoch": 0.43100248925096174, + "grad_norm": 0.7554017704102255, + "learning_rate": 6.3442884945605244e-06, + "loss": 0.3365, + "step": 9523 + }, + { + "epoch": 0.43104774835935733, + "grad_norm": 0.6077760535588105, + "learning_rate": 6.343582545320617e-06, + "loss": 0.3264, + "step": 9524 + }, + { + "epoch": 0.43109300746775286, + "grad_norm": 0.7121399654195334, + "learning_rate": 6.342876567211247e-06, + "loss": 0.3541, + "step": 9525 + }, + { + "epoch": 0.43113826657614845, + "grad_norm": 0.5372455333369079, + "learning_rate": 6.3421705602475835e-06, + "loss": 0.4888, + "step": 9526 + }, + { + "epoch": 0.431183525684544, + "grad_norm": 0.4297497874863686, + "learning_rate": 6.341464524444798e-06, + "loss": 0.5041, + "step": 9527 + }, + { + "epoch": 0.4312287847929396, + "grad_norm": 0.592031368327786, + "learning_rate": 6.340758459818058e-06, + "loss": 0.3267, + "step": 9528 + }, + { + "epoch": 0.43127404390133517, + "grad_norm": 0.6250388602448749, + "learning_rate": 6.340052366382539e-06, + "loss": 0.3383, + "step": 9529 + }, + { + "epoch": 0.4313193030097307, + "grad_norm": 0.6089617843367738, + "learning_rate": 6.339346244153408e-06, + "loss": 0.3561, + "step": 9530 + }, + { + "epoch": 0.4313645621181263, + "grad_norm": 0.6268140034238477, + "learning_rate": 6.3386400931458415e-06, + "loss": 0.3427, + "step": 9531 + }, + { + "epoch": 0.43140982122652183, + "grad_norm": 0.6129149633673354, + "learning_rate": 6.33793391337501e-06, + "loss": 0.3338, + "step": 9532 + }, + { + "epoch": 0.4314550803349174, + "grad_norm": 0.6096958155296873, + "learning_rate": 6.337227704856088e-06, + "loss": 0.3824, + "step": 9533 + }, + { + "epoch": 0.43150033944331295, + "grad_norm": 0.7077925181477205, + "learning_rate": 6.336521467604248e-06, + "loss": 0.4856, + "step": 9534 + }, + { + "epoch": 0.43154559855170854, + "grad_norm": 0.698918442674996, + "learning_rate": 6.33581520163467e-06, + "loss": 0.3558, + "step": 9535 + }, + { + "epoch": 0.4315908576601041, + "grad_norm": 0.6263258929760982, + "learning_rate": 6.335108906962523e-06, + "loss": 0.3547, + "step": 9536 + }, + { + "epoch": 0.43163611676849967, + "grad_norm": 0.6214809292754883, + "learning_rate": 6.334402583602988e-06, + "loss": 0.3516, + "step": 9537 + }, + { + "epoch": 0.4316813758768952, + "grad_norm": 0.6941423915887114, + "learning_rate": 6.333696231571238e-06, + "loss": 0.3184, + "step": 9538 + }, + { + "epoch": 0.4317266349852908, + "grad_norm": 0.805609294731211, + "learning_rate": 6.332989850882453e-06, + "loss": 0.3666, + "step": 9539 + }, + { + "epoch": 0.4317718940936864, + "grad_norm": 0.6618733159358412, + "learning_rate": 6.33228344155181e-06, + "loss": 0.3617, + "step": 9540 + }, + { + "epoch": 0.4318171532020819, + "grad_norm": 0.34824166171139703, + "learning_rate": 6.331577003594487e-06, + "loss": 0.4706, + "step": 9541 + }, + { + "epoch": 0.4318624123104775, + "grad_norm": 0.7031531376323451, + "learning_rate": 6.330870537025664e-06, + "loss": 0.3587, + "step": 9542 + }, + { + "epoch": 0.43190767141887304, + "grad_norm": 0.629939383500528, + "learning_rate": 6.3301640418605205e-06, + "loss": 0.3356, + "step": 9543 + }, + { + "epoch": 0.43195293052726863, + "grad_norm": 0.7383560966237717, + "learning_rate": 6.329457518114237e-06, + "loss": 0.3183, + "step": 9544 + }, + { + "epoch": 0.43199818963566416, + "grad_norm": 0.6529920903315911, + "learning_rate": 6.3287509658019955e-06, + "loss": 0.3452, + "step": 9545 + }, + { + "epoch": 0.43204344874405975, + "grad_norm": 0.6080741176068959, + "learning_rate": 6.328044384938977e-06, + "loss": 0.3728, + "step": 9546 + }, + { + "epoch": 0.4320887078524553, + "grad_norm": 0.6343854373387572, + "learning_rate": 6.327337775540362e-06, + "loss": 0.3624, + "step": 9547 + }, + { + "epoch": 0.4321339669608509, + "grad_norm": 0.6243543241577083, + "learning_rate": 6.326631137621336e-06, + "loss": 0.3479, + "step": 9548 + }, + { + "epoch": 0.4321792260692464, + "grad_norm": 0.6233388741859107, + "learning_rate": 6.32592447119708e-06, + "loss": 0.3652, + "step": 9549 + }, + { + "epoch": 0.432224485177642, + "grad_norm": 0.6586508655334762, + "learning_rate": 6.32521777628278e-06, + "loss": 0.3699, + "step": 9550 + }, + { + "epoch": 0.43226974428603754, + "grad_norm": 0.5813175230678054, + "learning_rate": 6.324511052893621e-06, + "loss": 0.2788, + "step": 9551 + }, + { + "epoch": 0.43231500339443313, + "grad_norm": 0.6953927867608366, + "learning_rate": 6.323804301044787e-06, + "loss": 0.3374, + "step": 9552 + }, + { + "epoch": 0.4323602625028287, + "grad_norm": 0.5648584961368391, + "learning_rate": 6.323097520751463e-06, + "loss": 0.3052, + "step": 9553 + }, + { + "epoch": 0.43240552161122425, + "grad_norm": 0.4741493747837489, + "learning_rate": 6.322390712028839e-06, + "loss": 0.5073, + "step": 9554 + }, + { + "epoch": 0.43245078071961984, + "grad_norm": 0.6282792193419438, + "learning_rate": 6.321683874892097e-06, + "loss": 0.377, + "step": 9555 + }, + { + "epoch": 0.4324960398280154, + "grad_norm": 0.6002379549916733, + "learning_rate": 6.3209770093564315e-06, + "loss": 0.317, + "step": 9556 + }, + { + "epoch": 0.43254129893641097, + "grad_norm": 0.6804592964508102, + "learning_rate": 6.320270115437024e-06, + "loss": 0.3424, + "step": 9557 + }, + { + "epoch": 0.4325865580448065, + "grad_norm": 0.6396321643958531, + "learning_rate": 6.319563193149069e-06, + "loss": 0.3416, + "step": 9558 + }, + { + "epoch": 0.4326318171532021, + "grad_norm": 0.6131211335825673, + "learning_rate": 6.318856242507751e-06, + "loss": 0.3518, + "step": 9559 + }, + { + "epoch": 0.4326770762615976, + "grad_norm": 0.6539758858362998, + "learning_rate": 6.318149263528266e-06, + "loss": 0.3267, + "step": 9560 + }, + { + "epoch": 0.4327223353699932, + "grad_norm": 0.6427223122736058, + "learning_rate": 6.3174422562258e-06, + "loss": 0.3866, + "step": 9561 + }, + { + "epoch": 0.43276759447838875, + "grad_norm": 0.620529057754623, + "learning_rate": 6.316735220615546e-06, + "loss": 0.3473, + "step": 9562 + }, + { + "epoch": 0.43281285358678434, + "grad_norm": 0.5354036421937424, + "learning_rate": 6.316028156712697e-06, + "loss": 0.3013, + "step": 9563 + }, + { + "epoch": 0.43285811269517993, + "grad_norm": 0.38415502626453674, + "learning_rate": 6.315321064532444e-06, + "loss": 0.4936, + "step": 9564 + }, + { + "epoch": 0.43290337180357547, + "grad_norm": 0.6673503544936131, + "learning_rate": 6.31461394408998e-06, + "loss": 0.3507, + "step": 9565 + }, + { + "epoch": 0.43294863091197106, + "grad_norm": 0.6086413226302155, + "learning_rate": 6.313906795400503e-06, + "loss": 0.3543, + "step": 9566 + }, + { + "epoch": 0.4329938900203666, + "grad_norm": 0.613894175157072, + "learning_rate": 6.313199618479202e-06, + "loss": 0.3341, + "step": 9567 + }, + { + "epoch": 0.4330391491287622, + "grad_norm": 0.27093995914272184, + "learning_rate": 6.312492413341274e-06, + "loss": 0.476, + "step": 9568 + }, + { + "epoch": 0.4330844082371577, + "grad_norm": 0.6441830810283002, + "learning_rate": 6.311785180001917e-06, + "loss": 0.3675, + "step": 9569 + }, + { + "epoch": 0.4331296673455533, + "grad_norm": 0.2881058366572796, + "learning_rate": 6.311077918476324e-06, + "loss": 0.5011, + "step": 9570 + }, + { + "epoch": 0.43317492645394884, + "grad_norm": 0.3016593831565181, + "learning_rate": 6.3103706287796925e-06, + "loss": 0.4897, + "step": 9571 + }, + { + "epoch": 0.43322018556234443, + "grad_norm": 0.6379247355962797, + "learning_rate": 6.309663310927222e-06, + "loss": 0.3611, + "step": 9572 + }, + { + "epoch": 0.43326544467073996, + "grad_norm": 0.6403026737285819, + "learning_rate": 6.30895596493411e-06, + "loss": 0.3237, + "step": 9573 + }, + { + "epoch": 0.43331070377913555, + "grad_norm": 0.29694733852073946, + "learning_rate": 6.308248590815552e-06, + "loss": 0.4911, + "step": 9574 + }, + { + "epoch": 0.43335596288753114, + "grad_norm": 0.6408292536123505, + "learning_rate": 6.3075411885867525e-06, + "loss": 0.3418, + "step": 9575 + }, + { + "epoch": 0.4334012219959267, + "grad_norm": 0.6038428892113239, + "learning_rate": 6.306833758262906e-06, + "loss": 0.3343, + "step": 9576 + }, + { + "epoch": 0.43344648110432227, + "grad_norm": 0.6628742329075414, + "learning_rate": 6.306126299859218e-06, + "loss": 0.3618, + "step": 9577 + }, + { + "epoch": 0.4334917402127178, + "grad_norm": 0.3045993291323623, + "learning_rate": 6.305418813390885e-06, + "loss": 0.5013, + "step": 9578 + }, + { + "epoch": 0.4335369993211134, + "grad_norm": 0.6780581289844321, + "learning_rate": 6.304711298873113e-06, + "loss": 0.3365, + "step": 9579 + }, + { + "epoch": 0.4335822584295089, + "grad_norm": 0.6211410836718435, + "learning_rate": 6.304003756321101e-06, + "loss": 0.3188, + "step": 9580 + }, + { + "epoch": 0.4336275175379045, + "grad_norm": 0.7669868311980905, + "learning_rate": 6.303296185750054e-06, + "loss": 0.3625, + "step": 9581 + }, + { + "epoch": 0.43367277664630005, + "grad_norm": 0.7573626626127393, + "learning_rate": 6.302588587175175e-06, + "loss": 0.3708, + "step": 9582 + }, + { + "epoch": 0.43371803575469564, + "grad_norm": 0.6855161490216711, + "learning_rate": 6.301880960611668e-06, + "loss": 0.3345, + "step": 9583 + }, + { + "epoch": 0.4337632948630912, + "grad_norm": 0.6437440624954825, + "learning_rate": 6.301173306074735e-06, + "loss": 0.323, + "step": 9584 + }, + { + "epoch": 0.43380855397148677, + "grad_norm": 0.6232157340832585, + "learning_rate": 6.300465623579587e-06, + "loss": 0.3519, + "step": 9585 + }, + { + "epoch": 0.4338538130798823, + "grad_norm": 0.6634296907842349, + "learning_rate": 6.299757913141424e-06, + "loss": 0.3498, + "step": 9586 + }, + { + "epoch": 0.4338990721882779, + "grad_norm": 1.2642722267366815, + "learning_rate": 6.299050174775458e-06, + "loss": 0.3335, + "step": 9587 + }, + { + "epoch": 0.4339443312966735, + "grad_norm": 0.651210690916979, + "learning_rate": 6.298342408496892e-06, + "loss": 0.3338, + "step": 9588 + }, + { + "epoch": 0.433989590405069, + "grad_norm": 0.8266010855192732, + "learning_rate": 6.297634614320937e-06, + "loss": 0.3365, + "step": 9589 + }, + { + "epoch": 0.4340348495134646, + "grad_norm": 0.777980841927406, + "learning_rate": 6.2969267922627975e-06, + "loss": 0.3787, + "step": 9590 + }, + { + "epoch": 0.43408010862186014, + "grad_norm": 0.6771848978264605, + "learning_rate": 6.296218942337685e-06, + "loss": 0.3893, + "step": 9591 + }, + { + "epoch": 0.43412536773025573, + "grad_norm": 0.6163189995087976, + "learning_rate": 6.295511064560808e-06, + "loss": 0.3225, + "step": 9592 + }, + { + "epoch": 0.43417062683865126, + "grad_norm": 0.657764399625542, + "learning_rate": 6.294803158947378e-06, + "loss": 0.3155, + "step": 9593 + }, + { + "epoch": 0.43421588594704685, + "grad_norm": 0.6305694061235845, + "learning_rate": 6.294095225512604e-06, + "loss": 0.3458, + "step": 9594 + }, + { + "epoch": 0.4342611450554424, + "grad_norm": 0.35629379362146724, + "learning_rate": 6.293387264271699e-06, + "loss": 0.4632, + "step": 9595 + }, + { + "epoch": 0.434306404163838, + "grad_norm": 0.6351505528430289, + "learning_rate": 6.292679275239875e-06, + "loss": 0.3321, + "step": 9596 + }, + { + "epoch": 0.4343516632722335, + "grad_norm": 0.6206739807132818, + "learning_rate": 6.29197125843234e-06, + "loss": 0.2933, + "step": 9597 + }, + { + "epoch": 0.4343969223806291, + "grad_norm": 0.6170676077633372, + "learning_rate": 6.291263213864314e-06, + "loss": 0.335, + "step": 9598 + }, + { + "epoch": 0.4344421814890247, + "grad_norm": 0.29918808007546976, + "learning_rate": 6.290555141551006e-06, + "loss": 0.5105, + "step": 9599 + }, + { + "epoch": 0.4344874405974202, + "grad_norm": 0.6466778800542244, + "learning_rate": 6.289847041507632e-06, + "loss": 0.318, + "step": 9600 + }, + { + "epoch": 0.4345326997058158, + "grad_norm": 0.6644688745721504, + "learning_rate": 6.289138913749406e-06, + "loss": 0.3263, + "step": 9601 + }, + { + "epoch": 0.43457795881421135, + "grad_norm": 0.5956829179386198, + "learning_rate": 6.2884307582915434e-06, + "loss": 0.3247, + "step": 9602 + }, + { + "epoch": 0.43462321792260694, + "grad_norm": 0.6384688707836155, + "learning_rate": 6.287722575149262e-06, + "loss": 0.3776, + "step": 9603 + }, + { + "epoch": 0.4346684770310025, + "grad_norm": 0.6210441993230379, + "learning_rate": 6.287014364337778e-06, + "loss": 0.3438, + "step": 9604 + }, + { + "epoch": 0.43471373613939807, + "grad_norm": 0.6515747518311703, + "learning_rate": 6.286306125872307e-06, + "loss": 0.3044, + "step": 9605 + }, + { + "epoch": 0.4347589952477936, + "grad_norm": 0.6065693284092167, + "learning_rate": 6.285597859768069e-06, + "loss": 0.3433, + "step": 9606 + }, + { + "epoch": 0.4348042543561892, + "grad_norm": 0.649773066821743, + "learning_rate": 6.28488956604028e-06, + "loss": 0.3329, + "step": 9607 + }, + { + "epoch": 0.4348495134645847, + "grad_norm": 0.6402533582637937, + "learning_rate": 6.284181244704161e-06, + "loss": 0.3311, + "step": 9608 + }, + { + "epoch": 0.4348947725729803, + "grad_norm": 0.34486747340777774, + "learning_rate": 6.2834728957749315e-06, + "loss": 0.4732, + "step": 9609 + }, + { + "epoch": 0.43494003168137585, + "grad_norm": 0.6517980038717447, + "learning_rate": 6.2827645192678114e-06, + "loss": 0.352, + "step": 9610 + }, + { + "epoch": 0.43498529078977144, + "grad_norm": 0.6361557329684554, + "learning_rate": 6.282056115198021e-06, + "loss": 0.3426, + "step": 9611 + }, + { + "epoch": 0.43503054989816703, + "grad_norm": 0.29829048121652124, + "learning_rate": 6.2813476835807814e-06, + "loss": 0.4682, + "step": 9612 + }, + { + "epoch": 0.43507580900656256, + "grad_norm": 0.6764980210107786, + "learning_rate": 6.280639224431317e-06, + "loss": 0.3527, + "step": 9613 + }, + { + "epoch": 0.43512106811495815, + "grad_norm": 0.6549779058980503, + "learning_rate": 6.27993073776485e-06, + "loss": 0.3193, + "step": 9614 + }, + { + "epoch": 0.4351663272233537, + "grad_norm": 0.6323811410845982, + "learning_rate": 6.279222223596599e-06, + "loss": 0.3335, + "step": 9615 + }, + { + "epoch": 0.4352115863317493, + "grad_norm": 0.5938579736378444, + "learning_rate": 6.278513681941793e-06, + "loss": 0.3349, + "step": 9616 + }, + { + "epoch": 0.4352568454401448, + "grad_norm": 0.2802435542448092, + "learning_rate": 6.277805112815656e-06, + "loss": 0.4687, + "step": 9617 + }, + { + "epoch": 0.4353021045485404, + "grad_norm": 0.6716376377922707, + "learning_rate": 6.277096516233409e-06, + "loss": 0.3541, + "step": 9618 + }, + { + "epoch": 0.43534736365693594, + "grad_norm": 0.29189397078435236, + "learning_rate": 6.276387892210281e-06, + "loss": 0.4743, + "step": 9619 + }, + { + "epoch": 0.4353926227653315, + "grad_norm": 0.7074551039812558, + "learning_rate": 6.275679240761499e-06, + "loss": 0.3602, + "step": 9620 + }, + { + "epoch": 0.43543788187372706, + "grad_norm": 0.5986286546331557, + "learning_rate": 6.274970561902286e-06, + "loss": 0.319, + "step": 9621 + }, + { + "epoch": 0.43548314098212265, + "grad_norm": 0.2924588447120755, + "learning_rate": 6.274261855647872e-06, + "loss": 0.4556, + "step": 9622 + }, + { + "epoch": 0.43552840009051824, + "grad_norm": 0.6350275796798999, + "learning_rate": 6.273553122013485e-06, + "loss": 0.3189, + "step": 9623 + }, + { + "epoch": 0.4355736591989138, + "grad_norm": 0.6569204309990117, + "learning_rate": 6.272844361014352e-06, + "loss": 0.3469, + "step": 9624 + }, + { + "epoch": 0.43561891830730937, + "grad_norm": 0.28160925038059104, + "learning_rate": 6.272135572665704e-06, + "loss": 0.475, + "step": 9625 + }, + { + "epoch": 0.4356641774157049, + "grad_norm": 0.6936511520119449, + "learning_rate": 6.271426756982768e-06, + "loss": 0.3391, + "step": 9626 + }, + { + "epoch": 0.4357094365241005, + "grad_norm": 0.6334270977412247, + "learning_rate": 6.270717913980777e-06, + "loss": 0.3363, + "step": 9627 + }, + { + "epoch": 0.435754695632496, + "grad_norm": 0.7076211424429047, + "learning_rate": 6.270009043674959e-06, + "loss": 0.3338, + "step": 9628 + }, + { + "epoch": 0.4357999547408916, + "grad_norm": 0.2949010307384991, + "learning_rate": 6.26930014608055e-06, + "loss": 0.4749, + "step": 9629 + }, + { + "epoch": 0.43584521384928715, + "grad_norm": 0.2991584601582421, + "learning_rate": 6.268591221212779e-06, + "loss": 0.5113, + "step": 9630 + }, + { + "epoch": 0.43589047295768274, + "grad_norm": 0.28826042168950156, + "learning_rate": 6.2678822690868765e-06, + "loss": 0.4618, + "step": 9631 + }, + { + "epoch": 0.4359357320660783, + "grad_norm": 0.27923258726620354, + "learning_rate": 6.267173289718079e-06, + "loss": 0.4611, + "step": 9632 + }, + { + "epoch": 0.43598099117447386, + "grad_norm": 0.6535515640602259, + "learning_rate": 6.2664642831216206e-06, + "loss": 0.3311, + "step": 9633 + }, + { + "epoch": 0.43602625028286945, + "grad_norm": 0.658058957760891, + "learning_rate": 6.265755249312733e-06, + "loss": 0.3343, + "step": 9634 + }, + { + "epoch": 0.436071509391265, + "grad_norm": 0.7420584561566588, + "learning_rate": 6.2650461883066534e-06, + "loss": 0.2928, + "step": 9635 + }, + { + "epoch": 0.4361167684996606, + "grad_norm": 0.679580769956985, + "learning_rate": 6.264337100118615e-06, + "loss": 0.3421, + "step": 9636 + }, + { + "epoch": 0.4361620276080561, + "grad_norm": 0.6600756328928645, + "learning_rate": 6.263627984763858e-06, + "loss": 0.4015, + "step": 9637 + }, + { + "epoch": 0.4362072867164517, + "grad_norm": 0.6217618393021392, + "learning_rate": 6.262918842257615e-06, + "loss": 0.3845, + "step": 9638 + }, + { + "epoch": 0.43625254582484724, + "grad_norm": 0.42654541926319295, + "learning_rate": 6.262209672615125e-06, + "loss": 0.4553, + "step": 9639 + }, + { + "epoch": 0.4362978049332428, + "grad_norm": 0.5991685886740397, + "learning_rate": 6.261500475851625e-06, + "loss": 0.3663, + "step": 9640 + }, + { + "epoch": 0.43634306404163836, + "grad_norm": 0.6526326165159332, + "learning_rate": 6.260791251982354e-06, + "loss": 0.3534, + "step": 9641 + }, + { + "epoch": 0.43638832315003395, + "grad_norm": 0.33023414793782335, + "learning_rate": 6.260082001022553e-06, + "loss": 0.4822, + "step": 9642 + }, + { + "epoch": 0.4364335822584295, + "grad_norm": 0.614008987239421, + "learning_rate": 6.259372722987459e-06, + "loss": 0.3539, + "step": 9643 + }, + { + "epoch": 0.4364788413668251, + "grad_norm": 0.6193780654351428, + "learning_rate": 6.2586634178923124e-06, + "loss": 0.3327, + "step": 9644 + }, + { + "epoch": 0.4365241004752206, + "grad_norm": 0.678929067511659, + "learning_rate": 6.257954085752356e-06, + "loss": 0.3385, + "step": 9645 + }, + { + "epoch": 0.4365693595836162, + "grad_norm": 0.6455644243579621, + "learning_rate": 6.257244726582829e-06, + "loss": 0.3944, + "step": 9646 + }, + { + "epoch": 0.4366146186920118, + "grad_norm": 0.6332816139581993, + "learning_rate": 6.256535340398974e-06, + "loss": 0.3395, + "step": 9647 + }, + { + "epoch": 0.4366598778004073, + "grad_norm": 0.6241992920208398, + "learning_rate": 6.255825927216032e-06, + "loss": 0.3107, + "step": 9648 + }, + { + "epoch": 0.4367051369088029, + "grad_norm": 0.6499774891475161, + "learning_rate": 6.2551164870492506e-06, + "loss": 0.3049, + "step": 9649 + }, + { + "epoch": 0.43675039601719845, + "grad_norm": 0.5852926834843499, + "learning_rate": 6.25440701991387e-06, + "loss": 0.3219, + "step": 9650 + }, + { + "epoch": 0.43679565512559404, + "grad_norm": 0.682648927831333, + "learning_rate": 6.253697525825134e-06, + "loss": 0.3546, + "step": 9651 + }, + { + "epoch": 0.4368409142339896, + "grad_norm": 0.6755898462967815, + "learning_rate": 6.25298800479829e-06, + "loss": 0.3562, + "step": 9652 + }, + { + "epoch": 0.43688617334238516, + "grad_norm": 0.6553521064520352, + "learning_rate": 6.252278456848581e-06, + "loss": 0.342, + "step": 9653 + }, + { + "epoch": 0.4369314324507807, + "grad_norm": 0.6414939440118836, + "learning_rate": 6.251568881991256e-06, + "loss": 0.3483, + "step": 9654 + }, + { + "epoch": 0.4369766915591763, + "grad_norm": 0.6554899417647392, + "learning_rate": 6.250859280241557e-06, + "loss": 0.3432, + "step": 9655 + }, + { + "epoch": 0.4370219506675718, + "grad_norm": 0.39347989973742065, + "learning_rate": 6.250149651614735e-06, + "loss": 0.4453, + "step": 9656 + }, + { + "epoch": 0.4370672097759674, + "grad_norm": 0.616710306543043, + "learning_rate": 6.249439996126036e-06, + "loss": 0.33, + "step": 9657 + }, + { + "epoch": 0.437112468884363, + "grad_norm": 0.6916743798248338, + "learning_rate": 6.24873031379071e-06, + "loss": 0.3539, + "step": 9658 + }, + { + "epoch": 0.43715772799275854, + "grad_norm": 0.7770154644576485, + "learning_rate": 6.248020604624004e-06, + "loss": 0.3506, + "step": 9659 + }, + { + "epoch": 0.43720298710115413, + "grad_norm": 0.6583445912764695, + "learning_rate": 6.247310868641168e-06, + "loss": 0.3122, + "step": 9660 + }, + { + "epoch": 0.43724824620954966, + "grad_norm": 0.8566787002605626, + "learning_rate": 6.246601105857453e-06, + "loss": 0.328, + "step": 9661 + }, + { + "epoch": 0.43729350531794525, + "grad_norm": 0.5977562191253866, + "learning_rate": 6.245891316288108e-06, + "loss": 0.3277, + "step": 9662 + }, + { + "epoch": 0.4373387644263408, + "grad_norm": 0.5231617692788478, + "learning_rate": 6.245181499948385e-06, + "loss": 0.4739, + "step": 9663 + }, + { + "epoch": 0.4373840235347364, + "grad_norm": 0.6619926024483229, + "learning_rate": 6.244471656853538e-06, + "loss": 0.3472, + "step": 9664 + }, + { + "epoch": 0.4374292826431319, + "grad_norm": 0.6386867229353993, + "learning_rate": 6.243761787018814e-06, + "loss": 0.3355, + "step": 9665 + }, + { + "epoch": 0.4374745417515275, + "grad_norm": 0.6153655896855599, + "learning_rate": 6.2430518904594715e-06, + "loss": 0.3314, + "step": 9666 + }, + { + "epoch": 0.43751980085992304, + "grad_norm": 0.6500289470819034, + "learning_rate": 6.24234196719076e-06, + "loss": 0.3335, + "step": 9667 + }, + { + "epoch": 0.4375650599683186, + "grad_norm": 0.6105016241224432, + "learning_rate": 6.241632017227937e-06, + "loss": 0.3209, + "step": 9668 + }, + { + "epoch": 0.4376103190767142, + "grad_norm": 0.33115584574894136, + "learning_rate": 6.240922040586254e-06, + "loss": 0.4953, + "step": 9669 + }, + { + "epoch": 0.43765557818510975, + "grad_norm": 0.33989481089576024, + "learning_rate": 6.240212037280967e-06, + "loss": 0.4787, + "step": 9670 + }, + { + "epoch": 0.43770083729350534, + "grad_norm": 0.6816818191990349, + "learning_rate": 6.239502007327334e-06, + "loss": 0.3285, + "step": 9671 + }, + { + "epoch": 0.4377460964019009, + "grad_norm": 0.6037031811269876, + "learning_rate": 6.2387919507406085e-06, + "loss": 0.308, + "step": 9672 + }, + { + "epoch": 0.43779135551029646, + "grad_norm": 0.6195194185977877, + "learning_rate": 6.238081867536049e-06, + "loss": 0.3443, + "step": 9673 + }, + { + "epoch": 0.437836614618692, + "grad_norm": 0.5802615024132223, + "learning_rate": 6.237371757728914e-06, + "loss": 0.3311, + "step": 9674 + }, + { + "epoch": 0.4378818737270876, + "grad_norm": 0.6486256013867389, + "learning_rate": 6.236661621334458e-06, + "loss": 0.332, + "step": 9675 + }, + { + "epoch": 0.4379271328354831, + "grad_norm": 0.5983038373745534, + "learning_rate": 6.235951458367943e-06, + "loss": 0.3169, + "step": 9676 + }, + { + "epoch": 0.4379723919438787, + "grad_norm": 0.3263293080204479, + "learning_rate": 6.235241268844626e-06, + "loss": 0.4952, + "step": 9677 + }, + { + "epoch": 0.43801765105227425, + "grad_norm": 0.6615016165723331, + "learning_rate": 6.234531052779769e-06, + "loss": 0.3285, + "step": 9678 + }, + { + "epoch": 0.43806291016066984, + "grad_norm": 0.2958982133149087, + "learning_rate": 6.233820810188631e-06, + "loss": 0.4683, + "step": 9679 + }, + { + "epoch": 0.4381081692690654, + "grad_norm": 0.597163484202998, + "learning_rate": 6.233110541086473e-06, + "loss": 0.349, + "step": 9680 + }, + { + "epoch": 0.43815342837746096, + "grad_norm": 0.6319411261177034, + "learning_rate": 6.2324002454885565e-06, + "loss": 0.3328, + "step": 9681 + }, + { + "epoch": 0.43819868748585655, + "grad_norm": 0.28556124909790315, + "learning_rate": 6.231689923410144e-06, + "loss": 0.4843, + "step": 9682 + }, + { + "epoch": 0.4382439465942521, + "grad_norm": 0.6600743868567222, + "learning_rate": 6.230979574866498e-06, + "loss": 0.3118, + "step": 9683 + }, + { + "epoch": 0.4382892057026477, + "grad_norm": 0.7072446001623893, + "learning_rate": 6.230269199872881e-06, + "loss": 0.3368, + "step": 9684 + }, + { + "epoch": 0.4383344648110432, + "grad_norm": 0.6703396770777446, + "learning_rate": 6.22955879844456e-06, + "loss": 0.3548, + "step": 9685 + }, + { + "epoch": 0.4383797239194388, + "grad_norm": 0.39158306035806945, + "learning_rate": 6.228848370596793e-06, + "loss": 0.4889, + "step": 9686 + }, + { + "epoch": 0.43842498302783434, + "grad_norm": 0.6576588402412944, + "learning_rate": 6.228137916344852e-06, + "loss": 0.3602, + "step": 9687 + }, + { + "epoch": 0.4384702421362299, + "grad_norm": 0.638629956214423, + "learning_rate": 6.227427435703997e-06, + "loss": 0.3318, + "step": 9688 + }, + { + "epoch": 0.43851550124462546, + "grad_norm": 0.633776454109039, + "learning_rate": 6.2267169286894954e-06, + "loss": 0.3348, + "step": 9689 + }, + { + "epoch": 0.43856076035302105, + "grad_norm": 0.3955281000545189, + "learning_rate": 6.2260063953166165e-06, + "loss": 0.4711, + "step": 9690 + }, + { + "epoch": 0.4386060194614166, + "grad_norm": 0.2967833814586568, + "learning_rate": 6.225295835600624e-06, + "loss": 0.5086, + "step": 9691 + }, + { + "epoch": 0.4386512785698122, + "grad_norm": 0.6881500244083311, + "learning_rate": 6.2245852495567885e-06, + "loss": 0.3629, + "step": 9692 + }, + { + "epoch": 0.43869653767820777, + "grad_norm": 0.2702582686060595, + "learning_rate": 6.2238746372003775e-06, + "loss": 0.4664, + "step": 9693 + }, + { + "epoch": 0.4387417967866033, + "grad_norm": 0.2826358246260504, + "learning_rate": 6.223163998546657e-06, + "loss": 0.4739, + "step": 9694 + }, + { + "epoch": 0.4387870558949989, + "grad_norm": 0.6637887438021715, + "learning_rate": 6.2224533336109015e-06, + "loss": 0.3642, + "step": 9695 + }, + { + "epoch": 0.4388323150033944, + "grad_norm": 0.612207286684882, + "learning_rate": 6.221742642408377e-06, + "loss": 0.3628, + "step": 9696 + }, + { + "epoch": 0.43887757411179, + "grad_norm": 0.6165039348405416, + "learning_rate": 6.221031924954356e-06, + "loss": 0.3216, + "step": 9697 + }, + { + "epoch": 0.43892283322018555, + "grad_norm": 0.696379443461046, + "learning_rate": 6.220321181264108e-06, + "loss": 0.3381, + "step": 9698 + }, + { + "epoch": 0.43896809232858114, + "grad_norm": 0.5904740059119723, + "learning_rate": 6.2196104113529064e-06, + "loss": 0.3482, + "step": 9699 + }, + { + "epoch": 0.4390133514369767, + "grad_norm": 0.7721270978948145, + "learning_rate": 6.218899615236022e-06, + "loss": 0.3034, + "step": 9700 + }, + { + "epoch": 0.43905861054537226, + "grad_norm": 0.7227083202010125, + "learning_rate": 6.21818879292873e-06, + "loss": 0.3402, + "step": 9701 + }, + { + "epoch": 0.4391038696537678, + "grad_norm": 0.6098500040445197, + "learning_rate": 6.217477944446301e-06, + "loss": 0.2952, + "step": 9702 + }, + { + "epoch": 0.4391491287621634, + "grad_norm": 0.6829152157415639, + "learning_rate": 6.216767069804011e-06, + "loss": 0.357, + "step": 9703 + }, + { + "epoch": 0.439194387870559, + "grad_norm": 0.7364773799141779, + "learning_rate": 6.216056169017133e-06, + "loss": 0.375, + "step": 9704 + }, + { + "epoch": 0.4392396469789545, + "grad_norm": 0.6548002280902314, + "learning_rate": 6.215345242100942e-06, + "loss": 0.3248, + "step": 9705 + }, + { + "epoch": 0.4392849060873501, + "grad_norm": 0.6254654159755485, + "learning_rate": 6.214634289070717e-06, + "loss": 0.3308, + "step": 9706 + }, + { + "epoch": 0.43933016519574564, + "grad_norm": 0.634866839571993, + "learning_rate": 6.213923309941728e-06, + "loss": 0.3062, + "step": 9707 + }, + { + "epoch": 0.4393754243041412, + "grad_norm": 0.40657094828930934, + "learning_rate": 6.213212304729259e-06, + "loss": 0.5051, + "step": 9708 + }, + { + "epoch": 0.43942068341253676, + "grad_norm": 0.6105111080738309, + "learning_rate": 6.212501273448581e-06, + "loss": 0.3353, + "step": 9709 + }, + { + "epoch": 0.43946594252093235, + "grad_norm": 0.662434514457218, + "learning_rate": 6.211790216114976e-06, + "loss": 0.3619, + "step": 9710 + }, + { + "epoch": 0.4395112016293279, + "grad_norm": 0.6727588402946885, + "learning_rate": 6.21107913274372e-06, + "loss": 0.3231, + "step": 9711 + }, + { + "epoch": 0.4395564607377235, + "grad_norm": 0.6425714546278731, + "learning_rate": 6.210368023350094e-06, + "loss": 0.3426, + "step": 9712 + }, + { + "epoch": 0.439601719846119, + "grad_norm": 0.61234101605595, + "learning_rate": 6.209656887949376e-06, + "loss": 0.3355, + "step": 9713 + }, + { + "epoch": 0.4396469789545146, + "grad_norm": 0.634668015159857, + "learning_rate": 6.208945726556848e-06, + "loss": 0.3608, + "step": 9714 + }, + { + "epoch": 0.43969223806291013, + "grad_norm": 0.29388674268361786, + "learning_rate": 6.2082345391877865e-06, + "loss": 0.4741, + "step": 9715 + }, + { + "epoch": 0.4397374971713057, + "grad_norm": 0.6535017456665534, + "learning_rate": 6.207523325857479e-06, + "loss": 0.3334, + "step": 9716 + }, + { + "epoch": 0.4397827562797013, + "grad_norm": 0.6852149829343164, + "learning_rate": 6.206812086581201e-06, + "loss": 0.3798, + "step": 9717 + }, + { + "epoch": 0.43982801538809685, + "grad_norm": 0.6729182496858611, + "learning_rate": 6.206100821374238e-06, + "loss": 0.3105, + "step": 9718 + }, + { + "epoch": 0.43987327449649244, + "grad_norm": 0.6320681877620283, + "learning_rate": 6.205389530251873e-06, + "loss": 0.3502, + "step": 9719 + }, + { + "epoch": 0.439918533604888, + "grad_norm": 0.6497975622661094, + "learning_rate": 6.204678213229389e-06, + "loss": 0.3513, + "step": 9720 + }, + { + "epoch": 0.43996379271328356, + "grad_norm": 0.6672429135341902, + "learning_rate": 6.203966870322071e-06, + "loss": 0.3292, + "step": 9721 + }, + { + "epoch": 0.4400090518216791, + "grad_norm": 0.31104056891057263, + "learning_rate": 6.2032555015452036e-06, + "loss": 0.4851, + "step": 9722 + }, + { + "epoch": 0.4400543109300747, + "grad_norm": 0.6751221659802483, + "learning_rate": 6.202544106914068e-06, + "loss": 0.3552, + "step": 9723 + }, + { + "epoch": 0.4400995700384702, + "grad_norm": 0.2951432521217396, + "learning_rate": 6.201832686443955e-06, + "loss": 0.4623, + "step": 9724 + }, + { + "epoch": 0.4401448291468658, + "grad_norm": 0.6698341616356065, + "learning_rate": 6.201121240150147e-06, + "loss": 0.3288, + "step": 9725 + }, + { + "epoch": 0.44019008825526135, + "grad_norm": 0.6358296298524535, + "learning_rate": 6.200409768047935e-06, + "loss": 0.32, + "step": 9726 + }, + { + "epoch": 0.44023534736365694, + "grad_norm": 0.6169335416900048, + "learning_rate": 6.199698270152602e-06, + "loss": 0.333, + "step": 9727 + }, + { + "epoch": 0.4402806064720525, + "grad_norm": 0.6492481963366252, + "learning_rate": 6.198986746479439e-06, + "loss": 0.3533, + "step": 9728 + }, + { + "epoch": 0.44032586558044806, + "grad_norm": 0.6320510462358143, + "learning_rate": 6.198275197043732e-06, + "loss": 0.3324, + "step": 9729 + }, + { + "epoch": 0.44037112468884365, + "grad_norm": 0.6742375310083387, + "learning_rate": 6.197563621860771e-06, + "loss": 0.3547, + "step": 9730 + }, + { + "epoch": 0.4404163837972392, + "grad_norm": 0.6290118966322753, + "learning_rate": 6.196852020945846e-06, + "loss": 0.362, + "step": 9731 + }, + { + "epoch": 0.4404616429056348, + "grad_norm": 0.6904537020386137, + "learning_rate": 6.196140394314247e-06, + "loss": 0.3841, + "step": 9732 + }, + { + "epoch": 0.4405069020140303, + "grad_norm": 0.581190785112767, + "learning_rate": 6.195428741981266e-06, + "loss": 0.3469, + "step": 9733 + }, + { + "epoch": 0.4405521611224259, + "grad_norm": 0.6780527641923901, + "learning_rate": 6.194717063962191e-06, + "loss": 0.3413, + "step": 9734 + }, + { + "epoch": 0.44059742023082143, + "grad_norm": 0.32026258131864827, + "learning_rate": 6.194005360272317e-06, + "loss": 0.474, + "step": 9735 + }, + { + "epoch": 0.440642679339217, + "grad_norm": 0.7847063668187274, + "learning_rate": 6.193293630926933e-06, + "loss": 0.3376, + "step": 9736 + }, + { + "epoch": 0.44068793844761256, + "grad_norm": 0.6635025468514317, + "learning_rate": 6.192581875941336e-06, + "loss": 0.3821, + "step": 9737 + }, + { + "epoch": 0.44073319755600815, + "grad_norm": 0.649767023666101, + "learning_rate": 6.191870095330817e-06, + "loss": 0.3396, + "step": 9738 + }, + { + "epoch": 0.4407784566644037, + "grad_norm": 0.6212240638480885, + "learning_rate": 6.191158289110669e-06, + "loss": 0.3332, + "step": 9739 + }, + { + "epoch": 0.4408237157727993, + "grad_norm": 0.6820651677451741, + "learning_rate": 6.1904464572961874e-06, + "loss": 0.3473, + "step": 9740 + }, + { + "epoch": 0.44086897488119486, + "grad_norm": 0.6214271892311886, + "learning_rate": 6.1897345999026695e-06, + "loss": 0.3568, + "step": 9741 + }, + { + "epoch": 0.4409142339895904, + "grad_norm": 0.6027011619205818, + "learning_rate": 6.1890227169454075e-06, + "loss": 0.3334, + "step": 9742 + }, + { + "epoch": 0.440959493097986, + "grad_norm": 0.6328640836910864, + "learning_rate": 6.188310808439701e-06, + "loss": 0.3459, + "step": 9743 + }, + { + "epoch": 0.4410047522063815, + "grad_norm": 0.6179245657168015, + "learning_rate": 6.187598874400842e-06, + "loss": 0.3734, + "step": 9744 + }, + { + "epoch": 0.4410500113147771, + "grad_norm": 0.7759051603585791, + "learning_rate": 6.1868869148441325e-06, + "loss": 0.342, + "step": 9745 + }, + { + "epoch": 0.44109527042317265, + "grad_norm": 0.606907011740406, + "learning_rate": 6.1861749297848685e-06, + "loss": 0.3105, + "step": 9746 + }, + { + "epoch": 0.44114052953156824, + "grad_norm": 0.6237538836904465, + "learning_rate": 6.185462919238348e-06, + "loss": 0.3557, + "step": 9747 + }, + { + "epoch": 0.44118578863996377, + "grad_norm": 0.3348760559457101, + "learning_rate": 6.184750883219869e-06, + "loss": 0.4823, + "step": 9748 + }, + { + "epoch": 0.44123104774835936, + "grad_norm": 0.6390519401669638, + "learning_rate": 6.184038821744733e-06, + "loss": 0.3532, + "step": 9749 + }, + { + "epoch": 0.4412763068567549, + "grad_norm": 0.657523255701622, + "learning_rate": 6.18332673482824e-06, + "loss": 0.2972, + "step": 9750 + }, + { + "epoch": 0.4413215659651505, + "grad_norm": 0.6160734718098636, + "learning_rate": 6.18261462248569e-06, + "loss": 0.3295, + "step": 9751 + }, + { + "epoch": 0.4413668250735461, + "grad_norm": 0.2923393424606555, + "learning_rate": 6.181902484732381e-06, + "loss": 0.4782, + "step": 9752 + }, + { + "epoch": 0.4414120841819416, + "grad_norm": 0.6990437189078308, + "learning_rate": 6.181190321583621e-06, + "loss": 0.3, + "step": 9753 + }, + { + "epoch": 0.4414573432903372, + "grad_norm": 0.6699717353002903, + "learning_rate": 6.180478133054707e-06, + "loss": 0.3256, + "step": 9754 + }, + { + "epoch": 0.44150260239873274, + "grad_norm": 0.6438335030507906, + "learning_rate": 6.179765919160945e-06, + "loss": 0.2922, + "step": 9755 + }, + { + "epoch": 0.4415478615071283, + "grad_norm": 0.6392233623323376, + "learning_rate": 6.179053679917635e-06, + "loss": 0.349, + "step": 9756 + }, + { + "epoch": 0.44159312061552386, + "grad_norm": 0.7026479472661328, + "learning_rate": 6.1783414153400835e-06, + "loss": 0.3244, + "step": 9757 + }, + { + "epoch": 0.44163837972391945, + "grad_norm": 0.6389687826947396, + "learning_rate": 6.177629125443594e-06, + "loss": 0.3285, + "step": 9758 + }, + { + "epoch": 0.441683638832315, + "grad_norm": 0.6566514186922977, + "learning_rate": 6.176916810243471e-06, + "loss": 0.354, + "step": 9759 + }, + { + "epoch": 0.4417288979407106, + "grad_norm": 0.43650505652041166, + "learning_rate": 6.176204469755021e-06, + "loss": 0.4714, + "step": 9760 + }, + { + "epoch": 0.4417741570491061, + "grad_norm": 0.7489027344347635, + "learning_rate": 6.175492103993548e-06, + "loss": 0.3237, + "step": 9761 + }, + { + "epoch": 0.4418194161575017, + "grad_norm": 0.7357275635937931, + "learning_rate": 6.1747797129743605e-06, + "loss": 0.3311, + "step": 9762 + }, + { + "epoch": 0.4418646752658973, + "grad_norm": 0.31040027336716486, + "learning_rate": 6.174067296712765e-06, + "loss": 0.4697, + "step": 9763 + }, + { + "epoch": 0.4419099343742928, + "grad_norm": 0.2952355102718732, + "learning_rate": 6.173354855224071e-06, + "loss": 0.5069, + "step": 9764 + }, + { + "epoch": 0.4419551934826884, + "grad_norm": 0.6238306699955422, + "learning_rate": 6.1726423885235816e-06, + "loss": 0.2843, + "step": 9765 + }, + { + "epoch": 0.44200045259108395, + "grad_norm": 0.6155755513933405, + "learning_rate": 6.1719298966266114e-06, + "loss": 0.3352, + "step": 9766 + }, + { + "epoch": 0.44204571169947954, + "grad_norm": 0.5998973251292518, + "learning_rate": 6.1712173795484665e-06, + "loss": 0.3683, + "step": 9767 + }, + { + "epoch": 0.44209097080787507, + "grad_norm": 0.6325835388706952, + "learning_rate": 6.170504837304458e-06, + "loss": 0.3218, + "step": 9768 + }, + { + "epoch": 0.44213622991627066, + "grad_norm": 0.6759271954619623, + "learning_rate": 6.169792269909893e-06, + "loss": 0.2931, + "step": 9769 + }, + { + "epoch": 0.4421814890246662, + "grad_norm": 0.7109614631678387, + "learning_rate": 6.169079677380086e-06, + "loss": 0.3695, + "step": 9770 + }, + { + "epoch": 0.4422267481330618, + "grad_norm": 0.6078522568876246, + "learning_rate": 6.168367059730348e-06, + "loss": 0.3557, + "step": 9771 + }, + { + "epoch": 0.4422720072414573, + "grad_norm": 0.6527485169924666, + "learning_rate": 6.167654416975991e-06, + "loss": 0.3428, + "step": 9772 + }, + { + "epoch": 0.4423172663498529, + "grad_norm": 0.5826122835066895, + "learning_rate": 6.166941749132325e-06, + "loss": 0.3384, + "step": 9773 + }, + { + "epoch": 0.44236252545824845, + "grad_norm": 0.5247511503156523, + "learning_rate": 6.166229056214665e-06, + "loss": 0.4777, + "step": 9774 + }, + { + "epoch": 0.44240778456664404, + "grad_norm": 0.6002895767581452, + "learning_rate": 6.165516338238324e-06, + "loss": 0.3281, + "step": 9775 + }, + { + "epoch": 0.4424530436750396, + "grad_norm": 0.6204952381918223, + "learning_rate": 6.164803595218618e-06, + "loss": 0.3679, + "step": 9776 + }, + { + "epoch": 0.44249830278343516, + "grad_norm": 0.5812317480029556, + "learning_rate": 6.16409082717086e-06, + "loss": 0.288, + "step": 9777 + }, + { + "epoch": 0.44254356189183075, + "grad_norm": 0.3027247180783634, + "learning_rate": 6.163378034110364e-06, + "loss": 0.4954, + "step": 9778 + }, + { + "epoch": 0.4425888210002263, + "grad_norm": 0.5976073792363202, + "learning_rate": 6.162665216052448e-06, + "loss": 0.3415, + "step": 9779 + }, + { + "epoch": 0.4426340801086219, + "grad_norm": 0.6213027447734806, + "learning_rate": 6.161952373012427e-06, + "loss": 0.3267, + "step": 9780 + }, + { + "epoch": 0.4426793392170174, + "grad_norm": 0.6648348362802524, + "learning_rate": 6.161239505005618e-06, + "loss": 0.3131, + "step": 9781 + }, + { + "epoch": 0.442724598325413, + "grad_norm": 0.6201774829422037, + "learning_rate": 6.160526612047339e-06, + "loss": 0.3248, + "step": 9782 + }, + { + "epoch": 0.44276985743380853, + "grad_norm": 0.5839515753747767, + "learning_rate": 6.159813694152907e-06, + "loss": 0.3755, + "step": 9783 + }, + { + "epoch": 0.4428151165422041, + "grad_norm": 0.6353528762384202, + "learning_rate": 6.1591007513376425e-06, + "loss": 0.36, + "step": 9784 + }, + { + "epoch": 0.44286037565059966, + "grad_norm": 0.406392908771862, + "learning_rate": 6.1583877836168615e-06, + "loss": 0.4675, + "step": 9785 + }, + { + "epoch": 0.44290563475899525, + "grad_norm": 0.7766715682857842, + "learning_rate": 6.157674791005884e-06, + "loss": 0.3431, + "step": 9786 + }, + { + "epoch": 0.44295089386739084, + "grad_norm": 0.6466693556842529, + "learning_rate": 6.1569617735200314e-06, + "loss": 0.3392, + "step": 9787 + }, + { + "epoch": 0.4429961529757864, + "grad_norm": 0.6287656784939121, + "learning_rate": 6.156248731174623e-06, + "loss": 0.3699, + "step": 9788 + }, + { + "epoch": 0.44304141208418196, + "grad_norm": 0.6648703914194296, + "learning_rate": 6.155535663984982e-06, + "loss": 0.3456, + "step": 9789 + }, + { + "epoch": 0.4430866711925775, + "grad_norm": 0.6262050918375093, + "learning_rate": 6.154822571966428e-06, + "loss": 0.3244, + "step": 9790 + }, + { + "epoch": 0.4431319303009731, + "grad_norm": 0.5743181017272367, + "learning_rate": 6.154109455134283e-06, + "loss": 0.3541, + "step": 9791 + }, + { + "epoch": 0.4431771894093686, + "grad_norm": 0.6474090918108056, + "learning_rate": 6.15339631350387e-06, + "loss": 0.3589, + "step": 9792 + }, + { + "epoch": 0.4432224485177642, + "grad_norm": 0.2949861466946327, + "learning_rate": 6.152683147090514e-06, + "loss": 0.4708, + "step": 9793 + }, + { + "epoch": 0.44326770762615975, + "grad_norm": 0.3234561818756022, + "learning_rate": 6.151969955909536e-06, + "loss": 0.4968, + "step": 9794 + }, + { + "epoch": 0.44331296673455534, + "grad_norm": 0.6340148275150607, + "learning_rate": 6.151256739976264e-06, + "loss": 0.4069, + "step": 9795 + }, + { + "epoch": 0.44335822584295087, + "grad_norm": 0.6197326211786413, + "learning_rate": 6.150543499306016e-06, + "loss": 0.3435, + "step": 9796 + }, + { + "epoch": 0.44340348495134646, + "grad_norm": 0.5949256976740551, + "learning_rate": 6.149830233914127e-06, + "loss": 0.3038, + "step": 9797 + }, + { + "epoch": 0.44344874405974205, + "grad_norm": 0.6661137701878944, + "learning_rate": 6.149116943815915e-06, + "loss": 0.3539, + "step": 9798 + }, + { + "epoch": 0.4434940031681376, + "grad_norm": 0.6438231289467011, + "learning_rate": 6.148403629026709e-06, + "loss": 0.3425, + "step": 9799 + }, + { + "epoch": 0.4435392622765332, + "grad_norm": 0.585216600771846, + "learning_rate": 6.147690289561836e-06, + "loss": 0.3322, + "step": 9800 + }, + { + "epoch": 0.4435845213849287, + "grad_norm": 0.3512680335430706, + "learning_rate": 6.146976925436625e-06, + "loss": 0.4644, + "step": 9801 + }, + { + "epoch": 0.4436297804933243, + "grad_norm": 0.6362038853976834, + "learning_rate": 6.146263536666401e-06, + "loss": 0.2967, + "step": 9802 + }, + { + "epoch": 0.44367503960171983, + "grad_norm": 0.33783993682468194, + "learning_rate": 6.145550123266496e-06, + "loss": 0.4783, + "step": 9803 + }, + { + "epoch": 0.4437202987101154, + "grad_norm": 0.3090639197217425, + "learning_rate": 6.1448366852522346e-06, + "loss": 0.4825, + "step": 9804 + }, + { + "epoch": 0.44376555781851096, + "grad_norm": 0.583863872091518, + "learning_rate": 6.144123222638952e-06, + "loss": 0.3328, + "step": 9805 + }, + { + "epoch": 0.44381081692690655, + "grad_norm": 0.6303943405691291, + "learning_rate": 6.143409735441972e-06, + "loss": 0.3285, + "step": 9806 + }, + { + "epoch": 0.4438560760353021, + "grad_norm": 0.6284380418815342, + "learning_rate": 6.1426962236766294e-06, + "loss": 0.3571, + "step": 9807 + }, + { + "epoch": 0.4439013351436977, + "grad_norm": 0.6136933151196827, + "learning_rate": 6.141982687358255e-06, + "loss": 0.3188, + "step": 9808 + }, + { + "epoch": 0.4439465942520932, + "grad_norm": 0.6394782780168193, + "learning_rate": 6.14126912650218e-06, + "loss": 0.346, + "step": 9809 + }, + { + "epoch": 0.4439918533604888, + "grad_norm": 0.3835124101312275, + "learning_rate": 6.140555541123737e-06, + "loss": 0.494, + "step": 9810 + }, + { + "epoch": 0.4440371124688844, + "grad_norm": 0.6657903827089986, + "learning_rate": 6.1398419312382575e-06, + "loss": 0.3775, + "step": 9811 + }, + { + "epoch": 0.4440823715772799, + "grad_norm": 0.6194909844891778, + "learning_rate": 6.139128296861076e-06, + "loss": 0.3312, + "step": 9812 + }, + { + "epoch": 0.4441276306856755, + "grad_norm": 0.6453686197003785, + "learning_rate": 6.138414638007526e-06, + "loss": 0.3239, + "step": 9813 + }, + { + "epoch": 0.44417288979407105, + "grad_norm": 0.6254271915732517, + "learning_rate": 6.137700954692944e-06, + "loss": 0.3028, + "step": 9814 + }, + { + "epoch": 0.44421814890246664, + "grad_norm": 0.3586269129410446, + "learning_rate": 6.136987246932658e-06, + "loss": 0.4782, + "step": 9815 + }, + { + "epoch": 0.44426340801086217, + "grad_norm": 0.6018532392814256, + "learning_rate": 6.136273514742013e-06, + "loss": 0.3272, + "step": 9816 + }, + { + "epoch": 0.44430866711925776, + "grad_norm": 0.6227910130275708, + "learning_rate": 6.135559758136337e-06, + "loss": 0.3353, + "step": 9817 + }, + { + "epoch": 0.4443539262276533, + "grad_norm": 0.6510502346855318, + "learning_rate": 6.13484597713097e-06, + "loss": 0.3647, + "step": 9818 + }, + { + "epoch": 0.4443991853360489, + "grad_norm": 0.5901502996575975, + "learning_rate": 6.134132171741247e-06, + "loss": 0.3081, + "step": 9819 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.6095995854076501, + "learning_rate": 6.133418341982509e-06, + "loss": 0.369, + "step": 9820 + }, + { + "epoch": 0.44448970355284, + "grad_norm": 0.30644324654506866, + "learning_rate": 6.132704487870091e-06, + "loss": 0.4773, + "step": 9821 + }, + { + "epoch": 0.4445349626612356, + "grad_norm": 0.5812312860977836, + "learning_rate": 6.131990609419334e-06, + "loss": 0.343, + "step": 9822 + }, + { + "epoch": 0.44458022176963113, + "grad_norm": 0.6686231801887372, + "learning_rate": 6.131276706645572e-06, + "loss": 0.3077, + "step": 9823 + }, + { + "epoch": 0.4446254808780267, + "grad_norm": 0.5833261560869663, + "learning_rate": 6.130562779564151e-06, + "loss": 0.3347, + "step": 9824 + }, + { + "epoch": 0.44467073998642226, + "grad_norm": 0.7014683950261718, + "learning_rate": 6.129848828190405e-06, + "loss": 0.3275, + "step": 9825 + }, + { + "epoch": 0.44471599909481785, + "grad_norm": 0.6994318413430156, + "learning_rate": 6.129134852539682e-06, + "loss": 0.3752, + "step": 9826 + }, + { + "epoch": 0.4447612582032134, + "grad_norm": 0.6038864527395649, + "learning_rate": 6.128420852627316e-06, + "loss": 0.3189, + "step": 9827 + }, + { + "epoch": 0.444806517311609, + "grad_norm": 0.6531328968033617, + "learning_rate": 6.127706828468653e-06, + "loss": 0.3217, + "step": 9828 + }, + { + "epoch": 0.4448517764200045, + "grad_norm": 0.6903825925649602, + "learning_rate": 6.126992780079032e-06, + "loss": 0.3516, + "step": 9829 + }, + { + "epoch": 0.4448970355284001, + "grad_norm": 0.6003467816513118, + "learning_rate": 6.1262787074738e-06, + "loss": 0.3222, + "step": 9830 + }, + { + "epoch": 0.44494229463679563, + "grad_norm": 0.5740307584921875, + "learning_rate": 6.125564610668294e-06, + "loss": 0.3023, + "step": 9831 + }, + { + "epoch": 0.4449875537451912, + "grad_norm": 0.33965468133552557, + "learning_rate": 6.124850489677865e-06, + "loss": 0.4725, + "step": 9832 + }, + { + "epoch": 0.44503281285358676, + "grad_norm": 0.2948191948780353, + "learning_rate": 6.1241363445178515e-06, + "loss": 0.4507, + "step": 9833 + }, + { + "epoch": 0.44507807196198235, + "grad_norm": 0.6054233565022529, + "learning_rate": 6.1234221752036015e-06, + "loss": 0.3551, + "step": 9834 + }, + { + "epoch": 0.44512333107037794, + "grad_norm": 0.5959582579867868, + "learning_rate": 6.122707981750458e-06, + "loss": 0.3295, + "step": 9835 + }, + { + "epoch": 0.44516859017877347, + "grad_norm": 0.6347395989261895, + "learning_rate": 6.12199376417377e-06, + "loss": 0.342, + "step": 9836 + }, + { + "epoch": 0.44521384928716906, + "grad_norm": 0.6944384128632148, + "learning_rate": 6.121279522488881e-06, + "loss": 0.399, + "step": 9837 + }, + { + "epoch": 0.4452591083955646, + "grad_norm": 1.8592269415278064, + "learning_rate": 6.120565256711138e-06, + "loss": 0.3245, + "step": 9838 + }, + { + "epoch": 0.4453043675039602, + "grad_norm": 0.5513116467297371, + "learning_rate": 6.11985096685589e-06, + "loss": 0.4517, + "step": 9839 + }, + { + "epoch": 0.4453496266123557, + "grad_norm": 0.5414042070571672, + "learning_rate": 6.1191366529384845e-06, + "loss": 0.4674, + "step": 9840 + }, + { + "epoch": 0.4453948857207513, + "grad_norm": 0.6531904168417739, + "learning_rate": 6.118422314974269e-06, + "loss": 0.3548, + "step": 9841 + }, + { + "epoch": 0.44544014482914684, + "grad_norm": 0.7155330778152321, + "learning_rate": 6.117707952978593e-06, + "loss": 0.3676, + "step": 9842 + }, + { + "epoch": 0.44548540393754243, + "grad_norm": 0.820859464855041, + "learning_rate": 6.116993566966807e-06, + "loss": 0.3315, + "step": 9843 + }, + { + "epoch": 0.44553066304593797, + "grad_norm": 0.33759141723286856, + "learning_rate": 6.1162791569542576e-06, + "loss": 0.4529, + "step": 9844 + }, + { + "epoch": 0.44557592215433356, + "grad_norm": 0.6551364903781752, + "learning_rate": 6.1155647229562994e-06, + "loss": 0.3414, + "step": 9845 + }, + { + "epoch": 0.44562118126272915, + "grad_norm": 0.7403036860239424, + "learning_rate": 6.1148502649882805e-06, + "loss": 0.3875, + "step": 9846 + }, + { + "epoch": 0.4456664403711247, + "grad_norm": 0.7837013726439063, + "learning_rate": 6.114135783065553e-06, + "loss": 0.3745, + "step": 9847 + }, + { + "epoch": 0.4457116994795203, + "grad_norm": 0.6477213100740911, + "learning_rate": 6.113421277203471e-06, + "loss": 0.407, + "step": 9848 + }, + { + "epoch": 0.4457569585879158, + "grad_norm": 0.614216462619982, + "learning_rate": 6.112706747417384e-06, + "loss": 0.3495, + "step": 9849 + }, + { + "epoch": 0.4458022176963114, + "grad_norm": 0.325186835765987, + "learning_rate": 6.111992193722647e-06, + "loss": 0.4765, + "step": 9850 + }, + { + "epoch": 0.44584747680470693, + "grad_norm": 0.71102109767876, + "learning_rate": 6.111277616134613e-06, + "loss": 0.3754, + "step": 9851 + }, + { + "epoch": 0.4458927359131025, + "grad_norm": 0.6230716349605854, + "learning_rate": 6.1105630146686345e-06, + "loss": 0.3431, + "step": 9852 + }, + { + "epoch": 0.44593799502149806, + "grad_norm": 0.6124749160422528, + "learning_rate": 6.109848389340071e-06, + "loss": 0.2981, + "step": 9853 + }, + { + "epoch": 0.44598325412989365, + "grad_norm": 0.6262606678189672, + "learning_rate": 6.109133740164271e-06, + "loss": 0.3268, + "step": 9854 + }, + { + "epoch": 0.4460285132382892, + "grad_norm": 0.6227322143402623, + "learning_rate": 6.108419067156595e-06, + "loss": 0.3142, + "step": 9855 + }, + { + "epoch": 0.44607377234668477, + "grad_norm": 0.651331240442703, + "learning_rate": 6.1077043703323964e-06, + "loss": 0.3388, + "step": 9856 + }, + { + "epoch": 0.44611903145508036, + "grad_norm": 0.6447017477504052, + "learning_rate": 6.106989649707034e-06, + "loss": 0.3347, + "step": 9857 + }, + { + "epoch": 0.4461642905634759, + "grad_norm": 1.0241416300529436, + "learning_rate": 6.106274905295864e-06, + "loss": 0.3285, + "step": 9858 + }, + { + "epoch": 0.4462095496718715, + "grad_norm": 0.3596739071402451, + "learning_rate": 6.105560137114244e-06, + "loss": 0.4933, + "step": 9859 + }, + { + "epoch": 0.446254808780267, + "grad_norm": 0.34139558368903, + "learning_rate": 6.1048453451775305e-06, + "loss": 0.4793, + "step": 9860 + }, + { + "epoch": 0.4463000678886626, + "grad_norm": 0.6831938419121877, + "learning_rate": 6.104130529501086e-06, + "loss": 0.3348, + "step": 9861 + }, + { + "epoch": 0.44634532699705814, + "grad_norm": 0.6882346739189739, + "learning_rate": 6.103415690100265e-06, + "loss": 0.3799, + "step": 9862 + }, + { + "epoch": 0.44639058610545373, + "grad_norm": 0.3176810590962061, + "learning_rate": 6.102700826990432e-06, + "loss": 0.459, + "step": 9863 + }, + { + "epoch": 0.44643584521384927, + "grad_norm": 0.6221193294243147, + "learning_rate": 6.101985940186943e-06, + "loss": 0.3489, + "step": 9864 + }, + { + "epoch": 0.44648110432224486, + "grad_norm": 0.6180950453337573, + "learning_rate": 6.101271029705163e-06, + "loss": 0.3321, + "step": 9865 + }, + { + "epoch": 0.4465263634306404, + "grad_norm": 0.6067883624713181, + "learning_rate": 6.100556095560448e-06, + "loss": 0.3362, + "step": 9866 + }, + { + "epoch": 0.446571622539036, + "grad_norm": 0.6901054349850831, + "learning_rate": 6.099841137768164e-06, + "loss": 0.3529, + "step": 9867 + }, + { + "epoch": 0.4466168816474315, + "grad_norm": 0.7772309288047123, + "learning_rate": 6.099126156343672e-06, + "loss": 0.3405, + "step": 9868 + }, + { + "epoch": 0.4466621407558271, + "grad_norm": 0.6338037509155247, + "learning_rate": 6.098411151302335e-06, + "loss": 0.3095, + "step": 9869 + }, + { + "epoch": 0.4467073998642227, + "grad_norm": 0.6478361223504291, + "learning_rate": 6.097696122659515e-06, + "loss": 0.365, + "step": 9870 + }, + { + "epoch": 0.44675265897261823, + "grad_norm": 0.3435502929420577, + "learning_rate": 6.096981070430577e-06, + "loss": 0.5074, + "step": 9871 + }, + { + "epoch": 0.4467979180810138, + "grad_norm": 0.5684308557539682, + "learning_rate": 6.096265994630886e-06, + "loss": 0.3604, + "step": 9872 + }, + { + "epoch": 0.44684317718940936, + "grad_norm": 0.6546593740824457, + "learning_rate": 6.095550895275803e-06, + "loss": 0.3511, + "step": 9873 + }, + { + "epoch": 0.44688843629780495, + "grad_norm": 0.6351718584179072, + "learning_rate": 6.094835772380699e-06, + "loss": 0.3389, + "step": 9874 + }, + { + "epoch": 0.4469336954062005, + "grad_norm": 0.6333198074375345, + "learning_rate": 6.094120625960934e-06, + "loss": 0.3783, + "step": 9875 + }, + { + "epoch": 0.44697895451459607, + "grad_norm": 0.28208508094965357, + "learning_rate": 6.09340545603188e-06, + "loss": 0.4683, + "step": 9876 + }, + { + "epoch": 0.4470242136229916, + "grad_norm": 0.27034783904859405, + "learning_rate": 6.092690262608899e-06, + "loss": 0.463, + "step": 9877 + }, + { + "epoch": 0.4470694727313872, + "grad_norm": 0.6585479271201585, + "learning_rate": 6.091975045707361e-06, + "loss": 0.3384, + "step": 9878 + }, + { + "epoch": 0.44711473183978273, + "grad_norm": 0.3012915983187069, + "learning_rate": 6.091259805342632e-06, + "loss": 0.4755, + "step": 9879 + }, + { + "epoch": 0.4471599909481783, + "grad_norm": 0.30539675156875673, + "learning_rate": 6.0905445415300835e-06, + "loss": 0.4655, + "step": 9880 + }, + { + "epoch": 0.4472052500565739, + "grad_norm": 0.5853264373929327, + "learning_rate": 6.089829254285079e-06, + "loss": 0.3302, + "step": 9881 + }, + { + "epoch": 0.44725050916496945, + "grad_norm": 0.27688497076430446, + "learning_rate": 6.089113943622994e-06, + "loss": 0.4778, + "step": 9882 + }, + { + "epoch": 0.44729576827336504, + "grad_norm": 0.6386649390187925, + "learning_rate": 6.088398609559193e-06, + "loss": 0.3475, + "step": 9883 + }, + { + "epoch": 0.44734102738176057, + "grad_norm": 0.67766272206694, + "learning_rate": 6.08768325210905e-06, + "loss": 0.3465, + "step": 9884 + }, + { + "epoch": 0.44738628649015616, + "grad_norm": 0.5990159683409747, + "learning_rate": 6.086967871287934e-06, + "loss": 0.3466, + "step": 9885 + }, + { + "epoch": 0.4474315455985517, + "grad_norm": 0.617226860730486, + "learning_rate": 6.086252467111216e-06, + "loss": 0.3293, + "step": 9886 + }, + { + "epoch": 0.4474768047069473, + "grad_norm": 0.32740738856566126, + "learning_rate": 6.0855370395942705e-06, + "loss": 0.4634, + "step": 9887 + }, + { + "epoch": 0.4475220638153428, + "grad_norm": 0.33128541122915056, + "learning_rate": 6.0848215887524665e-06, + "loss": 0.4757, + "step": 9888 + }, + { + "epoch": 0.4475673229237384, + "grad_norm": 0.6868043705289181, + "learning_rate": 6.084106114601178e-06, + "loss": 0.3073, + "step": 9889 + }, + { + "epoch": 0.44761258203213394, + "grad_norm": 0.6463951687803111, + "learning_rate": 6.08339061715578e-06, + "loss": 0.3492, + "step": 9890 + }, + { + "epoch": 0.44765784114052953, + "grad_norm": 0.5876814576312829, + "learning_rate": 6.082675096431645e-06, + "loss": 0.2924, + "step": 9891 + }, + { + "epoch": 0.4477031002489251, + "grad_norm": 0.6016861692698359, + "learning_rate": 6.081959552444147e-06, + "loss": 0.3545, + "step": 9892 + }, + { + "epoch": 0.44774835935732066, + "grad_norm": 0.607095977406509, + "learning_rate": 6.081243985208662e-06, + "loss": 0.348, + "step": 9893 + }, + { + "epoch": 0.44779361846571625, + "grad_norm": 0.6987791654284069, + "learning_rate": 6.0805283947405625e-06, + "loss": 0.3336, + "step": 9894 + }, + { + "epoch": 0.4478388775741118, + "grad_norm": 0.33277908746130674, + "learning_rate": 6.079812781055228e-06, + "loss": 0.4934, + "step": 9895 + }, + { + "epoch": 0.44788413668250737, + "grad_norm": 0.7554432633335122, + "learning_rate": 6.0790971441680325e-06, + "loss": 0.3706, + "step": 9896 + }, + { + "epoch": 0.4479293957909029, + "grad_norm": 0.6070321705846707, + "learning_rate": 6.078381484094353e-06, + "loss": 0.3155, + "step": 9897 + }, + { + "epoch": 0.4479746548992985, + "grad_norm": 0.2850215988806868, + "learning_rate": 6.077665800849568e-06, + "loss": 0.4661, + "step": 9898 + }, + { + "epoch": 0.44801991400769403, + "grad_norm": 0.27157890571522925, + "learning_rate": 6.076950094449055e-06, + "loss": 0.4477, + "step": 9899 + }, + { + "epoch": 0.4480651731160896, + "grad_norm": 0.637999409504055, + "learning_rate": 6.076234364908192e-06, + "loss": 0.3479, + "step": 9900 + }, + { + "epoch": 0.44811043222448516, + "grad_norm": 0.7043538513070119, + "learning_rate": 6.07551861224236e-06, + "loss": 0.3614, + "step": 9901 + }, + { + "epoch": 0.44815569133288075, + "grad_norm": 0.6120136354672181, + "learning_rate": 6.074802836466932e-06, + "loss": 0.3262, + "step": 9902 + }, + { + "epoch": 0.4482009504412763, + "grad_norm": 0.6531903882646292, + "learning_rate": 6.074087037597296e-06, + "loss": 0.3485, + "step": 9903 + }, + { + "epoch": 0.44824620954967187, + "grad_norm": 0.35229527558395135, + "learning_rate": 6.073371215648824e-06, + "loss": 0.4707, + "step": 9904 + }, + { + "epoch": 0.44829146865806746, + "grad_norm": 0.6613695716254754, + "learning_rate": 6.072655370636905e-06, + "loss": 0.3817, + "step": 9905 + }, + { + "epoch": 0.448336727766463, + "grad_norm": 0.6818675839553595, + "learning_rate": 6.071939502576916e-06, + "loss": 0.3511, + "step": 9906 + }, + { + "epoch": 0.4483819868748586, + "grad_norm": 0.6641385200315042, + "learning_rate": 6.071223611484238e-06, + "loss": 0.3688, + "step": 9907 + }, + { + "epoch": 0.4484272459832541, + "grad_norm": 0.6913005755156559, + "learning_rate": 6.070507697374255e-06, + "loss": 0.3579, + "step": 9908 + }, + { + "epoch": 0.4484725050916497, + "grad_norm": 0.6397984850762364, + "learning_rate": 6.06979176026235e-06, + "loss": 0.3429, + "step": 9909 + }, + { + "epoch": 0.44851776420004524, + "grad_norm": 0.5996746250995699, + "learning_rate": 6.069075800163905e-06, + "loss": 0.3481, + "step": 9910 + }, + { + "epoch": 0.44856302330844083, + "grad_norm": 0.6204333710521447, + "learning_rate": 6.068359817094305e-06, + "loss": 0.3566, + "step": 9911 + }, + { + "epoch": 0.44860828241683637, + "grad_norm": 0.68970625469837, + "learning_rate": 6.067643811068933e-06, + "loss": 0.3729, + "step": 9912 + }, + { + "epoch": 0.44865354152523196, + "grad_norm": 0.3392200840779211, + "learning_rate": 6.066927782103176e-06, + "loss": 0.5064, + "step": 9913 + }, + { + "epoch": 0.4486988006336275, + "grad_norm": 0.30088581321615016, + "learning_rate": 6.066211730212416e-06, + "loss": 0.5039, + "step": 9914 + }, + { + "epoch": 0.4487440597420231, + "grad_norm": 0.28778244578525586, + "learning_rate": 6.0654956554120415e-06, + "loss": 0.4639, + "step": 9915 + }, + { + "epoch": 0.4487893188504187, + "grad_norm": 0.6963911431712173, + "learning_rate": 6.064779557717437e-06, + "loss": 0.339, + "step": 9916 + }, + { + "epoch": 0.4488345779588142, + "grad_norm": 0.6489724625248927, + "learning_rate": 6.064063437143991e-06, + "loss": 0.3543, + "step": 9917 + }, + { + "epoch": 0.4488798370672098, + "grad_norm": 0.6240808389505063, + "learning_rate": 6.063347293707089e-06, + "loss": 0.3149, + "step": 9918 + }, + { + "epoch": 0.44892509617560533, + "grad_norm": 0.622898892449197, + "learning_rate": 6.06263112742212e-06, + "loss": 0.3497, + "step": 9919 + }, + { + "epoch": 0.4489703552840009, + "grad_norm": 0.6727813964875871, + "learning_rate": 6.06191493830447e-06, + "loss": 0.357, + "step": 9920 + }, + { + "epoch": 0.44901561439239646, + "grad_norm": 0.6661519934507191, + "learning_rate": 6.061198726369531e-06, + "loss": 0.3959, + "step": 9921 + }, + { + "epoch": 0.44906087350079205, + "grad_norm": 0.4142201447101892, + "learning_rate": 6.060482491632692e-06, + "loss": 0.4497, + "step": 9922 + }, + { + "epoch": 0.4491061326091876, + "grad_norm": 0.393005672498883, + "learning_rate": 6.0597662341093385e-06, + "loss": 0.4966, + "step": 9923 + }, + { + "epoch": 0.44915139171758317, + "grad_norm": 0.6824675162900966, + "learning_rate": 6.059049953814866e-06, + "loss": 0.3212, + "step": 9924 + }, + { + "epoch": 0.4491966508259787, + "grad_norm": 0.6373071535052531, + "learning_rate": 6.058333650764661e-06, + "loss": 0.374, + "step": 9925 + }, + { + "epoch": 0.4492419099343743, + "grad_norm": 0.3104536489191795, + "learning_rate": 6.057617324974117e-06, + "loss": 0.5098, + "step": 9926 + }, + { + "epoch": 0.4492871690427699, + "grad_norm": 0.3229491305240693, + "learning_rate": 6.056900976458624e-06, + "loss": 0.4813, + "step": 9927 + }, + { + "epoch": 0.4493324281511654, + "grad_norm": 0.30806458088232463, + "learning_rate": 6.056184605233576e-06, + "loss": 0.4662, + "step": 9928 + }, + { + "epoch": 0.449377687259561, + "grad_norm": 0.28375591384829485, + "learning_rate": 6.0554682113143634e-06, + "loss": 0.473, + "step": 9929 + }, + { + "epoch": 0.44942294636795654, + "grad_norm": 0.7300146494047709, + "learning_rate": 6.054751794716383e-06, + "loss": 0.3145, + "step": 9930 + }, + { + "epoch": 0.44946820547635213, + "grad_norm": 0.6897056093316484, + "learning_rate": 6.054035355455023e-06, + "loss": 0.3321, + "step": 9931 + }, + { + "epoch": 0.44951346458474767, + "grad_norm": 0.36312836939211174, + "learning_rate": 6.053318893545683e-06, + "loss": 0.47, + "step": 9932 + }, + { + "epoch": 0.44955872369314326, + "grad_norm": 0.6457343812310226, + "learning_rate": 6.052602409003752e-06, + "loss": 0.336, + "step": 9933 + }, + { + "epoch": 0.4496039828015388, + "grad_norm": 0.34105938676114667, + "learning_rate": 6.051885901844631e-06, + "loss": 0.4805, + "step": 9934 + }, + { + "epoch": 0.4496492419099344, + "grad_norm": 0.675213898440973, + "learning_rate": 6.0511693720837115e-06, + "loss": 0.3401, + "step": 9935 + }, + { + "epoch": 0.4496945010183299, + "grad_norm": 0.6437818053252721, + "learning_rate": 6.05045281973639e-06, + "loss": 0.3298, + "step": 9936 + }, + { + "epoch": 0.4497397601267255, + "grad_norm": 0.28992887781932325, + "learning_rate": 6.049736244818064e-06, + "loss": 0.4943, + "step": 9937 + }, + { + "epoch": 0.44978501923512104, + "grad_norm": 0.6648857665223082, + "learning_rate": 6.049019647344131e-06, + "loss": 0.3324, + "step": 9938 + }, + { + "epoch": 0.44983027834351663, + "grad_norm": 0.5901260207845679, + "learning_rate": 6.048303027329987e-06, + "loss": 0.3156, + "step": 9939 + }, + { + "epoch": 0.4498755374519122, + "grad_norm": 0.6111094970963682, + "learning_rate": 6.047586384791031e-06, + "loss": 0.3203, + "step": 9940 + }, + { + "epoch": 0.44992079656030776, + "grad_norm": 0.6613587091807451, + "learning_rate": 6.0468697197426595e-06, + "loss": 0.3222, + "step": 9941 + }, + { + "epoch": 0.44996605566870335, + "grad_norm": 0.6615717557607383, + "learning_rate": 6.046153032200275e-06, + "loss": 0.3667, + "step": 9942 + }, + { + "epoch": 0.4500113147770989, + "grad_norm": 0.6185763797191234, + "learning_rate": 6.045436322179274e-06, + "loss": 0.2944, + "step": 9943 + }, + { + "epoch": 0.45005657388549447, + "grad_norm": 0.5794541395057855, + "learning_rate": 6.044719589695056e-06, + "loss": 0.3209, + "step": 9944 + }, + { + "epoch": 0.45010183299389, + "grad_norm": 0.6453103902286667, + "learning_rate": 6.044002834763023e-06, + "loss": 0.3123, + "step": 9945 + }, + { + "epoch": 0.4501470921022856, + "grad_norm": 0.3764726932362269, + "learning_rate": 6.043286057398576e-06, + "loss": 0.4973, + "step": 9946 + }, + { + "epoch": 0.45019235121068113, + "grad_norm": 0.5987607755953244, + "learning_rate": 6.042569257617117e-06, + "loss": 0.2867, + "step": 9947 + }, + { + "epoch": 0.4502376103190767, + "grad_norm": 0.6875835265469559, + "learning_rate": 6.041852435434044e-06, + "loss": 0.3281, + "step": 9948 + }, + { + "epoch": 0.45028286942747225, + "grad_norm": 0.31049192884635557, + "learning_rate": 6.041135590864764e-06, + "loss": 0.4826, + "step": 9949 + }, + { + "epoch": 0.45032812853586784, + "grad_norm": 0.6758650923785007, + "learning_rate": 6.040418723924677e-06, + "loss": 0.3443, + "step": 9950 + }, + { + "epoch": 0.45037338764426343, + "grad_norm": 0.28328861483255147, + "learning_rate": 6.039701834629189e-06, + "loss": 0.452, + "step": 9951 + }, + { + "epoch": 0.45041864675265897, + "grad_norm": 0.6284360416631046, + "learning_rate": 6.0389849229936995e-06, + "loss": 0.3517, + "step": 9952 + }, + { + "epoch": 0.45046390586105456, + "grad_norm": 0.654522012389242, + "learning_rate": 6.038267989033616e-06, + "loss": 0.3349, + "step": 9953 + }, + { + "epoch": 0.4505091649694501, + "grad_norm": 0.6509169037023864, + "learning_rate": 6.03755103276434e-06, + "loss": 0.36, + "step": 9954 + }, + { + "epoch": 0.4505544240778457, + "grad_norm": 0.613634808440209, + "learning_rate": 6.036834054201283e-06, + "loss": 0.3259, + "step": 9955 + }, + { + "epoch": 0.4505996831862412, + "grad_norm": 0.6591701614555701, + "learning_rate": 6.036117053359844e-06, + "loss": 0.3353, + "step": 9956 + }, + { + "epoch": 0.4506449422946368, + "grad_norm": 0.6706779719525753, + "learning_rate": 6.035400030255431e-06, + "loss": 0.3627, + "step": 9957 + }, + { + "epoch": 0.45069020140303234, + "grad_norm": 0.5676917733584951, + "learning_rate": 6.034682984903453e-06, + "loss": 0.3374, + "step": 9958 + }, + { + "epoch": 0.45073546051142793, + "grad_norm": 0.6145185773045998, + "learning_rate": 6.0339659173193146e-06, + "loss": 0.3101, + "step": 9959 + }, + { + "epoch": 0.45078071961982347, + "grad_norm": 0.6095296510631012, + "learning_rate": 6.033248827518424e-06, + "loss": 0.3428, + "step": 9960 + }, + { + "epoch": 0.45082597872821906, + "grad_norm": 0.9770970068781155, + "learning_rate": 6.032531715516191e-06, + "loss": 0.3494, + "step": 9961 + }, + { + "epoch": 0.4508712378366146, + "grad_norm": 0.6038847607884669, + "learning_rate": 6.03181458132802e-06, + "loss": 0.3174, + "step": 9962 + }, + { + "epoch": 0.4509164969450102, + "grad_norm": 0.6458052103580482, + "learning_rate": 6.031097424969326e-06, + "loss": 0.3378, + "step": 9963 + }, + { + "epoch": 0.45096175605340577, + "grad_norm": 0.6262492290191575, + "learning_rate": 6.030380246455513e-06, + "loss": 0.3476, + "step": 9964 + }, + { + "epoch": 0.4510070151618013, + "grad_norm": 0.6787994342237069, + "learning_rate": 6.0296630458019925e-06, + "loss": 0.3662, + "step": 9965 + }, + { + "epoch": 0.4510522742701969, + "grad_norm": 0.47006450266382405, + "learning_rate": 6.028945823024176e-06, + "loss": 0.4631, + "step": 9966 + }, + { + "epoch": 0.45109753337859243, + "grad_norm": 0.6496704316985095, + "learning_rate": 6.0282285781374746e-06, + "loss": 0.3742, + "step": 9967 + }, + { + "epoch": 0.451142792486988, + "grad_norm": 0.3344543499700861, + "learning_rate": 6.027511311157298e-06, + "loss": 0.4825, + "step": 9968 + }, + { + "epoch": 0.45118805159538355, + "grad_norm": 0.6464649385526088, + "learning_rate": 6.026794022099061e-06, + "loss": 0.3752, + "step": 9969 + }, + { + "epoch": 0.45123331070377914, + "grad_norm": 0.6656227654498554, + "learning_rate": 6.026076710978172e-06, + "loss": 0.34, + "step": 9970 + }, + { + "epoch": 0.4512785698121747, + "grad_norm": 0.3244651461172815, + "learning_rate": 6.0253593778100475e-06, + "loss": 0.4541, + "step": 9971 + }, + { + "epoch": 0.45132382892057027, + "grad_norm": 0.631254794194664, + "learning_rate": 6.0246420226100976e-06, + "loss": 0.3385, + "step": 9972 + }, + { + "epoch": 0.4513690880289658, + "grad_norm": 0.6291399810343311, + "learning_rate": 6.023924645393739e-06, + "loss": 0.3506, + "step": 9973 + }, + { + "epoch": 0.4514143471373614, + "grad_norm": 0.6180885336147697, + "learning_rate": 6.023207246176383e-06, + "loss": 0.3311, + "step": 9974 + }, + { + "epoch": 0.451459606245757, + "grad_norm": 0.6788040399146853, + "learning_rate": 6.0224898249734466e-06, + "loss": 0.3407, + "step": 9975 + }, + { + "epoch": 0.4515048653541525, + "grad_norm": 0.7289625570092265, + "learning_rate": 6.021772381800344e-06, + "loss": 0.3521, + "step": 9976 + }, + { + "epoch": 0.4515501244625481, + "grad_norm": 0.6269926510587083, + "learning_rate": 6.021054916672491e-06, + "loss": 0.3814, + "step": 9977 + }, + { + "epoch": 0.45159538357094364, + "grad_norm": 0.6911981045719757, + "learning_rate": 6.020337429605304e-06, + "loss": 0.3716, + "step": 9978 + }, + { + "epoch": 0.45164064267933923, + "grad_norm": 0.6034806566182572, + "learning_rate": 6.019619920614199e-06, + "loss": 0.2915, + "step": 9979 + }, + { + "epoch": 0.45168590178773477, + "grad_norm": 0.62852179758487, + "learning_rate": 6.0189023897145944e-06, + "loss": 0.3452, + "step": 9980 + }, + { + "epoch": 0.45173116089613036, + "grad_norm": 0.647496318788268, + "learning_rate": 6.0181848369219055e-06, + "loss": 0.3315, + "step": 9981 + }, + { + "epoch": 0.4517764200045259, + "grad_norm": 0.6219216916623668, + "learning_rate": 6.017467262251553e-06, + "loss": 0.3492, + "step": 9982 + }, + { + "epoch": 0.4518216791129215, + "grad_norm": 0.5964205467527914, + "learning_rate": 6.016749665718953e-06, + "loss": 0.284, + "step": 9983 + }, + { + "epoch": 0.451866938221317, + "grad_norm": 0.6214559322845244, + "learning_rate": 6.016032047339526e-06, + "loss": 0.3743, + "step": 9984 + }, + { + "epoch": 0.4519121973297126, + "grad_norm": 0.5930174872567172, + "learning_rate": 6.01531440712869e-06, + "loss": 0.3482, + "step": 9985 + }, + { + "epoch": 0.4519574564381082, + "grad_norm": 0.6644054531840476, + "learning_rate": 6.014596745101866e-06, + "loss": 0.3815, + "step": 9986 + }, + { + "epoch": 0.45200271554650373, + "grad_norm": 0.6062908315932274, + "learning_rate": 6.0138790612744746e-06, + "loss": 0.3439, + "step": 9987 + }, + { + "epoch": 0.4520479746548993, + "grad_norm": 0.6729276946757002, + "learning_rate": 6.013161355661935e-06, + "loss": 0.3537, + "step": 9988 + }, + { + "epoch": 0.45209323376329485, + "grad_norm": 0.7996378503409862, + "learning_rate": 6.01244362827967e-06, + "loss": 0.3689, + "step": 9989 + }, + { + "epoch": 0.45213849287169044, + "grad_norm": 0.6228845676127899, + "learning_rate": 6.011725879143102e-06, + "loss": 0.3175, + "step": 9990 + }, + { + "epoch": 0.452183751980086, + "grad_norm": 0.6135257766245439, + "learning_rate": 6.01100810826765e-06, + "loss": 0.3412, + "step": 9991 + }, + { + "epoch": 0.45222901108848157, + "grad_norm": 0.6441029814830849, + "learning_rate": 6.0102903156687406e-06, + "loss": 0.328, + "step": 9992 + }, + { + "epoch": 0.4522742701968771, + "grad_norm": 0.6892365356820823, + "learning_rate": 6.009572501361794e-06, + "loss": 0.3718, + "step": 9993 + }, + { + "epoch": 0.4523195293052727, + "grad_norm": 0.6175618766242349, + "learning_rate": 6.008854665362236e-06, + "loss": 0.346, + "step": 9994 + }, + { + "epoch": 0.45236478841366823, + "grad_norm": 0.6940736989878789, + "learning_rate": 6.00813680768549e-06, + "loss": 0.3737, + "step": 9995 + }, + { + "epoch": 0.4524100475220638, + "grad_norm": 0.6413699942659871, + "learning_rate": 6.007418928346979e-06, + "loss": 0.3341, + "step": 9996 + }, + { + "epoch": 0.45245530663045935, + "grad_norm": 0.46958215559153266, + "learning_rate": 6.0067010273621295e-06, + "loss": 0.5034, + "step": 9997 + }, + { + "epoch": 0.45250056573885494, + "grad_norm": 0.6344552552649131, + "learning_rate": 6.005983104746367e-06, + "loss": 0.3008, + "step": 9998 + }, + { + "epoch": 0.45254582484725053, + "grad_norm": 0.6182104080609797, + "learning_rate": 6.005265160515117e-06, + "loss": 0.3428, + "step": 9999 + }, + { + "epoch": 0.45259108395564607, + "grad_norm": 0.6217252706723825, + "learning_rate": 6.004547194683806e-06, + "loss": 0.3569, + "step": 10000 + }, + { + "epoch": 0.45263634306404166, + "grad_norm": 0.6887992737610831, + "learning_rate": 6.003829207267863e-06, + "loss": 0.3729, + "step": 10001 + }, + { + "epoch": 0.4526816021724372, + "grad_norm": 0.6410925267438082, + "learning_rate": 6.00311119828271e-06, + "loss": 0.3274, + "step": 10002 + }, + { + "epoch": 0.4527268612808328, + "grad_norm": 0.6721644066136669, + "learning_rate": 6.002393167743782e-06, + "loss": 0.359, + "step": 10003 + }, + { + "epoch": 0.4527721203892283, + "grad_norm": 0.6646472727003765, + "learning_rate": 6.001675115666501e-06, + "loss": 0.3413, + "step": 10004 + }, + { + "epoch": 0.4528173794976239, + "grad_norm": 0.6296735501659243, + "learning_rate": 6.000957042066299e-06, + "loss": 0.3476, + "step": 10005 + }, + { + "epoch": 0.45286263860601944, + "grad_norm": 0.6067665919217092, + "learning_rate": 6.0002389469586035e-06, + "loss": 0.3066, + "step": 10006 + }, + { + "epoch": 0.45290789771441503, + "grad_norm": 0.39061303340342207, + "learning_rate": 5.999520830358845e-06, + "loss": 0.493, + "step": 10007 + }, + { + "epoch": 0.45295315682281057, + "grad_norm": 0.7134542623683294, + "learning_rate": 5.998802692282454e-06, + "loss": 0.3357, + "step": 10008 + }, + { + "epoch": 0.45299841593120616, + "grad_norm": 0.7105305038574864, + "learning_rate": 5.998084532744861e-06, + "loss": 0.3566, + "step": 10009 + }, + { + "epoch": 0.45304367503960175, + "grad_norm": 0.6754264853847363, + "learning_rate": 5.997366351761497e-06, + "loss": 0.3439, + "step": 10010 + }, + { + "epoch": 0.4530889341479973, + "grad_norm": 0.6172224892039797, + "learning_rate": 5.996648149347794e-06, + "loss": 0.3478, + "step": 10011 + }, + { + "epoch": 0.45313419325639287, + "grad_norm": 0.6040767266365203, + "learning_rate": 5.995929925519181e-06, + "loss": 0.359, + "step": 10012 + }, + { + "epoch": 0.4531794523647884, + "grad_norm": 0.6136311395265972, + "learning_rate": 5.9952116802910945e-06, + "loss": 0.3322, + "step": 10013 + }, + { + "epoch": 0.453224711473184, + "grad_norm": 0.6227029274185454, + "learning_rate": 5.994493413678964e-06, + "loss": 0.3624, + "step": 10014 + }, + { + "epoch": 0.45326997058157953, + "grad_norm": 0.3313337340020232, + "learning_rate": 5.993775125698226e-06, + "loss": 0.4818, + "step": 10015 + }, + { + "epoch": 0.4533152296899751, + "grad_norm": 0.5690430086005838, + "learning_rate": 5.993056816364312e-06, + "loss": 0.3081, + "step": 10016 + }, + { + "epoch": 0.45336048879837065, + "grad_norm": 0.6301972050072376, + "learning_rate": 5.992338485692657e-06, + "loss": 0.4077, + "step": 10017 + }, + { + "epoch": 0.45340574790676624, + "grad_norm": 0.28937216440665076, + "learning_rate": 5.991620133698694e-06, + "loss": 0.4723, + "step": 10018 + }, + { + "epoch": 0.4534510070151618, + "grad_norm": 0.4170841645776839, + "learning_rate": 5.990901760397863e-06, + "loss": 0.4923, + "step": 10019 + }, + { + "epoch": 0.45349626612355737, + "grad_norm": 0.626958759180911, + "learning_rate": 5.990183365805594e-06, + "loss": 0.3438, + "step": 10020 + }, + { + "epoch": 0.45354152523195296, + "grad_norm": 0.7145696132172378, + "learning_rate": 5.989464949937328e-06, + "loss": 0.3539, + "step": 10021 + }, + { + "epoch": 0.4535867843403485, + "grad_norm": 0.28916337801422026, + "learning_rate": 5.988746512808497e-06, + "loss": 0.4736, + "step": 10022 + }, + { + "epoch": 0.4536320434487441, + "grad_norm": 0.8029091456258143, + "learning_rate": 5.988028054434542e-06, + "loss": 0.3466, + "step": 10023 + }, + { + "epoch": 0.4536773025571396, + "grad_norm": 0.660620246553375, + "learning_rate": 5.987309574830897e-06, + "loss": 0.3498, + "step": 10024 + }, + { + "epoch": 0.4537225616655352, + "grad_norm": 0.6306767920116505, + "learning_rate": 5.986591074013002e-06, + "loss": 0.3371, + "step": 10025 + }, + { + "epoch": 0.45376782077393074, + "grad_norm": 0.6301389145434231, + "learning_rate": 5.985872551996294e-06, + "loss": 0.3572, + "step": 10026 + }, + { + "epoch": 0.45381307988232633, + "grad_norm": 0.653586722565106, + "learning_rate": 5.9851540087962134e-06, + "loss": 0.3909, + "step": 10027 + }, + { + "epoch": 0.45385833899072187, + "grad_norm": 0.7102396128575281, + "learning_rate": 5.984435444428199e-06, + "loss": 0.3169, + "step": 10028 + }, + { + "epoch": 0.45390359809911746, + "grad_norm": 0.31410983897179384, + "learning_rate": 5.9837168589076915e-06, + "loss": 0.4836, + "step": 10029 + }, + { + "epoch": 0.453948857207513, + "grad_norm": 0.6940641462750593, + "learning_rate": 5.982998252250127e-06, + "loss": 0.3427, + "step": 10030 + }, + { + "epoch": 0.4539941163159086, + "grad_norm": 0.829965545364354, + "learning_rate": 5.982279624470951e-06, + "loss": 0.3049, + "step": 10031 + }, + { + "epoch": 0.4540393754243041, + "grad_norm": 0.32688203077652456, + "learning_rate": 5.981560975585604e-06, + "loss": 0.4807, + "step": 10032 + }, + { + "epoch": 0.4540846345326997, + "grad_norm": 0.6398806724095104, + "learning_rate": 5.980842305609524e-06, + "loss": 0.3039, + "step": 10033 + }, + { + "epoch": 0.4541298936410953, + "grad_norm": 3.293531905029241, + "learning_rate": 5.9801236145581575e-06, + "loss": 0.369, + "step": 10034 + }, + { + "epoch": 0.45417515274949083, + "grad_norm": 0.5979734329060953, + "learning_rate": 5.979404902446944e-06, + "loss": 0.3643, + "step": 10035 + }, + { + "epoch": 0.4542204118578864, + "grad_norm": 0.6247016742930394, + "learning_rate": 5.978686169291325e-06, + "loss": 0.3453, + "step": 10036 + }, + { + "epoch": 0.45426567096628195, + "grad_norm": 0.8567027597175304, + "learning_rate": 5.977967415106748e-06, + "loss": 0.3212, + "step": 10037 + }, + { + "epoch": 0.45431093007467754, + "grad_norm": 0.6229950767660342, + "learning_rate": 5.977248639908655e-06, + "loss": 0.3437, + "step": 10038 + }, + { + "epoch": 0.4543561891830731, + "grad_norm": 0.3089428583891721, + "learning_rate": 5.976529843712489e-06, + "loss": 0.4906, + "step": 10039 + }, + { + "epoch": 0.45440144829146867, + "grad_norm": 0.6260978746591849, + "learning_rate": 5.975811026533698e-06, + "loss": 0.3332, + "step": 10040 + }, + { + "epoch": 0.4544467073998642, + "grad_norm": 0.838058394573743, + "learning_rate": 5.975092188387722e-06, + "loss": 0.3336, + "step": 10041 + }, + { + "epoch": 0.4544919665082598, + "grad_norm": 0.7191431229815619, + "learning_rate": 5.974373329290012e-06, + "loss": 0.3577, + "step": 10042 + }, + { + "epoch": 0.4545372256166553, + "grad_norm": 1.3640864595801048, + "learning_rate": 5.97365444925601e-06, + "loss": 0.314, + "step": 10043 + }, + { + "epoch": 0.4545824847250509, + "grad_norm": 0.6553645603471241, + "learning_rate": 5.972935548301165e-06, + "loss": 0.3784, + "step": 10044 + }, + { + "epoch": 0.4546277438334465, + "grad_norm": 0.6919586894797458, + "learning_rate": 5.972216626440923e-06, + "loss": 0.3119, + "step": 10045 + }, + { + "epoch": 0.45467300294184204, + "grad_norm": 0.6058196014582589, + "learning_rate": 5.971497683690732e-06, + "loss": 0.3095, + "step": 10046 + }, + { + "epoch": 0.45471826205023763, + "grad_norm": 0.5859664888367128, + "learning_rate": 5.970778720066039e-06, + "loss": 0.3469, + "step": 10047 + }, + { + "epoch": 0.45476352115863317, + "grad_norm": 0.6679169505616824, + "learning_rate": 5.970059735582295e-06, + "loss": 0.3582, + "step": 10048 + }, + { + "epoch": 0.45480878026702876, + "grad_norm": 0.6748470447738677, + "learning_rate": 5.969340730254943e-06, + "loss": 0.3352, + "step": 10049 + }, + { + "epoch": 0.4548540393754243, + "grad_norm": 0.6253274352764578, + "learning_rate": 5.96862170409944e-06, + "loss": 0.3526, + "step": 10050 + }, + { + "epoch": 0.4548992984838199, + "grad_norm": 0.3514815605156929, + "learning_rate": 5.967902657131228e-06, + "loss": 0.5057, + "step": 10051 + }, + { + "epoch": 0.4549445575922154, + "grad_norm": 0.6386680369414963, + "learning_rate": 5.967183589365761e-06, + "loss": 0.3268, + "step": 10052 + }, + { + "epoch": 0.454989816700611, + "grad_norm": 0.6423595813623103, + "learning_rate": 5.96646450081849e-06, + "loss": 0.3392, + "step": 10053 + }, + { + "epoch": 0.45503507580900654, + "grad_norm": 0.5911899645875737, + "learning_rate": 5.965745391504866e-06, + "loss": 0.3584, + "step": 10054 + }, + { + "epoch": 0.45508033491740213, + "grad_norm": 0.6756603866248126, + "learning_rate": 5.965026261440338e-06, + "loss": 0.3638, + "step": 10055 + }, + { + "epoch": 0.4551255940257977, + "grad_norm": 0.5730749821872515, + "learning_rate": 5.964307110640359e-06, + "loss": 0.2995, + "step": 10056 + }, + { + "epoch": 0.45517085313419325, + "grad_norm": 0.647423316803012, + "learning_rate": 5.963587939120383e-06, + "loss": 0.3, + "step": 10057 + }, + { + "epoch": 0.45521611224258884, + "grad_norm": 0.43464516641582995, + "learning_rate": 5.962868746895863e-06, + "loss": 0.5188, + "step": 10058 + }, + { + "epoch": 0.4552613713509844, + "grad_norm": 0.6385870718981455, + "learning_rate": 5.962149533982249e-06, + "loss": 0.3265, + "step": 10059 + }, + { + "epoch": 0.45530663045937997, + "grad_norm": 0.586098357918665, + "learning_rate": 5.961430300394996e-06, + "loss": 0.3227, + "step": 10060 + }, + { + "epoch": 0.4553518895677755, + "grad_norm": 0.6571832827328082, + "learning_rate": 5.960711046149561e-06, + "loss": 0.324, + "step": 10061 + }, + { + "epoch": 0.4553971486761711, + "grad_norm": 0.6963240291123793, + "learning_rate": 5.959991771261393e-06, + "loss": 0.3186, + "step": 10062 + }, + { + "epoch": 0.4554424077845666, + "grad_norm": 0.675978472481428, + "learning_rate": 5.959272475745953e-06, + "loss": 0.3183, + "step": 10063 + }, + { + "epoch": 0.4554876668929622, + "grad_norm": 0.6799292075558153, + "learning_rate": 5.958553159618693e-06, + "loss": 0.3561, + "step": 10064 + }, + { + "epoch": 0.45553292600135775, + "grad_norm": 0.6488099183480327, + "learning_rate": 5.957833822895069e-06, + "loss": 0.327, + "step": 10065 + }, + { + "epoch": 0.45557818510975334, + "grad_norm": 0.6392367712514024, + "learning_rate": 5.957114465590537e-06, + "loss": 0.3028, + "step": 10066 + }, + { + "epoch": 0.4556234442181489, + "grad_norm": 0.650807133681978, + "learning_rate": 5.9563950877205564e-06, + "loss": 0.3145, + "step": 10067 + }, + { + "epoch": 0.45566870332654447, + "grad_norm": 0.35525035348306405, + "learning_rate": 5.955675689300583e-06, + "loss": 0.4869, + "step": 10068 + }, + { + "epoch": 0.45571396243494006, + "grad_norm": 0.3131661760499863, + "learning_rate": 5.954956270346074e-06, + "loss": 0.4686, + "step": 10069 + }, + { + "epoch": 0.4557592215433356, + "grad_norm": 0.28290393614239373, + "learning_rate": 5.954236830872486e-06, + "loss": 0.4722, + "step": 10070 + }, + { + "epoch": 0.4558044806517312, + "grad_norm": 0.6734508988009212, + "learning_rate": 5.953517370895281e-06, + "loss": 0.3424, + "step": 10071 + }, + { + "epoch": 0.4558497397601267, + "grad_norm": 0.6159152425039114, + "learning_rate": 5.9527978904299156e-06, + "loss": 0.3573, + "step": 10072 + }, + { + "epoch": 0.4558949988685223, + "grad_norm": 0.40209035644627683, + "learning_rate": 5.952078389491849e-06, + "loss": 0.4681, + "step": 10073 + }, + { + "epoch": 0.45594025797691784, + "grad_norm": 0.3838006111097636, + "learning_rate": 5.951358868096543e-06, + "loss": 0.4741, + "step": 10074 + }, + { + "epoch": 0.45598551708531343, + "grad_norm": 0.5806324459925772, + "learning_rate": 5.950639326259456e-06, + "loss": 0.3061, + "step": 10075 + }, + { + "epoch": 0.45603077619370896, + "grad_norm": 0.2915926212204432, + "learning_rate": 5.949919763996049e-06, + "loss": 0.4833, + "step": 10076 + }, + { + "epoch": 0.45607603530210455, + "grad_norm": 0.6698235355513487, + "learning_rate": 5.949200181321785e-06, + "loss": 0.2768, + "step": 10077 + }, + { + "epoch": 0.4561212944105001, + "grad_norm": 0.661875473907528, + "learning_rate": 5.948480578252124e-06, + "loss": 0.3365, + "step": 10078 + }, + { + "epoch": 0.4561665535188957, + "grad_norm": 0.7092775293804304, + "learning_rate": 5.9477609548025295e-06, + "loss": 0.334, + "step": 10079 + }, + { + "epoch": 0.45621181262729127, + "grad_norm": 0.6381995964901996, + "learning_rate": 5.9470413109884605e-06, + "loss": 0.3144, + "step": 10080 + }, + { + "epoch": 0.4562570717356868, + "grad_norm": 0.7031847456684301, + "learning_rate": 5.946321646825385e-06, + "loss": 0.3292, + "step": 10081 + }, + { + "epoch": 0.4563023308440824, + "grad_norm": 0.6255334778708324, + "learning_rate": 5.945601962328762e-06, + "loss": 0.3261, + "step": 10082 + }, + { + "epoch": 0.45634758995247793, + "grad_norm": 0.680176425307315, + "learning_rate": 5.9448822575140575e-06, + "loss": 0.3444, + "step": 10083 + }, + { + "epoch": 0.4563928490608735, + "grad_norm": 0.5395328200815813, + "learning_rate": 5.944162532396735e-06, + "loss": 0.4819, + "step": 10084 + }, + { + "epoch": 0.45643810816926905, + "grad_norm": 0.3849787355614753, + "learning_rate": 5.94344278699226e-06, + "loss": 0.5027, + "step": 10085 + }, + { + "epoch": 0.45648336727766464, + "grad_norm": 0.6424042333189114, + "learning_rate": 5.942723021316096e-06, + "loss": 0.3238, + "step": 10086 + }, + { + "epoch": 0.4565286263860602, + "grad_norm": 0.31396481615912764, + "learning_rate": 5.94200323538371e-06, + "loss": 0.4707, + "step": 10087 + }, + { + "epoch": 0.45657388549445577, + "grad_norm": 0.6379754659076351, + "learning_rate": 5.941283429210568e-06, + "loss": 0.3282, + "step": 10088 + }, + { + "epoch": 0.4566191446028513, + "grad_norm": 0.80606623399743, + "learning_rate": 5.940563602812136e-06, + "loss": 0.3359, + "step": 10089 + }, + { + "epoch": 0.4566644037112469, + "grad_norm": 0.5049334906739511, + "learning_rate": 5.939843756203881e-06, + "loss": 0.5133, + "step": 10090 + }, + { + "epoch": 0.4567096628196424, + "grad_norm": 0.4405229130785534, + "learning_rate": 5.939123889401269e-06, + "loss": 0.4887, + "step": 10091 + }, + { + "epoch": 0.456754921928038, + "grad_norm": 0.6133949372009695, + "learning_rate": 5.9384040024197706e-06, + "loss": 0.3418, + "step": 10092 + }, + { + "epoch": 0.4568001810364336, + "grad_norm": 0.6528224932137077, + "learning_rate": 5.937684095274852e-06, + "loss": 0.2896, + "step": 10093 + }, + { + "epoch": 0.45684544014482914, + "grad_norm": 0.6773183075316381, + "learning_rate": 5.9369641679819825e-06, + "loss": 0.3126, + "step": 10094 + }, + { + "epoch": 0.45689069925322473, + "grad_norm": 0.6173430889617483, + "learning_rate": 5.936244220556629e-06, + "loss": 0.2843, + "step": 10095 + }, + { + "epoch": 0.45693595836162026, + "grad_norm": 0.6545161701377518, + "learning_rate": 5.935524253014263e-06, + "loss": 0.3498, + "step": 10096 + }, + { + "epoch": 0.45698121747001585, + "grad_norm": 0.6926825859793615, + "learning_rate": 5.934804265370355e-06, + "loss": 0.3623, + "step": 10097 + }, + { + "epoch": 0.4570264765784114, + "grad_norm": 0.6709432458048451, + "learning_rate": 5.934084257640374e-06, + "loss": 0.3769, + "step": 10098 + }, + { + "epoch": 0.457071735686807, + "grad_norm": 0.7025990623404197, + "learning_rate": 5.933364229839791e-06, + "loss": 0.4754, + "step": 10099 + }, + { + "epoch": 0.4571169947952025, + "grad_norm": 0.6522116942364439, + "learning_rate": 5.9326441819840785e-06, + "loss": 0.3262, + "step": 10100 + }, + { + "epoch": 0.4571622539035981, + "grad_norm": 0.7262529789467604, + "learning_rate": 5.931924114088704e-06, + "loss": 0.3285, + "step": 10101 + }, + { + "epoch": 0.45720751301199364, + "grad_norm": 0.6095318022692825, + "learning_rate": 5.931204026169146e-06, + "loss": 0.2971, + "step": 10102 + }, + { + "epoch": 0.45725277212038923, + "grad_norm": 0.6440862160573638, + "learning_rate": 5.930483918240871e-06, + "loss": 0.3, + "step": 10103 + }, + { + "epoch": 0.4572980312287848, + "grad_norm": 0.3511639562008092, + "learning_rate": 5.929763790319355e-06, + "loss": 0.4633, + "step": 10104 + }, + { + "epoch": 0.45734329033718035, + "grad_norm": 0.6597290674517585, + "learning_rate": 5.929043642420072e-06, + "loss": 0.3472, + "step": 10105 + }, + { + "epoch": 0.45738854944557594, + "grad_norm": 0.6926020451172388, + "learning_rate": 5.928323474558492e-06, + "loss": 0.3625, + "step": 10106 + }, + { + "epoch": 0.4574338085539715, + "grad_norm": 0.6236702207782863, + "learning_rate": 5.9276032867500935e-06, + "loss": 0.3357, + "step": 10107 + }, + { + "epoch": 0.45747906766236707, + "grad_norm": 0.6585100389290004, + "learning_rate": 5.926883079010348e-06, + "loss": 0.3405, + "step": 10108 + }, + { + "epoch": 0.4575243267707626, + "grad_norm": 0.6536198954693773, + "learning_rate": 5.926162851354733e-06, + "loss": 0.3067, + "step": 10109 + }, + { + "epoch": 0.4575695858791582, + "grad_norm": 0.6709630553993108, + "learning_rate": 5.925442603798721e-06, + "loss": 0.3512, + "step": 10110 + }, + { + "epoch": 0.4576148449875537, + "grad_norm": 0.6357049472328576, + "learning_rate": 5.924722336357793e-06, + "loss": 0.3776, + "step": 10111 + }, + { + "epoch": 0.4576601040959493, + "grad_norm": 0.6401419572145257, + "learning_rate": 5.924002049047419e-06, + "loss": 0.3589, + "step": 10112 + }, + { + "epoch": 0.45770536320434485, + "grad_norm": 0.6412345227285043, + "learning_rate": 5.92328174188308e-06, + "loss": 0.3536, + "step": 10113 + }, + { + "epoch": 0.45775062231274044, + "grad_norm": 0.35092114987906203, + "learning_rate": 5.922561414880253e-06, + "loss": 0.4642, + "step": 10114 + }, + { + "epoch": 0.45779588142113603, + "grad_norm": 0.3163893146299063, + "learning_rate": 5.9218410680544135e-06, + "loss": 0.467, + "step": 10115 + }, + { + "epoch": 0.45784114052953157, + "grad_norm": 0.6642074094805996, + "learning_rate": 5.92112070142104e-06, + "loss": 0.3683, + "step": 10116 + }, + { + "epoch": 0.45788639963792716, + "grad_norm": 0.6728589957006714, + "learning_rate": 5.920400314995612e-06, + "loss": 0.3682, + "step": 10117 + }, + { + "epoch": 0.4579316587463227, + "grad_norm": 0.6188594622706161, + "learning_rate": 5.919679908793609e-06, + "loss": 0.353, + "step": 10118 + }, + { + "epoch": 0.4579769178547183, + "grad_norm": 0.6152141100731404, + "learning_rate": 5.91895948283051e-06, + "loss": 0.3518, + "step": 10119 + }, + { + "epoch": 0.4580221769631138, + "grad_norm": 1.3605112239088757, + "learning_rate": 5.918239037121791e-06, + "loss": 0.3551, + "step": 10120 + }, + { + "epoch": 0.4580674360715094, + "grad_norm": 0.6493685868114359, + "learning_rate": 5.917518571682938e-06, + "loss": 0.3611, + "step": 10121 + }, + { + "epoch": 0.45811269517990494, + "grad_norm": 0.6374143410362819, + "learning_rate": 5.9167980865294285e-06, + "loss": 0.3527, + "step": 10122 + }, + { + "epoch": 0.45815795428830053, + "grad_norm": 0.6177615252904042, + "learning_rate": 5.916077581676743e-06, + "loss": 0.3147, + "step": 10123 + }, + { + "epoch": 0.45820321339669606, + "grad_norm": 0.6017872906281839, + "learning_rate": 5.915357057140364e-06, + "loss": 0.33, + "step": 10124 + }, + { + "epoch": 0.45824847250509165, + "grad_norm": 2.520985279055733, + "learning_rate": 5.914636512935773e-06, + "loss": 0.2914, + "step": 10125 + }, + { + "epoch": 0.4582937316134872, + "grad_norm": 0.6258472550396663, + "learning_rate": 5.913915949078453e-06, + "loss": 0.3159, + "step": 10126 + }, + { + "epoch": 0.4583389907218828, + "grad_norm": 0.6200775670641495, + "learning_rate": 5.913195365583886e-06, + "loss": 0.3225, + "step": 10127 + }, + { + "epoch": 0.45838424983027837, + "grad_norm": 0.6198459344095034, + "learning_rate": 5.912474762467554e-06, + "loss": 0.3223, + "step": 10128 + }, + { + "epoch": 0.4584295089386739, + "grad_norm": 0.663417256125671, + "learning_rate": 5.911754139744944e-06, + "loss": 0.3457, + "step": 10129 + }, + { + "epoch": 0.4584747680470695, + "grad_norm": 0.6357687156435802, + "learning_rate": 5.911033497431535e-06, + "loss": 0.3483, + "step": 10130 + }, + { + "epoch": 0.458520027155465, + "grad_norm": 0.6808329048633953, + "learning_rate": 5.910312835542818e-06, + "loss": 0.3519, + "step": 10131 + }, + { + "epoch": 0.4585652862638606, + "grad_norm": 0.6184081392584136, + "learning_rate": 5.909592154094272e-06, + "loss": 0.3417, + "step": 10132 + }, + { + "epoch": 0.45861054537225615, + "grad_norm": 0.8011839566403799, + "learning_rate": 5.908871453101382e-06, + "loss": 0.3706, + "step": 10133 + }, + { + "epoch": 0.45865580448065174, + "grad_norm": 0.6182329034996796, + "learning_rate": 5.908150732579638e-06, + "loss": 0.3616, + "step": 10134 + }, + { + "epoch": 0.4587010635890473, + "grad_norm": 0.5550657593697929, + "learning_rate": 5.907429992544524e-06, + "loss": 0.4797, + "step": 10135 + }, + { + "epoch": 0.45874632269744287, + "grad_norm": 0.6363540464766801, + "learning_rate": 5.906709233011526e-06, + "loss": 0.2912, + "step": 10136 + }, + { + "epoch": 0.4587915818058384, + "grad_norm": 0.6775860869864486, + "learning_rate": 5.905988453996132e-06, + "loss": 0.3403, + "step": 10137 + }, + { + "epoch": 0.458836840914234, + "grad_norm": 0.677654388601567, + "learning_rate": 5.905267655513828e-06, + "loss": 0.3386, + "step": 10138 + }, + { + "epoch": 0.4588821000226296, + "grad_norm": 0.6242789450230746, + "learning_rate": 5.904546837580102e-06, + "loss": 0.3217, + "step": 10139 + }, + { + "epoch": 0.4589273591310251, + "grad_norm": 0.3399240313310149, + "learning_rate": 5.903826000210444e-06, + "loss": 0.5025, + "step": 10140 + }, + { + "epoch": 0.4589726182394207, + "grad_norm": 0.7333455939094149, + "learning_rate": 5.903105143420339e-06, + "loss": 0.3618, + "step": 10141 + }, + { + "epoch": 0.45901787734781624, + "grad_norm": 0.32216027105417944, + "learning_rate": 5.9023842672252805e-06, + "loss": 0.4924, + "step": 10142 + }, + { + "epoch": 0.45906313645621183, + "grad_norm": 0.3183411706250615, + "learning_rate": 5.901663371640754e-06, + "loss": 0.4918, + "step": 10143 + }, + { + "epoch": 0.45910839556460736, + "grad_norm": 0.6769035244721872, + "learning_rate": 5.9009424566822515e-06, + "loss": 0.3371, + "step": 10144 + }, + { + "epoch": 0.45915365467300295, + "grad_norm": 0.6855526785778393, + "learning_rate": 5.900221522365262e-06, + "loss": 0.3708, + "step": 10145 + }, + { + "epoch": 0.4591989137813985, + "grad_norm": 0.30930897707540267, + "learning_rate": 5.899500568705279e-06, + "loss": 0.5104, + "step": 10146 + }, + { + "epoch": 0.4592441728897941, + "grad_norm": 0.6335489140165369, + "learning_rate": 5.898779595717788e-06, + "loss": 0.3168, + "step": 10147 + }, + { + "epoch": 0.4592894319981896, + "grad_norm": 0.6643824401837953, + "learning_rate": 5.898058603418287e-06, + "loss": 0.3341, + "step": 10148 + }, + { + "epoch": 0.4593346911065852, + "grad_norm": 0.6753211229313297, + "learning_rate": 5.897337591822262e-06, + "loss": 0.3671, + "step": 10149 + }, + { + "epoch": 0.4593799502149808, + "grad_norm": 0.6164101604781804, + "learning_rate": 5.896616560945211e-06, + "loss": 0.337, + "step": 10150 + }, + { + "epoch": 0.4594252093233763, + "grad_norm": 0.35054630893544075, + "learning_rate": 5.89589551080262e-06, + "loss": 0.4682, + "step": 10151 + }, + { + "epoch": 0.4594704684317719, + "grad_norm": 0.6409338224630452, + "learning_rate": 5.89517444140999e-06, + "loss": 0.358, + "step": 10152 + }, + { + "epoch": 0.45951572754016745, + "grad_norm": 0.6369081943211347, + "learning_rate": 5.8944533527828095e-06, + "loss": 0.3228, + "step": 10153 + }, + { + "epoch": 0.45956098664856304, + "grad_norm": 0.6745637529016413, + "learning_rate": 5.893732244936572e-06, + "loss": 0.321, + "step": 10154 + }, + { + "epoch": 0.4596062457569586, + "grad_norm": 0.33008809593587707, + "learning_rate": 5.893011117886775e-06, + "loss": 0.4715, + "step": 10155 + }, + { + "epoch": 0.45965150486535417, + "grad_norm": 0.7145102204647745, + "learning_rate": 5.892289971648912e-06, + "loss": 0.3288, + "step": 10156 + }, + { + "epoch": 0.4596967639737497, + "grad_norm": 0.6899872862577504, + "learning_rate": 5.8915688062384755e-06, + "loss": 0.3219, + "step": 10157 + }, + { + "epoch": 0.4597420230821453, + "grad_norm": 0.5941872763439713, + "learning_rate": 5.890847621670966e-06, + "loss": 0.358, + "step": 10158 + }, + { + "epoch": 0.4597872821905408, + "grad_norm": 0.6521882399774729, + "learning_rate": 5.8901264179618755e-06, + "loss": 0.3593, + "step": 10159 + }, + { + "epoch": 0.4598325412989364, + "grad_norm": 0.41761082565214547, + "learning_rate": 5.889405195126704e-06, + "loss": 0.4555, + "step": 10160 + }, + { + "epoch": 0.45987780040733195, + "grad_norm": 0.5930093261474315, + "learning_rate": 5.8886839531809455e-06, + "loss": 0.3178, + "step": 10161 + }, + { + "epoch": 0.45992305951572754, + "grad_norm": 0.6120930429914294, + "learning_rate": 5.8879626921400975e-06, + "loss": 0.3226, + "step": 10162 + }, + { + "epoch": 0.45996831862412313, + "grad_norm": 0.30768656389224336, + "learning_rate": 5.88724141201966e-06, + "loss": 0.4654, + "step": 10163 + }, + { + "epoch": 0.46001357773251866, + "grad_norm": 0.29084001888272587, + "learning_rate": 5.886520112835128e-06, + "loss": 0.4662, + "step": 10164 + }, + { + "epoch": 0.46005883684091425, + "grad_norm": 0.28239681672321787, + "learning_rate": 5.8857987946020025e-06, + "loss": 0.4972, + "step": 10165 + }, + { + "epoch": 0.4601040959493098, + "grad_norm": 0.6171878946319844, + "learning_rate": 5.8850774573357804e-06, + "loss": 0.3313, + "step": 10166 + }, + { + "epoch": 0.4601493550577054, + "grad_norm": 0.6448552071058545, + "learning_rate": 5.884356101051962e-06, + "loss": 0.3886, + "step": 10167 + }, + { + "epoch": 0.4601946141661009, + "grad_norm": 0.6837662949759267, + "learning_rate": 5.8836347257660485e-06, + "loss": 0.3452, + "step": 10168 + }, + { + "epoch": 0.4602398732744965, + "grad_norm": 0.6473672470153388, + "learning_rate": 5.882913331493538e-06, + "loss": 0.3138, + "step": 10169 + }, + { + "epoch": 0.46028513238289204, + "grad_norm": 0.601651739774847, + "learning_rate": 5.882191918249931e-06, + "loss": 0.3047, + "step": 10170 + }, + { + "epoch": 0.4603303914912876, + "grad_norm": 0.35601805678176673, + "learning_rate": 5.881470486050731e-06, + "loss": 0.4877, + "step": 10171 + }, + { + "epoch": 0.46037565059968316, + "grad_norm": 0.6413091667459374, + "learning_rate": 5.880749034911435e-06, + "loss": 0.3386, + "step": 10172 + }, + { + "epoch": 0.46042090970807875, + "grad_norm": 0.5842090741092955, + "learning_rate": 5.880027564847549e-06, + "loss": 0.3215, + "step": 10173 + }, + { + "epoch": 0.46046616881647434, + "grad_norm": 0.7689309377557272, + "learning_rate": 5.879306075874572e-06, + "loss": 0.3427, + "step": 10174 + }, + { + "epoch": 0.4605114279248699, + "grad_norm": 0.3315562112584059, + "learning_rate": 5.8785845680080085e-06, + "loss": 0.4716, + "step": 10175 + }, + { + "epoch": 0.46055668703326547, + "grad_norm": 0.6272824790835605, + "learning_rate": 5.877863041263362e-06, + "loss": 0.3191, + "step": 10176 + }, + { + "epoch": 0.460601946141661, + "grad_norm": 0.6484198612109718, + "learning_rate": 5.877141495656136e-06, + "loss": 0.3509, + "step": 10177 + }, + { + "epoch": 0.4606472052500566, + "grad_norm": 0.5869832697863312, + "learning_rate": 5.876419931201829e-06, + "loss": 0.2981, + "step": 10178 + }, + { + "epoch": 0.4606924643584521, + "grad_norm": 0.6531080746471695, + "learning_rate": 5.875698347915954e-06, + "loss": 0.3253, + "step": 10179 + }, + { + "epoch": 0.4607377234668477, + "grad_norm": 0.6298903489672832, + "learning_rate": 5.8749767458140075e-06, + "loss": 0.3144, + "step": 10180 + }, + { + "epoch": 0.46078298257524325, + "grad_norm": 0.5973823819739114, + "learning_rate": 5.8742551249115e-06, + "loss": 0.328, + "step": 10181 + }, + { + "epoch": 0.46082824168363884, + "grad_norm": 0.39421276793144844, + "learning_rate": 5.873533485223934e-06, + "loss": 0.4677, + "step": 10182 + }, + { + "epoch": 0.4608735007920344, + "grad_norm": 0.33173025820647933, + "learning_rate": 5.872811826766817e-06, + "loss": 0.4668, + "step": 10183 + }, + { + "epoch": 0.46091875990042996, + "grad_norm": 0.2915282231759761, + "learning_rate": 5.872090149555653e-06, + "loss": 0.4817, + "step": 10184 + }, + { + "epoch": 0.4609640190088255, + "grad_norm": 1.2720611030473974, + "learning_rate": 5.871368453605951e-06, + "loss": 0.3945, + "step": 10185 + }, + { + "epoch": 0.4610092781172211, + "grad_norm": 0.429056553504757, + "learning_rate": 5.870646738933218e-06, + "loss": 0.4937, + "step": 10186 + }, + { + "epoch": 0.4610545372256167, + "grad_norm": 0.4424597400168609, + "learning_rate": 5.869925005552959e-06, + "loss": 0.4667, + "step": 10187 + }, + { + "epoch": 0.4610997963340122, + "grad_norm": 0.6422267533943191, + "learning_rate": 5.869203253480684e-06, + "loss": 0.334, + "step": 10188 + }, + { + "epoch": 0.4611450554424078, + "grad_norm": 0.624955624943747, + "learning_rate": 5.868481482731903e-06, + "loss": 0.3131, + "step": 10189 + }, + { + "epoch": 0.46119031455080334, + "grad_norm": 0.6120821241352987, + "learning_rate": 5.867759693322119e-06, + "loss": 0.3318, + "step": 10190 + }, + { + "epoch": 0.4612355736591989, + "grad_norm": 0.30119929184566724, + "learning_rate": 5.867037885266845e-06, + "loss": 0.5031, + "step": 10191 + }, + { + "epoch": 0.46128083276759446, + "grad_norm": 0.6428062728626699, + "learning_rate": 5.86631605858159e-06, + "loss": 0.3349, + "step": 10192 + }, + { + "epoch": 0.46132609187599005, + "grad_norm": 0.28165607627203954, + "learning_rate": 5.865594213281864e-06, + "loss": 0.4707, + "step": 10193 + }, + { + "epoch": 0.4613713509843856, + "grad_norm": 0.6336775479151228, + "learning_rate": 5.864872349383177e-06, + "loss": 0.356, + "step": 10194 + }, + { + "epoch": 0.4614166100927812, + "grad_norm": 0.2913933081534654, + "learning_rate": 5.864150466901038e-06, + "loss": 0.4849, + "step": 10195 + }, + { + "epoch": 0.4614618692011767, + "grad_norm": 0.598970252669126, + "learning_rate": 5.863428565850961e-06, + "loss": 0.3303, + "step": 10196 + }, + { + "epoch": 0.4615071283095723, + "grad_norm": 0.608507335922516, + "learning_rate": 5.862706646248455e-06, + "loss": 0.3021, + "step": 10197 + }, + { + "epoch": 0.4615523874179679, + "grad_norm": 0.6200924088031894, + "learning_rate": 5.861984708109035e-06, + "loss": 0.3543, + "step": 10198 + }, + { + "epoch": 0.4615976465263634, + "grad_norm": 0.5752767246422879, + "learning_rate": 5.861262751448208e-06, + "loss": 0.3112, + "step": 10199 + }, + { + "epoch": 0.461642905634759, + "grad_norm": 0.6281904087924641, + "learning_rate": 5.860540776281492e-06, + "loss": 0.3551, + "step": 10200 + }, + { + "epoch": 0.46168816474315455, + "grad_norm": 0.6328412769197119, + "learning_rate": 5.859818782624395e-06, + "loss": 0.341, + "step": 10201 + }, + { + "epoch": 0.46173342385155014, + "grad_norm": 0.6876693828114777, + "learning_rate": 5.8590967704924365e-06, + "loss": 0.3139, + "step": 10202 + }, + { + "epoch": 0.4617786829599457, + "grad_norm": 0.31990046259109833, + "learning_rate": 5.858374739901125e-06, + "loss": 0.4687, + "step": 10203 + }, + { + "epoch": 0.46182394206834126, + "grad_norm": 0.6150479228587775, + "learning_rate": 5.857652690865976e-06, + "loss": 0.3444, + "step": 10204 + }, + { + "epoch": 0.4618692011767368, + "grad_norm": 0.2747063505928363, + "learning_rate": 5.856930623402506e-06, + "loss": 0.4619, + "step": 10205 + }, + { + "epoch": 0.4619144602851324, + "grad_norm": 0.6457097815772058, + "learning_rate": 5.856208537526229e-06, + "loss": 0.3926, + "step": 10206 + }, + { + "epoch": 0.4619597193935279, + "grad_norm": 0.6171431621945438, + "learning_rate": 5.855486433252658e-06, + "loss": 0.3503, + "step": 10207 + }, + { + "epoch": 0.4620049785019235, + "grad_norm": 0.6364217330608023, + "learning_rate": 5.854764310597314e-06, + "loss": 0.3435, + "step": 10208 + }, + { + "epoch": 0.4620502376103191, + "grad_norm": 0.6389061843500499, + "learning_rate": 5.8540421695757064e-06, + "loss": 0.3563, + "step": 10209 + }, + { + "epoch": 0.46209549671871464, + "grad_norm": 0.6891372948551845, + "learning_rate": 5.85332001020336e-06, + "loss": 0.3703, + "step": 10210 + }, + { + "epoch": 0.46214075582711023, + "grad_norm": 0.6416000376271216, + "learning_rate": 5.852597832495785e-06, + "loss": 0.3664, + "step": 10211 + }, + { + "epoch": 0.46218601493550576, + "grad_norm": 0.6537246700366226, + "learning_rate": 5.851875636468501e-06, + "loss": 0.3413, + "step": 10212 + }, + { + "epoch": 0.46223127404390135, + "grad_norm": 0.3856112248677831, + "learning_rate": 5.851153422137026e-06, + "loss": 0.4696, + "step": 10213 + }, + { + "epoch": 0.4622765331522969, + "grad_norm": 0.6338681957306734, + "learning_rate": 5.850431189516878e-06, + "loss": 0.377, + "step": 10214 + }, + { + "epoch": 0.4623217922606925, + "grad_norm": 0.5774558126226176, + "learning_rate": 5.849708938623575e-06, + "loss": 0.3399, + "step": 10215 + }, + { + "epoch": 0.462367051369088, + "grad_norm": 0.6104011755295417, + "learning_rate": 5.848986669472637e-06, + "loss": 0.3139, + "step": 10216 + }, + { + "epoch": 0.4624123104774836, + "grad_norm": 1.0495709555357173, + "learning_rate": 5.848264382079584e-06, + "loss": 0.3121, + "step": 10217 + }, + { + "epoch": 0.46245756958587914, + "grad_norm": 0.7581803532503051, + "learning_rate": 5.847542076459933e-06, + "loss": 0.3383, + "step": 10218 + }, + { + "epoch": 0.4625028286942747, + "grad_norm": 0.830956095614911, + "learning_rate": 5.846819752629208e-06, + "loss": 0.2785, + "step": 10219 + }, + { + "epoch": 0.46254808780267026, + "grad_norm": 0.3575301386080226, + "learning_rate": 5.846097410602925e-06, + "loss": 0.4788, + "step": 10220 + }, + { + "epoch": 0.46259334691106585, + "grad_norm": 0.6153185689546572, + "learning_rate": 5.84537505039661e-06, + "loss": 0.3295, + "step": 10221 + }, + { + "epoch": 0.46263860601946144, + "grad_norm": 0.6523731086184149, + "learning_rate": 5.844652672025779e-06, + "loss": 0.3411, + "step": 10222 + }, + { + "epoch": 0.462683865127857, + "grad_norm": 0.3059244969304898, + "learning_rate": 5.843930275505958e-06, + "loss": 0.465, + "step": 10223 + }, + { + "epoch": 0.46272912423625256, + "grad_norm": 0.6114200072668081, + "learning_rate": 5.843207860852667e-06, + "loss": 0.3175, + "step": 10224 + }, + { + "epoch": 0.4627743833446481, + "grad_norm": 0.6347751736428424, + "learning_rate": 5.842485428081428e-06, + "loss": 0.3491, + "step": 10225 + }, + { + "epoch": 0.4628196424530437, + "grad_norm": 0.2788723118165968, + "learning_rate": 5.841762977207764e-06, + "loss": 0.4776, + "step": 10226 + }, + { + "epoch": 0.4628649015614392, + "grad_norm": 0.2938092295138972, + "learning_rate": 5.841040508247201e-06, + "loss": 0.4959, + "step": 10227 + }, + { + "epoch": 0.4629101606698348, + "grad_norm": 0.2843870448715732, + "learning_rate": 5.840318021215259e-06, + "loss": 0.4595, + "step": 10228 + }, + { + "epoch": 0.46295541977823035, + "grad_norm": 0.778637112356431, + "learning_rate": 5.839595516127464e-06, + "loss": 0.3508, + "step": 10229 + }, + { + "epoch": 0.46300067888662594, + "grad_norm": 0.6174599810697912, + "learning_rate": 5.838872992999339e-06, + "loss": 0.3166, + "step": 10230 + }, + { + "epoch": 0.4630459379950215, + "grad_norm": 0.7235202774561535, + "learning_rate": 5.8381504518464114e-06, + "loss": 0.3472, + "step": 10231 + }, + { + "epoch": 0.46309119710341706, + "grad_norm": 0.6361329502755545, + "learning_rate": 5.837427892684205e-06, + "loss": 0.3, + "step": 10232 + }, + { + "epoch": 0.46313645621181265, + "grad_norm": 0.6060017986409006, + "learning_rate": 5.836705315528244e-06, + "loss": 0.333, + "step": 10233 + }, + { + "epoch": 0.4631817153202082, + "grad_norm": 0.6487855691336682, + "learning_rate": 5.8359827203940555e-06, + "loss": 0.3558, + "step": 10234 + }, + { + "epoch": 0.4632269744286038, + "grad_norm": 0.6297909598410385, + "learning_rate": 5.835260107297167e-06, + "loss": 0.3884, + "step": 10235 + }, + { + "epoch": 0.4632722335369993, + "grad_norm": 0.40330348336805316, + "learning_rate": 5.834537476253102e-06, + "loss": 0.4537, + "step": 10236 + }, + { + "epoch": 0.4633174926453949, + "grad_norm": 0.609177172025508, + "learning_rate": 5.833814827277391e-06, + "loss": 0.3219, + "step": 10237 + }, + { + "epoch": 0.46336275175379044, + "grad_norm": 0.747417185536518, + "learning_rate": 5.83309216038556e-06, + "loss": 0.3323, + "step": 10238 + }, + { + "epoch": 0.463408010862186, + "grad_norm": 0.2943224458498767, + "learning_rate": 5.832369475593138e-06, + "loss": 0.4879, + "step": 10239 + }, + { + "epoch": 0.46345326997058156, + "grad_norm": 0.6302865670246146, + "learning_rate": 5.831646772915651e-06, + "loss": 0.3296, + "step": 10240 + }, + { + "epoch": 0.46349852907897715, + "grad_norm": 0.33260995177021796, + "learning_rate": 5.8309240523686295e-06, + "loss": 0.4706, + "step": 10241 + }, + { + "epoch": 0.4635437881873727, + "grad_norm": 0.6227230630505952, + "learning_rate": 5.830201313967603e-06, + "loss": 0.3561, + "step": 10242 + }, + { + "epoch": 0.4635890472957683, + "grad_norm": 0.5977630892372184, + "learning_rate": 5.829478557728098e-06, + "loss": 0.2956, + "step": 10243 + }, + { + "epoch": 0.46363430640416387, + "grad_norm": 0.3112423505124785, + "learning_rate": 5.828755783665649e-06, + "loss": 0.4798, + "step": 10244 + }, + { + "epoch": 0.4636795655125594, + "grad_norm": 0.7156177789007848, + "learning_rate": 5.828032991795781e-06, + "loss": 0.3492, + "step": 10245 + }, + { + "epoch": 0.463724824620955, + "grad_norm": 0.6311863336708481, + "learning_rate": 5.827310182134029e-06, + "loss": 0.3904, + "step": 10246 + }, + { + "epoch": 0.4637700837293505, + "grad_norm": 0.6156550034528948, + "learning_rate": 5.8265873546959205e-06, + "loss": 0.2968, + "step": 10247 + }, + { + "epoch": 0.4638153428377461, + "grad_norm": 0.6030965572245929, + "learning_rate": 5.825864509496991e-06, + "loss": 0.3233, + "step": 10248 + }, + { + "epoch": 0.46386060194614165, + "grad_norm": 0.6954897934939829, + "learning_rate": 5.825141646552767e-06, + "loss": 0.3911, + "step": 10249 + }, + { + "epoch": 0.46390586105453724, + "grad_norm": 0.6408441042598859, + "learning_rate": 5.8244187658787855e-06, + "loss": 0.3344, + "step": 10250 + }, + { + "epoch": 0.4639511201629328, + "grad_norm": 0.6074890088107604, + "learning_rate": 5.8236958674905746e-06, + "loss": 0.3723, + "step": 10251 + }, + { + "epoch": 0.46399637927132836, + "grad_norm": 0.6334622958231805, + "learning_rate": 5.82297295140367e-06, + "loss": 0.3434, + "step": 10252 + }, + { + "epoch": 0.4640416383797239, + "grad_norm": 0.5730384902807221, + "learning_rate": 5.822250017633605e-06, + "loss": 0.3199, + "step": 10253 + }, + { + "epoch": 0.4640868974881195, + "grad_norm": 0.6189461609429648, + "learning_rate": 5.821527066195911e-06, + "loss": 0.3377, + "step": 10254 + }, + { + "epoch": 0.464132156596515, + "grad_norm": 0.6371926272056898, + "learning_rate": 5.820804097106125e-06, + "loss": 0.3073, + "step": 10255 + }, + { + "epoch": 0.4641774157049106, + "grad_norm": 0.36073886007825423, + "learning_rate": 5.82008111037978e-06, + "loss": 0.489, + "step": 10256 + }, + { + "epoch": 0.4642226748133062, + "grad_norm": 0.6265469379681541, + "learning_rate": 5.819358106032409e-06, + "loss": 0.3613, + "step": 10257 + }, + { + "epoch": 0.46426793392170174, + "grad_norm": 0.6752147741747988, + "learning_rate": 5.81863508407955e-06, + "loss": 0.3313, + "step": 10258 + }, + { + "epoch": 0.4643131930300973, + "grad_norm": 0.6333988101454193, + "learning_rate": 5.817912044536735e-06, + "loss": 0.3466, + "step": 10259 + }, + { + "epoch": 0.46435845213849286, + "grad_norm": 0.7462015858385178, + "learning_rate": 5.8171889874195066e-06, + "loss": 0.3298, + "step": 10260 + }, + { + "epoch": 0.46440371124688845, + "grad_norm": 0.6957970518827933, + "learning_rate": 5.8164659127433935e-06, + "loss": 0.3552, + "step": 10261 + }, + { + "epoch": 0.464448970355284, + "grad_norm": 0.6435510261970632, + "learning_rate": 5.815742820523936e-06, + "loss": 0.3092, + "step": 10262 + }, + { + "epoch": 0.4644942294636796, + "grad_norm": 0.3325824347052253, + "learning_rate": 5.815019710776671e-06, + "loss": 0.4796, + "step": 10263 + }, + { + "epoch": 0.4645394885720751, + "grad_norm": 0.7540769034192851, + "learning_rate": 5.814296583517135e-06, + "loss": 0.3059, + "step": 10264 + }, + { + "epoch": 0.4645847476804707, + "grad_norm": 0.623352839941175, + "learning_rate": 5.813573438760867e-06, + "loss": 0.365, + "step": 10265 + }, + { + "epoch": 0.46463000678886623, + "grad_norm": 0.626453682575965, + "learning_rate": 5.812850276523405e-06, + "loss": 0.3726, + "step": 10266 + }, + { + "epoch": 0.4646752658972618, + "grad_norm": 0.29882319766048127, + "learning_rate": 5.812127096820285e-06, + "loss": 0.4693, + "step": 10267 + }, + { + "epoch": 0.4647205250056574, + "grad_norm": 0.5995073751713392, + "learning_rate": 5.811403899667049e-06, + "loss": 0.329, + "step": 10268 + }, + { + "epoch": 0.46476578411405295, + "grad_norm": 0.5822564001987135, + "learning_rate": 5.810680685079236e-06, + "loss": 0.2995, + "step": 10269 + }, + { + "epoch": 0.46481104322244854, + "grad_norm": 0.6120648327385629, + "learning_rate": 5.809957453072385e-06, + "loss": 0.327, + "step": 10270 + }, + { + "epoch": 0.4648563023308441, + "grad_norm": 0.5772405836572486, + "learning_rate": 5.809234203662034e-06, + "loss": 0.3188, + "step": 10271 + }, + { + "epoch": 0.46490156143923966, + "grad_norm": 0.6548826917793775, + "learning_rate": 5.808510936863727e-06, + "loss": 0.3568, + "step": 10272 + }, + { + "epoch": 0.4649468205476352, + "grad_norm": 0.6218802174741334, + "learning_rate": 5.807787652693002e-06, + "loss": 0.3347, + "step": 10273 + }, + { + "epoch": 0.4649920796560308, + "grad_norm": 0.3925733151859355, + "learning_rate": 5.8070643511654025e-06, + "loss": 0.4809, + "step": 10274 + }, + { + "epoch": 0.4650373387644263, + "grad_norm": 0.6150511972804596, + "learning_rate": 5.806341032296468e-06, + "loss": 0.313, + "step": 10275 + }, + { + "epoch": 0.4650825978728219, + "grad_norm": 0.6160171370967106, + "learning_rate": 5.805617696101742e-06, + "loss": 0.3701, + "step": 10276 + }, + { + "epoch": 0.46512785698121745, + "grad_norm": 0.5990757050539252, + "learning_rate": 5.804894342596766e-06, + "loss": 0.3755, + "step": 10277 + }, + { + "epoch": 0.46517311608961304, + "grad_norm": 0.5648945834265401, + "learning_rate": 5.804170971797081e-06, + "loss": 0.2972, + "step": 10278 + }, + { + "epoch": 0.4652183751980086, + "grad_norm": 0.6026134317216376, + "learning_rate": 5.803447583718234e-06, + "loss": 0.368, + "step": 10279 + }, + { + "epoch": 0.46526363430640416, + "grad_norm": 0.5500918012924714, + "learning_rate": 5.802724178375762e-06, + "loss": 0.3353, + "step": 10280 + }, + { + "epoch": 0.46530889341479975, + "grad_norm": 0.6222704125021451, + "learning_rate": 5.802000755785217e-06, + "loss": 0.2981, + "step": 10281 + }, + { + "epoch": 0.4653541525231953, + "grad_norm": 0.3463249038069997, + "learning_rate": 5.801277315962139e-06, + "loss": 0.49, + "step": 10282 + }, + { + "epoch": 0.4653994116315909, + "grad_norm": 0.6643901305295368, + "learning_rate": 5.80055385892207e-06, + "loss": 0.3667, + "step": 10283 + }, + { + "epoch": 0.4654446707399864, + "grad_norm": 0.6112542275471315, + "learning_rate": 5.799830384680558e-06, + "loss": 0.3516, + "step": 10284 + }, + { + "epoch": 0.465489929848382, + "grad_norm": 0.6482440782183497, + "learning_rate": 5.799106893253148e-06, + "loss": 0.3737, + "step": 10285 + }, + { + "epoch": 0.46553518895677753, + "grad_norm": 0.5779577042672598, + "learning_rate": 5.798383384655384e-06, + "loss": 0.3306, + "step": 10286 + }, + { + "epoch": 0.4655804480651731, + "grad_norm": 0.2977800126206191, + "learning_rate": 5.7976598589028154e-06, + "loss": 0.4888, + "step": 10287 + }, + { + "epoch": 0.46562570717356866, + "grad_norm": 0.29912546170849535, + "learning_rate": 5.796936316010984e-06, + "loss": 0.4909, + "step": 10288 + }, + { + "epoch": 0.46567096628196425, + "grad_norm": 0.5550307674716896, + "learning_rate": 5.796212755995439e-06, + "loss": 0.3606, + "step": 10289 + }, + { + "epoch": 0.4657162253903598, + "grad_norm": 0.6016564912884554, + "learning_rate": 5.795489178871728e-06, + "loss": 0.32, + "step": 10290 + }, + { + "epoch": 0.4657614844987554, + "grad_norm": 0.6441104882214519, + "learning_rate": 5.794765584655397e-06, + "loss": 0.3154, + "step": 10291 + }, + { + "epoch": 0.46580674360715096, + "grad_norm": 0.6519116239052571, + "learning_rate": 5.794041973361996e-06, + "loss": 0.3459, + "step": 10292 + }, + { + "epoch": 0.4658520027155465, + "grad_norm": 0.6734882207659202, + "learning_rate": 5.793318345007071e-06, + "loss": 0.3281, + "step": 10293 + }, + { + "epoch": 0.4658972618239421, + "grad_norm": 0.657001665342723, + "learning_rate": 5.7925946996061696e-06, + "loss": 0.303, + "step": 10294 + }, + { + "epoch": 0.4659425209323376, + "grad_norm": 0.784940555608557, + "learning_rate": 5.791871037174844e-06, + "loss": 0.3287, + "step": 10295 + }, + { + "epoch": 0.4659877800407332, + "grad_norm": 1.2622160018753232, + "learning_rate": 5.7911473577286415e-06, + "loss": 0.3177, + "step": 10296 + }, + { + "epoch": 0.46603303914912875, + "grad_norm": 0.7490174556923634, + "learning_rate": 5.790423661283112e-06, + "loss": 0.3239, + "step": 10297 + }, + { + "epoch": 0.46607829825752434, + "grad_norm": 0.676624989660703, + "learning_rate": 5.789699947853807e-06, + "loss": 0.3614, + "step": 10298 + }, + { + "epoch": 0.46612355736591987, + "grad_norm": 0.652027534931178, + "learning_rate": 5.788976217456275e-06, + "loss": 0.3225, + "step": 10299 + }, + { + "epoch": 0.46616881647431546, + "grad_norm": 0.5830828483232589, + "learning_rate": 5.788252470106066e-06, + "loss": 0.3683, + "step": 10300 + }, + { + "epoch": 0.466214075582711, + "grad_norm": 0.5600393977412688, + "learning_rate": 5.787528705818732e-06, + "loss": 0.3223, + "step": 10301 + }, + { + "epoch": 0.4662593346911066, + "grad_norm": 0.6438021708441337, + "learning_rate": 5.786804924609827e-06, + "loss": 0.3012, + "step": 10302 + }, + { + "epoch": 0.4663045937995022, + "grad_norm": 0.6132114991086414, + "learning_rate": 5.786081126494899e-06, + "loss": 0.3149, + "step": 10303 + }, + { + "epoch": 0.4663498529078977, + "grad_norm": 0.6359022819021914, + "learning_rate": 5.785357311489502e-06, + "loss": 0.3078, + "step": 10304 + }, + { + "epoch": 0.4663951120162933, + "grad_norm": 0.652813458711096, + "learning_rate": 5.784633479609188e-06, + "loss": 0.3595, + "step": 10305 + }, + { + "epoch": 0.46644037112468884, + "grad_norm": 0.6192102296622036, + "learning_rate": 5.783909630869513e-06, + "loss": 0.3429, + "step": 10306 + }, + { + "epoch": 0.4664856302330844, + "grad_norm": 0.5587784962323307, + "learning_rate": 5.7831857652860234e-06, + "loss": 0.28, + "step": 10307 + }, + { + "epoch": 0.46653088934147996, + "grad_norm": 0.4213056610694686, + "learning_rate": 5.782461882874281e-06, + "loss": 0.4761, + "step": 10308 + }, + { + "epoch": 0.46657614844987555, + "grad_norm": 0.3523271291831369, + "learning_rate": 5.781737983649833e-06, + "loss": 0.4666, + "step": 10309 + }, + { + "epoch": 0.4666214075582711, + "grad_norm": 0.6272547002980707, + "learning_rate": 5.781014067628239e-06, + "loss": 0.3487, + "step": 10310 + }, + { + "epoch": 0.4666666666666667, + "grad_norm": 0.6324788202023445, + "learning_rate": 5.78029013482505e-06, + "loss": 0.3387, + "step": 10311 + }, + { + "epoch": 0.4667119257750622, + "grad_norm": 0.6166975355394055, + "learning_rate": 5.779566185255823e-06, + "loss": 0.373, + "step": 10312 + }, + { + "epoch": 0.4667571848834578, + "grad_norm": 0.646302923312705, + "learning_rate": 5.778842218936113e-06, + "loss": 0.3311, + "step": 10313 + }, + { + "epoch": 0.46680244399185333, + "grad_norm": 0.6577866467883527, + "learning_rate": 5.778118235881475e-06, + "loss": 0.3905, + "step": 10314 + }, + { + "epoch": 0.4668477031002489, + "grad_norm": 0.645231514488921, + "learning_rate": 5.777394236107465e-06, + "loss": 0.3354, + "step": 10315 + }, + { + "epoch": 0.4668929622086445, + "grad_norm": 0.6187727317439589, + "learning_rate": 5.776670219629643e-06, + "loss": 0.3309, + "step": 10316 + }, + { + "epoch": 0.46693822131704005, + "grad_norm": 0.6915513013065251, + "learning_rate": 5.775946186463561e-06, + "loss": 0.3635, + "step": 10317 + }, + { + "epoch": 0.46698348042543564, + "grad_norm": 0.6626864260683646, + "learning_rate": 5.775222136624781e-06, + "loss": 0.3722, + "step": 10318 + }, + { + "epoch": 0.46702873953383117, + "grad_norm": 0.6645297758465186, + "learning_rate": 5.774498070128857e-06, + "loss": 0.3243, + "step": 10319 + }, + { + "epoch": 0.46707399864222676, + "grad_norm": 0.6766053400804806, + "learning_rate": 5.773773986991348e-06, + "loss": 0.2781, + "step": 10320 + }, + { + "epoch": 0.4671192577506223, + "grad_norm": 0.6428071642509061, + "learning_rate": 5.773049887227813e-06, + "loss": 0.3932, + "step": 10321 + }, + { + "epoch": 0.4671645168590179, + "grad_norm": 0.6598168419273106, + "learning_rate": 5.772325770853809e-06, + "loss": 0.3251, + "step": 10322 + }, + { + "epoch": 0.4672097759674134, + "grad_norm": 0.6502389888661539, + "learning_rate": 5.771601637884897e-06, + "loss": 0.3507, + "step": 10323 + }, + { + "epoch": 0.467255035075809, + "grad_norm": 0.7030154069910686, + "learning_rate": 5.770877488336636e-06, + "loss": 0.2911, + "step": 10324 + }, + { + "epoch": 0.46730029418420455, + "grad_norm": 0.6618412096184401, + "learning_rate": 5.770153322224584e-06, + "loss": 0.3697, + "step": 10325 + }, + { + "epoch": 0.46734555329260014, + "grad_norm": 0.592769704678834, + "learning_rate": 5.769429139564303e-06, + "loss": 0.2987, + "step": 10326 + }, + { + "epoch": 0.4673908124009957, + "grad_norm": 0.6440489207513519, + "learning_rate": 5.7687049403713545e-06, + "loss": 0.3575, + "step": 10327 + }, + { + "epoch": 0.46743607150939126, + "grad_norm": 0.794780089394584, + "learning_rate": 5.767980724661295e-06, + "loss": 0.3235, + "step": 10328 + }, + { + "epoch": 0.46748133061778685, + "grad_norm": 0.6140401917762476, + "learning_rate": 5.767256492449691e-06, + "loss": 0.3428, + "step": 10329 + }, + { + "epoch": 0.4675265897261824, + "grad_norm": 0.6446457210170683, + "learning_rate": 5.7665322437521e-06, + "loss": 0.3425, + "step": 10330 + }, + { + "epoch": 0.467571848834578, + "grad_norm": 0.6975506143904119, + "learning_rate": 5.765807978584086e-06, + "loss": 0.3229, + "step": 10331 + }, + { + "epoch": 0.4676171079429735, + "grad_norm": 0.6558090303313163, + "learning_rate": 5.76508369696121e-06, + "loss": 0.4884, + "step": 10332 + }, + { + "epoch": 0.4676623670513691, + "grad_norm": 0.6493883687712777, + "learning_rate": 5.764359398899035e-06, + "loss": 0.3582, + "step": 10333 + }, + { + "epoch": 0.46770762615976463, + "grad_norm": 0.6830461604852215, + "learning_rate": 5.763635084413124e-06, + "loss": 0.3433, + "step": 10334 + }, + { + "epoch": 0.4677528852681602, + "grad_norm": 0.5862373864261052, + "learning_rate": 5.762910753519041e-06, + "loss": 0.2892, + "step": 10335 + }, + { + "epoch": 0.46779814437655576, + "grad_norm": 0.6203705565094673, + "learning_rate": 5.7621864062323484e-06, + "loss": 0.284, + "step": 10336 + }, + { + "epoch": 0.46784340348495135, + "grad_norm": 0.6226866995996769, + "learning_rate": 5.7614620425686115e-06, + "loss": 0.3411, + "step": 10337 + }, + { + "epoch": 0.46788866259334694, + "grad_norm": 0.6302494720705131, + "learning_rate": 5.760737662543393e-06, + "loss": 0.3842, + "step": 10338 + }, + { + "epoch": 0.4679339217017425, + "grad_norm": 0.6905149999540106, + "learning_rate": 5.760013266172261e-06, + "loss": 0.3205, + "step": 10339 + }, + { + "epoch": 0.46797918081013806, + "grad_norm": 0.6035717777445082, + "learning_rate": 5.759288853470776e-06, + "loss": 0.3267, + "step": 10340 + }, + { + "epoch": 0.4680244399185336, + "grad_norm": 0.6758276913963119, + "learning_rate": 5.758564424454505e-06, + "loss": 0.3656, + "step": 10341 + }, + { + "epoch": 0.4680696990269292, + "grad_norm": 0.6773124521999553, + "learning_rate": 5.757839979139015e-06, + "loss": 0.3276, + "step": 10342 + }, + { + "epoch": 0.4681149581353247, + "grad_norm": 0.39690207641958924, + "learning_rate": 5.757115517539871e-06, + "loss": 0.4777, + "step": 10343 + }, + { + "epoch": 0.4681602172437203, + "grad_norm": 0.6325568396327806, + "learning_rate": 5.7563910396726406e-06, + "loss": 0.3494, + "step": 10344 + }, + { + "epoch": 0.46820547635211585, + "grad_norm": 0.6558991024020747, + "learning_rate": 5.7556665455528905e-06, + "loss": 0.3444, + "step": 10345 + }, + { + "epoch": 0.46825073546051144, + "grad_norm": 0.5942159698676818, + "learning_rate": 5.7549420351961845e-06, + "loss": 0.3216, + "step": 10346 + }, + { + "epoch": 0.46829599456890697, + "grad_norm": 0.7086765666045822, + "learning_rate": 5.754217508618096e-06, + "loss": 0.3273, + "step": 10347 + }, + { + "epoch": 0.46834125367730256, + "grad_norm": 1.5854606445523287, + "learning_rate": 5.7534929658341875e-06, + "loss": 0.3766, + "step": 10348 + }, + { + "epoch": 0.4683865127856981, + "grad_norm": 0.6060422652921184, + "learning_rate": 5.75276840686003e-06, + "loss": 0.3445, + "step": 10349 + }, + { + "epoch": 0.4684317718940937, + "grad_norm": 0.6780583191381162, + "learning_rate": 5.752043831711191e-06, + "loss": 0.368, + "step": 10350 + }, + { + "epoch": 0.4684770310024893, + "grad_norm": 0.6071328346358127, + "learning_rate": 5.75131924040324e-06, + "loss": 0.3302, + "step": 10351 + }, + { + "epoch": 0.4685222901108848, + "grad_norm": 0.6544464830019272, + "learning_rate": 5.750594632951746e-06, + "loss": 0.3249, + "step": 10352 + }, + { + "epoch": 0.4685675492192804, + "grad_norm": 0.6721548509646648, + "learning_rate": 5.749870009372279e-06, + "loss": 0.3449, + "step": 10353 + }, + { + "epoch": 0.46861280832767593, + "grad_norm": 0.6522678147244234, + "learning_rate": 5.7491453696804075e-06, + "loss": 0.3217, + "step": 10354 + }, + { + "epoch": 0.4686580674360715, + "grad_norm": 0.600514796449989, + "learning_rate": 5.7484207138917046e-06, + "loss": 0.3414, + "step": 10355 + }, + { + "epoch": 0.46870332654446706, + "grad_norm": 0.34556402129044617, + "learning_rate": 5.747696042021737e-06, + "loss": 0.4632, + "step": 10356 + }, + { + "epoch": 0.46874858565286265, + "grad_norm": 0.6345826108971352, + "learning_rate": 5.746971354086079e-06, + "loss": 0.3596, + "step": 10357 + }, + { + "epoch": 0.4687938447612582, + "grad_norm": 0.30393717099708784, + "learning_rate": 5.746246650100302e-06, + "loss": 0.4986, + "step": 10358 + }, + { + "epoch": 0.4688391038696538, + "grad_norm": 0.8897658153634812, + "learning_rate": 5.745521930079974e-06, + "loss": 0.3326, + "step": 10359 + }, + { + "epoch": 0.4688843629780493, + "grad_norm": 0.5911021530626996, + "learning_rate": 5.744797194040672e-06, + "loss": 0.3094, + "step": 10360 + }, + { + "epoch": 0.4689296220864449, + "grad_norm": 0.666037617818459, + "learning_rate": 5.744072441997964e-06, + "loss": 0.3444, + "step": 10361 + }, + { + "epoch": 0.4689748811948405, + "grad_norm": 0.30775239245639197, + "learning_rate": 5.743347673967425e-06, + "loss": 0.4974, + "step": 10362 + }, + { + "epoch": 0.469020140303236, + "grad_norm": 0.668637519512142, + "learning_rate": 5.742622889964628e-06, + "loss": 0.3626, + "step": 10363 + }, + { + "epoch": 0.4690653994116316, + "grad_norm": 0.6288653334835292, + "learning_rate": 5.7418980900051445e-06, + "loss": 0.3252, + "step": 10364 + }, + { + "epoch": 0.46911065852002715, + "grad_norm": 0.6235682340663499, + "learning_rate": 5.74117327410455e-06, + "loss": 0.3328, + "step": 10365 + }, + { + "epoch": 0.46915591762842274, + "grad_norm": 0.6477660006050718, + "learning_rate": 5.740448442278419e-06, + "loss": 0.3471, + "step": 10366 + }, + { + "epoch": 0.46920117673681827, + "grad_norm": 0.3137744726867989, + "learning_rate": 5.739723594542323e-06, + "loss": 0.4813, + "step": 10367 + }, + { + "epoch": 0.46924643584521386, + "grad_norm": 0.652174300270186, + "learning_rate": 5.738998730911842e-06, + "loss": 0.3605, + "step": 10368 + }, + { + "epoch": 0.4692916949536094, + "grad_norm": 0.6532414271831297, + "learning_rate": 5.738273851402547e-06, + "loss": 0.3369, + "step": 10369 + }, + { + "epoch": 0.469336954062005, + "grad_norm": 0.673193705744866, + "learning_rate": 5.737548956030014e-06, + "loss": 0.3022, + "step": 10370 + }, + { + "epoch": 0.4693822131704005, + "grad_norm": 0.6975430135053898, + "learning_rate": 5.736824044809818e-06, + "loss": 0.3666, + "step": 10371 + }, + { + "epoch": 0.4694274722787961, + "grad_norm": 0.6213955186406469, + "learning_rate": 5.736099117757536e-06, + "loss": 0.3067, + "step": 10372 + }, + { + "epoch": 0.4694727313871917, + "grad_norm": 0.33070487315086977, + "learning_rate": 5.735374174888747e-06, + "loss": 0.4613, + "step": 10373 + }, + { + "epoch": 0.46951799049558723, + "grad_norm": 0.5927061983360309, + "learning_rate": 5.734649216219025e-06, + "loss": 0.3415, + "step": 10374 + }, + { + "epoch": 0.4695632496039828, + "grad_norm": 0.3027285314363435, + "learning_rate": 5.733924241763946e-06, + "loss": 0.4421, + "step": 10375 + }, + { + "epoch": 0.46960850871237836, + "grad_norm": 0.6054536612662688, + "learning_rate": 5.733199251539091e-06, + "loss": 0.363, + "step": 10376 + }, + { + "epoch": 0.46965376782077395, + "grad_norm": 0.7190888725596375, + "learning_rate": 5.732474245560035e-06, + "loss": 0.3624, + "step": 10377 + }, + { + "epoch": 0.4696990269291695, + "grad_norm": 0.6658433291423098, + "learning_rate": 5.7317492238423565e-06, + "loss": 0.3174, + "step": 10378 + }, + { + "epoch": 0.4697442860375651, + "grad_norm": 0.6978929635468228, + "learning_rate": 5.731024186401636e-06, + "loss": 0.3492, + "step": 10379 + }, + { + "epoch": 0.4697895451459606, + "grad_norm": 0.5995286401107972, + "learning_rate": 5.730299133253449e-06, + "loss": 0.3135, + "step": 10380 + }, + { + "epoch": 0.4698348042543562, + "grad_norm": 0.6186094867021583, + "learning_rate": 5.729574064413378e-06, + "loss": 0.326, + "step": 10381 + }, + { + "epoch": 0.46988006336275173, + "grad_norm": 0.5917704318434182, + "learning_rate": 5.728848979897001e-06, + "loss": 0.2979, + "step": 10382 + }, + { + "epoch": 0.4699253224711473, + "grad_norm": 0.6118993093230494, + "learning_rate": 5.728123879719898e-06, + "loss": 0.3497, + "step": 10383 + }, + { + "epoch": 0.46997058157954286, + "grad_norm": 0.6497195551446878, + "learning_rate": 5.727398763897648e-06, + "loss": 0.3151, + "step": 10384 + }, + { + "epoch": 0.47001584068793845, + "grad_norm": 0.6050964668972018, + "learning_rate": 5.726673632445834e-06, + "loss": 0.3738, + "step": 10385 + }, + { + "epoch": 0.47006109979633404, + "grad_norm": 0.6806531900254521, + "learning_rate": 5.725948485380034e-06, + "loss": 0.3026, + "step": 10386 + }, + { + "epoch": 0.47010635890472957, + "grad_norm": 0.663190369071432, + "learning_rate": 5.725223322715833e-06, + "loss": 0.4091, + "step": 10387 + }, + { + "epoch": 0.47015161801312516, + "grad_norm": 0.38758399729658655, + "learning_rate": 5.724498144468807e-06, + "loss": 0.4785, + "step": 10388 + }, + { + "epoch": 0.4701968771215207, + "grad_norm": 0.6718509573949357, + "learning_rate": 5.7237729506545435e-06, + "loss": 0.3497, + "step": 10389 + }, + { + "epoch": 0.4702421362299163, + "grad_norm": 0.699050141291865, + "learning_rate": 5.723047741288621e-06, + "loss": 0.3236, + "step": 10390 + }, + { + "epoch": 0.4702873953383118, + "grad_norm": 0.3215874781013927, + "learning_rate": 5.722322516386623e-06, + "loss": 0.4859, + "step": 10391 + }, + { + "epoch": 0.4703326544467074, + "grad_norm": 0.8302704908623061, + "learning_rate": 5.7215972759641335e-06, + "loss": 0.3606, + "step": 10392 + }, + { + "epoch": 0.47037791355510294, + "grad_norm": 0.6579121221890892, + "learning_rate": 5.720872020036734e-06, + "loss": 0.3094, + "step": 10393 + }, + { + "epoch": 0.47042317266349853, + "grad_norm": 0.6251053099740931, + "learning_rate": 5.720146748620009e-06, + "loss": 0.3317, + "step": 10394 + }, + { + "epoch": 0.47046843177189407, + "grad_norm": 0.4553160396122376, + "learning_rate": 5.719421461729544e-06, + "loss": 0.4976, + "step": 10395 + }, + { + "epoch": 0.47051369088028966, + "grad_norm": 0.6385209815002308, + "learning_rate": 5.718696159380918e-06, + "loss": 0.3031, + "step": 10396 + }, + { + "epoch": 0.47055894998868525, + "grad_norm": 0.6550005906490711, + "learning_rate": 5.717970841589722e-06, + "loss": 0.3575, + "step": 10397 + }, + { + "epoch": 0.4706042090970808, + "grad_norm": 0.6144433391942784, + "learning_rate": 5.717245508371535e-06, + "loss": 0.3571, + "step": 10398 + }, + { + "epoch": 0.4706494682054764, + "grad_norm": 0.56431820576879, + "learning_rate": 5.716520159741946e-06, + "loss": 0.3345, + "step": 10399 + }, + { + "epoch": 0.4706947273138719, + "grad_norm": 0.31773722371787644, + "learning_rate": 5.715794795716539e-06, + "loss": 0.4817, + "step": 10400 + }, + { + "epoch": 0.4707399864222675, + "grad_norm": 0.6638658663750014, + "learning_rate": 5.7150694163109015e-06, + "loss": 0.3792, + "step": 10401 + }, + { + "epoch": 0.47078524553066303, + "grad_norm": 0.6109695735079277, + "learning_rate": 5.714344021540616e-06, + "loss": 0.3311, + "step": 10402 + }, + { + "epoch": 0.4708305046390586, + "grad_norm": 0.634634568023838, + "learning_rate": 5.713618611421273e-06, + "loss": 0.3193, + "step": 10403 + }, + { + "epoch": 0.47087576374745416, + "grad_norm": 0.2813397261233842, + "learning_rate": 5.712893185968458e-06, + "loss": 0.4807, + "step": 10404 + }, + { + "epoch": 0.47092102285584975, + "grad_norm": 0.6239281195599348, + "learning_rate": 5.712167745197757e-06, + "loss": 0.3455, + "step": 10405 + }, + { + "epoch": 0.4709662819642453, + "grad_norm": 0.6635538212106871, + "learning_rate": 5.71144228912476e-06, + "loss": 0.3336, + "step": 10406 + }, + { + "epoch": 0.47101154107264087, + "grad_norm": 0.7367756633644262, + "learning_rate": 5.710716817765052e-06, + "loss": 0.3805, + "step": 10407 + }, + { + "epoch": 0.4710568001810364, + "grad_norm": 0.30067415828969046, + "learning_rate": 5.709991331134224e-06, + "loss": 0.4866, + "step": 10408 + }, + { + "epoch": 0.471102059289432, + "grad_norm": 0.6064990682433967, + "learning_rate": 5.709265829247861e-06, + "loss": 0.2583, + "step": 10409 + }, + { + "epoch": 0.4711473183978276, + "grad_norm": 0.5816228931837361, + "learning_rate": 5.7085403121215545e-06, + "loss": 0.3187, + "step": 10410 + }, + { + "epoch": 0.4711925775062231, + "grad_norm": 0.6846169928806831, + "learning_rate": 5.707814779770892e-06, + "loss": 0.3383, + "step": 10411 + }, + { + "epoch": 0.4712378366146187, + "grad_norm": 0.2864393917728087, + "learning_rate": 5.707089232211463e-06, + "loss": 0.4872, + "step": 10412 + }, + { + "epoch": 0.47128309572301424, + "grad_norm": 0.7883095576233838, + "learning_rate": 5.70636366945886e-06, + "loss": 0.3431, + "step": 10413 + }, + { + "epoch": 0.47132835483140983, + "grad_norm": 0.671171558514905, + "learning_rate": 5.70563809152867e-06, + "loss": 0.3319, + "step": 10414 + }, + { + "epoch": 0.47137361393980537, + "grad_norm": 0.570402605761979, + "learning_rate": 5.704912498436486e-06, + "loss": 0.3253, + "step": 10415 + }, + { + "epoch": 0.47141887304820096, + "grad_norm": 0.6494215852181807, + "learning_rate": 5.704186890197897e-06, + "loss": 0.3538, + "step": 10416 + }, + { + "epoch": 0.4714641321565965, + "grad_norm": 0.6309491842680105, + "learning_rate": 5.703461266828493e-06, + "loss": 0.3157, + "step": 10417 + }, + { + "epoch": 0.4715093912649921, + "grad_norm": 0.6158447888774433, + "learning_rate": 5.702735628343869e-06, + "loss": 0.3297, + "step": 10418 + }, + { + "epoch": 0.4715546503733876, + "grad_norm": 0.6330071116585992, + "learning_rate": 5.702009974759612e-06, + "loss": 0.3274, + "step": 10419 + }, + { + "epoch": 0.4715999094817832, + "grad_norm": 0.6302672089025184, + "learning_rate": 5.701284306091319e-06, + "loss": 0.329, + "step": 10420 + }, + { + "epoch": 0.4716451685901788, + "grad_norm": 0.698076458574524, + "learning_rate": 5.700558622354579e-06, + "loss": 0.3319, + "step": 10421 + }, + { + "epoch": 0.47169042769857433, + "grad_norm": 0.6162874189440831, + "learning_rate": 5.699832923564986e-06, + "loss": 0.3318, + "step": 10422 + }, + { + "epoch": 0.4717356868069699, + "grad_norm": 0.6526134943107602, + "learning_rate": 5.699107209738133e-06, + "loss": 0.3253, + "step": 10423 + }, + { + "epoch": 0.47178094591536546, + "grad_norm": 0.680267091801362, + "learning_rate": 5.698381480889614e-06, + "loss": 0.4051, + "step": 10424 + }, + { + "epoch": 0.47182620502376105, + "grad_norm": 0.30455858467410707, + "learning_rate": 5.697655737035019e-06, + "loss": 0.4714, + "step": 10425 + }, + { + "epoch": 0.4718714641321566, + "grad_norm": 0.6318519040245598, + "learning_rate": 5.6969299781899486e-06, + "loss": 0.33, + "step": 10426 + }, + { + "epoch": 0.47191672324055217, + "grad_norm": 0.5964106428025935, + "learning_rate": 5.696204204369991e-06, + "loss": 0.3563, + "step": 10427 + }, + { + "epoch": 0.4719619823489477, + "grad_norm": 0.6215879264225173, + "learning_rate": 5.695478415590745e-06, + "loss": 0.3742, + "step": 10428 + }, + { + "epoch": 0.4720072414573433, + "grad_norm": 0.6929168753091881, + "learning_rate": 5.6947526118678024e-06, + "loss": 0.3318, + "step": 10429 + }, + { + "epoch": 0.47205250056573883, + "grad_norm": 0.3668308493260659, + "learning_rate": 5.69402679321676e-06, + "loss": 0.4975, + "step": 10430 + }, + { + "epoch": 0.4720977596741344, + "grad_norm": 0.6125920894562785, + "learning_rate": 5.693300959653214e-06, + "loss": 0.3537, + "step": 10431 + }, + { + "epoch": 0.47214301878253, + "grad_norm": 0.6896258816056711, + "learning_rate": 5.69257511119276e-06, + "loss": 0.346, + "step": 10432 + }, + { + "epoch": 0.47218827789092555, + "grad_norm": 0.29626764972290515, + "learning_rate": 5.691849247850993e-06, + "loss": 0.4737, + "step": 10433 + }, + { + "epoch": 0.47223353699932114, + "grad_norm": 0.6294195346534802, + "learning_rate": 5.691123369643511e-06, + "loss": 0.2943, + "step": 10434 + }, + { + "epoch": 0.47227879610771667, + "grad_norm": 0.6396559300429897, + "learning_rate": 5.690397476585909e-06, + "loss": 0.3178, + "step": 10435 + }, + { + "epoch": 0.47232405521611226, + "grad_norm": 0.6148819531156378, + "learning_rate": 5.689671568693788e-06, + "loss": 0.3441, + "step": 10436 + }, + { + "epoch": 0.4723693143245078, + "grad_norm": 0.6441787478142561, + "learning_rate": 5.688945645982743e-06, + "loss": 0.3151, + "step": 10437 + }, + { + "epoch": 0.4724145734329034, + "grad_norm": 0.6019671802140769, + "learning_rate": 5.68821970846837e-06, + "loss": 0.3661, + "step": 10438 + }, + { + "epoch": 0.4724598325412989, + "grad_norm": 0.4033827542244026, + "learning_rate": 5.687493756166272e-06, + "loss": 0.4804, + "step": 10439 + }, + { + "epoch": 0.4725050916496945, + "grad_norm": 0.6473518747090413, + "learning_rate": 5.686767789092041e-06, + "loss": 0.3211, + "step": 10440 + }, + { + "epoch": 0.47255035075809004, + "grad_norm": 0.3296715058419835, + "learning_rate": 5.6860418072612826e-06, + "loss": 0.4899, + "step": 10441 + }, + { + "epoch": 0.47259560986648563, + "grad_norm": 0.270224074842136, + "learning_rate": 5.6853158106895915e-06, + "loss": 0.4893, + "step": 10442 + }, + { + "epoch": 0.47264086897488117, + "grad_norm": 0.657635254692437, + "learning_rate": 5.684589799392568e-06, + "loss": 0.3276, + "step": 10443 + }, + { + "epoch": 0.47268612808327676, + "grad_norm": 0.6170343378678285, + "learning_rate": 5.683863773385813e-06, + "loss": 0.3139, + "step": 10444 + }, + { + "epoch": 0.47273138719167235, + "grad_norm": 0.6288497648807237, + "learning_rate": 5.683137732684926e-06, + "loss": 0.2783, + "step": 10445 + }, + { + "epoch": 0.4727766463000679, + "grad_norm": 0.42879652640310395, + "learning_rate": 5.682411677305506e-06, + "loss": 0.4675, + "step": 10446 + }, + { + "epoch": 0.47282190540846347, + "grad_norm": 0.666824200507803, + "learning_rate": 5.681685607263156e-06, + "loss": 0.3025, + "step": 10447 + }, + { + "epoch": 0.472867164516859, + "grad_norm": 0.6456795722225278, + "learning_rate": 5.680959522573476e-06, + "loss": 0.3202, + "step": 10448 + }, + { + "epoch": 0.4729124236252546, + "grad_norm": 0.6255694791465786, + "learning_rate": 5.680233423252066e-06, + "loss": 0.358, + "step": 10449 + }, + { + "epoch": 0.47295768273365013, + "grad_norm": 0.6549822291733464, + "learning_rate": 5.67950730931453e-06, + "loss": 0.298, + "step": 10450 + }, + { + "epoch": 0.4730029418420457, + "grad_norm": 0.3193477388383573, + "learning_rate": 5.678781180776469e-06, + "loss": 0.4755, + "step": 10451 + }, + { + "epoch": 0.47304820095044126, + "grad_norm": 0.6423122948962887, + "learning_rate": 5.678055037653485e-06, + "loss": 0.3221, + "step": 10452 + }, + { + "epoch": 0.47309346005883685, + "grad_norm": 0.5848128098939198, + "learning_rate": 5.677328879961182e-06, + "loss": 0.3089, + "step": 10453 + }, + { + "epoch": 0.4731387191672324, + "grad_norm": 0.617186467546983, + "learning_rate": 5.676602707715159e-06, + "loss": 0.3378, + "step": 10454 + }, + { + "epoch": 0.47318397827562797, + "grad_norm": 0.3529437506726879, + "learning_rate": 5.675876520931023e-06, + "loss": 0.4806, + "step": 10455 + }, + { + "epoch": 0.47322923738402356, + "grad_norm": 0.6428562390362067, + "learning_rate": 5.675150319624375e-06, + "loss": 0.3185, + "step": 10456 + }, + { + "epoch": 0.4732744964924191, + "grad_norm": 0.637439086908186, + "learning_rate": 5.674424103810822e-06, + "loss": 0.3537, + "step": 10457 + }, + { + "epoch": 0.4733197556008147, + "grad_norm": 0.6085350660379343, + "learning_rate": 5.6736978735059665e-06, + "loss": 0.3302, + "step": 10458 + }, + { + "epoch": 0.4733650147092102, + "grad_norm": 0.6298430509677303, + "learning_rate": 5.672971628725412e-06, + "loss": 0.3168, + "step": 10459 + }, + { + "epoch": 0.4734102738176058, + "grad_norm": 0.6593200109445425, + "learning_rate": 5.672245369484765e-06, + "loss": 0.3543, + "step": 10460 + }, + { + "epoch": 0.47345553292600134, + "grad_norm": 0.6021724432032531, + "learning_rate": 5.671519095799629e-06, + "loss": 0.3174, + "step": 10461 + }, + { + "epoch": 0.47350079203439693, + "grad_norm": 0.38837544330242485, + "learning_rate": 5.67079280768561e-06, + "loss": 0.4881, + "step": 10462 + }, + { + "epoch": 0.47354605114279247, + "grad_norm": 0.635804739256016, + "learning_rate": 5.670066505158314e-06, + "loss": 0.3139, + "step": 10463 + }, + { + "epoch": 0.47359131025118806, + "grad_norm": 0.6266395671735902, + "learning_rate": 5.6693401882333455e-06, + "loss": 0.3154, + "step": 10464 + }, + { + "epoch": 0.4736365693595836, + "grad_norm": 0.6620216762722568, + "learning_rate": 5.668613856926312e-06, + "loss": 0.3305, + "step": 10465 + }, + { + "epoch": 0.4736818284679792, + "grad_norm": 1.1799266727472142, + "learning_rate": 5.667887511252823e-06, + "loss": 0.3249, + "step": 10466 + }, + { + "epoch": 0.4737270875763748, + "grad_norm": 0.6204130806355646, + "learning_rate": 5.667161151228481e-06, + "loss": 0.3139, + "step": 10467 + }, + { + "epoch": 0.4737723466847703, + "grad_norm": 0.6310237088814322, + "learning_rate": 5.666434776868895e-06, + "loss": 0.322, + "step": 10468 + }, + { + "epoch": 0.4738176057931659, + "grad_norm": 0.6381134611880385, + "learning_rate": 5.665708388189672e-06, + "loss": 0.3484, + "step": 10469 + }, + { + "epoch": 0.47386286490156143, + "grad_norm": 0.5889469271310414, + "learning_rate": 5.664981985206421e-06, + "loss": 0.2972, + "step": 10470 + }, + { + "epoch": 0.473908124009957, + "grad_norm": 0.711647404359138, + "learning_rate": 5.664255567934749e-06, + "loss": 0.3813, + "step": 10471 + }, + { + "epoch": 0.47395338311835256, + "grad_norm": 0.636677445282444, + "learning_rate": 5.663529136390264e-06, + "loss": 0.3075, + "step": 10472 + }, + { + "epoch": 0.47399864222674815, + "grad_norm": 0.6477771180653775, + "learning_rate": 5.662802690588578e-06, + "loss": 0.3174, + "step": 10473 + }, + { + "epoch": 0.4740439013351437, + "grad_norm": 0.6224392332441187, + "learning_rate": 5.662076230545297e-06, + "loss": 0.3416, + "step": 10474 + }, + { + "epoch": 0.47408916044353927, + "grad_norm": 0.6727880059139598, + "learning_rate": 5.66134975627603e-06, + "loss": 0.3335, + "step": 10475 + }, + { + "epoch": 0.4741344195519348, + "grad_norm": 0.6891150407798442, + "learning_rate": 5.660623267796389e-06, + "loss": 0.3144, + "step": 10476 + }, + { + "epoch": 0.4741796786603304, + "grad_norm": 0.6435883187225452, + "learning_rate": 5.659896765121982e-06, + "loss": 0.3266, + "step": 10477 + }, + { + "epoch": 0.47422493776872593, + "grad_norm": 0.608530341011818, + "learning_rate": 5.659170248268422e-06, + "loss": 0.3437, + "step": 10478 + }, + { + "epoch": 0.4742701968771215, + "grad_norm": 0.7542906131639765, + "learning_rate": 5.658443717251316e-06, + "loss": 0.3254, + "step": 10479 + }, + { + "epoch": 0.4743154559855171, + "grad_norm": 0.6917075448860384, + "learning_rate": 5.657717172086278e-06, + "loss": 0.3608, + "step": 10480 + }, + { + "epoch": 0.47436071509391264, + "grad_norm": 0.5847839847757341, + "learning_rate": 5.656990612788918e-06, + "loss": 0.3296, + "step": 10481 + }, + { + "epoch": 0.47440597420230823, + "grad_norm": 0.5982518729555611, + "learning_rate": 5.656264039374846e-06, + "loss": 0.3042, + "step": 10482 + }, + { + "epoch": 0.47445123331070377, + "grad_norm": 0.5898479533174593, + "learning_rate": 5.6555374518596765e-06, + "loss": 0.2949, + "step": 10483 + }, + { + "epoch": 0.47449649241909936, + "grad_norm": 0.6297858839636593, + "learning_rate": 5.654810850259021e-06, + "loss": 0.3386, + "step": 10484 + }, + { + "epoch": 0.4745417515274949, + "grad_norm": 0.6019311826617614, + "learning_rate": 5.65408423458849e-06, + "loss": 0.3221, + "step": 10485 + }, + { + "epoch": 0.4745870106358905, + "grad_norm": 0.6191628764874518, + "learning_rate": 5.653357604863698e-06, + "loss": 0.318, + "step": 10486 + }, + { + "epoch": 0.474632269744286, + "grad_norm": 0.3862071876986101, + "learning_rate": 5.65263096110026e-06, + "loss": 0.4458, + "step": 10487 + }, + { + "epoch": 0.4746775288526816, + "grad_norm": 0.32403615101505817, + "learning_rate": 5.651904303313784e-06, + "loss": 0.4774, + "step": 10488 + }, + { + "epoch": 0.47472278796107714, + "grad_norm": 0.6090597584329148, + "learning_rate": 5.6511776315198886e-06, + "loss": 0.336, + "step": 10489 + }, + { + "epoch": 0.47476804706947273, + "grad_norm": 0.6378578217907716, + "learning_rate": 5.650450945734185e-06, + "loss": 0.3159, + "step": 10490 + }, + { + "epoch": 0.4748133061778683, + "grad_norm": 0.7852386464362128, + "learning_rate": 5.649724245972288e-06, + "loss": 0.3137, + "step": 10491 + }, + { + "epoch": 0.47485856528626386, + "grad_norm": 0.7013033187362679, + "learning_rate": 5.6489975322498124e-06, + "loss": 0.3738, + "step": 10492 + }, + { + "epoch": 0.47490382439465945, + "grad_norm": 0.6168953929319065, + "learning_rate": 5.6482708045823734e-06, + "loss": 0.3624, + "step": 10493 + }, + { + "epoch": 0.474949083503055, + "grad_norm": 0.6178354670293249, + "learning_rate": 5.647544062985586e-06, + "loss": 0.3126, + "step": 10494 + }, + { + "epoch": 0.47499434261145057, + "grad_norm": 0.6318537399545678, + "learning_rate": 5.646817307475066e-06, + "loss": 0.31, + "step": 10495 + }, + { + "epoch": 0.4750396017198461, + "grad_norm": 0.644458175134238, + "learning_rate": 5.646090538066426e-06, + "loss": 0.2877, + "step": 10496 + }, + { + "epoch": 0.4750848608282417, + "grad_norm": 0.642582769804473, + "learning_rate": 5.645363754775288e-06, + "loss": 0.3776, + "step": 10497 + }, + { + "epoch": 0.47513011993663723, + "grad_norm": 0.7798488205437791, + "learning_rate": 5.644636957617264e-06, + "loss": 0.3314, + "step": 10498 + }, + { + "epoch": 0.4751753790450328, + "grad_norm": 0.4828585852711643, + "learning_rate": 5.643910146607972e-06, + "loss": 0.5135, + "step": 10499 + }, + { + "epoch": 0.47522063815342835, + "grad_norm": 0.40002404946581677, + "learning_rate": 5.643183321763027e-06, + "loss": 0.4605, + "step": 10500 + }, + { + "epoch": 0.47526589726182394, + "grad_norm": 0.6665771337235158, + "learning_rate": 5.642456483098049e-06, + "loss": 0.3191, + "step": 10501 + }, + { + "epoch": 0.47531115637021953, + "grad_norm": 0.6147364813893643, + "learning_rate": 5.641729630628654e-06, + "loss": 0.2882, + "step": 10502 + }, + { + "epoch": 0.47535641547861507, + "grad_norm": 0.6099779425362699, + "learning_rate": 5.641002764370461e-06, + "loss": 0.2919, + "step": 10503 + }, + { + "epoch": 0.47540167458701066, + "grad_norm": 0.43910766007800495, + "learning_rate": 5.6402758843390844e-06, + "loss": 0.4744, + "step": 10504 + }, + { + "epoch": 0.4754469336954062, + "grad_norm": 0.6479746229972967, + "learning_rate": 5.63954899055015e-06, + "loss": 0.3509, + "step": 10505 + }, + { + "epoch": 0.4754921928038018, + "grad_norm": 0.6720997314016489, + "learning_rate": 5.638822083019267e-06, + "loss": 0.318, + "step": 10506 + }, + { + "epoch": 0.4755374519121973, + "grad_norm": 0.644017282983267, + "learning_rate": 5.638095161762064e-06, + "loss": 0.3371, + "step": 10507 + }, + { + "epoch": 0.4755827110205929, + "grad_norm": 0.6956189399848878, + "learning_rate": 5.637368226794153e-06, + "loss": 0.3441, + "step": 10508 + }, + { + "epoch": 0.47562797012898844, + "grad_norm": 0.7176511786720804, + "learning_rate": 5.6366412781311575e-06, + "loss": 0.2998, + "step": 10509 + }, + { + "epoch": 0.47567322923738403, + "grad_norm": 0.6742388327178932, + "learning_rate": 5.635914315788695e-06, + "loss": 0.3256, + "step": 10510 + }, + { + "epoch": 0.47571848834577957, + "grad_norm": 0.3869309011976393, + "learning_rate": 5.635187339782389e-06, + "loss": 0.4666, + "step": 10511 + }, + { + "epoch": 0.47576374745417516, + "grad_norm": 0.6723727392135527, + "learning_rate": 5.634460350127855e-06, + "loss": 0.3445, + "step": 10512 + }, + { + "epoch": 0.4758090065625707, + "grad_norm": 0.7130082870729761, + "learning_rate": 5.633733346840719e-06, + "loss": 0.3382, + "step": 10513 + }, + { + "epoch": 0.4758542656709663, + "grad_norm": 0.6353001641146917, + "learning_rate": 5.633006329936599e-06, + "loss": 0.3402, + "step": 10514 + }, + { + "epoch": 0.47589952477936187, + "grad_norm": 0.6832213982688691, + "learning_rate": 5.632279299431117e-06, + "loss": 0.3894, + "step": 10515 + }, + { + "epoch": 0.4759447838877574, + "grad_norm": 0.6601553798734152, + "learning_rate": 5.631552255339896e-06, + "loss": 0.3305, + "step": 10516 + }, + { + "epoch": 0.475990042996153, + "grad_norm": 0.5967921800961582, + "learning_rate": 5.630825197678556e-06, + "loss": 0.3038, + "step": 10517 + }, + { + "epoch": 0.47603530210454853, + "grad_norm": 0.6301548350456041, + "learning_rate": 5.630098126462719e-06, + "loss": 0.3315, + "step": 10518 + }, + { + "epoch": 0.4760805612129441, + "grad_norm": 0.32232962909444457, + "learning_rate": 5.629371041708009e-06, + "loss": 0.4891, + "step": 10519 + }, + { + "epoch": 0.47612582032133965, + "grad_norm": 0.6719022010093874, + "learning_rate": 5.6286439434300476e-06, + "loss": 0.3032, + "step": 10520 + }, + { + "epoch": 0.47617107942973524, + "grad_norm": 0.3066219988017249, + "learning_rate": 5.627916831644459e-06, + "loss": 0.4913, + "step": 10521 + }, + { + "epoch": 0.4762163385381308, + "grad_norm": 0.7207359042047378, + "learning_rate": 5.627189706366866e-06, + "loss": 0.3651, + "step": 10522 + }, + { + "epoch": 0.47626159764652637, + "grad_norm": 0.37989185098569295, + "learning_rate": 5.626462567612892e-06, + "loss": 0.4776, + "step": 10523 + }, + { + "epoch": 0.4763068567549219, + "grad_norm": 0.647918593072506, + "learning_rate": 5.625735415398164e-06, + "loss": 0.3261, + "step": 10524 + }, + { + "epoch": 0.4763521158633175, + "grad_norm": 0.6128445967722367, + "learning_rate": 5.625008249738301e-06, + "loss": 0.3424, + "step": 10525 + }, + { + "epoch": 0.4763973749717131, + "grad_norm": 0.6504907307873667, + "learning_rate": 5.624281070648933e-06, + "loss": 0.2983, + "step": 10526 + }, + { + "epoch": 0.4764426340801086, + "grad_norm": 0.6773751990955243, + "learning_rate": 5.623553878145679e-06, + "loss": 0.326, + "step": 10527 + }, + { + "epoch": 0.4764878931885042, + "grad_norm": 0.6813088755616269, + "learning_rate": 5.622826672244169e-06, + "loss": 0.3685, + "step": 10528 + }, + { + "epoch": 0.47653315229689974, + "grad_norm": 0.599714585578754, + "learning_rate": 5.622099452960027e-06, + "loss": 0.3429, + "step": 10529 + }, + { + "epoch": 0.47657841140529533, + "grad_norm": 0.6266975657425706, + "learning_rate": 5.621372220308877e-06, + "loss": 0.3029, + "step": 10530 + }, + { + "epoch": 0.47662367051369087, + "grad_norm": 0.638104136678306, + "learning_rate": 5.620644974306347e-06, + "loss": 0.3379, + "step": 10531 + }, + { + "epoch": 0.47666892962208646, + "grad_norm": 0.5813658371373994, + "learning_rate": 5.619917714968064e-06, + "loss": 0.3223, + "step": 10532 + }, + { + "epoch": 0.476714188730482, + "grad_norm": 0.6488863979765176, + "learning_rate": 5.619190442309651e-06, + "loss": 0.3755, + "step": 10533 + }, + { + "epoch": 0.4767594478388776, + "grad_norm": 0.6424566258027432, + "learning_rate": 5.61846315634674e-06, + "loss": 0.3545, + "step": 10534 + }, + { + "epoch": 0.4768047069472731, + "grad_norm": 0.6801460304007487, + "learning_rate": 5.617735857094951e-06, + "loss": 0.3029, + "step": 10535 + }, + { + "epoch": 0.4768499660556687, + "grad_norm": 0.5846304119121462, + "learning_rate": 5.61700854456992e-06, + "loss": 0.2941, + "step": 10536 + }, + { + "epoch": 0.47689522516406424, + "grad_norm": 0.6947565676732286, + "learning_rate": 5.616281218787268e-06, + "loss": 0.3463, + "step": 10537 + }, + { + "epoch": 0.47694048427245983, + "grad_norm": 0.588970540066978, + "learning_rate": 5.6155538797626254e-06, + "loss": 0.2924, + "step": 10538 + }, + { + "epoch": 0.4769857433808554, + "grad_norm": 0.5913635705662535, + "learning_rate": 5.614826527511621e-06, + "loss": 0.3402, + "step": 10539 + }, + { + "epoch": 0.47703100248925095, + "grad_norm": 0.6070944121687863, + "learning_rate": 5.614099162049883e-06, + "loss": 0.3182, + "step": 10540 + }, + { + "epoch": 0.47707626159764654, + "grad_norm": 0.6484577073713257, + "learning_rate": 5.613371783393039e-06, + "loss": 0.3275, + "step": 10541 + }, + { + "epoch": 0.4771215207060421, + "grad_norm": 0.4262153304553133, + "learning_rate": 5.612644391556721e-06, + "loss": 0.4919, + "step": 10542 + }, + { + "epoch": 0.47716677981443767, + "grad_norm": 0.3873556240946461, + "learning_rate": 5.611916986556555e-06, + "loss": 0.4972, + "step": 10543 + }, + { + "epoch": 0.4772120389228332, + "grad_norm": 0.6849575773347174, + "learning_rate": 5.611189568408173e-06, + "loss": 0.3236, + "step": 10544 + }, + { + "epoch": 0.4772572980312288, + "grad_norm": 0.5726982431800233, + "learning_rate": 5.610462137127205e-06, + "loss": 0.2837, + "step": 10545 + }, + { + "epoch": 0.47730255713962433, + "grad_norm": 0.4824256200216908, + "learning_rate": 5.609734692729278e-06, + "loss": 0.5073, + "step": 10546 + }, + { + "epoch": 0.4773478162480199, + "grad_norm": 0.6105993805076046, + "learning_rate": 5.609007235230029e-06, + "loss": 0.3515, + "step": 10547 + }, + { + "epoch": 0.47739307535641545, + "grad_norm": 0.5849218911471216, + "learning_rate": 5.60827976464508e-06, + "loss": 0.3263, + "step": 10548 + }, + { + "epoch": 0.47743833446481104, + "grad_norm": 0.6010479395747507, + "learning_rate": 5.607552280990071e-06, + "loss": 0.3844, + "step": 10549 + }, + { + "epoch": 0.47748359357320663, + "grad_norm": 0.5929995553550435, + "learning_rate": 5.606824784280629e-06, + "loss": 0.3451, + "step": 10550 + }, + { + "epoch": 0.47752885268160217, + "grad_norm": 0.5829228500969121, + "learning_rate": 5.606097274532385e-06, + "loss": 0.3029, + "step": 10551 + }, + { + "epoch": 0.47757411178999776, + "grad_norm": 0.6339342525556836, + "learning_rate": 5.6053697517609725e-06, + "loss": 0.3454, + "step": 10552 + }, + { + "epoch": 0.4776193708983933, + "grad_norm": 0.6276833410613624, + "learning_rate": 5.604642215982025e-06, + "loss": 0.2935, + "step": 10553 + }, + { + "epoch": 0.4776646300067889, + "grad_norm": 0.6209626862316334, + "learning_rate": 5.60391466721117e-06, + "loss": 0.3491, + "step": 10554 + }, + { + "epoch": 0.4777098891151844, + "grad_norm": 0.6916101322205742, + "learning_rate": 5.603187105464045e-06, + "loss": 0.3242, + "step": 10555 + }, + { + "epoch": 0.47775514822358, + "grad_norm": 0.5816352573986318, + "learning_rate": 5.6024595307562815e-06, + "loss": 0.3542, + "step": 10556 + }, + { + "epoch": 0.47780040733197554, + "grad_norm": 0.47474496295445967, + "learning_rate": 5.601731943103515e-06, + "loss": 0.4782, + "step": 10557 + }, + { + "epoch": 0.47784566644037113, + "grad_norm": 0.41798351738900213, + "learning_rate": 5.601004342521374e-06, + "loss": 0.4784, + "step": 10558 + }, + { + "epoch": 0.47789092554876667, + "grad_norm": 0.33629964526700756, + "learning_rate": 5.6002767290254975e-06, + "loss": 0.4703, + "step": 10559 + }, + { + "epoch": 0.47793618465716226, + "grad_norm": 0.5380229410130636, + "learning_rate": 5.599549102631516e-06, + "loss": 0.284, + "step": 10560 + }, + { + "epoch": 0.47798144376555785, + "grad_norm": 0.8051297804005851, + "learning_rate": 5.598821463355069e-06, + "loss": 0.3106, + "step": 10561 + }, + { + "epoch": 0.4780267028739534, + "grad_norm": 0.5170642766936449, + "learning_rate": 5.598093811211785e-06, + "loss": 0.4833, + "step": 10562 + }, + { + "epoch": 0.47807196198234897, + "grad_norm": 0.5963256229323173, + "learning_rate": 5.597366146217303e-06, + "loss": 0.2974, + "step": 10563 + }, + { + "epoch": 0.4781172210907445, + "grad_norm": 0.6599750262075692, + "learning_rate": 5.596638468387255e-06, + "loss": 0.3371, + "step": 10564 + }, + { + "epoch": 0.4781624801991401, + "grad_norm": 0.9292747092655834, + "learning_rate": 5.595910777737281e-06, + "loss": 0.297, + "step": 10565 + }, + { + "epoch": 0.47820773930753563, + "grad_norm": 0.5795709443727703, + "learning_rate": 5.5951830742830145e-06, + "loss": 0.482, + "step": 10566 + }, + { + "epoch": 0.4782529984159312, + "grad_norm": 0.6727811870569353, + "learning_rate": 5.594455358040091e-06, + "loss": 0.3363, + "step": 10567 + }, + { + "epoch": 0.47829825752432675, + "grad_norm": 0.657959114215397, + "learning_rate": 5.5937276290241486e-06, + "loss": 0.3043, + "step": 10568 + }, + { + "epoch": 0.47834351663272234, + "grad_norm": 0.6521324485995212, + "learning_rate": 5.5929998872508215e-06, + "loss": 0.36, + "step": 10569 + }, + { + "epoch": 0.4783887757411179, + "grad_norm": 0.5943329694166126, + "learning_rate": 5.592272132735749e-06, + "loss": 0.34, + "step": 10570 + }, + { + "epoch": 0.47843403484951347, + "grad_norm": 0.6612431740464342, + "learning_rate": 5.591544365494567e-06, + "loss": 0.3467, + "step": 10571 + }, + { + "epoch": 0.478479293957909, + "grad_norm": 0.6059079878953904, + "learning_rate": 5.590816585542913e-06, + "loss": 0.2876, + "step": 10572 + }, + { + "epoch": 0.4785245530663046, + "grad_norm": 0.4460620888325759, + "learning_rate": 5.590088792896427e-06, + "loss": 0.4569, + "step": 10573 + }, + { + "epoch": 0.4785698121747002, + "grad_norm": 0.6267320935882589, + "learning_rate": 5.589360987570745e-06, + "loss": 0.3438, + "step": 10574 + }, + { + "epoch": 0.4786150712830957, + "grad_norm": 0.6944370493131874, + "learning_rate": 5.588633169581502e-06, + "loss": 0.3631, + "step": 10575 + }, + { + "epoch": 0.4786603303914913, + "grad_norm": 0.5718505842229085, + "learning_rate": 5.5879053389443435e-06, + "loss": 0.3306, + "step": 10576 + }, + { + "epoch": 0.47870558949988684, + "grad_norm": 0.654911543789487, + "learning_rate": 5.587177495674902e-06, + "loss": 0.3318, + "step": 10577 + }, + { + "epoch": 0.47875084860828243, + "grad_norm": 0.38311412169455544, + "learning_rate": 5.586449639788822e-06, + "loss": 0.459, + "step": 10578 + }, + { + "epoch": 0.47879610771667797, + "grad_norm": 0.296067994927196, + "learning_rate": 5.5857217713017394e-06, + "loss": 0.4551, + "step": 10579 + }, + { + "epoch": 0.47884136682507356, + "grad_norm": 0.6393993738142472, + "learning_rate": 5.584993890229296e-06, + "loss": 0.3378, + "step": 10580 + }, + { + "epoch": 0.4788866259334691, + "grad_norm": 0.6430280142761382, + "learning_rate": 5.584265996587129e-06, + "loss": 0.3513, + "step": 10581 + }, + { + "epoch": 0.4789318850418647, + "grad_norm": 0.6254186892649977, + "learning_rate": 5.583538090390882e-06, + "loss": 0.3143, + "step": 10582 + }, + { + "epoch": 0.4789771441502602, + "grad_norm": 0.6131120850240496, + "learning_rate": 5.582810171656191e-06, + "loss": 0.3176, + "step": 10583 + }, + { + "epoch": 0.4790224032586558, + "grad_norm": 0.6319043952102459, + "learning_rate": 5.582082240398702e-06, + "loss": 0.2955, + "step": 10584 + }, + { + "epoch": 0.4790676623670514, + "grad_norm": 0.6220473199791497, + "learning_rate": 5.5813542966340514e-06, + "loss": 0.3094, + "step": 10585 + }, + { + "epoch": 0.47911292147544693, + "grad_norm": 0.6886628697806193, + "learning_rate": 5.580626340377884e-06, + "loss": 0.347, + "step": 10586 + }, + { + "epoch": 0.4791581805838425, + "grad_norm": 0.6260532435901625, + "learning_rate": 5.579898371645839e-06, + "loss": 0.3447, + "step": 10587 + }, + { + "epoch": 0.47920343969223805, + "grad_norm": 0.6086583527185936, + "learning_rate": 5.5791703904535584e-06, + "loss": 0.3037, + "step": 10588 + }, + { + "epoch": 0.47924869880063364, + "grad_norm": 0.41963576368214195, + "learning_rate": 5.578442396816685e-06, + "loss": 0.4868, + "step": 10589 + }, + { + "epoch": 0.4792939579090292, + "grad_norm": 0.8103420874521906, + "learning_rate": 5.577714390750862e-06, + "loss": 0.3631, + "step": 10590 + }, + { + "epoch": 0.47933921701742477, + "grad_norm": 0.8064511619876602, + "learning_rate": 5.576986372271731e-06, + "loss": 0.3607, + "step": 10591 + }, + { + "epoch": 0.4793844761258203, + "grad_norm": 0.31885495383354473, + "learning_rate": 5.576258341394936e-06, + "loss": 0.4842, + "step": 10592 + }, + { + "epoch": 0.4794297352342159, + "grad_norm": 0.6218266829354264, + "learning_rate": 5.575530298136116e-06, + "loss": 0.3272, + "step": 10593 + }, + { + "epoch": 0.4794749943426114, + "grad_norm": 0.704607964592815, + "learning_rate": 5.574802242510921e-06, + "loss": 0.3527, + "step": 10594 + }, + { + "epoch": 0.479520253451007, + "grad_norm": 0.6220004783827727, + "learning_rate": 5.574074174534989e-06, + "loss": 0.3301, + "step": 10595 + }, + { + "epoch": 0.4795655125594026, + "grad_norm": 0.3650889133524078, + "learning_rate": 5.573346094223966e-06, + "loss": 0.5055, + "step": 10596 + }, + { + "epoch": 0.47961077166779814, + "grad_norm": 0.35017989979027714, + "learning_rate": 5.5726180015934976e-06, + "loss": 0.4825, + "step": 10597 + }, + { + "epoch": 0.47965603077619373, + "grad_norm": 0.5869712702415046, + "learning_rate": 5.571889896659225e-06, + "loss": 0.2814, + "step": 10598 + }, + { + "epoch": 0.47970128988458927, + "grad_norm": 0.814425945966293, + "learning_rate": 5.571161779436797e-06, + "loss": 0.3136, + "step": 10599 + }, + { + "epoch": 0.47974654899298486, + "grad_norm": 0.6462944916736755, + "learning_rate": 5.570433649941855e-06, + "loss": 0.3181, + "step": 10600 + }, + { + "epoch": 0.4797918081013804, + "grad_norm": 0.293109634291855, + "learning_rate": 5.5697055081900465e-06, + "loss": 0.508, + "step": 10601 + }, + { + "epoch": 0.479837067209776, + "grad_norm": 0.5914173537503168, + "learning_rate": 5.568977354197016e-06, + "loss": 0.3507, + "step": 10602 + }, + { + "epoch": 0.4798823263181715, + "grad_norm": 0.6150955968191683, + "learning_rate": 5.568249187978412e-06, + "loss": 0.3485, + "step": 10603 + }, + { + "epoch": 0.4799275854265671, + "grad_norm": 0.32318681448665276, + "learning_rate": 5.567521009549874e-06, + "loss": 0.4621, + "step": 10604 + }, + { + "epoch": 0.47997284453496264, + "grad_norm": 0.7618492618904363, + "learning_rate": 5.566792818927056e-06, + "loss": 0.3452, + "step": 10605 + }, + { + "epoch": 0.48001810364335823, + "grad_norm": 0.6691405458194113, + "learning_rate": 5.566064616125599e-06, + "loss": 0.3312, + "step": 10606 + }, + { + "epoch": 0.48006336275175376, + "grad_norm": 0.6048012011493709, + "learning_rate": 5.565336401161153e-06, + "loss": 0.3358, + "step": 10607 + }, + { + "epoch": 0.48010862186014935, + "grad_norm": 0.3210420083699887, + "learning_rate": 5.564608174049364e-06, + "loss": 0.4653, + "step": 10608 + }, + { + "epoch": 0.48015388096854494, + "grad_norm": 0.6075408372209316, + "learning_rate": 5.5638799348058795e-06, + "loss": 0.3502, + "step": 10609 + }, + { + "epoch": 0.4801991400769405, + "grad_norm": 0.30198056087813385, + "learning_rate": 5.563151683446346e-06, + "loss": 0.4962, + "step": 10610 + }, + { + "epoch": 0.48024439918533607, + "grad_norm": 0.6459826113080686, + "learning_rate": 5.562423419986415e-06, + "loss": 0.3455, + "step": 10611 + }, + { + "epoch": 0.4802896582937316, + "grad_norm": 0.26951371419713627, + "learning_rate": 5.561695144441729e-06, + "loss": 0.457, + "step": 10612 + }, + { + "epoch": 0.4803349174021272, + "grad_norm": 0.624369197658816, + "learning_rate": 5.5609668568279415e-06, + "loss": 0.3443, + "step": 10613 + }, + { + "epoch": 0.4803801765105227, + "grad_norm": 0.6414411152346636, + "learning_rate": 5.560238557160698e-06, + "loss": 0.301, + "step": 10614 + }, + { + "epoch": 0.4804254356189183, + "grad_norm": 0.7575620000309918, + "learning_rate": 5.559510245455649e-06, + "loss": 0.3197, + "step": 10615 + }, + { + "epoch": 0.48047069472731385, + "grad_norm": 0.6573322938311212, + "learning_rate": 5.558781921728443e-06, + "loss": 0.3071, + "step": 10616 + }, + { + "epoch": 0.48051595383570944, + "grad_norm": 0.7094660698496261, + "learning_rate": 5.558053585994729e-06, + "loss": 0.3711, + "step": 10617 + }, + { + "epoch": 0.480561212944105, + "grad_norm": 0.6309626577717088, + "learning_rate": 5.557325238270158e-06, + "loss": 0.3501, + "step": 10618 + }, + { + "epoch": 0.48060647205250057, + "grad_norm": 0.5516977033728775, + "learning_rate": 5.5565968785703795e-06, + "loss": 0.4785, + "step": 10619 + }, + { + "epoch": 0.48065173116089616, + "grad_norm": 0.6460332565278315, + "learning_rate": 5.5558685069110444e-06, + "loss": 0.3558, + "step": 10620 + }, + { + "epoch": 0.4806969902692917, + "grad_norm": 0.6748996044819889, + "learning_rate": 5.5551401233078e-06, + "loss": 0.3051, + "step": 10621 + }, + { + "epoch": 0.4807422493776873, + "grad_norm": 0.6158170111840895, + "learning_rate": 5.554411727776301e-06, + "loss": 0.3576, + "step": 10622 + }, + { + "epoch": 0.4807875084860828, + "grad_norm": 0.6420214315777303, + "learning_rate": 5.553683320332196e-06, + "loss": 0.3463, + "step": 10623 + }, + { + "epoch": 0.4808327675944784, + "grad_norm": 0.6657582922286736, + "learning_rate": 5.552954900991139e-06, + "loss": 0.3237, + "step": 10624 + }, + { + "epoch": 0.48087802670287394, + "grad_norm": 0.2967738912280458, + "learning_rate": 5.552226469768777e-06, + "loss": 0.5012, + "step": 10625 + }, + { + "epoch": 0.48092328581126953, + "grad_norm": 0.6290390226298818, + "learning_rate": 5.551498026680766e-06, + "loss": 0.3351, + "step": 10626 + }, + { + "epoch": 0.48096854491966506, + "grad_norm": 0.6340394446650136, + "learning_rate": 5.550769571742755e-06, + "loss": 0.3395, + "step": 10627 + }, + { + "epoch": 0.48101380402806065, + "grad_norm": 0.6845926201588363, + "learning_rate": 5.550041104970398e-06, + "loss": 0.3375, + "step": 10628 + }, + { + "epoch": 0.4810590631364562, + "grad_norm": 0.637658919649331, + "learning_rate": 5.5493126263793465e-06, + "loss": 0.3517, + "step": 10629 + }, + { + "epoch": 0.4811043222448518, + "grad_norm": 0.6112584722416579, + "learning_rate": 5.548584135985253e-06, + "loss": 0.3642, + "step": 10630 + }, + { + "epoch": 0.48114958135324737, + "grad_norm": 0.29810893204105327, + "learning_rate": 5.547855633803773e-06, + "loss": 0.464, + "step": 10631 + }, + { + "epoch": 0.4811948404616429, + "grad_norm": 0.2780123534377946, + "learning_rate": 5.547127119850557e-06, + "loss": 0.4612, + "step": 10632 + }, + { + "epoch": 0.4812400995700385, + "grad_norm": 0.7624268380123517, + "learning_rate": 5.546398594141259e-06, + "loss": 0.319, + "step": 10633 + }, + { + "epoch": 0.481285358678434, + "grad_norm": 0.6333384073958104, + "learning_rate": 5.545670056691535e-06, + "loss": 0.3073, + "step": 10634 + }, + { + "epoch": 0.4813306177868296, + "grad_norm": 0.6160571792514077, + "learning_rate": 5.544941507517036e-06, + "loss": 0.324, + "step": 10635 + }, + { + "epoch": 0.48137587689522515, + "grad_norm": 0.8222033027521287, + "learning_rate": 5.544212946633418e-06, + "loss": 0.3533, + "step": 10636 + }, + { + "epoch": 0.48142113600362074, + "grad_norm": 0.3127573927438028, + "learning_rate": 5.543484374056336e-06, + "loss": 0.4877, + "step": 10637 + }, + { + "epoch": 0.4814663951120163, + "grad_norm": 0.30309197025768586, + "learning_rate": 5.542755789801442e-06, + "loss": 0.4931, + "step": 10638 + }, + { + "epoch": 0.48151165422041187, + "grad_norm": 0.7781188479398874, + "learning_rate": 5.542027193884395e-06, + "loss": 0.3357, + "step": 10639 + }, + { + "epoch": 0.4815569133288074, + "grad_norm": 0.6058750001614668, + "learning_rate": 5.541298586320848e-06, + "loss": 0.3053, + "step": 10640 + }, + { + "epoch": 0.481602172437203, + "grad_norm": 0.7037355994647424, + "learning_rate": 5.540569967126457e-06, + "loss": 0.3496, + "step": 10641 + }, + { + "epoch": 0.4816474315455985, + "grad_norm": 0.2824906190415698, + "learning_rate": 5.539841336316878e-06, + "loss": 0.4922, + "step": 10642 + }, + { + "epoch": 0.4816926906539941, + "grad_norm": 0.6428244763588142, + "learning_rate": 5.539112693907765e-06, + "loss": 0.3252, + "step": 10643 + }, + { + "epoch": 0.4817379497623897, + "grad_norm": 0.8841811706079792, + "learning_rate": 5.538384039914777e-06, + "loss": 0.3226, + "step": 10644 + }, + { + "epoch": 0.48178320887078524, + "grad_norm": 0.687525201512995, + "learning_rate": 5.53765537435357e-06, + "loss": 0.2941, + "step": 10645 + }, + { + "epoch": 0.48182846797918083, + "grad_norm": 0.27305283829515287, + "learning_rate": 5.536926697239799e-06, + "loss": 0.4843, + "step": 10646 + }, + { + "epoch": 0.48187372708757636, + "grad_norm": 0.2857701176133048, + "learning_rate": 5.536198008589123e-06, + "loss": 0.4813, + "step": 10647 + }, + { + "epoch": 0.48191898619597195, + "grad_norm": 0.6667747799625429, + "learning_rate": 5.535469308417198e-06, + "loss": 0.3229, + "step": 10648 + }, + { + "epoch": 0.4819642453043675, + "grad_norm": 0.2865961501721208, + "learning_rate": 5.5347405967396825e-06, + "loss": 0.4841, + "step": 10649 + }, + { + "epoch": 0.4820095044127631, + "grad_norm": 0.6549681438750801, + "learning_rate": 5.534011873572235e-06, + "loss": 0.3061, + "step": 10650 + }, + { + "epoch": 0.4820547635211586, + "grad_norm": 0.3055832816720461, + "learning_rate": 5.533283138930511e-06, + "loss": 0.4667, + "step": 10651 + }, + { + "epoch": 0.4821000226295542, + "grad_norm": 0.288024145984902, + "learning_rate": 5.532554392830171e-06, + "loss": 0.4667, + "step": 10652 + }, + { + "epoch": 0.48214528173794974, + "grad_norm": 0.6439717321302626, + "learning_rate": 5.531825635286872e-06, + "loss": 0.2995, + "step": 10653 + }, + { + "epoch": 0.48219054084634533, + "grad_norm": 0.7070447804383497, + "learning_rate": 5.531096866316273e-06, + "loss": 0.3659, + "step": 10654 + }, + { + "epoch": 0.4822357999547409, + "grad_norm": 0.6090117333768033, + "learning_rate": 5.530368085934036e-06, + "loss": 0.3284, + "step": 10655 + }, + { + "epoch": 0.48228105906313645, + "grad_norm": 0.649044910192023, + "learning_rate": 5.529639294155815e-06, + "loss": 0.3244, + "step": 10656 + }, + { + "epoch": 0.48232631817153204, + "grad_norm": 0.6588154605789093, + "learning_rate": 5.528910490997275e-06, + "loss": 0.3237, + "step": 10657 + }, + { + "epoch": 0.4823715772799276, + "grad_norm": 0.6840278031937022, + "learning_rate": 5.528181676474071e-06, + "loss": 0.3157, + "step": 10658 + }, + { + "epoch": 0.48241683638832317, + "grad_norm": 0.7609837095962596, + "learning_rate": 5.527452850601864e-06, + "loss": 0.3481, + "step": 10659 + }, + { + "epoch": 0.4824620954967187, + "grad_norm": 0.766232069555319, + "learning_rate": 5.526724013396317e-06, + "loss": 0.3152, + "step": 10660 + }, + { + "epoch": 0.4825073546051143, + "grad_norm": 0.870320948169904, + "learning_rate": 5.5259951648730885e-06, + "loss": 0.2826, + "step": 10661 + }, + { + "epoch": 0.4825526137135098, + "grad_norm": 0.7347925460790161, + "learning_rate": 5.525266305047838e-06, + "loss": 0.3707, + "step": 10662 + }, + { + "epoch": 0.4825978728219054, + "grad_norm": 0.6507573047099565, + "learning_rate": 5.52453743393623e-06, + "loss": 0.3407, + "step": 10663 + }, + { + "epoch": 0.48264313193030095, + "grad_norm": 0.6773965666426126, + "learning_rate": 5.523808551553922e-06, + "loss": 0.3185, + "step": 10664 + }, + { + "epoch": 0.48268839103869654, + "grad_norm": 0.726493244938831, + "learning_rate": 5.523079657916578e-06, + "loss": 0.3734, + "step": 10665 + }, + { + "epoch": 0.4827336501470921, + "grad_norm": 0.7505689732391188, + "learning_rate": 5.522350753039858e-06, + "loss": 0.3267, + "step": 10666 + }, + { + "epoch": 0.48277890925548766, + "grad_norm": 0.6373008470479439, + "learning_rate": 5.521621836939424e-06, + "loss": 0.3462, + "step": 10667 + }, + { + "epoch": 0.48282416836388325, + "grad_norm": 0.656718497793086, + "learning_rate": 5.520892909630939e-06, + "loss": 0.347, + "step": 10668 + }, + { + "epoch": 0.4828694274722788, + "grad_norm": 0.7163210793168534, + "learning_rate": 5.520163971130066e-06, + "loss": 0.3255, + "step": 10669 + }, + { + "epoch": 0.4829146865806744, + "grad_norm": 0.8900367229797369, + "learning_rate": 5.519435021452466e-06, + "loss": 0.3682, + "step": 10670 + }, + { + "epoch": 0.4829599456890699, + "grad_norm": 0.6803573374057947, + "learning_rate": 5.518706060613805e-06, + "loss": 0.3681, + "step": 10671 + }, + { + "epoch": 0.4830052047974655, + "grad_norm": 0.41030123308996586, + "learning_rate": 5.5179770886297405e-06, + "loss": 0.483, + "step": 10672 + }, + { + "epoch": 0.48305046390586104, + "grad_norm": 0.6613579069249995, + "learning_rate": 5.517248105515941e-06, + "loss": 0.3629, + "step": 10673 + }, + { + "epoch": 0.48309572301425663, + "grad_norm": 0.7609906339983002, + "learning_rate": 5.5165191112880674e-06, + "loss": 0.313, + "step": 10674 + }, + { + "epoch": 0.48314098212265216, + "grad_norm": 0.6434155323688646, + "learning_rate": 5.515790105961785e-06, + "loss": 0.3746, + "step": 10675 + }, + { + "epoch": 0.48318624123104775, + "grad_norm": 0.6547562892095287, + "learning_rate": 5.515061089552758e-06, + "loss": 0.3438, + "step": 10676 + }, + { + "epoch": 0.4832315003394433, + "grad_norm": 0.6847596676297534, + "learning_rate": 5.514332062076649e-06, + "loss": 0.362, + "step": 10677 + }, + { + "epoch": 0.4832767594478389, + "grad_norm": 0.6315597904582344, + "learning_rate": 5.513603023549124e-06, + "loss": 0.3041, + "step": 10678 + }, + { + "epoch": 0.48332201855623447, + "grad_norm": 0.9328268878726889, + "learning_rate": 5.512873973985847e-06, + "loss": 0.3337, + "step": 10679 + }, + { + "epoch": 0.48336727766463, + "grad_norm": 0.6916736855340677, + "learning_rate": 5.512144913402485e-06, + "loss": 0.3443, + "step": 10680 + }, + { + "epoch": 0.4834125367730256, + "grad_norm": 0.6105004747411769, + "learning_rate": 5.5114158418147005e-06, + "loss": 0.3758, + "step": 10681 + }, + { + "epoch": 0.4834577958814211, + "grad_norm": 0.6197122850557252, + "learning_rate": 5.51068675923816e-06, + "loss": 0.348, + "step": 10682 + }, + { + "epoch": 0.4835030549898167, + "grad_norm": 0.6173874073638086, + "learning_rate": 5.50995766568853e-06, + "loss": 0.296, + "step": 10683 + }, + { + "epoch": 0.48354831409821225, + "grad_norm": 0.7139824073742472, + "learning_rate": 5.509228561181476e-06, + "loss": 0.3266, + "step": 10684 + }, + { + "epoch": 0.48359357320660784, + "grad_norm": 0.6004396779219929, + "learning_rate": 5.508499445732664e-06, + "loss": 0.3495, + "step": 10685 + }, + { + "epoch": 0.4836388323150034, + "grad_norm": 0.3708540694305002, + "learning_rate": 5.507770319357762e-06, + "loss": 0.4929, + "step": 10686 + }, + { + "epoch": 0.48368409142339897, + "grad_norm": 0.6407768627436697, + "learning_rate": 5.507041182072434e-06, + "loss": 0.3306, + "step": 10687 + }, + { + "epoch": 0.4837293505317945, + "grad_norm": 0.6358955180217345, + "learning_rate": 5.506312033892348e-06, + "loss": 0.3176, + "step": 10688 + }, + { + "epoch": 0.4837746096401901, + "grad_norm": 0.6569460673575273, + "learning_rate": 5.505582874833172e-06, + "loss": 0.3283, + "step": 10689 + }, + { + "epoch": 0.4838198687485857, + "grad_norm": 0.644320307937128, + "learning_rate": 5.5048537049105725e-06, + "loss": 0.3068, + "step": 10690 + }, + { + "epoch": 0.4838651278569812, + "grad_norm": 0.7781673045915627, + "learning_rate": 5.504124524140218e-06, + "loss": 0.3419, + "step": 10691 + }, + { + "epoch": 0.4839103869653768, + "grad_norm": 0.6220391667566061, + "learning_rate": 5.503395332537775e-06, + "loss": 0.3007, + "step": 10692 + }, + { + "epoch": 0.48395564607377234, + "grad_norm": 0.6090721522407699, + "learning_rate": 5.502666130118912e-06, + "loss": 0.3194, + "step": 10693 + }, + { + "epoch": 0.48400090518216793, + "grad_norm": 0.6498239850687649, + "learning_rate": 5.501936916899299e-06, + "loss": 0.3303, + "step": 10694 + }, + { + "epoch": 0.48404616429056346, + "grad_norm": 0.5225679995389199, + "learning_rate": 5.5012076928946035e-06, + "loss": 0.504, + "step": 10695 + }, + { + "epoch": 0.48409142339895905, + "grad_norm": 0.6396807582547848, + "learning_rate": 5.500478458120493e-06, + "loss": 0.3156, + "step": 10696 + }, + { + "epoch": 0.4841366825073546, + "grad_norm": 0.6745967780966945, + "learning_rate": 5.499749212592638e-06, + "loss": 0.3393, + "step": 10697 + }, + { + "epoch": 0.4841819416157502, + "grad_norm": 0.5948531132657466, + "learning_rate": 5.499019956326707e-06, + "loss": 0.3375, + "step": 10698 + }, + { + "epoch": 0.4842272007241457, + "grad_norm": 0.7187557241178281, + "learning_rate": 5.498290689338369e-06, + "loss": 0.365, + "step": 10699 + }, + { + "epoch": 0.4842724598325413, + "grad_norm": 0.6229183218355234, + "learning_rate": 5.497561411643295e-06, + "loss": 0.3192, + "step": 10700 + }, + { + "epoch": 0.48431771894093684, + "grad_norm": 0.65019446915603, + "learning_rate": 5.496832123257154e-06, + "loss": 0.3165, + "step": 10701 + }, + { + "epoch": 0.4843629780493324, + "grad_norm": 0.6392369740491995, + "learning_rate": 5.496102824195618e-06, + "loss": 0.3165, + "step": 10702 + }, + { + "epoch": 0.484408237157728, + "grad_norm": 0.6310505782519518, + "learning_rate": 5.495373514474356e-06, + "loss": 0.3322, + "step": 10703 + }, + { + "epoch": 0.48445349626612355, + "grad_norm": 0.3597350218284494, + "learning_rate": 5.494644194109037e-06, + "loss": 0.4691, + "step": 10704 + }, + { + "epoch": 0.48449875537451914, + "grad_norm": 0.7423058383703874, + "learning_rate": 5.493914863115334e-06, + "loss": 0.3297, + "step": 10705 + }, + { + "epoch": 0.4845440144829147, + "grad_norm": 0.6461765373067897, + "learning_rate": 5.493185521508918e-06, + "loss": 0.3338, + "step": 10706 + }, + { + "epoch": 0.48458927359131027, + "grad_norm": 0.6258937227965276, + "learning_rate": 5.492456169305459e-06, + "loss": 0.3471, + "step": 10707 + }, + { + "epoch": 0.4846345326997058, + "grad_norm": 0.6575981123752621, + "learning_rate": 5.49172680652063e-06, + "loss": 0.2936, + "step": 10708 + }, + { + "epoch": 0.4846797918081014, + "grad_norm": 0.6326251364558374, + "learning_rate": 5.490997433170102e-06, + "loss": 0.3563, + "step": 10709 + }, + { + "epoch": 0.4847250509164969, + "grad_norm": 0.621611401740867, + "learning_rate": 5.490268049269547e-06, + "loss": 0.3364, + "step": 10710 + }, + { + "epoch": 0.4847703100248925, + "grad_norm": 0.6495453676007767, + "learning_rate": 5.489538654834638e-06, + "loss": 0.3161, + "step": 10711 + }, + { + "epoch": 0.48481556913328805, + "grad_norm": 0.7567639136682123, + "learning_rate": 5.488809249881046e-06, + "loss": 0.3478, + "step": 10712 + }, + { + "epoch": 0.48486082824168364, + "grad_norm": 0.6084504260324419, + "learning_rate": 5.488079834424446e-06, + "loss": 0.3391, + "step": 10713 + }, + { + "epoch": 0.48490608735007923, + "grad_norm": 0.6199904202505113, + "learning_rate": 5.487350408480507e-06, + "loss": 0.3693, + "step": 10714 + }, + { + "epoch": 0.48495134645847476, + "grad_norm": 0.694471474371993, + "learning_rate": 5.486620972064907e-06, + "loss": 0.3143, + "step": 10715 + }, + { + "epoch": 0.48499660556687035, + "grad_norm": 0.6589954660090999, + "learning_rate": 5.485891525193316e-06, + "loss": 0.3475, + "step": 10716 + }, + { + "epoch": 0.4850418646752659, + "grad_norm": 0.7121177870279708, + "learning_rate": 5.485162067881407e-06, + "loss": 0.3641, + "step": 10717 + }, + { + "epoch": 0.4850871237836615, + "grad_norm": 0.6038698862707603, + "learning_rate": 5.484432600144857e-06, + "loss": 0.2968, + "step": 10718 + }, + { + "epoch": 0.485132382892057, + "grad_norm": 0.3526127088415516, + "learning_rate": 5.483703121999337e-06, + "loss": 0.4761, + "step": 10719 + }, + { + "epoch": 0.4851776420004526, + "grad_norm": 0.6175444607040002, + "learning_rate": 5.482973633460524e-06, + "loss": 0.3395, + "step": 10720 + }, + { + "epoch": 0.48522290110884814, + "grad_norm": 0.6735534842917275, + "learning_rate": 5.48224413454409e-06, + "loss": 0.3059, + "step": 10721 + }, + { + "epoch": 0.4852681602172437, + "grad_norm": 0.6806734732368638, + "learning_rate": 5.481514625265709e-06, + "loss": 0.3289, + "step": 10722 + }, + { + "epoch": 0.48531341932563926, + "grad_norm": 0.6232723889284854, + "learning_rate": 5.480785105641061e-06, + "loss": 0.3057, + "step": 10723 + }, + { + "epoch": 0.48535867843403485, + "grad_norm": 0.3112196698409816, + "learning_rate": 5.480055575685815e-06, + "loss": 0.4743, + "step": 10724 + }, + { + "epoch": 0.48540393754243044, + "grad_norm": 0.6491875086620946, + "learning_rate": 5.479326035415651e-06, + "loss": 0.3451, + "step": 10725 + }, + { + "epoch": 0.485449196650826, + "grad_norm": 0.5653696631158297, + "learning_rate": 5.47859648484624e-06, + "loss": 0.3236, + "step": 10726 + }, + { + "epoch": 0.48549445575922157, + "grad_norm": 0.6438329748116453, + "learning_rate": 5.477866923993262e-06, + "loss": 0.3604, + "step": 10727 + }, + { + "epoch": 0.4855397148676171, + "grad_norm": 0.6606980186635552, + "learning_rate": 5.477137352872393e-06, + "loss": 0.3349, + "step": 10728 + }, + { + "epoch": 0.4855849739760127, + "grad_norm": 0.6504920369442497, + "learning_rate": 5.476407771499305e-06, + "loss": 0.3571, + "step": 10729 + }, + { + "epoch": 0.4856302330844082, + "grad_norm": 0.6187465532547302, + "learning_rate": 5.475678179889678e-06, + "loss": 0.3178, + "step": 10730 + }, + { + "epoch": 0.4856754921928038, + "grad_norm": 0.618073859499965, + "learning_rate": 5.474948578059188e-06, + "loss": 0.3314, + "step": 10731 + }, + { + "epoch": 0.48572075130119935, + "grad_norm": 0.60108084486261, + "learning_rate": 5.474218966023512e-06, + "loss": 0.2919, + "step": 10732 + }, + { + "epoch": 0.48576601040959494, + "grad_norm": 0.6285127287027362, + "learning_rate": 5.473489343798327e-06, + "loss": 0.3514, + "step": 10733 + }, + { + "epoch": 0.4858112695179905, + "grad_norm": 0.5864677323792765, + "learning_rate": 5.472759711399311e-06, + "loss": 0.302, + "step": 10734 + }, + { + "epoch": 0.48585652862638606, + "grad_norm": 0.6108680980516022, + "learning_rate": 5.472030068842139e-06, + "loss": 0.3118, + "step": 10735 + }, + { + "epoch": 0.4859017877347816, + "grad_norm": 0.6878846950476892, + "learning_rate": 5.471300416142492e-06, + "loss": 0.3982, + "step": 10736 + }, + { + "epoch": 0.4859470468431772, + "grad_norm": 0.5736098866508658, + "learning_rate": 5.470570753316046e-06, + "loss": 0.3123, + "step": 10737 + }, + { + "epoch": 0.4859923059515728, + "grad_norm": 0.39332779899616394, + "learning_rate": 5.469841080378479e-06, + "loss": 0.486, + "step": 10738 + }, + { + "epoch": 0.4860375650599683, + "grad_norm": 0.6134887660443886, + "learning_rate": 5.469111397345471e-06, + "loss": 0.2917, + "step": 10739 + }, + { + "epoch": 0.4860828241683639, + "grad_norm": 0.616320225429893, + "learning_rate": 5.468381704232699e-06, + "loss": 0.3249, + "step": 10740 + }, + { + "epoch": 0.48612808327675944, + "grad_norm": 0.30036757827431215, + "learning_rate": 5.467652001055844e-06, + "loss": 0.4679, + "step": 10741 + }, + { + "epoch": 0.486173342385155, + "grad_norm": 0.627407516553, + "learning_rate": 5.466922287830584e-06, + "loss": 0.314, + "step": 10742 + }, + { + "epoch": 0.48621860149355056, + "grad_norm": 0.6642704155718921, + "learning_rate": 5.466192564572597e-06, + "loss": 0.36, + "step": 10743 + }, + { + "epoch": 0.48626386060194615, + "grad_norm": 0.6556948496626056, + "learning_rate": 5.465462831297564e-06, + "loss": 0.3316, + "step": 10744 + }, + { + "epoch": 0.4863091197103417, + "grad_norm": 0.3150740101579165, + "learning_rate": 5.464733088021165e-06, + "loss": 0.4845, + "step": 10745 + }, + { + "epoch": 0.4863543788187373, + "grad_norm": 0.3502713140017343, + "learning_rate": 5.464003334759077e-06, + "loss": 0.4992, + "step": 10746 + }, + { + "epoch": 0.4863996379271328, + "grad_norm": 0.675623605474163, + "learning_rate": 5.463273571526985e-06, + "loss": 0.324, + "step": 10747 + }, + { + "epoch": 0.4864448970355284, + "grad_norm": 0.6686506686394733, + "learning_rate": 5.462543798340565e-06, + "loss": 0.3361, + "step": 10748 + }, + { + "epoch": 0.486490156143924, + "grad_norm": 0.6719577676699698, + "learning_rate": 5.4618140152155e-06, + "loss": 0.3557, + "step": 10749 + }, + { + "epoch": 0.4865354152523195, + "grad_norm": 0.625166400770162, + "learning_rate": 5.461084222167471e-06, + "loss": 0.3191, + "step": 10750 + }, + { + "epoch": 0.4865806743607151, + "grad_norm": 0.6150470753407132, + "learning_rate": 5.460354419212156e-06, + "loss": 0.3423, + "step": 10751 + }, + { + "epoch": 0.48662593346911065, + "grad_norm": 0.6496271771479472, + "learning_rate": 5.4596246063652405e-06, + "loss": 0.3481, + "step": 10752 + }, + { + "epoch": 0.48667119257750624, + "grad_norm": 0.35633811231314305, + "learning_rate": 5.458894783642402e-06, + "loss": 0.4824, + "step": 10753 + }, + { + "epoch": 0.4867164516859018, + "grad_norm": 0.6279048917219101, + "learning_rate": 5.458164951059326e-06, + "loss": 0.3248, + "step": 10754 + }, + { + "epoch": 0.48676171079429736, + "grad_norm": 0.8146324246424269, + "learning_rate": 5.457435108631691e-06, + "loss": 0.3321, + "step": 10755 + }, + { + "epoch": 0.4868069699026929, + "grad_norm": 0.6477127892981791, + "learning_rate": 5.456705256375181e-06, + "loss": 0.2981, + "step": 10756 + }, + { + "epoch": 0.4868522290110885, + "grad_norm": 0.6579613350271492, + "learning_rate": 5.455975394305477e-06, + "loss": 0.3271, + "step": 10757 + }, + { + "epoch": 0.486897488119484, + "grad_norm": 0.5833393576827078, + "learning_rate": 5.455245522438263e-06, + "loss": 0.3102, + "step": 10758 + }, + { + "epoch": 0.4869427472278796, + "grad_norm": 0.6247727229746702, + "learning_rate": 5.4545156407892204e-06, + "loss": 0.3344, + "step": 10759 + }, + { + "epoch": 0.48698800633627515, + "grad_norm": 0.689951121413877, + "learning_rate": 5.453785749374033e-06, + "loss": 0.2856, + "step": 10760 + }, + { + "epoch": 0.48703326544467074, + "grad_norm": 0.64134476580352, + "learning_rate": 5.453055848208383e-06, + "loss": 0.348, + "step": 10761 + }, + { + "epoch": 0.4870785245530663, + "grad_norm": 0.6136023603777939, + "learning_rate": 5.452325937307955e-06, + "loss": 0.3274, + "step": 10762 + }, + { + "epoch": 0.48712378366146186, + "grad_norm": 0.3744441172659562, + "learning_rate": 5.4515960166884315e-06, + "loss": 0.4868, + "step": 10763 + }, + { + "epoch": 0.48716904276985745, + "grad_norm": 0.6212186162849846, + "learning_rate": 5.450866086365496e-06, + "loss": 0.3281, + "step": 10764 + }, + { + "epoch": 0.487214301878253, + "grad_norm": 0.2842456058853913, + "learning_rate": 5.450136146354834e-06, + "loss": 0.4618, + "step": 10765 + }, + { + "epoch": 0.4872595609866486, + "grad_norm": 0.6214680943609159, + "learning_rate": 5.449406196672129e-06, + "loss": 0.3403, + "step": 10766 + }, + { + "epoch": 0.4873048200950441, + "grad_norm": 0.6344077106540689, + "learning_rate": 5.448676237333064e-06, + "loss": 0.3273, + "step": 10767 + }, + { + "epoch": 0.4873500792034397, + "grad_norm": 0.7736892506859431, + "learning_rate": 5.447946268353324e-06, + "loss": 0.3384, + "step": 10768 + }, + { + "epoch": 0.48739533831183524, + "grad_norm": 0.6034017418949362, + "learning_rate": 5.447216289748596e-06, + "loss": 0.296, + "step": 10769 + }, + { + "epoch": 0.4874405974202308, + "grad_norm": 0.6784861654016594, + "learning_rate": 5.446486301534564e-06, + "loss": 0.3702, + "step": 10770 + }, + { + "epoch": 0.48748585652862636, + "grad_norm": 0.6188537207264507, + "learning_rate": 5.445756303726913e-06, + "loss": 0.2995, + "step": 10771 + }, + { + "epoch": 0.48753111563702195, + "grad_norm": 0.7374548514598774, + "learning_rate": 5.445026296341325e-06, + "loss": 0.3689, + "step": 10772 + }, + { + "epoch": 0.48757637474541754, + "grad_norm": 0.5612933828257421, + "learning_rate": 5.44429627939349e-06, + "loss": 0.3288, + "step": 10773 + }, + { + "epoch": 0.4876216338538131, + "grad_norm": 0.5757263653169219, + "learning_rate": 5.443566252899093e-06, + "loss": 0.3163, + "step": 10774 + }, + { + "epoch": 0.48766689296220866, + "grad_norm": 0.6066722995925015, + "learning_rate": 5.442836216873819e-06, + "loss": 0.3361, + "step": 10775 + }, + { + "epoch": 0.4877121520706042, + "grad_norm": 0.6216811471150734, + "learning_rate": 5.442106171333355e-06, + "loss": 0.3517, + "step": 10776 + }, + { + "epoch": 0.4877574111789998, + "grad_norm": 0.6070444961735875, + "learning_rate": 5.441376116293388e-06, + "loss": 0.3058, + "step": 10777 + }, + { + "epoch": 0.4878026702873953, + "grad_norm": 0.4419608326387728, + "learning_rate": 5.4406460517696035e-06, + "loss": 0.4778, + "step": 10778 + }, + { + "epoch": 0.4878479293957909, + "grad_norm": 0.6615232986768225, + "learning_rate": 5.439915977777689e-06, + "loss": 0.297, + "step": 10779 + }, + { + "epoch": 0.48789318850418645, + "grad_norm": 0.6232505080950455, + "learning_rate": 5.43918589433333e-06, + "loss": 0.287, + "step": 10780 + }, + { + "epoch": 0.48793844761258204, + "grad_norm": 0.6065556060776699, + "learning_rate": 5.438455801452216e-06, + "loss": 0.3024, + "step": 10781 + }, + { + "epoch": 0.4879837067209776, + "grad_norm": 0.2888300571280535, + "learning_rate": 5.437725699150031e-06, + "loss": 0.4714, + "step": 10782 + }, + { + "epoch": 0.48802896582937316, + "grad_norm": 0.6129089740283804, + "learning_rate": 5.43699558744247e-06, + "loss": 0.3645, + "step": 10783 + }, + { + "epoch": 0.48807422493776875, + "grad_norm": 0.6321064368855184, + "learning_rate": 5.4362654663452115e-06, + "loss": 0.3244, + "step": 10784 + }, + { + "epoch": 0.4881194840461643, + "grad_norm": 0.6236913706818845, + "learning_rate": 5.435535335873951e-06, + "loss": 0.3237, + "step": 10785 + }, + { + "epoch": 0.4881647431545599, + "grad_norm": 0.6794406833180068, + "learning_rate": 5.434805196044372e-06, + "loss": 0.3217, + "step": 10786 + }, + { + "epoch": 0.4882100022629554, + "grad_norm": 0.6015193977905489, + "learning_rate": 5.434075046872165e-06, + "loss": 0.3482, + "step": 10787 + }, + { + "epoch": 0.488255261371351, + "grad_norm": 0.7173251165967416, + "learning_rate": 5.4333448883730176e-06, + "loss": 0.3236, + "step": 10788 + }, + { + "epoch": 0.48830052047974654, + "grad_norm": 0.5847772835682842, + "learning_rate": 5.432614720562621e-06, + "loss": 0.3191, + "step": 10789 + }, + { + "epoch": 0.4883457795881421, + "grad_norm": 0.564475806463531, + "learning_rate": 5.431884543456662e-06, + "loss": 0.3342, + "step": 10790 + }, + { + "epoch": 0.48839103869653766, + "grad_norm": 0.3581390264447621, + "learning_rate": 5.43115435707083e-06, + "loss": 0.4891, + "step": 10791 + }, + { + "epoch": 0.48843629780493325, + "grad_norm": 0.31725716739829074, + "learning_rate": 5.430424161420817e-06, + "loss": 0.4979, + "step": 10792 + }, + { + "epoch": 0.4884815569133288, + "grad_norm": 0.6605082182680612, + "learning_rate": 5.429693956522308e-06, + "loss": 0.3351, + "step": 10793 + }, + { + "epoch": 0.4885268160217244, + "grad_norm": 0.6187867157877088, + "learning_rate": 5.428963742390998e-06, + "loss": 0.276, + "step": 10794 + }, + { + "epoch": 0.4885720751301199, + "grad_norm": 0.6339357376584758, + "learning_rate": 5.428233519042574e-06, + "loss": 0.3166, + "step": 10795 + }, + { + "epoch": 0.4886173342385155, + "grad_norm": 0.6775069897284436, + "learning_rate": 5.427503286492727e-06, + "loss": 0.3244, + "step": 10796 + }, + { + "epoch": 0.4886625933469111, + "grad_norm": 0.5769499742725703, + "learning_rate": 5.426773044757146e-06, + "loss": 0.3392, + "step": 10797 + }, + { + "epoch": 0.4887078524553066, + "grad_norm": 0.6189106653236469, + "learning_rate": 5.426042793851525e-06, + "loss": 0.3598, + "step": 10798 + }, + { + "epoch": 0.4887531115637022, + "grad_norm": 0.4653135340607896, + "learning_rate": 5.4253125337915514e-06, + "loss": 0.4796, + "step": 10799 + }, + { + "epoch": 0.48879837067209775, + "grad_norm": 0.5879197568638728, + "learning_rate": 5.424582264592919e-06, + "loss": 0.325, + "step": 10800 + }, + { + "epoch": 0.48884362978049334, + "grad_norm": 0.6698195305676378, + "learning_rate": 5.423851986271316e-06, + "loss": 0.3432, + "step": 10801 + }, + { + "epoch": 0.4888888888888889, + "grad_norm": 0.5785312045865859, + "learning_rate": 5.423121698842437e-06, + "loss": 0.3195, + "step": 10802 + }, + { + "epoch": 0.48893414799728446, + "grad_norm": 0.6450008538019762, + "learning_rate": 5.422391402321971e-06, + "loss": 0.317, + "step": 10803 + }, + { + "epoch": 0.48897940710568, + "grad_norm": 0.6335218164038602, + "learning_rate": 5.421661096725612e-06, + "loss": 0.3354, + "step": 10804 + }, + { + "epoch": 0.4890246662140756, + "grad_norm": 0.5882818683954184, + "learning_rate": 5.42093078206905e-06, + "loss": 0.3516, + "step": 10805 + }, + { + "epoch": 0.4890699253224711, + "grad_norm": 0.6605788221478196, + "learning_rate": 5.42020045836798e-06, + "loss": 0.3559, + "step": 10806 + }, + { + "epoch": 0.4891151844308667, + "grad_norm": 0.6831599714525608, + "learning_rate": 5.419470125638091e-06, + "loss": 0.3454, + "step": 10807 + }, + { + "epoch": 0.4891604435392623, + "grad_norm": 0.820649625790173, + "learning_rate": 5.418739783895079e-06, + "loss": 0.3969, + "step": 10808 + }, + { + "epoch": 0.48920570264765784, + "grad_norm": 0.5944023432998939, + "learning_rate": 5.418009433154633e-06, + "loss": 0.3134, + "step": 10809 + }, + { + "epoch": 0.4892509617560534, + "grad_norm": 0.6119284376862187, + "learning_rate": 5.41727907343245e-06, + "loss": 0.3646, + "step": 10810 + }, + { + "epoch": 0.48929622086444896, + "grad_norm": 0.33465584922221014, + "learning_rate": 5.41654870474422e-06, + "loss": 0.5179, + "step": 10811 + }, + { + "epoch": 0.48934147997284455, + "grad_norm": 0.6614144868552617, + "learning_rate": 5.4158183271056385e-06, + "loss": 0.3378, + "step": 10812 + }, + { + "epoch": 0.4893867390812401, + "grad_norm": 0.2797843285100049, + "learning_rate": 5.415087940532398e-06, + "loss": 0.4535, + "step": 10813 + }, + { + "epoch": 0.4894319981896357, + "grad_norm": 0.6000761307553717, + "learning_rate": 5.414357545040193e-06, + "loss": 0.3209, + "step": 10814 + }, + { + "epoch": 0.4894772572980312, + "grad_norm": 0.6367329656813222, + "learning_rate": 5.413627140644716e-06, + "loss": 0.3127, + "step": 10815 + }, + { + "epoch": 0.4895225164064268, + "grad_norm": 0.6852619243332301, + "learning_rate": 5.412896727361663e-06, + "loss": 0.3298, + "step": 10816 + }, + { + "epoch": 0.48956777551482233, + "grad_norm": 0.6331187290160614, + "learning_rate": 5.4121663052067265e-06, + "loss": 0.2659, + "step": 10817 + }, + { + "epoch": 0.4896130346232179, + "grad_norm": 0.6319135130209138, + "learning_rate": 5.411435874195602e-06, + "loss": 0.3313, + "step": 10818 + }, + { + "epoch": 0.4896582937316135, + "grad_norm": 0.6181685095469309, + "learning_rate": 5.410705434343985e-06, + "loss": 0.3094, + "step": 10819 + }, + { + "epoch": 0.48970355284000905, + "grad_norm": 0.5959127812339418, + "learning_rate": 5.409974985667569e-06, + "loss": 0.2987, + "step": 10820 + }, + { + "epoch": 0.48974881194840464, + "grad_norm": 0.6046669266642816, + "learning_rate": 5.409244528182051e-06, + "loss": 0.3174, + "step": 10821 + }, + { + "epoch": 0.4897940710568002, + "grad_norm": 0.6079813229979458, + "learning_rate": 5.408514061903123e-06, + "loss": 0.32, + "step": 10822 + }, + { + "epoch": 0.48983933016519576, + "grad_norm": 0.6102670213077448, + "learning_rate": 5.407783586846484e-06, + "loss": 0.3443, + "step": 10823 + }, + { + "epoch": 0.4898845892735913, + "grad_norm": 0.6393248736433033, + "learning_rate": 5.407053103027826e-06, + "loss": 0.3698, + "step": 10824 + }, + { + "epoch": 0.4899298483819869, + "grad_norm": 0.36085845063400557, + "learning_rate": 5.40632261046285e-06, + "loss": 0.4866, + "step": 10825 + }, + { + "epoch": 0.4899751074903824, + "grad_norm": 0.6483175296284592, + "learning_rate": 5.405592109167247e-06, + "loss": 0.3598, + "step": 10826 + }, + { + "epoch": 0.490020366598778, + "grad_norm": 0.6434207102297304, + "learning_rate": 5.404861599156715e-06, + "loss": 0.2913, + "step": 10827 + }, + { + "epoch": 0.49006562570717355, + "grad_norm": 0.6106852104150455, + "learning_rate": 5.404131080446952e-06, + "loss": 0.328, + "step": 10828 + }, + { + "epoch": 0.49011088481556914, + "grad_norm": 0.5672917334952595, + "learning_rate": 5.403400553053654e-06, + "loss": 0.3257, + "step": 10829 + }, + { + "epoch": 0.49015614392396467, + "grad_norm": 0.5955844527154185, + "learning_rate": 5.402670016992514e-06, + "loss": 0.3027, + "step": 10830 + }, + { + "epoch": 0.49020140303236026, + "grad_norm": 0.6209346722321435, + "learning_rate": 5.401939472279235e-06, + "loss": 0.3474, + "step": 10831 + }, + { + "epoch": 0.49024666214075585, + "grad_norm": 0.5779865260605683, + "learning_rate": 5.401208918929509e-06, + "loss": 0.2958, + "step": 10832 + }, + { + "epoch": 0.4902919212491514, + "grad_norm": 0.6041288922310544, + "learning_rate": 5.400478356959037e-06, + "loss": 0.3225, + "step": 10833 + }, + { + "epoch": 0.490337180357547, + "grad_norm": 0.5982865724226629, + "learning_rate": 5.399747786383515e-06, + "loss": 0.3329, + "step": 10834 + }, + { + "epoch": 0.4903824394659425, + "grad_norm": 0.6113716925853488, + "learning_rate": 5.39901720721864e-06, + "loss": 0.3717, + "step": 10835 + }, + { + "epoch": 0.4904276985743381, + "grad_norm": 0.6375109781082008, + "learning_rate": 5.398286619480111e-06, + "loss": 0.3493, + "step": 10836 + }, + { + "epoch": 0.49047295768273363, + "grad_norm": 0.6280684294342922, + "learning_rate": 5.397556023183627e-06, + "loss": 0.3364, + "step": 10837 + }, + { + "epoch": 0.4905182167911292, + "grad_norm": 0.632665793054112, + "learning_rate": 5.396825418344883e-06, + "loss": 0.3083, + "step": 10838 + }, + { + "epoch": 0.49056347589952476, + "grad_norm": 0.35018151725738517, + "learning_rate": 5.39609480497958e-06, + "loss": 0.4969, + "step": 10839 + }, + { + "epoch": 0.49060873500792035, + "grad_norm": 0.3135080903253166, + "learning_rate": 5.395364183103418e-06, + "loss": 0.4889, + "step": 10840 + }, + { + "epoch": 0.4906539941163159, + "grad_norm": 0.6470675002316936, + "learning_rate": 5.394633552732091e-06, + "loss": 0.3122, + "step": 10841 + }, + { + "epoch": 0.4906992532247115, + "grad_norm": 0.6641179250395611, + "learning_rate": 5.393902913881304e-06, + "loss": 0.3127, + "step": 10842 + }, + { + "epoch": 0.49074451233310706, + "grad_norm": 0.6837994695623595, + "learning_rate": 5.393172266566751e-06, + "loss": 0.2972, + "step": 10843 + }, + { + "epoch": 0.4907897714415026, + "grad_norm": 0.57976310001045, + "learning_rate": 5.392441610804135e-06, + "loss": 0.3528, + "step": 10844 + }, + { + "epoch": 0.4908350305498982, + "grad_norm": 0.6808006276626665, + "learning_rate": 5.391710946609152e-06, + "loss": 0.3338, + "step": 10845 + }, + { + "epoch": 0.4908802896582937, + "grad_norm": 0.6162543213535839, + "learning_rate": 5.390980273997507e-06, + "loss": 0.3185, + "step": 10846 + }, + { + "epoch": 0.4909255487666893, + "grad_norm": 0.4064457148287276, + "learning_rate": 5.390249592984894e-06, + "loss": 0.4859, + "step": 10847 + }, + { + "epoch": 0.49097080787508485, + "grad_norm": 0.6968576972450587, + "learning_rate": 5.389518903587016e-06, + "loss": 0.3389, + "step": 10848 + }, + { + "epoch": 0.49101606698348044, + "grad_norm": 0.5918707805528982, + "learning_rate": 5.388788205819575e-06, + "loss": 0.3141, + "step": 10849 + }, + { + "epoch": 0.49106132609187597, + "grad_norm": 0.6700538372595888, + "learning_rate": 5.38805749969827e-06, + "loss": 0.3083, + "step": 10850 + }, + { + "epoch": 0.49110658520027156, + "grad_norm": 0.5692254658928143, + "learning_rate": 5.387326785238798e-06, + "loss": 0.3105, + "step": 10851 + }, + { + "epoch": 0.4911518443086671, + "grad_norm": 0.7663508797023766, + "learning_rate": 5.386596062456865e-06, + "loss": 0.3289, + "step": 10852 + }, + { + "epoch": 0.4911971034170627, + "grad_norm": 0.7226289709380682, + "learning_rate": 5.385865331368169e-06, + "loss": 0.329, + "step": 10853 + }, + { + "epoch": 0.4912423625254583, + "grad_norm": 0.6834416027758144, + "learning_rate": 5.385134591988412e-06, + "loss": 0.3443, + "step": 10854 + }, + { + "epoch": 0.4912876216338538, + "grad_norm": 0.6554737185398497, + "learning_rate": 5.384403844333297e-06, + "loss": 0.3413, + "step": 10855 + }, + { + "epoch": 0.4913328807422494, + "grad_norm": 0.6095854602314272, + "learning_rate": 5.383673088418523e-06, + "loss": 0.3498, + "step": 10856 + }, + { + "epoch": 0.49137813985064493, + "grad_norm": 0.6254987626059457, + "learning_rate": 5.382942324259792e-06, + "loss": 0.3505, + "step": 10857 + }, + { + "epoch": 0.4914233989590405, + "grad_norm": 0.600840135233815, + "learning_rate": 5.382211551872808e-06, + "loss": 0.3281, + "step": 10858 + }, + { + "epoch": 0.49146865806743606, + "grad_norm": 0.7062759844203941, + "learning_rate": 5.38148077127327e-06, + "loss": 0.3398, + "step": 10859 + }, + { + "epoch": 0.49151391717583165, + "grad_norm": 0.6182077333196346, + "learning_rate": 5.380749982476884e-06, + "loss": 0.3508, + "step": 10860 + }, + { + "epoch": 0.4915591762842272, + "grad_norm": 0.6250150814729192, + "learning_rate": 5.380019185499348e-06, + "loss": 0.3335, + "step": 10861 + }, + { + "epoch": 0.4916044353926228, + "grad_norm": 0.644831247436958, + "learning_rate": 5.379288380356369e-06, + "loss": 0.3034, + "step": 10862 + }, + { + "epoch": 0.4916496945010183, + "grad_norm": 0.6896682788376114, + "learning_rate": 5.378557567063646e-06, + "loss": 0.3236, + "step": 10863 + }, + { + "epoch": 0.4916949536094139, + "grad_norm": 0.6116663195157774, + "learning_rate": 5.3778267456368836e-06, + "loss": 0.3136, + "step": 10864 + }, + { + "epoch": 0.49174021271780943, + "grad_norm": 0.686230452116269, + "learning_rate": 5.377095916091786e-06, + "loss": 0.4097, + "step": 10865 + }, + { + "epoch": 0.491785471826205, + "grad_norm": 0.6313581308698523, + "learning_rate": 5.376365078444053e-06, + "loss": 0.3125, + "step": 10866 + }, + { + "epoch": 0.4918307309346006, + "grad_norm": 0.6151971347833678, + "learning_rate": 5.375634232709392e-06, + "loss": 0.3094, + "step": 10867 + }, + { + "epoch": 0.49187599004299615, + "grad_norm": 0.6493310508228657, + "learning_rate": 5.374903378903506e-06, + "loss": 0.3611, + "step": 10868 + }, + { + "epoch": 0.49192124915139174, + "grad_norm": 0.624810363256607, + "learning_rate": 5.374172517042095e-06, + "loss": 0.3345, + "step": 10869 + }, + { + "epoch": 0.49196650825978727, + "grad_norm": 0.620486668426, + "learning_rate": 5.373441647140868e-06, + "loss": 0.3351, + "step": 10870 + }, + { + "epoch": 0.49201176736818286, + "grad_norm": 0.6993162835016654, + "learning_rate": 5.372710769215528e-06, + "loss": 0.3263, + "step": 10871 + }, + { + "epoch": 0.4920570264765784, + "grad_norm": 0.6195139398422571, + "learning_rate": 5.371979883281775e-06, + "loss": 0.3029, + "step": 10872 + }, + { + "epoch": 0.492102285584974, + "grad_norm": 0.6278648577458785, + "learning_rate": 5.37124898935532e-06, + "loss": 0.3384, + "step": 10873 + }, + { + "epoch": 0.4921475446933695, + "grad_norm": 0.3736919014627429, + "learning_rate": 5.370518087451861e-06, + "loss": 0.4994, + "step": 10874 + }, + { + "epoch": 0.4921928038017651, + "grad_norm": 0.31140376218161614, + "learning_rate": 5.36978717758711e-06, + "loss": 0.467, + "step": 10875 + }, + { + "epoch": 0.49223806291016065, + "grad_norm": 0.6264908346834731, + "learning_rate": 5.369056259776766e-06, + "loss": 0.3101, + "step": 10876 + }, + { + "epoch": 0.49228332201855624, + "grad_norm": 0.6107193434356737, + "learning_rate": 5.368325334036537e-06, + "loss": 0.3226, + "step": 10877 + }, + { + "epoch": 0.4923285811269518, + "grad_norm": 0.6140612466191963, + "learning_rate": 5.367594400382128e-06, + "loss": 0.3546, + "step": 10878 + }, + { + "epoch": 0.49237384023534736, + "grad_norm": 0.6609013483376353, + "learning_rate": 5.366863458829245e-06, + "loss": 0.3197, + "step": 10879 + }, + { + "epoch": 0.49241909934374295, + "grad_norm": 0.6760753743411584, + "learning_rate": 5.36613250939359e-06, + "loss": 0.3521, + "step": 10880 + }, + { + "epoch": 0.4924643584521385, + "grad_norm": 0.6654052260248408, + "learning_rate": 5.365401552090876e-06, + "loss": 0.3776, + "step": 10881 + }, + { + "epoch": 0.4925096175605341, + "grad_norm": 0.6076148237516632, + "learning_rate": 5.364670586936801e-06, + "loss": 0.3614, + "step": 10882 + }, + { + "epoch": 0.4925548766689296, + "grad_norm": 0.6324147862809746, + "learning_rate": 5.363939613947078e-06, + "loss": 0.3794, + "step": 10883 + }, + { + "epoch": 0.4926001357773252, + "grad_norm": 0.674557660483406, + "learning_rate": 5.363208633137409e-06, + "loss": 0.3159, + "step": 10884 + }, + { + "epoch": 0.49264539488572073, + "grad_norm": 0.6074914429601798, + "learning_rate": 5.3624776445235025e-06, + "loss": 0.3391, + "step": 10885 + }, + { + "epoch": 0.4926906539941163, + "grad_norm": 0.6394413452281954, + "learning_rate": 5.361746648121064e-06, + "loss": 0.3425, + "step": 10886 + }, + { + "epoch": 0.49273591310251186, + "grad_norm": 0.5797041229041443, + "learning_rate": 5.361015643945803e-06, + "loss": 0.3405, + "step": 10887 + }, + { + "epoch": 0.49278117221090745, + "grad_norm": 0.6373249682044135, + "learning_rate": 5.3602846320134216e-06, + "loss": 0.3199, + "step": 10888 + }, + { + "epoch": 0.492826431319303, + "grad_norm": 0.6562131440044063, + "learning_rate": 5.359553612339633e-06, + "loss": 0.2873, + "step": 10889 + }, + { + "epoch": 0.49287169042769857, + "grad_norm": 0.7090555888596984, + "learning_rate": 5.358822584940139e-06, + "loss": 0.3358, + "step": 10890 + }, + { + "epoch": 0.49291694953609416, + "grad_norm": 0.605339486263966, + "learning_rate": 5.358091549830651e-06, + "loss": 0.336, + "step": 10891 + }, + { + "epoch": 0.4929622086444897, + "grad_norm": 0.6203741456837942, + "learning_rate": 5.357360507026875e-06, + "loss": 0.3124, + "step": 10892 + }, + { + "epoch": 0.4930074677528853, + "grad_norm": 0.685886739956919, + "learning_rate": 5.35662945654452e-06, + "loss": 0.3535, + "step": 10893 + }, + { + "epoch": 0.4930527268612808, + "grad_norm": 0.5849598914637093, + "learning_rate": 5.3558983983992915e-06, + "loss": 0.4987, + "step": 10894 + }, + { + "epoch": 0.4930979859696764, + "grad_norm": 0.6099707630801916, + "learning_rate": 5.355167332606901e-06, + "loss": 0.2888, + "step": 10895 + }, + { + "epoch": 0.49314324507807195, + "grad_norm": 0.387638619022389, + "learning_rate": 5.354436259183054e-06, + "loss": 0.4624, + "step": 10896 + }, + { + "epoch": 0.49318850418646754, + "grad_norm": 0.6293204916290952, + "learning_rate": 5.353705178143462e-06, + "loss": 0.3194, + "step": 10897 + }, + { + "epoch": 0.49323376329486307, + "grad_norm": 0.6166390666247227, + "learning_rate": 5.352974089503832e-06, + "loss": 0.3186, + "step": 10898 + }, + { + "epoch": 0.49327902240325866, + "grad_norm": 0.6305222876486003, + "learning_rate": 5.352242993279871e-06, + "loss": 0.3787, + "step": 10899 + }, + { + "epoch": 0.4933242815116542, + "grad_norm": 0.6176998912923989, + "learning_rate": 5.351511889487293e-06, + "loss": 0.3472, + "step": 10900 + }, + { + "epoch": 0.4933695406200498, + "grad_norm": 0.6752658070048627, + "learning_rate": 5.350780778141801e-06, + "loss": 0.3142, + "step": 10901 + }, + { + "epoch": 0.4934147997284454, + "grad_norm": 0.7026151470603141, + "learning_rate": 5.35004965925911e-06, + "loss": 0.4815, + "step": 10902 + }, + { + "epoch": 0.4934600588368409, + "grad_norm": 0.6056281896817226, + "learning_rate": 5.349318532854924e-06, + "loss": 0.3572, + "step": 10903 + }, + { + "epoch": 0.4935053179452365, + "grad_norm": 0.6059450088427075, + "learning_rate": 5.348587398944959e-06, + "loss": 0.3166, + "step": 10904 + }, + { + "epoch": 0.49355057705363203, + "grad_norm": 0.6160544560726442, + "learning_rate": 5.347856257544919e-06, + "loss": 0.2925, + "step": 10905 + }, + { + "epoch": 0.4935958361620276, + "grad_norm": 0.6557893318809236, + "learning_rate": 5.347125108670516e-06, + "loss": 0.3096, + "step": 10906 + }, + { + "epoch": 0.49364109527042316, + "grad_norm": 0.6031112497162145, + "learning_rate": 5.3463939523374616e-06, + "loss": 0.3395, + "step": 10907 + }, + { + "epoch": 0.49368635437881875, + "grad_norm": 0.7063980157611425, + "learning_rate": 5.345662788561466e-06, + "loss": 0.362, + "step": 10908 + }, + { + "epoch": 0.4937316134872143, + "grad_norm": 0.6090917761194881, + "learning_rate": 5.344931617358237e-06, + "loss": 0.3011, + "step": 10909 + }, + { + "epoch": 0.4937768725956099, + "grad_norm": 0.6698751151563621, + "learning_rate": 5.344200438743489e-06, + "loss": 0.3522, + "step": 10910 + }, + { + "epoch": 0.4938221317040054, + "grad_norm": 0.3498846920345231, + "learning_rate": 5.343469252732928e-06, + "loss": 0.4968, + "step": 10911 + }, + { + "epoch": 0.493867390812401, + "grad_norm": 0.6144344328879905, + "learning_rate": 5.34273805934227e-06, + "loss": 0.3478, + "step": 10912 + }, + { + "epoch": 0.4939126499207966, + "grad_norm": 0.6833336709587083, + "learning_rate": 5.342006858587222e-06, + "loss": 0.3395, + "step": 10913 + }, + { + "epoch": 0.4939579090291921, + "grad_norm": 0.6541159079724937, + "learning_rate": 5.341275650483497e-06, + "loss": 0.3197, + "step": 10914 + }, + { + "epoch": 0.4940031681375877, + "grad_norm": 0.5724005590293713, + "learning_rate": 5.340544435046807e-06, + "loss": 0.3453, + "step": 10915 + }, + { + "epoch": 0.49404842724598325, + "grad_norm": 0.6626481102515759, + "learning_rate": 5.3398132122928635e-06, + "loss": 0.3212, + "step": 10916 + }, + { + "epoch": 0.49409368635437884, + "grad_norm": 0.6123494656814092, + "learning_rate": 5.339081982237377e-06, + "loss": 0.3028, + "step": 10917 + }, + { + "epoch": 0.49413894546277437, + "grad_norm": 0.3694123866230547, + "learning_rate": 5.3383507448960605e-06, + "loss": 0.4727, + "step": 10918 + }, + { + "epoch": 0.49418420457116996, + "grad_norm": 0.38805313791080737, + "learning_rate": 5.3376195002846255e-06, + "loss": 0.4782, + "step": 10919 + }, + { + "epoch": 0.4942294636795655, + "grad_norm": 0.27574780421912554, + "learning_rate": 5.336888248418784e-06, + "loss": 0.4617, + "step": 10920 + }, + { + "epoch": 0.4942747227879611, + "grad_norm": 0.5884279939932177, + "learning_rate": 5.3361569893142505e-06, + "loss": 0.3636, + "step": 10921 + }, + { + "epoch": 0.4943199818963566, + "grad_norm": 0.6648088950131924, + "learning_rate": 5.335425722986735e-06, + "loss": 0.3125, + "step": 10922 + }, + { + "epoch": 0.4943652410047522, + "grad_norm": 0.6967573887842057, + "learning_rate": 5.334694449451949e-06, + "loss": 0.3286, + "step": 10923 + }, + { + "epoch": 0.49441050011314774, + "grad_norm": 0.5998366409788873, + "learning_rate": 5.3339631687256085e-06, + "loss": 0.3723, + "step": 10924 + }, + { + "epoch": 0.49445575922154333, + "grad_norm": 0.6340096059926653, + "learning_rate": 5.333231880823425e-06, + "loss": 0.308, + "step": 10925 + }, + { + "epoch": 0.4945010183299389, + "grad_norm": 0.6382985585954123, + "learning_rate": 5.3325005857611126e-06, + "loss": 0.3365, + "step": 10926 + }, + { + "epoch": 0.49454627743833446, + "grad_norm": 0.6236564404757622, + "learning_rate": 5.331769283554382e-06, + "loss": 0.3088, + "step": 10927 + }, + { + "epoch": 0.49459153654673005, + "grad_norm": 0.6272800491529169, + "learning_rate": 5.33103797421895e-06, + "loss": 0.3432, + "step": 10928 + }, + { + "epoch": 0.4946367956551256, + "grad_norm": 0.6036608440507453, + "learning_rate": 5.33030665777053e-06, + "loss": 0.292, + "step": 10929 + }, + { + "epoch": 0.4946820547635212, + "grad_norm": 0.6449185476085723, + "learning_rate": 5.329575334224832e-06, + "loss": 0.3284, + "step": 10930 + }, + { + "epoch": 0.4947273138719167, + "grad_norm": 0.6568644023628296, + "learning_rate": 5.328844003597573e-06, + "loss": 0.3207, + "step": 10931 + }, + { + "epoch": 0.4947725729803123, + "grad_norm": 0.6664172500534838, + "learning_rate": 5.328112665904465e-06, + "loss": 0.3314, + "step": 10932 + }, + { + "epoch": 0.49481783208870783, + "grad_norm": 0.7000066487436837, + "learning_rate": 5.3273813211612254e-06, + "loss": 0.4498, + "step": 10933 + }, + { + "epoch": 0.4948630911971034, + "grad_norm": 0.6227058982525127, + "learning_rate": 5.3266499693835664e-06, + "loss": 0.3427, + "step": 10934 + }, + { + "epoch": 0.49490835030549896, + "grad_norm": 0.6609441288231116, + "learning_rate": 5.325918610587202e-06, + "loss": 0.3172, + "step": 10935 + }, + { + "epoch": 0.49495360941389455, + "grad_norm": 0.6541893702664565, + "learning_rate": 5.325187244787848e-06, + "loss": 0.3608, + "step": 10936 + }, + { + "epoch": 0.49499886852229014, + "grad_norm": 0.6666335731827807, + "learning_rate": 5.324455872001221e-06, + "loss": 0.299, + "step": 10937 + }, + { + "epoch": 0.49504412763068567, + "grad_norm": 0.3023990661780046, + "learning_rate": 5.32372449224303e-06, + "loss": 0.4673, + "step": 10938 + }, + { + "epoch": 0.49508938673908126, + "grad_norm": 0.6952874310170034, + "learning_rate": 5.322993105528996e-06, + "loss": 0.307, + "step": 10939 + }, + { + "epoch": 0.4951346458474768, + "grad_norm": 0.654364686935079, + "learning_rate": 5.322261711874831e-06, + "loss": 0.3121, + "step": 10940 + }, + { + "epoch": 0.4951799049558724, + "grad_norm": 0.6533502738870345, + "learning_rate": 5.321530311296253e-06, + "loss": 0.346, + "step": 10941 + }, + { + "epoch": 0.4952251640642679, + "grad_norm": 0.36164641617759674, + "learning_rate": 5.320798903808976e-06, + "loss": 0.4691, + "step": 10942 + }, + { + "epoch": 0.4952704231726635, + "grad_norm": 0.6153895176913012, + "learning_rate": 5.320067489428715e-06, + "loss": 0.3039, + "step": 10943 + }, + { + "epoch": 0.49531568228105904, + "grad_norm": 0.6072482821425039, + "learning_rate": 5.319336068171187e-06, + "loss": 0.3082, + "step": 10944 + }, + { + "epoch": 0.49536094138945463, + "grad_norm": 0.6703237557809038, + "learning_rate": 5.318604640052107e-06, + "loss": 0.3422, + "step": 10945 + }, + { + "epoch": 0.49540620049785017, + "grad_norm": 0.617543960875071, + "learning_rate": 5.317873205087193e-06, + "loss": 0.3292, + "step": 10946 + }, + { + "epoch": 0.49545145960624576, + "grad_norm": 0.6251190285600072, + "learning_rate": 5.31714176329216e-06, + "loss": 0.3505, + "step": 10947 + }, + { + "epoch": 0.49549671871464135, + "grad_norm": 0.6543858933536746, + "learning_rate": 5.3164103146827225e-06, + "loss": 0.3389, + "step": 10948 + }, + { + "epoch": 0.4955419778230369, + "grad_norm": 0.7297640462728502, + "learning_rate": 5.315678859274601e-06, + "loss": 0.3592, + "step": 10949 + }, + { + "epoch": 0.4955872369314325, + "grad_norm": 0.6665550483127983, + "learning_rate": 5.314947397083512e-06, + "loss": 0.3158, + "step": 10950 + }, + { + "epoch": 0.495632496039828, + "grad_norm": 0.6365177479194217, + "learning_rate": 5.314215928125167e-06, + "loss": 0.3262, + "step": 10951 + }, + { + "epoch": 0.4956777551482236, + "grad_norm": 0.6694352069351751, + "learning_rate": 5.313484452415289e-06, + "loss": 0.3406, + "step": 10952 + }, + { + "epoch": 0.49572301425661913, + "grad_norm": 0.667497336946359, + "learning_rate": 5.312752969969592e-06, + "loss": 0.3397, + "step": 10953 + }, + { + "epoch": 0.4957682733650147, + "grad_norm": 0.6467036143338789, + "learning_rate": 5.3120214808037954e-06, + "loss": 0.3268, + "step": 10954 + }, + { + "epoch": 0.49581353247341026, + "grad_norm": 0.6394702341304671, + "learning_rate": 5.311289984933615e-06, + "loss": 0.3347, + "step": 10955 + }, + { + "epoch": 0.49585879158180585, + "grad_norm": 0.6664012078186998, + "learning_rate": 5.310558482374768e-06, + "loss": 0.3179, + "step": 10956 + }, + { + "epoch": 0.4959040506902014, + "grad_norm": 0.6043814803870569, + "learning_rate": 5.309826973142974e-06, + "loss": 0.3369, + "step": 10957 + }, + { + "epoch": 0.49594930979859697, + "grad_norm": 0.37418229367821654, + "learning_rate": 5.30909545725395e-06, + "loss": 0.4913, + "step": 10958 + }, + { + "epoch": 0.4959945689069925, + "grad_norm": 0.3446965224369728, + "learning_rate": 5.308363934723412e-06, + "loss": 0.4493, + "step": 10959 + }, + { + "epoch": 0.4960398280153881, + "grad_norm": 0.29210342646383686, + "learning_rate": 5.307632405567084e-06, + "loss": 0.4693, + "step": 10960 + }, + { + "epoch": 0.4960850871237837, + "grad_norm": 0.6848138089237998, + "learning_rate": 5.306900869800676e-06, + "loss": 0.3238, + "step": 10961 + }, + { + "epoch": 0.4961303462321792, + "grad_norm": 0.6610513471135908, + "learning_rate": 5.306169327439914e-06, + "loss": 0.3727, + "step": 10962 + }, + { + "epoch": 0.4961756053405748, + "grad_norm": 0.6254059324403175, + "learning_rate": 5.3054377785005114e-06, + "loss": 0.3011, + "step": 10963 + }, + { + "epoch": 0.49622086444897034, + "grad_norm": 0.6963258697924865, + "learning_rate": 5.30470622299819e-06, + "loss": 0.3804, + "step": 10964 + }, + { + "epoch": 0.49626612355736593, + "grad_norm": 0.6253787977907792, + "learning_rate": 5.303974660948669e-06, + "loss": 0.2995, + "step": 10965 + }, + { + "epoch": 0.49631138266576147, + "grad_norm": 0.5194836757059452, + "learning_rate": 5.3032430923676635e-06, + "loss": 0.4814, + "step": 10966 + }, + { + "epoch": 0.49635664177415706, + "grad_norm": 0.7144946249067363, + "learning_rate": 5.302511517270897e-06, + "loss": 0.3655, + "step": 10967 + }, + { + "epoch": 0.4964019008825526, + "grad_norm": 0.6770453635481531, + "learning_rate": 5.301779935674087e-06, + "loss": 0.3143, + "step": 10968 + }, + { + "epoch": 0.4964471599909482, + "grad_norm": 0.6699003525392917, + "learning_rate": 5.301048347592952e-06, + "loss": 0.3671, + "step": 10969 + }, + { + "epoch": 0.4964924190993437, + "grad_norm": 0.7024479803516988, + "learning_rate": 5.300316753043214e-06, + "loss": 0.3028, + "step": 10970 + }, + { + "epoch": 0.4965376782077393, + "grad_norm": 0.6159114504436758, + "learning_rate": 5.299585152040592e-06, + "loss": 0.3316, + "step": 10971 + }, + { + "epoch": 0.4965829373161349, + "grad_norm": 0.6961387616550401, + "learning_rate": 5.298853544600802e-06, + "loss": 0.3352, + "step": 10972 + }, + { + "epoch": 0.49662819642453043, + "grad_norm": 0.628516422333494, + "learning_rate": 5.298121930739571e-06, + "loss": 0.3507, + "step": 10973 + }, + { + "epoch": 0.496673455532926, + "grad_norm": 0.6162577726628683, + "learning_rate": 5.297390310472612e-06, + "loss": 0.3468, + "step": 10974 + }, + { + "epoch": 0.49671871464132156, + "grad_norm": 0.6160097811332337, + "learning_rate": 5.29665868381565e-06, + "loss": 0.3158, + "step": 10975 + }, + { + "epoch": 0.49676397374971715, + "grad_norm": 0.3708650924765746, + "learning_rate": 5.295927050784404e-06, + "loss": 0.4435, + "step": 10976 + }, + { + "epoch": 0.4968092328581127, + "grad_norm": 0.34981254449603477, + "learning_rate": 5.295195411394595e-06, + "loss": 0.4923, + "step": 10977 + }, + { + "epoch": 0.49685449196650827, + "grad_norm": 0.6413983531235229, + "learning_rate": 5.2944637656619415e-06, + "loss": 0.3009, + "step": 10978 + }, + { + "epoch": 0.4968997510749038, + "grad_norm": 0.2988611839833011, + "learning_rate": 5.293732113602169e-06, + "loss": 0.4772, + "step": 10979 + }, + { + "epoch": 0.4969450101832994, + "grad_norm": 0.30784643760060076, + "learning_rate": 5.293000455230992e-06, + "loss": 0.457, + "step": 10980 + }, + { + "epoch": 0.49699026929169493, + "grad_norm": 0.5847235092637628, + "learning_rate": 5.292268790564138e-06, + "loss": 0.3285, + "step": 10981 + }, + { + "epoch": 0.4970355284000905, + "grad_norm": 0.33329237159104297, + "learning_rate": 5.291537119617322e-06, + "loss": 0.467, + "step": 10982 + }, + { + "epoch": 0.4970807875084861, + "grad_norm": 0.7333630104412473, + "learning_rate": 5.290805442406273e-06, + "loss": 0.3166, + "step": 10983 + }, + { + "epoch": 0.49712604661688164, + "grad_norm": 0.651647334533816, + "learning_rate": 5.290073758946705e-06, + "loss": 0.3379, + "step": 10984 + }, + { + "epoch": 0.49717130572527723, + "grad_norm": 0.6267299723598055, + "learning_rate": 5.289342069254345e-06, + "loss": 0.3138, + "step": 10985 + }, + { + "epoch": 0.49721656483367277, + "grad_norm": 0.348045624189992, + "learning_rate": 5.288610373344911e-06, + "loss": 0.5256, + "step": 10986 + }, + { + "epoch": 0.49726182394206836, + "grad_norm": 0.642822543396966, + "learning_rate": 5.287878671234127e-06, + "loss": 0.3337, + "step": 10987 + }, + { + "epoch": 0.4973070830504639, + "grad_norm": 0.6261370129498127, + "learning_rate": 5.287146962937715e-06, + "loss": 0.3539, + "step": 10988 + }, + { + "epoch": 0.4973523421588595, + "grad_norm": 0.31546532431943075, + "learning_rate": 5.286415248471397e-06, + "loss": 0.4861, + "step": 10989 + }, + { + "epoch": 0.497397601267255, + "grad_norm": 0.8233396237313507, + "learning_rate": 5.285683527850892e-06, + "loss": 0.3291, + "step": 10990 + }, + { + "epoch": 0.4974428603756506, + "grad_norm": 0.6464844367231789, + "learning_rate": 5.284951801091929e-06, + "loss": 0.3274, + "step": 10991 + }, + { + "epoch": 0.49748811948404614, + "grad_norm": 0.6104061184013967, + "learning_rate": 5.284220068210225e-06, + "loss": 0.3618, + "step": 10992 + }, + { + "epoch": 0.49753337859244173, + "grad_norm": 0.6293890350828011, + "learning_rate": 5.283488329221506e-06, + "loss": 0.3895, + "step": 10993 + }, + { + "epoch": 0.49757863770083727, + "grad_norm": 0.6475500629074776, + "learning_rate": 5.2827565841414915e-06, + "loss": 0.3484, + "step": 10994 + }, + { + "epoch": 0.49762389680923286, + "grad_norm": 0.35003481742079523, + "learning_rate": 5.282024832985908e-06, + "loss": 0.4795, + "step": 10995 + }, + { + "epoch": 0.49766915591762845, + "grad_norm": 0.6124094693385026, + "learning_rate": 5.281293075770476e-06, + "loss": 0.3034, + "step": 10996 + }, + { + "epoch": 0.497714415026024, + "grad_norm": 0.6289972302871515, + "learning_rate": 5.280561312510921e-06, + "loss": 0.3426, + "step": 10997 + }, + { + "epoch": 0.49775967413441957, + "grad_norm": 0.6095955157067299, + "learning_rate": 5.279829543222963e-06, + "loss": 0.326, + "step": 10998 + }, + { + "epoch": 0.4978049332428151, + "grad_norm": 0.660604168797498, + "learning_rate": 5.27909776792233e-06, + "loss": 0.3287, + "step": 10999 + }, + { + "epoch": 0.4978501923512107, + "grad_norm": 0.6395277156110963, + "learning_rate": 5.278365986624743e-06, + "loss": 0.3563, + "step": 11000 + }, + { + "epoch": 0.49789545145960623, + "grad_norm": 0.6254936495960255, + "learning_rate": 5.277634199345924e-06, + "loss": 0.3451, + "step": 11001 + }, + { + "epoch": 0.4979407105680018, + "grad_norm": 0.2960462330487585, + "learning_rate": 5.2769024061016e-06, + "loss": 0.4898, + "step": 11002 + }, + { + "epoch": 0.49798596967639736, + "grad_norm": 0.6469695831600601, + "learning_rate": 5.276170606907492e-06, + "loss": 0.3291, + "step": 11003 + }, + { + "epoch": 0.49803122878479295, + "grad_norm": 0.2650054780274163, + "learning_rate": 5.275438801779328e-06, + "loss": 0.4513, + "step": 11004 + }, + { + "epoch": 0.4980764878931885, + "grad_norm": 0.5869666126213625, + "learning_rate": 5.27470699073283e-06, + "loss": 0.3008, + "step": 11005 + }, + { + "epoch": 0.49812174700158407, + "grad_norm": 0.2852893613309126, + "learning_rate": 5.273975173783721e-06, + "loss": 0.4997, + "step": 11006 + }, + { + "epoch": 0.49816700610997966, + "grad_norm": 0.5978448571071595, + "learning_rate": 5.273243350947728e-06, + "loss": 0.3148, + "step": 11007 + }, + { + "epoch": 0.4982122652183752, + "grad_norm": 0.2849176493083047, + "learning_rate": 5.272511522240574e-06, + "loss": 0.4952, + "step": 11008 + }, + { + "epoch": 0.4982575243267708, + "grad_norm": 0.586350458099298, + "learning_rate": 5.271779687677984e-06, + "loss": 0.3416, + "step": 11009 + }, + { + "epoch": 0.4983027834351663, + "grad_norm": 0.6386831237999933, + "learning_rate": 5.271047847275685e-06, + "loss": 0.3097, + "step": 11010 + }, + { + "epoch": 0.4983480425435619, + "grad_norm": 0.6339958723931352, + "learning_rate": 5.270316001049398e-06, + "loss": 0.3842, + "step": 11011 + }, + { + "epoch": 0.49839330165195744, + "grad_norm": 0.29838913407663087, + "learning_rate": 5.269584149014852e-06, + "loss": 0.4807, + "step": 11012 + }, + { + "epoch": 0.49843856076035303, + "grad_norm": 0.6826512568699339, + "learning_rate": 5.268852291187771e-06, + "loss": 0.3287, + "step": 11013 + }, + { + "epoch": 0.49848381986874857, + "grad_norm": 0.5931840879498935, + "learning_rate": 5.2681204275838785e-06, + "loss": 0.3152, + "step": 11014 + }, + { + "epoch": 0.49852907897714416, + "grad_norm": 0.6657488830808198, + "learning_rate": 5.267388558218902e-06, + "loss": 0.3263, + "step": 11015 + }, + { + "epoch": 0.4985743380855397, + "grad_norm": 0.6057858608335768, + "learning_rate": 5.266656683108566e-06, + "loss": 0.3041, + "step": 11016 + }, + { + "epoch": 0.4986195971939353, + "grad_norm": 0.6745789119505499, + "learning_rate": 5.265924802268598e-06, + "loss": 0.3572, + "step": 11017 + }, + { + "epoch": 0.4986648563023308, + "grad_norm": 0.5747209305871905, + "learning_rate": 5.265192915714723e-06, + "loss": 0.3098, + "step": 11018 + }, + { + "epoch": 0.4987101154107264, + "grad_norm": 0.6218352907198297, + "learning_rate": 5.2644610234626646e-06, + "loss": 0.2639, + "step": 11019 + }, + { + "epoch": 0.498755374519122, + "grad_norm": 0.3348861378427615, + "learning_rate": 5.2637291255281545e-06, + "loss": 0.4646, + "step": 11020 + }, + { + "epoch": 0.49880063362751753, + "grad_norm": 0.6534352481784338, + "learning_rate": 5.262997221926912e-06, + "loss": 0.304, + "step": 11021 + }, + { + "epoch": 0.4988458927359131, + "grad_norm": 0.6371580418242253, + "learning_rate": 5.262265312674669e-06, + "loss": 0.3518, + "step": 11022 + }, + { + "epoch": 0.49889115184430866, + "grad_norm": 0.29895083134067113, + "learning_rate": 5.261533397787149e-06, + "loss": 0.4731, + "step": 11023 + }, + { + "epoch": 0.49893641095270425, + "grad_norm": 0.6893328823430755, + "learning_rate": 5.26080147728008e-06, + "loss": 0.3198, + "step": 11024 + }, + { + "epoch": 0.4989816700610998, + "grad_norm": 0.6131173186560834, + "learning_rate": 5.260069551169187e-06, + "loss": 0.3078, + "step": 11025 + }, + { + "epoch": 0.49902692916949537, + "grad_norm": 0.6330615061810914, + "learning_rate": 5.2593376194702e-06, + "loss": 0.3216, + "step": 11026 + }, + { + "epoch": 0.4990721882778909, + "grad_norm": 0.6459216134848859, + "learning_rate": 5.258605682198842e-06, + "loss": 0.3118, + "step": 11027 + }, + { + "epoch": 0.4991174473862865, + "grad_norm": 0.6205966481412192, + "learning_rate": 5.2578737393708435e-06, + "loss": 0.3307, + "step": 11028 + }, + { + "epoch": 0.49916270649468203, + "grad_norm": 0.7227561225552619, + "learning_rate": 5.257141791001931e-06, + "loss": 0.3494, + "step": 11029 + }, + { + "epoch": 0.4992079656030776, + "grad_norm": 0.5829741951280177, + "learning_rate": 5.256409837107828e-06, + "loss": 0.3502, + "step": 11030 + }, + { + "epoch": 0.4992532247114732, + "grad_norm": 0.3573351925053009, + "learning_rate": 5.255677877704269e-06, + "loss": 0.5144, + "step": 11031 + }, + { + "epoch": 0.49929848381986874, + "grad_norm": 0.4306011684363518, + "learning_rate": 5.254945912806977e-06, + "loss": 0.4578, + "step": 11032 + }, + { + "epoch": 0.49934374292826433, + "grad_norm": 0.5924615467188293, + "learning_rate": 5.254213942431679e-06, + "loss": 0.3542, + "step": 11033 + }, + { + "epoch": 0.49938900203665987, + "grad_norm": 0.6294281367848047, + "learning_rate": 5.253481966594104e-06, + "loss": 0.3409, + "step": 11034 + }, + { + "epoch": 0.49943426114505546, + "grad_norm": 0.624064714320788, + "learning_rate": 5.25274998530998e-06, + "loss": 0.3123, + "step": 11035 + }, + { + "epoch": 0.499479520253451, + "grad_norm": 0.7475311399192298, + "learning_rate": 5.252017998595036e-06, + "loss": 0.2917, + "step": 11036 + }, + { + "epoch": 0.4995247793618466, + "grad_norm": 0.6201713220823594, + "learning_rate": 5.2512860064649985e-06, + "loss": 0.3123, + "step": 11037 + }, + { + "epoch": 0.4995700384702421, + "grad_norm": 0.6376182014202468, + "learning_rate": 5.250554008935596e-06, + "loss": 0.3383, + "step": 11038 + }, + { + "epoch": 0.4996152975786377, + "grad_norm": 0.6267144751580137, + "learning_rate": 5.24982200602256e-06, + "loss": 0.2996, + "step": 11039 + }, + { + "epoch": 0.49966055668703324, + "grad_norm": 0.6305389310730376, + "learning_rate": 5.249089997741613e-06, + "loss": 0.3086, + "step": 11040 + }, + { + "epoch": 0.49970581579542883, + "grad_norm": 0.6201450886917379, + "learning_rate": 5.248357984108489e-06, + "loss": 0.2934, + "step": 11041 + }, + { + "epoch": 0.4997510749038244, + "grad_norm": 0.6182082306856194, + "learning_rate": 5.247625965138915e-06, + "loss": 0.3555, + "step": 11042 + }, + { + "epoch": 0.49979633401221996, + "grad_norm": 0.6015189328325169, + "learning_rate": 5.246893940848619e-06, + "loss": 0.35, + "step": 11043 + }, + { + "epoch": 0.49984159312061555, + "grad_norm": 0.5770996555177424, + "learning_rate": 5.24616191125333e-06, + "loss": 0.3129, + "step": 11044 + }, + { + "epoch": 0.4998868522290111, + "grad_norm": 0.6121094238797752, + "learning_rate": 5.245429876368777e-06, + "loss": 0.3085, + "step": 11045 + }, + { + "epoch": 0.49993211133740667, + "grad_norm": 0.6523235892928385, + "learning_rate": 5.244697836210691e-06, + "loss": 0.373, + "step": 11046 + }, + { + "epoch": 0.4999773704458022, + "grad_norm": 0.6345743591337244, + "learning_rate": 5.2439657907948005e-06, + "loss": 0.3459, + "step": 11047 + }, + { + "epoch": 0.5000226295541977, + "grad_norm": 0.6277061830033281, + "learning_rate": 5.243233740136833e-06, + "loss": 0.3521, + "step": 11048 + }, + { + "epoch": 0.5000678886625933, + "grad_norm": 0.584450845482722, + "learning_rate": 5.24250168425252e-06, + "loss": 0.3166, + "step": 11049 + }, + { + "epoch": 0.5001131477709889, + "grad_norm": 0.6260654492263931, + "learning_rate": 5.241769623157591e-06, + "loss": 0.3242, + "step": 11050 + }, + { + "epoch": 0.5001584068793845, + "grad_norm": 0.6012131043558517, + "learning_rate": 5.241037556867775e-06, + "loss": 0.3088, + "step": 11051 + }, + { + "epoch": 0.50020366598778, + "grad_norm": 0.7609827413186744, + "learning_rate": 5.2403054853988025e-06, + "loss": 0.3373, + "step": 11052 + }, + { + "epoch": 0.5002489250961756, + "grad_norm": 0.6481397861005885, + "learning_rate": 5.239573408766402e-06, + "loss": 0.3348, + "step": 11053 + }, + { + "epoch": 0.5002941842045712, + "grad_norm": 0.6230503792736052, + "learning_rate": 5.2388413269863046e-06, + "loss": 0.3084, + "step": 11054 + }, + { + "epoch": 0.5003394433129668, + "grad_norm": 0.6175391721691664, + "learning_rate": 5.238109240074242e-06, + "loss": 0.3683, + "step": 11055 + }, + { + "epoch": 0.5003847024213623, + "grad_norm": 0.6082765218977666, + "learning_rate": 5.237377148045942e-06, + "loss": 0.3168, + "step": 11056 + }, + { + "epoch": 0.5004299615297578, + "grad_norm": 0.6186954600485816, + "learning_rate": 5.236645050917137e-06, + "loss": 0.322, + "step": 11057 + }, + { + "epoch": 0.5004752206381534, + "grad_norm": 0.4754584167898786, + "learning_rate": 5.235912948703557e-06, + "loss": 0.5052, + "step": 11058 + }, + { + "epoch": 0.500520479746549, + "grad_norm": 0.5948232513771863, + "learning_rate": 5.235180841420932e-06, + "loss": 0.3247, + "step": 11059 + }, + { + "epoch": 0.5005657388549446, + "grad_norm": 0.3340250369796509, + "learning_rate": 5.234448729084993e-06, + "loss": 0.4682, + "step": 11060 + }, + { + "epoch": 0.5006109979633401, + "grad_norm": 0.6822242617146005, + "learning_rate": 5.233716611711469e-06, + "loss": 0.3387, + "step": 11061 + }, + { + "epoch": 0.5006562570717357, + "grad_norm": 0.6499428614474025, + "learning_rate": 5.232984489316095e-06, + "loss": 0.3623, + "step": 11062 + }, + { + "epoch": 0.5007015161801313, + "grad_norm": 0.5985211579875924, + "learning_rate": 5.2322523619146e-06, + "loss": 0.2922, + "step": 11063 + }, + { + "epoch": 0.5007467752885268, + "grad_norm": 0.6165914090170149, + "learning_rate": 5.2315202295227144e-06, + "loss": 0.3392, + "step": 11064 + }, + { + "epoch": 0.5007920343969223, + "grad_norm": 0.5905261648096668, + "learning_rate": 5.2307880921561695e-06, + "loss": 0.3121, + "step": 11065 + }, + { + "epoch": 0.5008372935053179, + "grad_norm": 0.6898713814030945, + "learning_rate": 5.230055949830698e-06, + "loss": 0.3261, + "step": 11066 + }, + { + "epoch": 0.5008825526137135, + "grad_norm": 0.6222355567404058, + "learning_rate": 5.229323802562031e-06, + "loss": 0.353, + "step": 11067 + }, + { + "epoch": 0.5009278117221091, + "grad_norm": 0.5075586203431988, + "learning_rate": 5.2285916503659e-06, + "loss": 0.4744, + "step": 11068 + }, + { + "epoch": 0.5009730708305047, + "grad_norm": 0.6291194527210394, + "learning_rate": 5.227859493258035e-06, + "loss": 0.3207, + "step": 11069 + }, + { + "epoch": 0.5010183299389002, + "grad_norm": 0.6418232667533473, + "learning_rate": 5.227127331254171e-06, + "loss": 0.3069, + "step": 11070 + }, + { + "epoch": 0.5010635890472958, + "grad_norm": 0.6018724740067458, + "learning_rate": 5.226395164370038e-06, + "loss": 0.3068, + "step": 11071 + }, + { + "epoch": 0.5011088481556913, + "grad_norm": 0.3252219454503633, + "learning_rate": 5.225662992621367e-06, + "loss": 0.454, + "step": 11072 + }, + { + "epoch": 0.5011541072640869, + "grad_norm": 0.7285264576358847, + "learning_rate": 5.224930816023892e-06, + "loss": 0.35, + "step": 11073 + }, + { + "epoch": 0.5011993663724824, + "grad_norm": 0.6205010962097538, + "learning_rate": 5.224198634593344e-06, + "loss": 0.3039, + "step": 11074 + }, + { + "epoch": 0.501244625480878, + "grad_norm": 0.594041615008846, + "learning_rate": 5.223466448345457e-06, + "loss": 0.3091, + "step": 11075 + }, + { + "epoch": 0.5012898845892736, + "grad_norm": 0.6321829785744144, + "learning_rate": 5.222734257295963e-06, + "loss": 0.3225, + "step": 11076 + }, + { + "epoch": 0.5013351436976692, + "grad_norm": 0.6118644687548812, + "learning_rate": 5.222002061460592e-06, + "loss": 0.2942, + "step": 11077 + }, + { + "epoch": 0.5013804028060648, + "grad_norm": 0.7053326924588188, + "learning_rate": 5.22126986085508e-06, + "loss": 0.3398, + "step": 11078 + }, + { + "epoch": 0.5014256619144603, + "grad_norm": 0.6527266573297478, + "learning_rate": 5.220537655495156e-06, + "loss": 0.3353, + "step": 11079 + }, + { + "epoch": 0.5014709210228558, + "grad_norm": 0.6638030771401946, + "learning_rate": 5.219805445396558e-06, + "loss": 0.362, + "step": 11080 + }, + { + "epoch": 0.5015161801312514, + "grad_norm": 0.620148977612418, + "learning_rate": 5.219073230575014e-06, + "loss": 0.3098, + "step": 11081 + }, + { + "epoch": 0.501561439239647, + "grad_norm": 0.6372724691585553, + "learning_rate": 5.218341011046259e-06, + "loss": 0.319, + "step": 11082 + }, + { + "epoch": 0.5016066983480425, + "grad_norm": 0.6373736598102927, + "learning_rate": 5.217608786826028e-06, + "loss": 0.3637, + "step": 11083 + }, + { + "epoch": 0.5016519574564381, + "grad_norm": 0.605732810497086, + "learning_rate": 5.216876557930052e-06, + "loss": 0.3337, + "step": 11084 + }, + { + "epoch": 0.5016972165648337, + "grad_norm": 0.6222119929539311, + "learning_rate": 5.216144324374064e-06, + "loss": 0.3213, + "step": 11085 + }, + { + "epoch": 0.5017424756732293, + "grad_norm": 0.6386771222508453, + "learning_rate": 5.215412086173798e-06, + "loss": 0.3204, + "step": 11086 + }, + { + "epoch": 0.5017877347816248, + "grad_norm": 0.6350812082463306, + "learning_rate": 5.214679843344989e-06, + "loss": 0.3172, + "step": 11087 + }, + { + "epoch": 0.5018329938900203, + "grad_norm": 0.5767092330974389, + "learning_rate": 5.213947595903369e-06, + "loss": 0.477, + "step": 11088 + }, + { + "epoch": 0.5018782529984159, + "grad_norm": 0.6352459911259863, + "learning_rate": 5.213215343864674e-06, + "loss": 0.3516, + "step": 11089 + }, + { + "epoch": 0.5019235121068115, + "grad_norm": 2.2769096522106915, + "learning_rate": 5.212483087244633e-06, + "loss": 0.3, + "step": 11090 + }, + { + "epoch": 0.5019687712152071, + "grad_norm": 0.6258735223163712, + "learning_rate": 5.211750826058986e-06, + "loss": 0.3866, + "step": 11091 + }, + { + "epoch": 0.5020140303236026, + "grad_norm": 0.3613147591509796, + "learning_rate": 5.211018560323462e-06, + "loss": 0.4903, + "step": 11092 + }, + { + "epoch": 0.5020592894319982, + "grad_norm": 0.36778750299336693, + "learning_rate": 5.2102862900537975e-06, + "loss": 0.4728, + "step": 11093 + }, + { + "epoch": 0.5021045485403938, + "grad_norm": 0.6867057430629306, + "learning_rate": 5.209554015265727e-06, + "loss": 0.3507, + "step": 11094 + }, + { + "epoch": 0.5021498076487894, + "grad_norm": 0.668992589318168, + "learning_rate": 5.208821735974984e-06, + "loss": 0.3298, + "step": 11095 + }, + { + "epoch": 0.5021950667571848, + "grad_norm": 0.6752741459092744, + "learning_rate": 5.208089452197302e-06, + "loss": 0.3499, + "step": 11096 + }, + { + "epoch": 0.5022403258655804, + "grad_norm": 0.6438409777835392, + "learning_rate": 5.20735716394842e-06, + "loss": 0.3207, + "step": 11097 + }, + { + "epoch": 0.502285584973976, + "grad_norm": 0.6102737159370142, + "learning_rate": 5.206624871244066e-06, + "loss": 0.3713, + "step": 11098 + }, + { + "epoch": 0.5023308440823716, + "grad_norm": 0.433094389848305, + "learning_rate": 5.205892574099981e-06, + "loss": 0.4836, + "step": 11099 + }, + { + "epoch": 0.5023761031907671, + "grad_norm": 0.6388238597155772, + "learning_rate": 5.205160272531895e-06, + "loss": 0.3217, + "step": 11100 + }, + { + "epoch": 0.5024213622991627, + "grad_norm": 0.6274527566329003, + "learning_rate": 5.204427966555545e-06, + "loss": 0.3346, + "step": 11101 + }, + { + "epoch": 0.5024666214075583, + "grad_norm": 0.6025656163217872, + "learning_rate": 5.203695656186667e-06, + "loss": 0.3253, + "step": 11102 + }, + { + "epoch": 0.5025118805159539, + "grad_norm": 0.6551152806125176, + "learning_rate": 5.202963341440994e-06, + "loss": 0.3418, + "step": 11103 + }, + { + "epoch": 0.5025571396243494, + "grad_norm": 0.816953226291965, + "learning_rate": 5.202231022334262e-06, + "loss": 0.3047, + "step": 11104 + }, + { + "epoch": 0.5026023987327449, + "grad_norm": 0.6103218202483534, + "learning_rate": 5.201498698882207e-06, + "loss": 0.3312, + "step": 11105 + }, + { + "epoch": 0.5026476578411405, + "grad_norm": 0.6104779818808732, + "learning_rate": 5.200766371100564e-06, + "loss": 0.3227, + "step": 11106 + }, + { + "epoch": 0.5026929169495361, + "grad_norm": 0.33092996473441333, + "learning_rate": 5.200034039005068e-06, + "loss": 0.4819, + "step": 11107 + }, + { + "epoch": 0.5027381760579317, + "grad_norm": 0.5760559752047034, + "learning_rate": 5.199301702611454e-06, + "loss": 0.3256, + "step": 11108 + }, + { + "epoch": 0.5027834351663272, + "grad_norm": 0.7355190355427469, + "learning_rate": 5.1985693619354604e-06, + "loss": 0.3257, + "step": 11109 + }, + { + "epoch": 0.5028286942747228, + "grad_norm": 0.5909027528222714, + "learning_rate": 5.197837016992819e-06, + "loss": 0.3348, + "step": 11110 + }, + { + "epoch": 0.5028739533831184, + "grad_norm": 0.7936770031677421, + "learning_rate": 5.1971046677992695e-06, + "loss": 0.3381, + "step": 11111 + }, + { + "epoch": 0.502919212491514, + "grad_norm": 0.6005895346477476, + "learning_rate": 5.196372314370545e-06, + "loss": 0.3098, + "step": 11112 + }, + { + "epoch": 0.5029644715999095, + "grad_norm": 0.6393324770340331, + "learning_rate": 5.195639956722382e-06, + "loss": 0.3555, + "step": 11113 + }, + { + "epoch": 0.503009730708305, + "grad_norm": 0.7363714170001564, + "learning_rate": 5.194907594870519e-06, + "loss": 0.3672, + "step": 11114 + }, + { + "epoch": 0.5030549898167006, + "grad_norm": 0.6282518354837773, + "learning_rate": 5.194175228830689e-06, + "loss": 0.3095, + "step": 11115 + }, + { + "epoch": 0.5031002489250962, + "grad_norm": 0.6451065493325112, + "learning_rate": 5.19344285861863e-06, + "loss": 0.3435, + "step": 11116 + }, + { + "epoch": 0.5031455080334918, + "grad_norm": 0.6009264106991573, + "learning_rate": 5.192710484250078e-06, + "loss": 0.327, + "step": 11117 + }, + { + "epoch": 0.5031907671418873, + "grad_norm": 0.36986139771972265, + "learning_rate": 5.19197810574077e-06, + "loss": 0.4864, + "step": 11118 + }, + { + "epoch": 0.5032360262502829, + "grad_norm": 0.6092739699754802, + "learning_rate": 5.191245723106442e-06, + "loss": 0.2666, + "step": 11119 + }, + { + "epoch": 0.5032812853586784, + "grad_norm": 0.6406215574886052, + "learning_rate": 5.1905133363628314e-06, + "loss": 0.3426, + "step": 11120 + }, + { + "epoch": 0.503326544467074, + "grad_norm": 0.6530649316219899, + "learning_rate": 5.189780945525673e-06, + "loss": 0.3212, + "step": 11121 + }, + { + "epoch": 0.5033718035754695, + "grad_norm": 0.2882004458790363, + "learning_rate": 5.189048550610706e-06, + "loss": 0.4628, + "step": 11122 + }, + { + "epoch": 0.5034170626838651, + "grad_norm": 0.6282642020762652, + "learning_rate": 5.188316151633665e-06, + "loss": 0.3331, + "step": 11123 + }, + { + "epoch": 0.5034623217922607, + "grad_norm": 0.6610170936830095, + "learning_rate": 5.187583748610289e-06, + "loss": 0.3781, + "step": 11124 + }, + { + "epoch": 0.5035075809006563, + "grad_norm": 0.6332532439068955, + "learning_rate": 5.186851341556315e-06, + "loss": 0.3464, + "step": 11125 + }, + { + "epoch": 0.5035528400090519, + "grad_norm": 0.6949540478270081, + "learning_rate": 5.186118930487479e-06, + "loss": 0.293, + "step": 11126 + }, + { + "epoch": 0.5035980991174474, + "grad_norm": 0.67081655894998, + "learning_rate": 5.185386515419518e-06, + "loss": 0.3259, + "step": 11127 + }, + { + "epoch": 0.5036433582258429, + "grad_norm": 0.3210088732142528, + "learning_rate": 5.184654096368172e-06, + "loss": 0.4637, + "step": 11128 + }, + { + "epoch": 0.5036886173342385, + "grad_norm": 0.5915652837478227, + "learning_rate": 5.183921673349174e-06, + "loss": 0.3081, + "step": 11129 + }, + { + "epoch": 0.5037338764426341, + "grad_norm": 0.6268015361227933, + "learning_rate": 5.183189246378266e-06, + "loss": 0.3451, + "step": 11130 + }, + { + "epoch": 0.5037791355510296, + "grad_norm": 0.7243887671807692, + "learning_rate": 5.182456815471184e-06, + "loss": 0.3106, + "step": 11131 + }, + { + "epoch": 0.5038243946594252, + "grad_norm": 0.6726835956341942, + "learning_rate": 5.181724380643664e-06, + "loss": 0.3437, + "step": 11132 + }, + { + "epoch": 0.5038696537678208, + "grad_norm": 0.6491434431452936, + "learning_rate": 5.180991941911446e-06, + "loss": 0.2941, + "step": 11133 + }, + { + "epoch": 0.5039149128762164, + "grad_norm": 0.6497408706120921, + "learning_rate": 5.180259499290268e-06, + "loss": 0.3248, + "step": 11134 + }, + { + "epoch": 0.5039601719846118, + "grad_norm": 0.657340082524694, + "learning_rate": 5.179527052795865e-06, + "loss": 0.2914, + "step": 11135 + }, + { + "epoch": 0.5040054310930074, + "grad_norm": 0.6404778084269498, + "learning_rate": 5.178794602443978e-06, + "loss": 0.3023, + "step": 11136 + }, + { + "epoch": 0.504050690201403, + "grad_norm": 0.7219616940330175, + "learning_rate": 5.178062148250343e-06, + "loss": 0.3541, + "step": 11137 + }, + { + "epoch": 0.5040959493097986, + "grad_norm": 0.4489197089113747, + "learning_rate": 5.177329690230702e-06, + "loss": 0.4824, + "step": 11138 + }, + { + "epoch": 0.5041412084181942, + "grad_norm": 0.619375488651365, + "learning_rate": 5.176597228400789e-06, + "loss": 0.3333, + "step": 11139 + }, + { + "epoch": 0.5041864675265897, + "grad_norm": 0.31081728274069303, + "learning_rate": 5.175864762776343e-06, + "loss": 0.4816, + "step": 11140 + }, + { + "epoch": 0.5042317266349853, + "grad_norm": 0.6297193844551721, + "learning_rate": 5.175132293373105e-06, + "loss": 0.347, + "step": 11141 + }, + { + "epoch": 0.5042769857433809, + "grad_norm": 0.6650090584331564, + "learning_rate": 5.174399820206811e-06, + "loss": 0.3354, + "step": 11142 + }, + { + "epoch": 0.5043222448517765, + "grad_norm": 0.6580033293915457, + "learning_rate": 5.1736673432932e-06, + "loss": 0.3263, + "step": 11143 + }, + { + "epoch": 0.5043675039601719, + "grad_norm": 0.3533605652723998, + "learning_rate": 5.172934862648012e-06, + "loss": 0.4612, + "step": 11144 + }, + { + "epoch": 0.5044127630685675, + "grad_norm": 0.3096719747489321, + "learning_rate": 5.172202378286986e-06, + "loss": 0.4858, + "step": 11145 + }, + { + "epoch": 0.5044580221769631, + "grad_norm": 0.976650936575463, + "learning_rate": 5.171469890225857e-06, + "loss": 0.3483, + "step": 11146 + }, + { + "epoch": 0.5045032812853587, + "grad_norm": 0.6514853901406809, + "learning_rate": 5.17073739848037e-06, + "loss": 0.354, + "step": 11147 + }, + { + "epoch": 0.5045485403937543, + "grad_norm": 0.6230159049535104, + "learning_rate": 5.170004903066258e-06, + "loss": 0.3374, + "step": 11148 + }, + { + "epoch": 0.5045937995021498, + "grad_norm": 0.5977594402004174, + "learning_rate": 5.169272403999265e-06, + "loss": 0.3315, + "step": 11149 + }, + { + "epoch": 0.5046390586105454, + "grad_norm": 0.6652306304159178, + "learning_rate": 5.1685399012951244e-06, + "loss": 0.3276, + "step": 11150 + }, + { + "epoch": 0.504684317718941, + "grad_norm": 0.6319080666281376, + "learning_rate": 5.167807394969583e-06, + "loss": 0.3413, + "step": 11151 + }, + { + "epoch": 0.5047295768273365, + "grad_norm": 0.6793929270264313, + "learning_rate": 5.1670748850383734e-06, + "loss": 0.3306, + "step": 11152 + }, + { + "epoch": 0.504774835935732, + "grad_norm": 0.6600474914651242, + "learning_rate": 5.166342371517239e-06, + "loss": 0.3292, + "step": 11153 + }, + { + "epoch": 0.5048200950441276, + "grad_norm": 0.5960777358642412, + "learning_rate": 5.165609854421917e-06, + "loss": 0.3273, + "step": 11154 + }, + { + "epoch": 0.5048653541525232, + "grad_norm": 0.6414202022312718, + "learning_rate": 5.164877333768149e-06, + "loss": 0.3084, + "step": 11155 + }, + { + "epoch": 0.5049106132609188, + "grad_norm": 0.5750570450752185, + "learning_rate": 5.1641448095716715e-06, + "loss": 0.3788, + "step": 11156 + }, + { + "epoch": 0.5049558723693143, + "grad_norm": 0.5541977367870765, + "learning_rate": 5.163412281848229e-06, + "loss": 0.2903, + "step": 11157 + }, + { + "epoch": 0.5050011314777099, + "grad_norm": 0.6017463923431956, + "learning_rate": 5.162679750613555e-06, + "loss": 0.3191, + "step": 11158 + }, + { + "epoch": 0.5050463905861055, + "grad_norm": 0.6164484835509431, + "learning_rate": 5.1619472158833964e-06, + "loss": 0.2896, + "step": 11159 + }, + { + "epoch": 0.505091649694501, + "grad_norm": 0.5741582036164451, + "learning_rate": 5.161214677673487e-06, + "loss": 0.3643, + "step": 11160 + }, + { + "epoch": 0.5051369088028966, + "grad_norm": 0.6408902888090979, + "learning_rate": 5.16048213599957e-06, + "loss": 0.326, + "step": 11161 + }, + { + "epoch": 0.5051821679112921, + "grad_norm": 0.3830985888230755, + "learning_rate": 5.159749590877384e-06, + "loss": 0.471, + "step": 11162 + }, + { + "epoch": 0.5052274270196877, + "grad_norm": 0.35226583525158767, + "learning_rate": 5.159017042322671e-06, + "loss": 0.477, + "step": 11163 + }, + { + "epoch": 0.5052726861280833, + "grad_norm": 0.6303737577199218, + "learning_rate": 5.158284490351169e-06, + "loss": 0.2958, + "step": 11164 + }, + { + "epoch": 0.5053179452364789, + "grad_norm": 0.6127947670780783, + "learning_rate": 5.157551934978622e-06, + "loss": 0.3251, + "step": 11165 + }, + { + "epoch": 0.5053632043448744, + "grad_norm": 0.6064243651942174, + "learning_rate": 5.156819376220765e-06, + "loss": 0.3087, + "step": 11166 + }, + { + "epoch": 0.50540846345327, + "grad_norm": 1.068945333103363, + "learning_rate": 5.1560868140933425e-06, + "loss": 0.3313, + "step": 11167 + }, + { + "epoch": 0.5054537225616655, + "grad_norm": 0.41989461112524973, + "learning_rate": 5.155354248612095e-06, + "loss": 0.4639, + "step": 11168 + }, + { + "epoch": 0.5054989816700611, + "grad_norm": 0.7009044558515606, + "learning_rate": 5.1546216797927594e-06, + "loss": 0.3175, + "step": 11169 + }, + { + "epoch": 0.5055442407784566, + "grad_norm": 0.6599507990452416, + "learning_rate": 5.1538891076510815e-06, + "loss": 0.353, + "step": 11170 + }, + { + "epoch": 0.5055894998868522, + "grad_norm": 0.34121769850879935, + "learning_rate": 5.153156532202795e-06, + "loss": 0.4709, + "step": 11171 + }, + { + "epoch": 0.5056347589952478, + "grad_norm": 0.6500256984129645, + "learning_rate": 5.152423953463649e-06, + "loss": 0.3466, + "step": 11172 + }, + { + "epoch": 0.5056800181036434, + "grad_norm": 0.6725159093189567, + "learning_rate": 5.151691371449378e-06, + "loss": 0.3298, + "step": 11173 + }, + { + "epoch": 0.505725277212039, + "grad_norm": 0.5950271952157588, + "learning_rate": 5.150958786175727e-06, + "loss": 0.2905, + "step": 11174 + }, + { + "epoch": 0.5057705363204344, + "grad_norm": 0.6326018823306971, + "learning_rate": 5.1502261976584354e-06, + "loss": 0.282, + "step": 11175 + }, + { + "epoch": 0.50581579542883, + "grad_norm": 0.589511282902164, + "learning_rate": 5.149493605913244e-06, + "loss": 0.3124, + "step": 11176 + }, + { + "epoch": 0.5058610545372256, + "grad_norm": 0.9978463493192055, + "learning_rate": 5.148761010955893e-06, + "loss": 0.3334, + "step": 11177 + }, + { + "epoch": 0.5059063136456212, + "grad_norm": 0.6701595925463175, + "learning_rate": 5.1480284128021265e-06, + "loss": 0.3836, + "step": 11178 + }, + { + "epoch": 0.5059515727540167, + "grad_norm": 0.6096570947381155, + "learning_rate": 5.147295811467681e-06, + "loss": 0.3671, + "step": 11179 + }, + { + "epoch": 0.5059968318624123, + "grad_norm": 0.6534848644153554, + "learning_rate": 5.146563206968303e-06, + "loss": 0.3427, + "step": 11180 + }, + { + "epoch": 0.5060420909708079, + "grad_norm": 0.6840519353875196, + "learning_rate": 5.1458305993197326e-06, + "loss": 0.3127, + "step": 11181 + }, + { + "epoch": 0.5060873500792035, + "grad_norm": 0.6609155547256175, + "learning_rate": 5.145097988537709e-06, + "loss": 0.3149, + "step": 11182 + }, + { + "epoch": 0.5061326091875991, + "grad_norm": 0.6332826976514082, + "learning_rate": 5.144365374637976e-06, + "loss": 0.3082, + "step": 11183 + }, + { + "epoch": 0.5061778682959945, + "grad_norm": 0.6580709619987227, + "learning_rate": 5.143632757636275e-06, + "loss": 0.3321, + "step": 11184 + }, + { + "epoch": 0.5062231274043901, + "grad_norm": 0.6328645701871171, + "learning_rate": 5.142900137548346e-06, + "loss": 0.2912, + "step": 11185 + }, + { + "epoch": 0.5062683865127857, + "grad_norm": 0.6329476213869506, + "learning_rate": 5.142167514389933e-06, + "loss": 0.3509, + "step": 11186 + }, + { + "epoch": 0.5063136456211813, + "grad_norm": 0.6852797503715907, + "learning_rate": 5.141434888176775e-06, + "loss": 0.3406, + "step": 11187 + }, + { + "epoch": 0.5063589047295768, + "grad_norm": 0.46909894318034845, + "learning_rate": 5.140702258924618e-06, + "loss": 0.487, + "step": 11188 + }, + { + "epoch": 0.5064041638379724, + "grad_norm": 0.3779597483581766, + "learning_rate": 5.1399696266491996e-06, + "loss": 0.4912, + "step": 11189 + }, + { + "epoch": 0.506449422946368, + "grad_norm": 0.2965653746703262, + "learning_rate": 5.1392369913662646e-06, + "loss": 0.486, + "step": 11190 + }, + { + "epoch": 0.5064946820547636, + "grad_norm": 0.6462902080209708, + "learning_rate": 5.138504353091555e-06, + "loss": 0.3288, + "step": 11191 + }, + { + "epoch": 0.506539941163159, + "grad_norm": 0.6200576019698926, + "learning_rate": 5.137771711840811e-06, + "loss": 0.317, + "step": 11192 + }, + { + "epoch": 0.5065852002715546, + "grad_norm": 0.6191220053106656, + "learning_rate": 5.137039067629776e-06, + "loss": 0.3249, + "step": 11193 + }, + { + "epoch": 0.5066304593799502, + "grad_norm": 0.6109383276969177, + "learning_rate": 5.136306420474193e-06, + "loss": 0.3895, + "step": 11194 + }, + { + "epoch": 0.5066757184883458, + "grad_norm": 0.540868332291682, + "learning_rate": 5.135573770389804e-06, + "loss": 0.4816, + "step": 11195 + }, + { + "epoch": 0.5067209775967414, + "grad_norm": 0.5663607782035478, + "learning_rate": 5.134841117392349e-06, + "loss": 0.3157, + "step": 11196 + }, + { + "epoch": 0.5067662367051369, + "grad_norm": 0.6249994867232201, + "learning_rate": 5.134108461497576e-06, + "loss": 0.3748, + "step": 11197 + }, + { + "epoch": 0.5068114958135325, + "grad_norm": 0.644905525457812, + "learning_rate": 5.133375802721221e-06, + "loss": 0.3007, + "step": 11198 + }, + { + "epoch": 0.506856754921928, + "grad_norm": 0.5906218504091207, + "learning_rate": 5.132643141079031e-06, + "loss": 0.3555, + "step": 11199 + }, + { + "epoch": 0.5069020140303236, + "grad_norm": 0.5924636723678586, + "learning_rate": 5.131910476586747e-06, + "loss": 0.3488, + "step": 11200 + }, + { + "epoch": 0.5069472731387191, + "grad_norm": 0.617360493101454, + "learning_rate": 5.131177809260113e-06, + "loss": 0.2914, + "step": 11201 + }, + { + "epoch": 0.5069925322471147, + "grad_norm": 0.37941447725663624, + "learning_rate": 5.130445139114869e-06, + "loss": 0.5056, + "step": 11202 + }, + { + "epoch": 0.5070377913555103, + "grad_norm": 0.6042484854768109, + "learning_rate": 5.129712466166761e-06, + "loss": 0.3294, + "step": 11203 + }, + { + "epoch": 0.5070830504639059, + "grad_norm": 0.6631656173398461, + "learning_rate": 5.1289797904315295e-06, + "loss": 0.3125, + "step": 11204 + }, + { + "epoch": 0.5071283095723014, + "grad_norm": 0.6901252842609684, + "learning_rate": 5.12824711192492e-06, + "loss": 0.2989, + "step": 11205 + }, + { + "epoch": 0.507173568680697, + "grad_norm": 0.3076522049578289, + "learning_rate": 5.127514430662671e-06, + "loss": 0.4765, + "step": 11206 + }, + { + "epoch": 0.5072188277890926, + "grad_norm": 0.28162741026972016, + "learning_rate": 5.126781746660532e-06, + "loss": 0.4618, + "step": 11207 + }, + { + "epoch": 0.5072640868974881, + "grad_norm": 0.7051014395322401, + "learning_rate": 5.126049059934239e-06, + "loss": 0.3878, + "step": 11208 + }, + { + "epoch": 0.5073093460058837, + "grad_norm": 0.659795061917355, + "learning_rate": 5.1253163704995425e-06, + "loss": 0.3245, + "step": 11209 + }, + { + "epoch": 0.5073546051142792, + "grad_norm": 0.6166099899942831, + "learning_rate": 5.124583678372179e-06, + "loss": 0.3236, + "step": 11210 + }, + { + "epoch": 0.5073998642226748, + "grad_norm": 0.6118398319259457, + "learning_rate": 5.1238509835678966e-06, + "loss": 0.3747, + "step": 11211 + }, + { + "epoch": 0.5074451233310704, + "grad_norm": 0.645756325863196, + "learning_rate": 5.1231182861024365e-06, + "loss": 0.3546, + "step": 11212 + }, + { + "epoch": 0.507490382439466, + "grad_norm": 1.0032332349126258, + "learning_rate": 5.122385585991543e-06, + "loss": 0.3146, + "step": 11213 + }, + { + "epoch": 0.5075356415478615, + "grad_norm": 0.6702414915122858, + "learning_rate": 5.121652883250958e-06, + "loss": 0.3278, + "step": 11214 + }, + { + "epoch": 0.507580900656257, + "grad_norm": 0.6723140979649809, + "learning_rate": 5.120920177896427e-06, + "loss": 0.3213, + "step": 11215 + }, + { + "epoch": 0.5076261597646526, + "grad_norm": 0.5790786377335796, + "learning_rate": 5.120187469943693e-06, + "loss": 0.3214, + "step": 11216 + }, + { + "epoch": 0.5076714188730482, + "grad_norm": 0.6766593472851177, + "learning_rate": 5.1194547594085e-06, + "loss": 0.3392, + "step": 11217 + }, + { + "epoch": 0.5077166779814438, + "grad_norm": 0.6590885592070846, + "learning_rate": 5.11872204630659e-06, + "loss": 0.3047, + "step": 11218 + }, + { + "epoch": 0.5077619370898393, + "grad_norm": 0.5935583909536788, + "learning_rate": 5.117989330653708e-06, + "loss": 0.3255, + "step": 11219 + }, + { + "epoch": 0.5078071961982349, + "grad_norm": 0.6477657735336387, + "learning_rate": 5.117256612465598e-06, + "loss": 0.3217, + "step": 11220 + }, + { + "epoch": 0.5078524553066305, + "grad_norm": 0.6513408752224927, + "learning_rate": 5.116523891758002e-06, + "loss": 0.3173, + "step": 11221 + }, + { + "epoch": 0.5078977144150261, + "grad_norm": 0.6476151623312782, + "learning_rate": 5.115791168546667e-06, + "loss": 0.3308, + "step": 11222 + }, + { + "epoch": 0.5079429735234215, + "grad_norm": 0.6360775434592607, + "learning_rate": 5.115058442847335e-06, + "loss": 0.3149, + "step": 11223 + }, + { + "epoch": 0.5079882326318171, + "grad_norm": 0.6270942302739652, + "learning_rate": 5.1143257146757495e-06, + "loss": 0.31, + "step": 11224 + }, + { + "epoch": 0.5080334917402127, + "grad_norm": 0.9616494452634846, + "learning_rate": 5.113592984047657e-06, + "loss": 0.3273, + "step": 11225 + }, + { + "epoch": 0.5080787508486083, + "grad_norm": 0.6000044934131067, + "learning_rate": 5.1128602509788e-06, + "loss": 0.3141, + "step": 11226 + }, + { + "epoch": 0.5081240099570038, + "grad_norm": 0.605687051745947, + "learning_rate": 5.112127515484923e-06, + "loss": 0.2911, + "step": 11227 + }, + { + "epoch": 0.5081692690653994, + "grad_norm": 0.6413182962936163, + "learning_rate": 5.111394777581769e-06, + "loss": 0.3262, + "step": 11228 + }, + { + "epoch": 0.508214528173795, + "grad_norm": 0.6508238650826035, + "learning_rate": 5.110662037285084e-06, + "loss": 0.3477, + "step": 11229 + }, + { + "epoch": 0.5082597872821906, + "grad_norm": 0.584862532364185, + "learning_rate": 5.109929294610611e-06, + "loss": 0.3572, + "step": 11230 + }, + { + "epoch": 0.5083050463905862, + "grad_norm": 0.6690967334167736, + "learning_rate": 5.109196549574097e-06, + "loss": 0.327, + "step": 11231 + }, + { + "epoch": 0.5083503054989816, + "grad_norm": 0.6375743641978658, + "learning_rate": 5.108463802191282e-06, + "loss": 0.303, + "step": 11232 + }, + { + "epoch": 0.5083955646073772, + "grad_norm": 0.6475998504990564, + "learning_rate": 5.1077310524779144e-06, + "loss": 0.3497, + "step": 11233 + }, + { + "epoch": 0.5084408237157728, + "grad_norm": 0.7126058888744391, + "learning_rate": 5.106998300449738e-06, + "loss": 0.3137, + "step": 11234 + }, + { + "epoch": 0.5084860828241684, + "grad_norm": 0.6367622694735621, + "learning_rate": 5.106265546122495e-06, + "loss": 0.3389, + "step": 11235 + }, + { + "epoch": 0.5085313419325639, + "grad_norm": 0.6321256652100014, + "learning_rate": 5.105532789511935e-06, + "loss": 0.3334, + "step": 11236 + }, + { + "epoch": 0.5085766010409595, + "grad_norm": 0.5958640465462083, + "learning_rate": 5.104800030633795e-06, + "loss": 0.3278, + "step": 11237 + }, + { + "epoch": 0.5086218601493551, + "grad_norm": 0.5115932366401839, + "learning_rate": 5.104067269503828e-06, + "loss": 0.4554, + "step": 11238 + }, + { + "epoch": 0.5086671192577507, + "grad_norm": 0.60649625395521, + "learning_rate": 5.103334506137773e-06, + "loss": 0.3469, + "step": 11239 + }, + { + "epoch": 0.5087123783661461, + "grad_norm": 0.6774631407983585, + "learning_rate": 5.102601740551376e-06, + "loss": 0.3477, + "step": 11240 + }, + { + "epoch": 0.5087576374745417, + "grad_norm": 1.0746385590831669, + "learning_rate": 5.101868972760384e-06, + "loss": 0.3248, + "step": 11241 + }, + { + "epoch": 0.5088028965829373, + "grad_norm": 0.3245666411781549, + "learning_rate": 5.101136202780541e-06, + "loss": 0.4738, + "step": 11242 + }, + { + "epoch": 0.5088481556913329, + "grad_norm": 0.5872003958509445, + "learning_rate": 5.100403430627591e-06, + "loss": 0.3212, + "step": 11243 + }, + { + "epoch": 0.5088934147997285, + "grad_norm": 0.315392723450212, + "learning_rate": 5.099670656317279e-06, + "loss": 0.5003, + "step": 11244 + }, + { + "epoch": 0.508938673908124, + "grad_norm": 0.6502171119613663, + "learning_rate": 5.098937879865352e-06, + "loss": 0.3016, + "step": 11245 + }, + { + "epoch": 0.5089839330165196, + "grad_norm": 0.6229935855299926, + "learning_rate": 5.098205101287554e-06, + "loss": 0.3329, + "step": 11246 + }, + { + "epoch": 0.5090291921249152, + "grad_norm": 0.6350228907401154, + "learning_rate": 5.09747232059963e-06, + "loss": 0.3536, + "step": 11247 + }, + { + "epoch": 0.5090744512333107, + "grad_norm": 0.5996675389445115, + "learning_rate": 5.096739537817324e-06, + "loss": 0.3211, + "step": 11248 + }, + { + "epoch": 0.5091197103417062, + "grad_norm": 0.28875990227643683, + "learning_rate": 5.096006752956383e-06, + "loss": 0.4433, + "step": 11249 + }, + { + "epoch": 0.5091649694501018, + "grad_norm": 0.6452691574848929, + "learning_rate": 5.09527396603255e-06, + "loss": 0.3585, + "step": 11250 + }, + { + "epoch": 0.5092102285584974, + "grad_norm": 0.6289696919418744, + "learning_rate": 5.094541177061575e-06, + "loss": 0.3045, + "step": 11251 + }, + { + "epoch": 0.509255487666893, + "grad_norm": 0.6431979381717381, + "learning_rate": 5.093808386059199e-06, + "loss": 0.3139, + "step": 11252 + }, + { + "epoch": 0.5093007467752886, + "grad_norm": 0.3145776248691224, + "learning_rate": 5.093075593041169e-06, + "loss": 0.4865, + "step": 11253 + }, + { + "epoch": 0.5093460058836841, + "grad_norm": 0.6125843389798492, + "learning_rate": 5.092342798023231e-06, + "loss": 0.2992, + "step": 11254 + }, + { + "epoch": 0.5093912649920797, + "grad_norm": 0.6316823394050614, + "learning_rate": 5.09161000102113e-06, + "loss": 0.3389, + "step": 11255 + }, + { + "epoch": 0.5094365241004752, + "grad_norm": 0.6598121599800412, + "learning_rate": 5.09087720205061e-06, + "loss": 0.3666, + "step": 11256 + }, + { + "epoch": 0.5094817832088708, + "grad_norm": 0.28874403727307535, + "learning_rate": 5.09014440112742e-06, + "loss": 0.4977, + "step": 11257 + }, + { + "epoch": 0.5095270423172663, + "grad_norm": 0.583549177944878, + "learning_rate": 5.089411598267301e-06, + "loss": 0.308, + "step": 11258 + }, + { + "epoch": 0.5095723014256619, + "grad_norm": 0.6404139081606993, + "learning_rate": 5.0886787934860035e-06, + "loss": 0.3034, + "step": 11259 + }, + { + "epoch": 0.5096175605340575, + "grad_norm": 0.7182314766797886, + "learning_rate": 5.087945986799271e-06, + "loss": 0.3312, + "step": 11260 + }, + { + "epoch": 0.5096628196424531, + "grad_norm": 0.575133981644903, + "learning_rate": 5.087213178222849e-06, + "loss": 0.3477, + "step": 11261 + }, + { + "epoch": 0.5097080787508486, + "grad_norm": 0.6096373866619839, + "learning_rate": 5.086480367772483e-06, + "loss": 0.3013, + "step": 11262 + }, + { + "epoch": 0.5097533378592441, + "grad_norm": 0.6335065629864226, + "learning_rate": 5.085747555463921e-06, + "loss": 0.3047, + "step": 11263 + }, + { + "epoch": 0.5097985969676397, + "grad_norm": 0.7209290500060698, + "learning_rate": 5.0850147413129054e-06, + "loss": 0.34, + "step": 11264 + }, + { + "epoch": 0.5098438560760353, + "grad_norm": 0.5751831416002953, + "learning_rate": 5.084281925335186e-06, + "loss": 0.3312, + "step": 11265 + }, + { + "epoch": 0.5098891151844309, + "grad_norm": 0.5940786723235196, + "learning_rate": 5.083549107546505e-06, + "loss": 0.33, + "step": 11266 + }, + { + "epoch": 0.5099343742928264, + "grad_norm": 0.5952467247815935, + "learning_rate": 5.082816287962612e-06, + "loss": 0.3349, + "step": 11267 + }, + { + "epoch": 0.509979633401222, + "grad_norm": 0.6749626366271329, + "learning_rate": 5.08208346659925e-06, + "loss": 0.3127, + "step": 11268 + }, + { + "epoch": 0.5100248925096176, + "grad_norm": 0.6412464993754627, + "learning_rate": 5.0813506434721675e-06, + "loss": 0.3224, + "step": 11269 + }, + { + "epoch": 0.5100701516180132, + "grad_norm": 0.6567906439007134, + "learning_rate": 5.080617818597109e-06, + "loss": 0.333, + "step": 11270 + }, + { + "epoch": 0.5101154107264086, + "grad_norm": 0.6648483420021117, + "learning_rate": 5.07988499198982e-06, + "loss": 0.305, + "step": 11271 + }, + { + "epoch": 0.5101606698348042, + "grad_norm": 0.5562962246107327, + "learning_rate": 5.07915216366605e-06, + "loss": 0.3293, + "step": 11272 + }, + { + "epoch": 0.5102059289431998, + "grad_norm": 0.6539207385618393, + "learning_rate": 5.078419333641542e-06, + "loss": 0.3272, + "step": 11273 + }, + { + "epoch": 0.5102511880515954, + "grad_norm": 0.5975765423928, + "learning_rate": 5.0776865019320435e-06, + "loss": 0.3564, + "step": 11274 + }, + { + "epoch": 0.5102964471599909, + "grad_norm": 0.6711899726697937, + "learning_rate": 5.0769536685533005e-06, + "loss": 0.3093, + "step": 11275 + }, + { + "epoch": 0.5103417062683865, + "grad_norm": 0.6561532773126665, + "learning_rate": 5.07622083352106e-06, + "loss": 0.3185, + "step": 11276 + }, + { + "epoch": 0.5103869653767821, + "grad_norm": 0.6482268957086683, + "learning_rate": 5.075487996851067e-06, + "loss": 0.3302, + "step": 11277 + }, + { + "epoch": 0.5104322244851777, + "grad_norm": 0.6693747821770387, + "learning_rate": 5.074755158559071e-06, + "loss": 0.3065, + "step": 11278 + }, + { + "epoch": 0.5104774835935733, + "grad_norm": 0.3465167500866455, + "learning_rate": 5.074022318660813e-06, + "loss": 0.4807, + "step": 11279 + }, + { + "epoch": 0.5105227427019687, + "grad_norm": 0.6136755901790223, + "learning_rate": 5.073289477172045e-06, + "loss": 0.321, + "step": 11280 + }, + { + "epoch": 0.5105680018103643, + "grad_norm": 0.6950315561088651, + "learning_rate": 5.072556634108511e-06, + "loss": 0.3116, + "step": 11281 + }, + { + "epoch": 0.5106132609187599, + "grad_norm": 0.629696999529083, + "learning_rate": 5.0718237894859564e-06, + "loss": 0.3314, + "step": 11282 + }, + { + "epoch": 0.5106585200271555, + "grad_norm": 0.32162850665106396, + "learning_rate": 5.0710909433201305e-06, + "loss": 0.4862, + "step": 11283 + }, + { + "epoch": 0.510703779135551, + "grad_norm": 0.6296288455006523, + "learning_rate": 5.07035809562678e-06, + "loss": 0.3205, + "step": 11284 + }, + { + "epoch": 0.5107490382439466, + "grad_norm": 0.6243151764913002, + "learning_rate": 5.069625246421646e-06, + "loss": 0.328, + "step": 11285 + }, + { + "epoch": 0.5107942973523422, + "grad_norm": 0.6881553488129036, + "learning_rate": 5.068892395720482e-06, + "loss": 0.3147, + "step": 11286 + }, + { + "epoch": 0.5108395564607378, + "grad_norm": 0.6794158457913292, + "learning_rate": 5.068159543539031e-06, + "loss": 0.3508, + "step": 11287 + }, + { + "epoch": 0.5108848155691332, + "grad_norm": 0.679924159439684, + "learning_rate": 5.067426689893043e-06, + "loss": 0.3202, + "step": 11288 + }, + { + "epoch": 0.5109300746775288, + "grad_norm": 0.5821612922899742, + "learning_rate": 5.0666938347982595e-06, + "loss": 0.3471, + "step": 11289 + }, + { + "epoch": 0.5109753337859244, + "grad_norm": 0.29824764368092205, + "learning_rate": 5.065960978270432e-06, + "loss": 0.471, + "step": 11290 + }, + { + "epoch": 0.51102059289432, + "grad_norm": 0.8062790804593769, + "learning_rate": 5.065228120325305e-06, + "loss": 0.3381, + "step": 11291 + }, + { + "epoch": 0.5110658520027156, + "grad_norm": 0.8877515178860867, + "learning_rate": 5.064495260978627e-06, + "loss": 0.3005, + "step": 11292 + }, + { + "epoch": 0.5111111111111111, + "grad_norm": 0.3518551067373952, + "learning_rate": 5.063762400246142e-06, + "loss": 0.4749, + "step": 11293 + }, + { + "epoch": 0.5111563702195067, + "grad_norm": 0.5545536333545275, + "learning_rate": 5.0630295381436024e-06, + "loss": 0.3357, + "step": 11294 + }, + { + "epoch": 0.5112016293279023, + "grad_norm": 0.6323065540402106, + "learning_rate": 5.0622966746867474e-06, + "loss": 0.3143, + "step": 11295 + }, + { + "epoch": 0.5112468884362978, + "grad_norm": 0.6076090835310118, + "learning_rate": 5.061563809891331e-06, + "loss": 0.3411, + "step": 11296 + }, + { + "epoch": 0.5112921475446933, + "grad_norm": 0.653226132656761, + "learning_rate": 5.060830943773096e-06, + "loss": 0.342, + "step": 11297 + }, + { + "epoch": 0.5113374066530889, + "grad_norm": 0.6571716045609006, + "learning_rate": 5.060098076347793e-06, + "loss": 0.3033, + "step": 11298 + }, + { + "epoch": 0.5113826657614845, + "grad_norm": 0.7121177366376688, + "learning_rate": 5.059365207631164e-06, + "loss": 0.3646, + "step": 11299 + }, + { + "epoch": 0.5114279248698801, + "grad_norm": 0.6071299612800266, + "learning_rate": 5.05863233763896e-06, + "loss": 0.316, + "step": 11300 + }, + { + "epoch": 0.5114731839782757, + "grad_norm": 0.6936881305190062, + "learning_rate": 5.057899466386927e-06, + "loss": 0.3396, + "step": 11301 + }, + { + "epoch": 0.5115184430866712, + "grad_norm": 0.6665339230126445, + "learning_rate": 5.057166593890813e-06, + "loss": 0.3067, + "step": 11302 + }, + { + "epoch": 0.5115637021950667, + "grad_norm": 0.6278926273373675, + "learning_rate": 5.056433720166365e-06, + "loss": 0.3372, + "step": 11303 + }, + { + "epoch": 0.5116089613034623, + "grad_norm": 0.7060565458924746, + "learning_rate": 5.0557008452293275e-06, + "loss": 0.3061, + "step": 11304 + }, + { + "epoch": 0.5116542204118579, + "grad_norm": 0.5859283629350893, + "learning_rate": 5.054967969095453e-06, + "loss": 0.2859, + "step": 11305 + }, + { + "epoch": 0.5116994795202534, + "grad_norm": 0.624192608388278, + "learning_rate": 5.054235091780483e-06, + "loss": 0.3336, + "step": 11306 + }, + { + "epoch": 0.511744738628649, + "grad_norm": 0.6051449201263233, + "learning_rate": 5.0535022133001684e-06, + "loss": 0.3499, + "step": 11307 + }, + { + "epoch": 0.5117899977370446, + "grad_norm": 0.33856949948383025, + "learning_rate": 5.052769333670255e-06, + "loss": 0.4626, + "step": 11308 + }, + { + "epoch": 0.5118352568454402, + "grad_norm": 0.3127064522869994, + "learning_rate": 5.052036452906493e-06, + "loss": 0.4817, + "step": 11309 + }, + { + "epoch": 0.5118805159538357, + "grad_norm": 0.692213222529521, + "learning_rate": 5.051303571024625e-06, + "loss": 0.2916, + "step": 11310 + }, + { + "epoch": 0.5119257750622312, + "grad_norm": 0.6157695962818099, + "learning_rate": 5.050570688040402e-06, + "loss": 0.3441, + "step": 11311 + }, + { + "epoch": 0.5119710341706268, + "grad_norm": 0.703509672456279, + "learning_rate": 5.0498378039695685e-06, + "loss": 0.3353, + "step": 11312 + }, + { + "epoch": 0.5120162932790224, + "grad_norm": 0.3098488163330391, + "learning_rate": 5.0491049188278755e-06, + "loss": 0.4974, + "step": 11313 + }, + { + "epoch": 0.512061552387418, + "grad_norm": 0.6039855223359988, + "learning_rate": 5.048372032631067e-06, + "loss": 0.2952, + "step": 11314 + }, + { + "epoch": 0.5121068114958135, + "grad_norm": 0.6613158443050209, + "learning_rate": 5.047639145394895e-06, + "loss": 0.314, + "step": 11315 + }, + { + "epoch": 0.5121520706042091, + "grad_norm": 0.6614902158701181, + "learning_rate": 5.0469062571351e-06, + "loss": 0.3231, + "step": 11316 + }, + { + "epoch": 0.5121973297126047, + "grad_norm": 0.9304745086510386, + "learning_rate": 5.046173367867438e-06, + "loss": 0.3198, + "step": 11317 + }, + { + "epoch": 0.5122425888210003, + "grad_norm": 0.5757091533057657, + "learning_rate": 5.045440477607649e-06, + "loss": 0.2927, + "step": 11318 + }, + { + "epoch": 0.5122878479293957, + "grad_norm": 0.6358379134113163, + "learning_rate": 5.0447075863714845e-06, + "loss": 0.3387, + "step": 11319 + }, + { + "epoch": 0.5123331070377913, + "grad_norm": 0.6245896771537696, + "learning_rate": 5.0439746941746914e-06, + "loss": 0.3569, + "step": 11320 + }, + { + "epoch": 0.5123783661461869, + "grad_norm": 0.5986751300931925, + "learning_rate": 5.043241801033016e-06, + "loss": 0.3068, + "step": 11321 + }, + { + "epoch": 0.5124236252545825, + "grad_norm": 0.6944406862173468, + "learning_rate": 5.0425089069622094e-06, + "loss": 0.3433, + "step": 11322 + }, + { + "epoch": 0.512468884362978, + "grad_norm": 0.6517562624227294, + "learning_rate": 5.041776011978016e-06, + "loss": 0.3522, + "step": 11323 + }, + { + "epoch": 0.5125141434713736, + "grad_norm": 0.6062106577965188, + "learning_rate": 5.041043116096184e-06, + "loss": 0.3264, + "step": 11324 + }, + { + "epoch": 0.5125594025797692, + "grad_norm": 0.6044463216389774, + "learning_rate": 5.040310219332462e-06, + "loss": 0.282, + "step": 11325 + }, + { + "epoch": 0.5126046616881648, + "grad_norm": 0.6393686203566962, + "learning_rate": 5.039577321702597e-06, + "loss": 0.3062, + "step": 11326 + }, + { + "epoch": 0.5126499207965604, + "grad_norm": 0.6003692003449826, + "learning_rate": 5.038844423222337e-06, + "loss": 0.2901, + "step": 11327 + }, + { + "epoch": 0.5126951799049558, + "grad_norm": 0.588872996754063, + "learning_rate": 5.038111523907429e-06, + "loss": 0.2987, + "step": 11328 + }, + { + "epoch": 0.5127404390133514, + "grad_norm": 0.4081869555533952, + "learning_rate": 5.037378623773622e-06, + "loss": 0.4625, + "step": 11329 + }, + { + "epoch": 0.512785698121747, + "grad_norm": 0.5975544563016666, + "learning_rate": 5.0366457228366625e-06, + "loss": 0.3555, + "step": 11330 + }, + { + "epoch": 0.5128309572301426, + "grad_norm": 0.6545036025728562, + "learning_rate": 5.0359128211123e-06, + "loss": 0.3226, + "step": 11331 + }, + { + "epoch": 0.5128762163385381, + "grad_norm": 0.6590359641589497, + "learning_rate": 5.03517991861628e-06, + "loss": 0.3669, + "step": 11332 + }, + { + "epoch": 0.5129214754469337, + "grad_norm": 0.6099890479128197, + "learning_rate": 5.0344470153643525e-06, + "loss": 0.3039, + "step": 11333 + }, + { + "epoch": 0.5129667345553293, + "grad_norm": 0.28940934663786255, + "learning_rate": 5.033714111372264e-06, + "loss": 0.4761, + "step": 11334 + }, + { + "epoch": 0.5130119936637249, + "grad_norm": 0.27877054420668457, + "learning_rate": 5.0329812066557625e-06, + "loss": 0.4659, + "step": 11335 + }, + { + "epoch": 0.5130572527721204, + "grad_norm": 0.6698297968837902, + "learning_rate": 5.032248301230598e-06, + "loss": 0.3442, + "step": 11336 + }, + { + "epoch": 0.5131025118805159, + "grad_norm": 0.6981877078120617, + "learning_rate": 5.031515395112514e-06, + "loss": 0.3132, + "step": 11337 + }, + { + "epoch": 0.5131477709889115, + "grad_norm": 0.29108841447013006, + "learning_rate": 5.030782488317264e-06, + "loss": 0.4765, + "step": 11338 + }, + { + "epoch": 0.5131930300973071, + "grad_norm": 0.6216874067123067, + "learning_rate": 5.0300495808605905e-06, + "loss": 0.3438, + "step": 11339 + }, + { + "epoch": 0.5132382892057027, + "grad_norm": 0.6140580587652936, + "learning_rate": 5.029316672758244e-06, + "loss": 0.295, + "step": 11340 + }, + { + "epoch": 0.5132835483140982, + "grad_norm": 0.6596484104592925, + "learning_rate": 5.028583764025973e-06, + "loss": 0.3516, + "step": 11341 + }, + { + "epoch": 0.5133288074224938, + "grad_norm": 0.3130909373251786, + "learning_rate": 5.027850854679525e-06, + "loss": 0.5116, + "step": 11342 + }, + { + "epoch": 0.5133740665308894, + "grad_norm": 0.2835968055163799, + "learning_rate": 5.0271179447346465e-06, + "loss": 0.4425, + "step": 11343 + }, + { + "epoch": 0.5134193256392849, + "grad_norm": 0.7193538124059337, + "learning_rate": 5.026385034207087e-06, + "loss": 0.3356, + "step": 11344 + }, + { + "epoch": 0.5134645847476804, + "grad_norm": 0.5762159401245821, + "learning_rate": 5.0256521231125945e-06, + "loss": 0.3215, + "step": 11345 + }, + { + "epoch": 0.513509843856076, + "grad_norm": 0.27371331037951185, + "learning_rate": 5.024919211466916e-06, + "loss": 0.457, + "step": 11346 + }, + { + "epoch": 0.5135551029644716, + "grad_norm": 0.6497819866175213, + "learning_rate": 5.024186299285801e-06, + "loss": 0.3389, + "step": 11347 + }, + { + "epoch": 0.5136003620728672, + "grad_norm": 0.5756216408873701, + "learning_rate": 5.023453386584997e-06, + "loss": 0.3606, + "step": 11348 + }, + { + "epoch": 0.5136456211812628, + "grad_norm": 0.581163075070484, + "learning_rate": 5.02272047338025e-06, + "loss": 0.3028, + "step": 11349 + }, + { + "epoch": 0.5136908802896583, + "grad_norm": 0.5916833302080331, + "learning_rate": 5.021987559687311e-06, + "loss": 0.3246, + "step": 11350 + }, + { + "epoch": 0.5137361393980538, + "grad_norm": 0.6405202372136433, + "learning_rate": 5.021254645521927e-06, + "loss": 0.3248, + "step": 11351 + }, + { + "epoch": 0.5137813985064494, + "grad_norm": 0.5926543425677399, + "learning_rate": 5.020521730899846e-06, + "loss": 0.2805, + "step": 11352 + }, + { + "epoch": 0.513826657614845, + "grad_norm": 0.676497412921749, + "learning_rate": 5.019788815836816e-06, + "loss": 0.3524, + "step": 11353 + }, + { + "epoch": 0.5138719167232405, + "grad_norm": 0.6304341978323126, + "learning_rate": 5.019055900348584e-06, + "loss": 0.4115, + "step": 11354 + }, + { + "epoch": 0.5139171758316361, + "grad_norm": 0.5920342969027814, + "learning_rate": 5.018322984450902e-06, + "loss": 0.3192, + "step": 11355 + }, + { + "epoch": 0.5139624349400317, + "grad_norm": 0.6092487652218684, + "learning_rate": 5.0175900681595116e-06, + "loss": 0.3172, + "step": 11356 + }, + { + "epoch": 0.5140076940484273, + "grad_norm": 0.6443659421278713, + "learning_rate": 5.016857151490167e-06, + "loss": 0.352, + "step": 11357 + }, + { + "epoch": 0.5140529531568228, + "grad_norm": 0.6284718857153448, + "learning_rate": 5.016124234458612e-06, + "loss": 0.3265, + "step": 11358 + }, + { + "epoch": 0.5140982122652183, + "grad_norm": 0.3664084196581059, + "learning_rate": 5.0153913170806e-06, + "loss": 0.4694, + "step": 11359 + }, + { + "epoch": 0.5141434713736139, + "grad_norm": 0.6508373755946546, + "learning_rate": 5.0146583993718746e-06, + "loss": 0.3564, + "step": 11360 + }, + { + "epoch": 0.5141887304820095, + "grad_norm": 0.7032696127025374, + "learning_rate": 5.013925481348184e-06, + "loss": 0.3143, + "step": 11361 + }, + { + "epoch": 0.5142339895904051, + "grad_norm": 0.6514752461003346, + "learning_rate": 5.013192563025279e-06, + "loss": 0.335, + "step": 11362 + }, + { + "epoch": 0.5142792486988006, + "grad_norm": 0.6165901346699098, + "learning_rate": 5.012459644418905e-06, + "loss": 0.3743, + "step": 11363 + }, + { + "epoch": 0.5143245078071962, + "grad_norm": 0.6550129626238077, + "learning_rate": 5.0117267255448125e-06, + "loss": 0.3444, + "step": 11364 + }, + { + "epoch": 0.5143697669155918, + "grad_norm": 0.6607140941538844, + "learning_rate": 5.010993806418749e-06, + "loss": 0.3336, + "step": 11365 + }, + { + "epoch": 0.5144150260239874, + "grad_norm": 0.6011220664787138, + "learning_rate": 5.010260887056461e-06, + "loss": 0.3137, + "step": 11366 + }, + { + "epoch": 0.5144602851323828, + "grad_norm": 0.7794734661688585, + "learning_rate": 5.0095279674736985e-06, + "loss": 0.2907, + "step": 11367 + }, + { + "epoch": 0.5145055442407784, + "grad_norm": 0.6761020044403644, + "learning_rate": 5.00879504768621e-06, + "loss": 0.359, + "step": 11368 + }, + { + "epoch": 0.514550803349174, + "grad_norm": 0.3171163849441446, + "learning_rate": 5.0080621277097415e-06, + "loss": 0.4823, + "step": 11369 + }, + { + "epoch": 0.5145960624575696, + "grad_norm": 0.6345630972622137, + "learning_rate": 5.007329207560045e-06, + "loss": 0.3481, + "step": 11370 + }, + { + "epoch": 0.5146413215659652, + "grad_norm": 0.6281598512262935, + "learning_rate": 5.006596287252864e-06, + "loss": 0.3116, + "step": 11371 + }, + { + "epoch": 0.5146865806743607, + "grad_norm": 0.5969350370380021, + "learning_rate": 5.005863366803949e-06, + "loss": 0.3117, + "step": 11372 + }, + { + "epoch": 0.5147318397827563, + "grad_norm": 0.3021692952870953, + "learning_rate": 5.005130446229051e-06, + "loss": 0.4937, + "step": 11373 + }, + { + "epoch": 0.5147770988911519, + "grad_norm": 0.6542062893315373, + "learning_rate": 5.004397525543912e-06, + "loss": 0.3018, + "step": 11374 + }, + { + "epoch": 0.5148223579995475, + "grad_norm": 0.6304255438734256, + "learning_rate": 5.003664604764287e-06, + "loss": 0.3452, + "step": 11375 + }, + { + "epoch": 0.5148676171079429, + "grad_norm": 0.6261344215797569, + "learning_rate": 5.0029316839059185e-06, + "loss": 0.3184, + "step": 11376 + }, + { + "epoch": 0.5149128762163385, + "grad_norm": 0.6652000712960027, + "learning_rate": 5.002198762984558e-06, + "loss": 0.3365, + "step": 11377 + }, + { + "epoch": 0.5149581353247341, + "grad_norm": 0.6554706239445697, + "learning_rate": 5.001465842015952e-06, + "loss": 0.3421, + "step": 11378 + }, + { + "epoch": 0.5150033944331297, + "grad_norm": 0.6191293900481116, + "learning_rate": 5.00073292101585e-06, + "loss": 0.3314, + "step": 11379 + }, + { + "epoch": 0.5150486535415252, + "grad_norm": 0.6021614928686941, + "learning_rate": 5e-06, + "loss": 0.3071, + "step": 11380 + }, + { + "epoch": 0.5150939126499208, + "grad_norm": 0.6004614620135654, + "learning_rate": 4.999267078984151e-06, + "loss": 0.3469, + "step": 11381 + }, + { + "epoch": 0.5151391717583164, + "grad_norm": 0.6382424917561594, + "learning_rate": 4.9985341579840505e-06, + "loss": 0.2796, + "step": 11382 + }, + { + "epoch": 0.515184430866712, + "grad_norm": 0.3659738748625842, + "learning_rate": 4.997801237015443e-06, + "loss": 0.5161, + "step": 11383 + }, + { + "epoch": 0.5152296899751075, + "grad_norm": 0.3201373508939831, + "learning_rate": 4.997068316094082e-06, + "loss": 0.4659, + "step": 11384 + }, + { + "epoch": 0.515274949083503, + "grad_norm": 0.6206303491788823, + "learning_rate": 4.996335395235715e-06, + "loss": 0.3298, + "step": 11385 + }, + { + "epoch": 0.5153202081918986, + "grad_norm": 0.27895766028848795, + "learning_rate": 4.9956024744560895e-06, + "loss": 0.4794, + "step": 11386 + }, + { + "epoch": 0.5153654673002942, + "grad_norm": 0.679592436620248, + "learning_rate": 4.994869553770951e-06, + "loss": 0.3421, + "step": 11387 + }, + { + "epoch": 0.5154107264086898, + "grad_norm": 0.28995181663669395, + "learning_rate": 4.99413663319605e-06, + "loss": 0.4615, + "step": 11388 + }, + { + "epoch": 0.5154559855170853, + "grad_norm": 0.6250008664608733, + "learning_rate": 4.9934037127471375e-06, + "loss": 0.3421, + "step": 11389 + }, + { + "epoch": 0.5155012446254809, + "grad_norm": 0.6482942267558705, + "learning_rate": 4.992670792439958e-06, + "loss": 0.3616, + "step": 11390 + }, + { + "epoch": 0.5155465037338764, + "grad_norm": 0.6523165173209333, + "learning_rate": 4.9919378722902585e-06, + "loss": 0.366, + "step": 11391 + }, + { + "epoch": 0.515591762842272, + "grad_norm": 0.6858262611847109, + "learning_rate": 4.991204952313793e-06, + "loss": 0.3269, + "step": 11392 + }, + { + "epoch": 0.5156370219506675, + "grad_norm": 0.6718800704422592, + "learning_rate": 4.990472032526302e-06, + "loss": 0.3338, + "step": 11393 + }, + { + "epoch": 0.5156822810590631, + "grad_norm": 0.6642519794348746, + "learning_rate": 4.98973911294354e-06, + "loss": 0.3295, + "step": 11394 + }, + { + "epoch": 0.5157275401674587, + "grad_norm": 0.9539961494847039, + "learning_rate": 4.989006193581254e-06, + "loss": 0.3274, + "step": 11395 + }, + { + "epoch": 0.5157727992758543, + "grad_norm": 0.7514131009659355, + "learning_rate": 4.98827327445519e-06, + "loss": 0.3493, + "step": 11396 + }, + { + "epoch": 0.5158180583842499, + "grad_norm": 0.5885314984600968, + "learning_rate": 4.987540355581095e-06, + "loss": 0.303, + "step": 11397 + }, + { + "epoch": 0.5158633174926454, + "grad_norm": 0.6082433375446966, + "learning_rate": 4.986807436974723e-06, + "loss": 0.3593, + "step": 11398 + }, + { + "epoch": 0.515908576601041, + "grad_norm": 0.6451130905415389, + "learning_rate": 4.986074518651817e-06, + "loss": 0.3339, + "step": 11399 + }, + { + "epoch": 0.5159538357094365, + "grad_norm": 0.5772202526007606, + "learning_rate": 4.985341600628127e-06, + "loss": 0.3196, + "step": 11400 + }, + { + "epoch": 0.5159990948178321, + "grad_norm": 0.5913749203713625, + "learning_rate": 4.984608682919402e-06, + "loss": 0.3233, + "step": 11401 + }, + { + "epoch": 0.5160443539262276, + "grad_norm": 0.3501031557695471, + "learning_rate": 4.983875765541389e-06, + "loss": 0.4645, + "step": 11402 + }, + { + "epoch": 0.5160896130346232, + "grad_norm": 0.6909893434416505, + "learning_rate": 4.9831428485098336e-06, + "loss": 0.2862, + "step": 11403 + }, + { + "epoch": 0.5161348721430188, + "grad_norm": 0.6190287034727685, + "learning_rate": 4.982409931840489e-06, + "loss": 0.2947, + "step": 11404 + }, + { + "epoch": 0.5161801312514144, + "grad_norm": 0.608323485565074, + "learning_rate": 4.981677015549101e-06, + "loss": 0.3384, + "step": 11405 + }, + { + "epoch": 0.51622539035981, + "grad_norm": 0.2996223595119399, + "learning_rate": 4.9809440996514175e-06, + "loss": 0.4672, + "step": 11406 + }, + { + "epoch": 0.5162706494682054, + "grad_norm": 0.6549238199747524, + "learning_rate": 4.980211184163185e-06, + "loss": 0.4021, + "step": 11407 + }, + { + "epoch": 0.516315908576601, + "grad_norm": 0.545305893167641, + "learning_rate": 4.979478269100156e-06, + "loss": 0.301, + "step": 11408 + }, + { + "epoch": 0.5163611676849966, + "grad_norm": 0.5946659894475338, + "learning_rate": 4.978745354478074e-06, + "loss": 0.3317, + "step": 11409 + }, + { + "epoch": 0.5164064267933922, + "grad_norm": 0.34696223925319947, + "learning_rate": 4.97801244031269e-06, + "loss": 0.5026, + "step": 11410 + }, + { + "epoch": 0.5164516859017877, + "grad_norm": 0.5886503604054116, + "learning_rate": 4.977279526619752e-06, + "loss": 0.3588, + "step": 11411 + }, + { + "epoch": 0.5164969450101833, + "grad_norm": 0.6532625865775712, + "learning_rate": 4.976546613415005e-06, + "loss": 0.3335, + "step": 11412 + }, + { + "epoch": 0.5165422041185789, + "grad_norm": 0.5755714762624607, + "learning_rate": 4.9758137007141996e-06, + "loss": 0.3029, + "step": 11413 + }, + { + "epoch": 0.5165874632269745, + "grad_norm": 0.6314543801653296, + "learning_rate": 4.975080788533086e-06, + "loss": 0.2644, + "step": 11414 + }, + { + "epoch": 0.5166327223353699, + "grad_norm": 0.6535819873033479, + "learning_rate": 4.974347876887408e-06, + "loss": 0.3214, + "step": 11415 + }, + { + "epoch": 0.5166779814437655, + "grad_norm": 0.6748569895089654, + "learning_rate": 4.9736149657929136e-06, + "loss": 0.3325, + "step": 11416 + }, + { + "epoch": 0.5167232405521611, + "grad_norm": 0.5890510812062223, + "learning_rate": 4.972882055265354e-06, + "loss": 0.2784, + "step": 11417 + }, + { + "epoch": 0.5167684996605567, + "grad_norm": 0.669667646454698, + "learning_rate": 4.9721491453204775e-06, + "loss": 0.3645, + "step": 11418 + }, + { + "epoch": 0.5168137587689523, + "grad_norm": 0.6469370141276698, + "learning_rate": 4.971416235974029e-06, + "loss": 0.356, + "step": 11419 + }, + { + "epoch": 0.5168590178773478, + "grad_norm": 0.28727861727393994, + "learning_rate": 4.970683327241756e-06, + "loss": 0.4433, + "step": 11420 + }, + { + "epoch": 0.5169042769857434, + "grad_norm": 0.5721570053047997, + "learning_rate": 4.969950419139412e-06, + "loss": 0.3127, + "step": 11421 + }, + { + "epoch": 0.516949536094139, + "grad_norm": 0.6452400902352549, + "learning_rate": 4.969217511682738e-06, + "loss": 0.2943, + "step": 11422 + }, + { + "epoch": 0.5169947952025346, + "grad_norm": 0.6123018177339106, + "learning_rate": 4.968484604887486e-06, + "loss": 0.3301, + "step": 11423 + }, + { + "epoch": 0.51704005431093, + "grad_norm": 0.6869000275438506, + "learning_rate": 4.967751698769404e-06, + "loss": 0.3226, + "step": 11424 + }, + { + "epoch": 0.5170853134193256, + "grad_norm": 0.33237830928716483, + "learning_rate": 4.967018793344238e-06, + "loss": 0.4798, + "step": 11425 + }, + { + "epoch": 0.5171305725277212, + "grad_norm": 0.6681644421207563, + "learning_rate": 4.966285888627737e-06, + "loss": 0.3272, + "step": 11426 + }, + { + "epoch": 0.5171758316361168, + "grad_norm": 0.35537307589594674, + "learning_rate": 4.965552984635649e-06, + "loss": 0.4678, + "step": 11427 + }, + { + "epoch": 0.5172210907445123, + "grad_norm": 0.5954443500581541, + "learning_rate": 4.964820081383721e-06, + "loss": 0.3503, + "step": 11428 + }, + { + "epoch": 0.5172663498529079, + "grad_norm": 1.2948284935381316, + "learning_rate": 4.964087178887702e-06, + "loss": 0.3322, + "step": 11429 + }, + { + "epoch": 0.5173116089613035, + "grad_norm": 0.6466216348525649, + "learning_rate": 4.9633542771633374e-06, + "loss": 0.3744, + "step": 11430 + }, + { + "epoch": 0.517356868069699, + "grad_norm": 0.28981058998753617, + "learning_rate": 4.96262137622638e-06, + "loss": 0.4401, + "step": 11431 + }, + { + "epoch": 0.5174021271780946, + "grad_norm": 0.6355869243233664, + "learning_rate": 4.961888476092572e-06, + "loss": 0.3396, + "step": 11432 + }, + { + "epoch": 0.5174473862864901, + "grad_norm": 0.669479187452769, + "learning_rate": 4.961155576777665e-06, + "loss": 0.3626, + "step": 11433 + }, + { + "epoch": 0.5174926453948857, + "grad_norm": 0.2818465609687323, + "learning_rate": 4.960422678297405e-06, + "loss": 0.4604, + "step": 11434 + }, + { + "epoch": 0.5175379045032813, + "grad_norm": 0.643082002501434, + "learning_rate": 4.959689780667541e-06, + "loss": 0.3208, + "step": 11435 + }, + { + "epoch": 0.5175831636116769, + "grad_norm": 0.6454671244405962, + "learning_rate": 4.958956883903816e-06, + "loss": 0.3216, + "step": 11436 + }, + { + "epoch": 0.5176284227200724, + "grad_norm": 0.6115329967832458, + "learning_rate": 4.958223988021986e-06, + "loss": 0.2993, + "step": 11437 + }, + { + "epoch": 0.517673681828468, + "grad_norm": 0.6792218491410067, + "learning_rate": 4.957491093037792e-06, + "loss": 0.3235, + "step": 11438 + }, + { + "epoch": 0.5177189409368635, + "grad_norm": 0.6469072805006693, + "learning_rate": 4.9567581989669846e-06, + "loss": 0.3097, + "step": 11439 + }, + { + "epoch": 0.5177642000452591, + "grad_norm": 0.5611885638407211, + "learning_rate": 4.956025305825311e-06, + "loss": 0.332, + "step": 11440 + }, + { + "epoch": 0.5178094591536547, + "grad_norm": 0.646053881191501, + "learning_rate": 4.955292413628517e-06, + "loss": 0.3216, + "step": 11441 + }, + { + "epoch": 0.5178547182620502, + "grad_norm": 0.6438452394996295, + "learning_rate": 4.954559522392353e-06, + "loss": 0.3362, + "step": 11442 + }, + { + "epoch": 0.5178999773704458, + "grad_norm": 0.5463017390785394, + "learning_rate": 4.953826632132565e-06, + "loss": 0.3116, + "step": 11443 + }, + { + "epoch": 0.5179452364788414, + "grad_norm": 0.6006738174475235, + "learning_rate": 4.953093742864901e-06, + "loss": 0.3027, + "step": 11444 + }, + { + "epoch": 0.517990495587237, + "grad_norm": 0.6338506106940345, + "learning_rate": 4.952360854605107e-06, + "loss": 0.3432, + "step": 11445 + }, + { + "epoch": 0.5180357546956325, + "grad_norm": 0.30322297477801935, + "learning_rate": 4.9516279673689325e-06, + "loss": 0.4782, + "step": 11446 + }, + { + "epoch": 0.518081013804028, + "grad_norm": 0.2954691434464442, + "learning_rate": 4.950895081172126e-06, + "loss": 0.4573, + "step": 11447 + }, + { + "epoch": 0.5181262729124236, + "grad_norm": 0.60676611158701, + "learning_rate": 4.950162196030432e-06, + "loss": 0.3375, + "step": 11448 + }, + { + "epoch": 0.5181715320208192, + "grad_norm": 0.6813301058139706, + "learning_rate": 4.949429311959599e-06, + "loss": 0.3341, + "step": 11449 + }, + { + "epoch": 0.5182167911292147, + "grad_norm": 0.6132195451458272, + "learning_rate": 4.948696428975378e-06, + "loss": 0.3329, + "step": 11450 + }, + { + "epoch": 0.5182620502376103, + "grad_norm": 0.3179752179157839, + "learning_rate": 4.94796354709351e-06, + "loss": 0.4948, + "step": 11451 + }, + { + "epoch": 0.5183073093460059, + "grad_norm": 0.6149764342268045, + "learning_rate": 4.947230666329746e-06, + "loss": 0.307, + "step": 11452 + }, + { + "epoch": 0.5183525684544015, + "grad_norm": 0.6185752635572527, + "learning_rate": 4.946497786699834e-06, + "loss": 0.3167, + "step": 11453 + }, + { + "epoch": 0.5183978275627971, + "grad_norm": 0.6235007636911877, + "learning_rate": 4.945764908219518e-06, + "loss": 0.3347, + "step": 11454 + }, + { + "epoch": 0.5184430866711925, + "grad_norm": 0.5803562587200566, + "learning_rate": 4.945032030904549e-06, + "loss": 0.3323, + "step": 11455 + }, + { + "epoch": 0.5184883457795881, + "grad_norm": 0.30145818078884856, + "learning_rate": 4.944299154770673e-06, + "loss": 0.4791, + "step": 11456 + }, + { + "epoch": 0.5185336048879837, + "grad_norm": 0.5993805071540821, + "learning_rate": 4.943566279833637e-06, + "loss": 0.3044, + "step": 11457 + }, + { + "epoch": 0.5185788639963793, + "grad_norm": 0.7045733996670277, + "learning_rate": 4.942833406109188e-06, + "loss": 0.334, + "step": 11458 + }, + { + "epoch": 0.5186241231047748, + "grad_norm": 0.7412287485525683, + "learning_rate": 4.942100533613073e-06, + "loss": 0.3033, + "step": 11459 + }, + { + "epoch": 0.5186693822131704, + "grad_norm": 0.6682742478151386, + "learning_rate": 4.9413676623610415e-06, + "loss": 0.3015, + "step": 11460 + }, + { + "epoch": 0.518714641321566, + "grad_norm": 0.6134757832433286, + "learning_rate": 4.940634792368838e-06, + "loss": 0.3295, + "step": 11461 + }, + { + "epoch": 0.5187599004299616, + "grad_norm": 0.5984181963560531, + "learning_rate": 4.93990192365221e-06, + "loss": 0.3094, + "step": 11462 + }, + { + "epoch": 0.518805159538357, + "grad_norm": 0.2826570298747315, + "learning_rate": 4.939169056226905e-06, + "loss": 0.4776, + "step": 11463 + }, + { + "epoch": 0.5188504186467526, + "grad_norm": 0.6281200564906452, + "learning_rate": 4.93843619010867e-06, + "loss": 0.3114, + "step": 11464 + }, + { + "epoch": 0.5188956777551482, + "grad_norm": 0.5984727108969583, + "learning_rate": 4.9377033253132525e-06, + "loss": 0.3136, + "step": 11465 + }, + { + "epoch": 0.5189409368635438, + "grad_norm": 0.2765346528439434, + "learning_rate": 4.936970461856401e-06, + "loss": 0.4897, + "step": 11466 + }, + { + "epoch": 0.5189861959719394, + "grad_norm": 0.5694173173865967, + "learning_rate": 4.9362375997538585e-06, + "loss": 0.2882, + "step": 11467 + }, + { + "epoch": 0.5190314550803349, + "grad_norm": 0.6698294625395641, + "learning_rate": 4.935504739021373e-06, + "loss": 0.33, + "step": 11468 + }, + { + "epoch": 0.5190767141887305, + "grad_norm": 0.6551708984371104, + "learning_rate": 4.934771879674697e-06, + "loss": 0.3066, + "step": 11469 + }, + { + "epoch": 0.5191219732971261, + "grad_norm": 0.6106802527555026, + "learning_rate": 4.9340390217295695e-06, + "loss": 0.3236, + "step": 11470 + }, + { + "epoch": 0.5191672324055217, + "grad_norm": 0.6727567886294621, + "learning_rate": 4.933306165201741e-06, + "loss": 0.359, + "step": 11471 + }, + { + "epoch": 0.5192124915139171, + "grad_norm": 0.6107305453605221, + "learning_rate": 4.93257331010696e-06, + "loss": 0.3012, + "step": 11472 + }, + { + "epoch": 0.5192577506223127, + "grad_norm": 0.6547948104154359, + "learning_rate": 4.93184045646097e-06, + "loss": 0.2876, + "step": 11473 + }, + { + "epoch": 0.5193030097307083, + "grad_norm": 0.7767733500126526, + "learning_rate": 4.9311076042795185e-06, + "loss": 0.401, + "step": 11474 + }, + { + "epoch": 0.5193482688391039, + "grad_norm": 0.6586598931865223, + "learning_rate": 4.9303747535783546e-06, + "loss": 0.3949, + "step": 11475 + }, + { + "epoch": 0.5193935279474995, + "grad_norm": 0.6023598627621121, + "learning_rate": 4.929641904373224e-06, + "loss": 0.3286, + "step": 11476 + }, + { + "epoch": 0.519438787055895, + "grad_norm": 0.6771772438763664, + "learning_rate": 4.928909056679871e-06, + "loss": 0.3372, + "step": 11477 + }, + { + "epoch": 0.5194840461642906, + "grad_norm": 0.6669304323314843, + "learning_rate": 4.9281762105140435e-06, + "loss": 0.3104, + "step": 11478 + }, + { + "epoch": 0.5195293052726861, + "grad_norm": 0.5836886978307488, + "learning_rate": 4.927443365891491e-06, + "loss": 0.3041, + "step": 11479 + }, + { + "epoch": 0.5195745643810817, + "grad_norm": 0.2876301646577038, + "learning_rate": 4.926710522827956e-06, + "loss": 0.4568, + "step": 11480 + }, + { + "epoch": 0.5196198234894772, + "grad_norm": 0.6165277494595637, + "learning_rate": 4.925977681339187e-06, + "loss": 0.3551, + "step": 11481 + }, + { + "epoch": 0.5196650825978728, + "grad_norm": 0.27093643070250045, + "learning_rate": 4.925244841440932e-06, + "loss": 0.4685, + "step": 11482 + }, + { + "epoch": 0.5197103417062684, + "grad_norm": 0.6405904609560221, + "learning_rate": 4.924512003148934e-06, + "loss": 0.3018, + "step": 11483 + }, + { + "epoch": 0.519755600814664, + "grad_norm": 0.9864860414307957, + "learning_rate": 4.923779166478941e-06, + "loss": 0.3549, + "step": 11484 + }, + { + "epoch": 0.5198008599230595, + "grad_norm": 0.6053341823481457, + "learning_rate": 4.923046331446701e-06, + "loss": 0.3307, + "step": 11485 + }, + { + "epoch": 0.5198461190314551, + "grad_norm": 0.687499422803569, + "learning_rate": 4.922313498067957e-06, + "loss": 0.4052, + "step": 11486 + }, + { + "epoch": 0.5198913781398506, + "grad_norm": 0.30206540097624307, + "learning_rate": 4.921580666358459e-06, + "loss": 0.5052, + "step": 11487 + }, + { + "epoch": 0.5199366372482462, + "grad_norm": 0.637064127625573, + "learning_rate": 4.92084783633395e-06, + "loss": 0.2951, + "step": 11488 + }, + { + "epoch": 0.5199818963566418, + "grad_norm": 0.6493410177667419, + "learning_rate": 4.92011500801018e-06, + "loss": 0.3268, + "step": 11489 + }, + { + "epoch": 0.5200271554650373, + "grad_norm": 0.2834604949324538, + "learning_rate": 4.919382181402892e-06, + "loss": 0.4844, + "step": 11490 + }, + { + "epoch": 0.5200724145734329, + "grad_norm": 1.102465837368673, + "learning_rate": 4.918649356527833e-06, + "loss": 0.4206, + "step": 11491 + }, + { + "epoch": 0.5201176736818285, + "grad_norm": 0.32046127818036063, + "learning_rate": 4.917916533400751e-06, + "loss": 0.4756, + "step": 11492 + }, + { + "epoch": 0.5201629327902241, + "grad_norm": 0.7355754043795567, + "learning_rate": 4.917183712037389e-06, + "loss": 0.3912, + "step": 11493 + }, + { + "epoch": 0.5202081918986196, + "grad_norm": 0.638127357125275, + "learning_rate": 4.916450892453495e-06, + "loss": 0.3091, + "step": 11494 + }, + { + "epoch": 0.5202534510070151, + "grad_norm": 0.6315955219941163, + "learning_rate": 4.915718074664816e-06, + "loss": 0.3264, + "step": 11495 + }, + { + "epoch": 0.5202987101154107, + "grad_norm": 0.7076413970554138, + "learning_rate": 4.914985258687096e-06, + "loss": 0.2954, + "step": 11496 + }, + { + "epoch": 0.5203439692238063, + "grad_norm": 0.27708411098994, + "learning_rate": 4.91425244453608e-06, + "loss": 0.4464, + "step": 11497 + }, + { + "epoch": 0.5203892283322018, + "grad_norm": 0.31059135604941385, + "learning_rate": 4.9135196322275195e-06, + "loss": 0.4828, + "step": 11498 + }, + { + "epoch": 0.5204344874405974, + "grad_norm": 0.6448738124679796, + "learning_rate": 4.912786821777152e-06, + "loss": 0.3207, + "step": 11499 + }, + { + "epoch": 0.520479746548993, + "grad_norm": 0.6242971855514119, + "learning_rate": 4.912054013200731e-06, + "loss": 0.3071, + "step": 11500 + }, + { + "epoch": 0.5205250056573886, + "grad_norm": 0.5925596570301539, + "learning_rate": 4.911321206513996e-06, + "loss": 0.32, + "step": 11501 + }, + { + "epoch": 0.5205702647657842, + "grad_norm": 0.5976703369179355, + "learning_rate": 4.9105884017327e-06, + "loss": 0.3359, + "step": 11502 + }, + { + "epoch": 0.5206155238741796, + "grad_norm": 0.6275887504151985, + "learning_rate": 4.9098555988725814e-06, + "loss": 0.3594, + "step": 11503 + }, + { + "epoch": 0.5206607829825752, + "grad_norm": 0.6165560172448781, + "learning_rate": 4.909122797949391e-06, + "loss": 0.3609, + "step": 11504 + }, + { + "epoch": 0.5207060420909708, + "grad_norm": 0.5923730829115844, + "learning_rate": 4.908389998978872e-06, + "loss": 0.2873, + "step": 11505 + }, + { + "epoch": 0.5207513011993664, + "grad_norm": 0.625363493919042, + "learning_rate": 4.90765720197677e-06, + "loss": 0.3318, + "step": 11506 + }, + { + "epoch": 0.5207965603077619, + "grad_norm": 0.6421525850337274, + "learning_rate": 4.9069244069588305e-06, + "loss": 0.3188, + "step": 11507 + }, + { + "epoch": 0.5208418194161575, + "grad_norm": 0.6784976499251623, + "learning_rate": 4.906191613940802e-06, + "loss": 0.2584, + "step": 11508 + }, + { + "epoch": 0.5208870785245531, + "grad_norm": 0.5921940899188074, + "learning_rate": 4.905458822938426e-06, + "loss": 0.3429, + "step": 11509 + }, + { + "epoch": 0.5209323376329487, + "grad_norm": 0.3467998770991752, + "learning_rate": 4.904726033967449e-06, + "loss": 0.4917, + "step": 11510 + }, + { + "epoch": 0.5209775967413443, + "grad_norm": 0.6696798074266498, + "learning_rate": 4.903993247043619e-06, + "loss": 0.2988, + "step": 11511 + }, + { + "epoch": 0.5210228558497397, + "grad_norm": 0.6357144820286588, + "learning_rate": 4.903260462182679e-06, + "loss": 0.3349, + "step": 11512 + }, + { + "epoch": 0.5210681149581353, + "grad_norm": 0.2981795596572952, + "learning_rate": 4.9025276794003715e-06, + "loss": 0.4921, + "step": 11513 + }, + { + "epoch": 0.5211133740665309, + "grad_norm": 0.2738371351530118, + "learning_rate": 4.901794898712448e-06, + "loss": 0.4528, + "step": 11514 + }, + { + "epoch": 0.5211586331749265, + "grad_norm": 0.6725567010375184, + "learning_rate": 4.901062120134649e-06, + "loss": 0.3219, + "step": 11515 + }, + { + "epoch": 0.521203892283322, + "grad_norm": 0.652312178444958, + "learning_rate": 4.900329343682722e-06, + "loss": 0.3185, + "step": 11516 + }, + { + "epoch": 0.5212491513917176, + "grad_norm": 0.30183426080952397, + "learning_rate": 4.899596569372409e-06, + "loss": 0.5009, + "step": 11517 + }, + { + "epoch": 0.5212944105001132, + "grad_norm": 0.6150731244812301, + "learning_rate": 4.898863797219461e-06, + "loss": 0.3348, + "step": 11518 + }, + { + "epoch": 0.5213396696085087, + "grad_norm": 0.6168255764535435, + "learning_rate": 4.898131027239617e-06, + "loss": 0.3102, + "step": 11519 + }, + { + "epoch": 0.5213849287169042, + "grad_norm": 0.6182232484985482, + "learning_rate": 4.897398259448625e-06, + "loss": 0.3573, + "step": 11520 + }, + { + "epoch": 0.5214301878252998, + "grad_norm": 0.6503476877876679, + "learning_rate": 4.89666549386223e-06, + "loss": 0.3264, + "step": 11521 + }, + { + "epoch": 0.5214754469336954, + "grad_norm": 0.6180965256332858, + "learning_rate": 4.895932730496174e-06, + "loss": 0.3065, + "step": 11522 + }, + { + "epoch": 0.521520706042091, + "grad_norm": 0.6210216574725677, + "learning_rate": 4.895199969366206e-06, + "loss": 0.3076, + "step": 11523 + }, + { + "epoch": 0.5215659651504866, + "grad_norm": 0.35488493465928805, + "learning_rate": 4.894467210488069e-06, + "loss": 0.498, + "step": 11524 + }, + { + "epoch": 0.5216112242588821, + "grad_norm": 0.5825552011467424, + "learning_rate": 4.893734453877506e-06, + "loss": 0.3155, + "step": 11525 + }, + { + "epoch": 0.5216564833672777, + "grad_norm": 0.723602606465192, + "learning_rate": 4.893001699550263e-06, + "loss": 0.325, + "step": 11526 + }, + { + "epoch": 0.5217017424756732, + "grad_norm": 0.6108827405279276, + "learning_rate": 4.892268947522088e-06, + "loss": 0.3568, + "step": 11527 + }, + { + "epoch": 0.5217470015840688, + "grad_norm": 0.6114150637030855, + "learning_rate": 4.891536197808719e-06, + "loss": 0.2834, + "step": 11528 + }, + { + "epoch": 0.5217922606924643, + "grad_norm": 0.6360114618316366, + "learning_rate": 4.890803450425905e-06, + "loss": 0.3451, + "step": 11529 + }, + { + "epoch": 0.5218375198008599, + "grad_norm": 0.6545738871505882, + "learning_rate": 4.890070705389388e-06, + "loss": 0.3402, + "step": 11530 + }, + { + "epoch": 0.5218827789092555, + "grad_norm": 0.6206975055805826, + "learning_rate": 4.889337962714918e-06, + "loss": 0.337, + "step": 11531 + }, + { + "epoch": 0.5219280380176511, + "grad_norm": 0.7917530209205013, + "learning_rate": 4.888605222418232e-06, + "loss": 0.3216, + "step": 11532 + }, + { + "epoch": 0.5219732971260466, + "grad_norm": 1.048978913657367, + "learning_rate": 4.887872484515078e-06, + "loss": 0.2912, + "step": 11533 + }, + { + "epoch": 0.5220185562344422, + "grad_norm": 0.6780769881876405, + "learning_rate": 4.8871397490212015e-06, + "loss": 0.3535, + "step": 11534 + }, + { + "epoch": 0.5220638153428377, + "grad_norm": 0.33566560310405, + "learning_rate": 4.886407015952344e-06, + "loss": 0.4988, + "step": 11535 + }, + { + "epoch": 0.5221090744512333, + "grad_norm": 0.33959867580729625, + "learning_rate": 4.8856742853242504e-06, + "loss": 0.4726, + "step": 11536 + }, + { + "epoch": 0.5221543335596289, + "grad_norm": 0.6735425814731475, + "learning_rate": 4.884941557152666e-06, + "loss": 0.2987, + "step": 11537 + }, + { + "epoch": 0.5221995926680244, + "grad_norm": 0.654398883844253, + "learning_rate": 4.884208831453335e-06, + "loss": 0.2959, + "step": 11538 + }, + { + "epoch": 0.52224485177642, + "grad_norm": 0.28491726359378344, + "learning_rate": 4.883476108241999e-06, + "loss": 0.4815, + "step": 11539 + }, + { + "epoch": 0.5222901108848156, + "grad_norm": 0.632877892939454, + "learning_rate": 4.882743387534406e-06, + "loss": 0.383, + "step": 11540 + }, + { + "epoch": 0.5223353699932112, + "grad_norm": 0.678673092334952, + "learning_rate": 4.882010669346294e-06, + "loss": 0.3215, + "step": 11541 + }, + { + "epoch": 0.5223806291016067, + "grad_norm": 0.5418262072495581, + "learning_rate": 4.881277953693412e-06, + "loss": 0.2891, + "step": 11542 + }, + { + "epoch": 0.5224258882100022, + "grad_norm": 0.6492432998704616, + "learning_rate": 4.8805452405915025e-06, + "loss": 0.3584, + "step": 11543 + }, + { + "epoch": 0.5224711473183978, + "grad_norm": 0.6027133643001226, + "learning_rate": 4.879812530056309e-06, + "loss": 0.3324, + "step": 11544 + }, + { + "epoch": 0.5225164064267934, + "grad_norm": 0.37705179015063844, + "learning_rate": 4.879079822103575e-06, + "loss": 0.4705, + "step": 11545 + }, + { + "epoch": 0.5225616655351889, + "grad_norm": 0.6388994946240203, + "learning_rate": 4.878347116749042e-06, + "loss": 0.2968, + "step": 11546 + }, + { + "epoch": 0.5226069246435845, + "grad_norm": 1.0835052752581886, + "learning_rate": 4.877614414008459e-06, + "loss": 0.2954, + "step": 11547 + }, + { + "epoch": 0.5226521837519801, + "grad_norm": 0.6052171996998544, + "learning_rate": 4.876881713897565e-06, + "loss": 0.3191, + "step": 11548 + }, + { + "epoch": 0.5226974428603757, + "grad_norm": 0.2959331868748914, + "learning_rate": 4.876149016432104e-06, + "loss": 0.4955, + "step": 11549 + }, + { + "epoch": 0.5227427019687713, + "grad_norm": 0.6744346120219048, + "learning_rate": 4.875416321627823e-06, + "loss": 0.3521, + "step": 11550 + }, + { + "epoch": 0.5227879610771667, + "grad_norm": 0.6485308645284827, + "learning_rate": 4.87468362950046e-06, + "loss": 0.3312, + "step": 11551 + }, + { + "epoch": 0.5228332201855623, + "grad_norm": 0.6479885264159125, + "learning_rate": 4.873950940065762e-06, + "loss": 0.327, + "step": 11552 + }, + { + "epoch": 0.5228784792939579, + "grad_norm": 0.6287323547050803, + "learning_rate": 4.8732182533394716e-06, + "loss": 0.3253, + "step": 11553 + }, + { + "epoch": 0.5229237384023535, + "grad_norm": 0.5957143013642198, + "learning_rate": 4.87248556933733e-06, + "loss": 0.3297, + "step": 11554 + }, + { + "epoch": 0.522968997510749, + "grad_norm": 0.7821061377676696, + "learning_rate": 4.871752888075082e-06, + "loss": 0.3196, + "step": 11555 + }, + { + "epoch": 0.5230142566191446, + "grad_norm": 0.5824779569512576, + "learning_rate": 4.871020209568473e-06, + "loss": 0.2934, + "step": 11556 + }, + { + "epoch": 0.5230595157275402, + "grad_norm": 0.6703735640538666, + "learning_rate": 4.870287533833241e-06, + "loss": 0.3042, + "step": 11557 + }, + { + "epoch": 0.5231047748359358, + "grad_norm": 0.6302364020589345, + "learning_rate": 4.8695548608851326e-06, + "loss": 0.3039, + "step": 11558 + }, + { + "epoch": 0.5231500339443313, + "grad_norm": 0.3537950301088894, + "learning_rate": 4.868822190739888e-06, + "loss": 0.4762, + "step": 11559 + }, + { + "epoch": 0.5231952930527268, + "grad_norm": 0.32810713616438986, + "learning_rate": 4.868089523413255e-06, + "loss": 0.4747, + "step": 11560 + }, + { + "epoch": 0.5232405521611224, + "grad_norm": 0.6764435138048115, + "learning_rate": 4.86735685892097e-06, + "loss": 0.3298, + "step": 11561 + }, + { + "epoch": 0.523285811269518, + "grad_norm": 0.665599259745116, + "learning_rate": 4.8666241972787794e-06, + "loss": 0.2964, + "step": 11562 + }, + { + "epoch": 0.5233310703779136, + "grad_norm": 0.29847661924202107, + "learning_rate": 4.865891538502427e-06, + "loss": 0.4723, + "step": 11563 + }, + { + "epoch": 0.5233763294863091, + "grad_norm": 0.7256024754638591, + "learning_rate": 4.8651588826076514e-06, + "loss": 0.3278, + "step": 11564 + }, + { + "epoch": 0.5234215885947047, + "grad_norm": 0.633130441389736, + "learning_rate": 4.864426229610197e-06, + "loss": 0.3186, + "step": 11565 + }, + { + "epoch": 0.5234668477031003, + "grad_norm": 0.7023084421439088, + "learning_rate": 4.863693579525809e-06, + "loss": 0.2952, + "step": 11566 + }, + { + "epoch": 0.5235121068114958, + "grad_norm": 0.32772249762550243, + "learning_rate": 4.862960932370225e-06, + "loss": 0.4613, + "step": 11567 + }, + { + "epoch": 0.5235573659198913, + "grad_norm": 0.481162743463051, + "learning_rate": 4.862228288159191e-06, + "loss": 0.4735, + "step": 11568 + }, + { + "epoch": 0.5236026250282869, + "grad_norm": 0.5770663986524796, + "learning_rate": 4.861495646908448e-06, + "loss": 0.3308, + "step": 11569 + }, + { + "epoch": 0.5236478841366825, + "grad_norm": 0.7454149959163855, + "learning_rate": 4.860763008633736e-06, + "loss": 0.3551, + "step": 11570 + }, + { + "epoch": 0.5236931432450781, + "grad_norm": 1.1015416066071197, + "learning_rate": 4.860030373350801e-06, + "loss": 0.3473, + "step": 11571 + }, + { + "epoch": 0.5237384023534737, + "grad_norm": 0.6270488120483275, + "learning_rate": 4.859297741075384e-06, + "loss": 0.3224, + "step": 11572 + }, + { + "epoch": 0.5237836614618692, + "grad_norm": 0.6364875464470632, + "learning_rate": 4.858565111823226e-06, + "loss": 0.3071, + "step": 11573 + }, + { + "epoch": 0.5238289205702648, + "grad_norm": 0.6294535199966994, + "learning_rate": 4.857832485610068e-06, + "loss": 0.3626, + "step": 11574 + }, + { + "epoch": 0.5238741796786603, + "grad_norm": 0.6204239645520809, + "learning_rate": 4.857099862451654e-06, + "loss": 0.3054, + "step": 11575 + }, + { + "epoch": 0.5239194387870559, + "grad_norm": 0.6299344433260511, + "learning_rate": 4.856367242363727e-06, + "loss": 0.3272, + "step": 11576 + }, + { + "epoch": 0.5239646978954514, + "grad_norm": 0.7757501561103395, + "learning_rate": 4.8556346253620256e-06, + "loss": 0.3414, + "step": 11577 + }, + { + "epoch": 0.524009957003847, + "grad_norm": 0.6331417772382716, + "learning_rate": 4.854902011462291e-06, + "loss": 0.3632, + "step": 11578 + }, + { + "epoch": 0.5240552161122426, + "grad_norm": 0.5749494648969943, + "learning_rate": 4.85416940068027e-06, + "loss": 0.289, + "step": 11579 + }, + { + "epoch": 0.5241004752206382, + "grad_norm": 0.6052192024100661, + "learning_rate": 4.853436793031698e-06, + "loss": 0.2715, + "step": 11580 + }, + { + "epoch": 0.5241457343290337, + "grad_norm": 0.6163087970541375, + "learning_rate": 4.852704188532319e-06, + "loss": 0.3065, + "step": 11581 + }, + { + "epoch": 0.5241909934374293, + "grad_norm": 0.650723415979576, + "learning_rate": 4.851971587197877e-06, + "loss": 0.3127, + "step": 11582 + }, + { + "epoch": 0.5242362525458248, + "grad_norm": 0.694249073684704, + "learning_rate": 4.8512389890441085e-06, + "loss": 0.3626, + "step": 11583 + }, + { + "epoch": 0.5242815116542204, + "grad_norm": 0.6401599007480614, + "learning_rate": 4.850506394086758e-06, + "loss": 0.2904, + "step": 11584 + }, + { + "epoch": 0.524326770762616, + "grad_norm": 0.6155923503742697, + "learning_rate": 4.849773802341567e-06, + "loss": 0.3397, + "step": 11585 + }, + { + "epoch": 0.5243720298710115, + "grad_norm": 0.5400596329824081, + "learning_rate": 4.849041213824274e-06, + "loss": 0.4777, + "step": 11586 + }, + { + "epoch": 0.5244172889794071, + "grad_norm": 0.6594264393188617, + "learning_rate": 4.8483086285506224e-06, + "loss": 0.3501, + "step": 11587 + }, + { + "epoch": 0.5244625480878027, + "grad_norm": 0.5901626896745396, + "learning_rate": 4.847576046536351e-06, + "loss": 0.2842, + "step": 11588 + }, + { + "epoch": 0.5245078071961983, + "grad_norm": 0.313815295453429, + "learning_rate": 4.8468434677972055e-06, + "loss": 0.467, + "step": 11589 + }, + { + "epoch": 0.5245530663045938, + "grad_norm": 0.6058793660821811, + "learning_rate": 4.846110892348921e-06, + "loss": 0.3314, + "step": 11590 + }, + { + "epoch": 0.5245983254129893, + "grad_norm": 0.6104563403943248, + "learning_rate": 4.845378320207241e-06, + "loss": 0.3176, + "step": 11591 + }, + { + "epoch": 0.5246435845213849, + "grad_norm": 0.6523091636477124, + "learning_rate": 4.844645751387908e-06, + "loss": 0.2999, + "step": 11592 + }, + { + "epoch": 0.5246888436297805, + "grad_norm": 0.7426514955082142, + "learning_rate": 4.843913185906658e-06, + "loss": 0.3172, + "step": 11593 + }, + { + "epoch": 0.5247341027381761, + "grad_norm": 0.7036324622303272, + "learning_rate": 4.843180623779235e-06, + "loss": 0.3273, + "step": 11594 + }, + { + "epoch": 0.5247793618465716, + "grad_norm": 0.5821863957649411, + "learning_rate": 4.84244806502138e-06, + "loss": 0.311, + "step": 11595 + }, + { + "epoch": 0.5248246209549672, + "grad_norm": 0.5758372192065354, + "learning_rate": 4.8417155096488315e-06, + "loss": 0.3352, + "step": 11596 + }, + { + "epoch": 0.5248698800633628, + "grad_norm": 0.5169097065158664, + "learning_rate": 4.84098295767733e-06, + "loss": 0.4698, + "step": 11597 + }, + { + "epoch": 0.5249151391717584, + "grad_norm": 0.6203385669439295, + "learning_rate": 4.840250409122617e-06, + "loss": 0.3466, + "step": 11598 + }, + { + "epoch": 0.5249603982801538, + "grad_norm": 0.6227472133526765, + "learning_rate": 4.8395178640004316e-06, + "loss": 0.2755, + "step": 11599 + }, + { + "epoch": 0.5250056573885494, + "grad_norm": 0.6781019728826658, + "learning_rate": 4.838785322326514e-06, + "loss": 0.3559, + "step": 11600 + }, + { + "epoch": 0.525050916496945, + "grad_norm": 0.6091503037903379, + "learning_rate": 4.838052784116606e-06, + "loss": 0.3216, + "step": 11601 + }, + { + "epoch": 0.5250961756053406, + "grad_norm": 0.6534442212691152, + "learning_rate": 4.837320249386446e-06, + "loss": 0.3209, + "step": 11602 + }, + { + "epoch": 0.5251414347137361, + "grad_norm": 0.629883307804516, + "learning_rate": 4.836587718151773e-06, + "loss": 0.3384, + "step": 11603 + }, + { + "epoch": 0.5251866938221317, + "grad_norm": 0.6153519783719406, + "learning_rate": 4.8358551904283285e-06, + "loss": 0.34, + "step": 11604 + }, + { + "epoch": 0.5252319529305273, + "grad_norm": 0.6418449757511626, + "learning_rate": 4.835122666231854e-06, + "loss": 0.343, + "step": 11605 + }, + { + "epoch": 0.5252772120389229, + "grad_norm": 0.6674292870560887, + "learning_rate": 4.834390145578085e-06, + "loss": 0.3172, + "step": 11606 + }, + { + "epoch": 0.5253224711473184, + "grad_norm": 0.6841831017464953, + "learning_rate": 4.833657628482762e-06, + "loss": 0.33, + "step": 11607 + }, + { + "epoch": 0.5253677302557139, + "grad_norm": 0.656399605022193, + "learning_rate": 4.832925114961629e-06, + "loss": 0.3138, + "step": 11608 + }, + { + "epoch": 0.5254129893641095, + "grad_norm": 0.6375523783307073, + "learning_rate": 4.832192605030419e-06, + "loss": 0.3187, + "step": 11609 + }, + { + "epoch": 0.5254582484725051, + "grad_norm": 0.6306405944594975, + "learning_rate": 4.8314600987048755e-06, + "loss": 0.3179, + "step": 11610 + }, + { + "epoch": 0.5255035075809007, + "grad_norm": 0.6388363788630063, + "learning_rate": 4.8307275960007385e-06, + "loss": 0.3457, + "step": 11611 + }, + { + "epoch": 0.5255487666892962, + "grad_norm": 0.6314349162576515, + "learning_rate": 4.829995096933744e-06, + "loss": 0.3226, + "step": 11612 + }, + { + "epoch": 0.5255940257976918, + "grad_norm": 0.6298986320385729, + "learning_rate": 4.829262601519632e-06, + "loss": 0.3318, + "step": 11613 + }, + { + "epoch": 0.5256392849060874, + "grad_norm": 0.5812589402658017, + "learning_rate": 4.828530109774143e-06, + "loss": 0.3065, + "step": 11614 + }, + { + "epoch": 0.525684544014483, + "grad_norm": 0.6130135799885693, + "learning_rate": 4.827797621713017e-06, + "loss": 0.3103, + "step": 11615 + }, + { + "epoch": 0.5257298031228784, + "grad_norm": 0.6913402064094225, + "learning_rate": 4.827065137351989e-06, + "loss": 0.3412, + "step": 11616 + }, + { + "epoch": 0.525775062231274, + "grad_norm": 0.609221764111646, + "learning_rate": 4.8263326567068e-06, + "loss": 0.3162, + "step": 11617 + }, + { + "epoch": 0.5258203213396696, + "grad_norm": 0.6355749663590441, + "learning_rate": 4.82560017979319e-06, + "loss": 0.3289, + "step": 11618 + }, + { + "epoch": 0.5258655804480652, + "grad_norm": 0.5889558380160951, + "learning_rate": 4.824867706626896e-06, + "loss": 0.3576, + "step": 11619 + }, + { + "epoch": 0.5259108395564608, + "grad_norm": 0.6284121917912118, + "learning_rate": 4.824135237223657e-06, + "loss": 0.3272, + "step": 11620 + }, + { + "epoch": 0.5259560986648563, + "grad_norm": 0.7364661098326682, + "learning_rate": 4.823402771599213e-06, + "loss": 0.3105, + "step": 11621 + }, + { + "epoch": 0.5260013577732519, + "grad_norm": 0.6478860127947099, + "learning_rate": 4.8226703097693e-06, + "loss": 0.3555, + "step": 11622 + }, + { + "epoch": 0.5260466168816474, + "grad_norm": 0.6213982022709907, + "learning_rate": 4.821937851749656e-06, + "loss": 0.3183, + "step": 11623 + }, + { + "epoch": 0.526091875990043, + "grad_norm": 0.6582634639981945, + "learning_rate": 4.8212053975560234e-06, + "loss": 0.3429, + "step": 11624 + }, + { + "epoch": 0.5261371350984385, + "grad_norm": 0.8059204720277006, + "learning_rate": 4.820472947204136e-06, + "loss": 0.3219, + "step": 11625 + }, + { + "epoch": 0.5261823942068341, + "grad_norm": 0.6953029754941127, + "learning_rate": 4.8197405007097346e-06, + "loss": 0.305, + "step": 11626 + }, + { + "epoch": 0.5262276533152297, + "grad_norm": 0.7495626317789454, + "learning_rate": 4.819008058088557e-06, + "loss": 0.3335, + "step": 11627 + }, + { + "epoch": 0.5262729124236253, + "grad_norm": 1.065609757684985, + "learning_rate": 4.8182756193563365e-06, + "loss": 0.3214, + "step": 11628 + }, + { + "epoch": 0.5263181715320209, + "grad_norm": 0.6059383855298436, + "learning_rate": 4.817543184528817e-06, + "loss": 0.3208, + "step": 11629 + }, + { + "epoch": 0.5263634306404164, + "grad_norm": 0.6914288972480178, + "learning_rate": 4.816810753621735e-06, + "loss": 0.2865, + "step": 11630 + }, + { + "epoch": 0.5264086897488119, + "grad_norm": 0.7088506050035354, + "learning_rate": 4.816078326650827e-06, + "loss": 0.3535, + "step": 11631 + }, + { + "epoch": 0.5264539488572075, + "grad_norm": 0.6231122942346944, + "learning_rate": 4.8153459036318295e-06, + "loss": 0.3199, + "step": 11632 + }, + { + "epoch": 0.5264992079656031, + "grad_norm": 0.7525461765440211, + "learning_rate": 4.8146134845804825e-06, + "loss": 0.3321, + "step": 11633 + }, + { + "epoch": 0.5265444670739986, + "grad_norm": 0.6610384539648507, + "learning_rate": 4.813881069512523e-06, + "loss": 0.3682, + "step": 11634 + }, + { + "epoch": 0.5265897261823942, + "grad_norm": 0.6021964475738, + "learning_rate": 4.813148658443687e-06, + "loss": 0.3105, + "step": 11635 + }, + { + "epoch": 0.5266349852907898, + "grad_norm": 0.7096372561872358, + "learning_rate": 4.812416251389711e-06, + "loss": 0.3171, + "step": 11636 + }, + { + "epoch": 0.5266802443991854, + "grad_norm": 0.6117060167385769, + "learning_rate": 4.811683848366337e-06, + "loss": 0.2988, + "step": 11637 + }, + { + "epoch": 0.5267255035075809, + "grad_norm": 0.6371498511510651, + "learning_rate": 4.810951449389296e-06, + "loss": 0.3235, + "step": 11638 + }, + { + "epoch": 0.5267707626159764, + "grad_norm": 0.42872180050104086, + "learning_rate": 4.810219054474328e-06, + "loss": 0.5039, + "step": 11639 + }, + { + "epoch": 0.526816021724372, + "grad_norm": 0.6127103887238807, + "learning_rate": 4.809486663637171e-06, + "loss": 0.2954, + "step": 11640 + }, + { + "epoch": 0.5268612808327676, + "grad_norm": 0.6174635844891634, + "learning_rate": 4.808754276893561e-06, + "loss": 0.3038, + "step": 11641 + }, + { + "epoch": 0.5269065399411632, + "grad_norm": 0.6389246437029213, + "learning_rate": 4.808021894259231e-06, + "loss": 0.2961, + "step": 11642 + }, + { + "epoch": 0.5269517990495587, + "grad_norm": 0.33784446396240825, + "learning_rate": 4.807289515749922e-06, + "loss": 0.4631, + "step": 11643 + }, + { + "epoch": 0.5269970581579543, + "grad_norm": 0.6487534965345723, + "learning_rate": 4.806557141381372e-06, + "loss": 0.3594, + "step": 11644 + }, + { + "epoch": 0.5270423172663499, + "grad_norm": 0.6231353459848805, + "learning_rate": 4.8058247711693125e-06, + "loss": 0.3189, + "step": 11645 + }, + { + "epoch": 0.5270875763747455, + "grad_norm": 0.6998728150085539, + "learning_rate": 4.805092405129482e-06, + "loss": 0.3131, + "step": 11646 + }, + { + "epoch": 0.5271328354831409, + "grad_norm": 0.6695260173459374, + "learning_rate": 4.8043600432776186e-06, + "loss": 0.3058, + "step": 11647 + }, + { + "epoch": 0.5271780945915365, + "grad_norm": 0.6494845805375873, + "learning_rate": 4.803627685629456e-06, + "loss": 0.352, + "step": 11648 + }, + { + "epoch": 0.5272233536999321, + "grad_norm": 0.593318239997085, + "learning_rate": 4.802895332200732e-06, + "loss": 0.3103, + "step": 11649 + }, + { + "epoch": 0.5272686128083277, + "grad_norm": 0.6348791995672993, + "learning_rate": 4.8021629830071824e-06, + "loss": 0.3239, + "step": 11650 + }, + { + "epoch": 0.5273138719167232, + "grad_norm": 0.6810575830189948, + "learning_rate": 4.801430638064541e-06, + "loss": 0.361, + "step": 11651 + }, + { + "epoch": 0.5273591310251188, + "grad_norm": 0.3611467775995699, + "learning_rate": 4.800698297388546e-06, + "loss": 0.4956, + "step": 11652 + }, + { + "epoch": 0.5274043901335144, + "grad_norm": 0.7888433588801044, + "learning_rate": 4.799965960994934e-06, + "loss": 0.3363, + "step": 11653 + }, + { + "epoch": 0.52744964924191, + "grad_norm": 0.6675214604942749, + "learning_rate": 4.799233628899438e-06, + "loss": 0.3621, + "step": 11654 + }, + { + "epoch": 0.5274949083503055, + "grad_norm": 0.6828697170062485, + "learning_rate": 4.798501301117795e-06, + "loss": 0.2869, + "step": 11655 + }, + { + "epoch": 0.527540167458701, + "grad_norm": 0.8840805701229352, + "learning_rate": 4.79776897766574e-06, + "loss": 0.342, + "step": 11656 + }, + { + "epoch": 0.5275854265670966, + "grad_norm": 0.6454186370060752, + "learning_rate": 4.797036658559008e-06, + "loss": 0.2812, + "step": 11657 + }, + { + "epoch": 0.5276306856754922, + "grad_norm": 0.7100634649960617, + "learning_rate": 4.796304343813334e-06, + "loss": 0.3479, + "step": 11658 + }, + { + "epoch": 0.5276759447838878, + "grad_norm": 0.6197573068724485, + "learning_rate": 4.795572033444456e-06, + "loss": 0.3524, + "step": 11659 + }, + { + "epoch": 0.5277212038922833, + "grad_norm": 0.9175159401326038, + "learning_rate": 4.794839727468107e-06, + "loss": 0.3208, + "step": 11660 + }, + { + "epoch": 0.5277664630006789, + "grad_norm": 0.6546605773311394, + "learning_rate": 4.7941074259000205e-06, + "loss": 0.359, + "step": 11661 + }, + { + "epoch": 0.5278117221090745, + "grad_norm": 0.6532201480708522, + "learning_rate": 4.793375128755934e-06, + "loss": 0.3707, + "step": 11662 + }, + { + "epoch": 0.52785698121747, + "grad_norm": 0.6959393775248613, + "learning_rate": 4.792642836051582e-06, + "loss": 0.3373, + "step": 11663 + }, + { + "epoch": 0.5279022403258656, + "grad_norm": 0.6183465447029041, + "learning_rate": 4.7919105478026985e-06, + "loss": 0.3366, + "step": 11664 + }, + { + "epoch": 0.5279474994342611, + "grad_norm": 0.6034941995919838, + "learning_rate": 4.791178264025017e-06, + "loss": 0.305, + "step": 11665 + }, + { + "epoch": 0.5279927585426567, + "grad_norm": 0.6695252246471096, + "learning_rate": 4.790445984734276e-06, + "loss": 0.3577, + "step": 11666 + }, + { + "epoch": 0.5280380176510523, + "grad_norm": 0.6469582967562887, + "learning_rate": 4.789713709946204e-06, + "loss": 0.3265, + "step": 11667 + }, + { + "epoch": 0.5280832767594479, + "grad_norm": 0.614241262918158, + "learning_rate": 4.78898143967654e-06, + "loss": 0.3115, + "step": 11668 + }, + { + "epoch": 0.5281285358678434, + "grad_norm": 0.6096260352455624, + "learning_rate": 4.788249173941018e-06, + "loss": 0.2803, + "step": 11669 + }, + { + "epoch": 0.528173794976239, + "grad_norm": 0.7147214143476651, + "learning_rate": 4.787516912755369e-06, + "loss": 0.3376, + "step": 11670 + }, + { + "epoch": 0.5282190540846345, + "grad_norm": 0.5721691495643542, + "learning_rate": 4.786784656135328e-06, + "loss": 0.3161, + "step": 11671 + }, + { + "epoch": 0.5282643131930301, + "grad_norm": 0.6560440703019803, + "learning_rate": 4.7860524040966316e-06, + "loss": 0.2999, + "step": 11672 + }, + { + "epoch": 0.5283095723014256, + "grad_norm": 0.35524932920415997, + "learning_rate": 4.785320156655013e-06, + "loss": 0.4573, + "step": 11673 + }, + { + "epoch": 0.5283548314098212, + "grad_norm": 0.5887683799085369, + "learning_rate": 4.784587913826203e-06, + "loss": 0.3017, + "step": 11674 + }, + { + "epoch": 0.5284000905182168, + "grad_norm": 0.5629299722363656, + "learning_rate": 4.7838556756259365e-06, + "loss": 0.3053, + "step": 11675 + }, + { + "epoch": 0.5284453496266124, + "grad_norm": 0.30189560182789005, + "learning_rate": 4.78312344206995e-06, + "loss": 0.4844, + "step": 11676 + }, + { + "epoch": 0.528490608735008, + "grad_norm": 0.6264257087723134, + "learning_rate": 4.782391213173973e-06, + "loss": 0.3099, + "step": 11677 + }, + { + "epoch": 0.5285358678434035, + "grad_norm": 0.6104495057889123, + "learning_rate": 4.7816589889537415e-06, + "loss": 0.2973, + "step": 11678 + }, + { + "epoch": 0.528581126951799, + "grad_norm": 0.6157904204618331, + "learning_rate": 4.780926769424988e-06, + "loss": 0.3305, + "step": 11679 + }, + { + "epoch": 0.5286263860601946, + "grad_norm": 0.3013608723061759, + "learning_rate": 4.780194554603444e-06, + "loss": 0.4591, + "step": 11680 + }, + { + "epoch": 0.5286716451685902, + "grad_norm": 0.6707113511900932, + "learning_rate": 4.779462344504845e-06, + "loss": 0.3438, + "step": 11681 + }, + { + "epoch": 0.5287169042769857, + "grad_norm": 0.6069204030793611, + "learning_rate": 4.778730139144923e-06, + "loss": 0.3302, + "step": 11682 + }, + { + "epoch": 0.5287621633853813, + "grad_norm": 0.30036751368928316, + "learning_rate": 4.777997938539411e-06, + "loss": 0.4674, + "step": 11683 + }, + { + "epoch": 0.5288074224937769, + "grad_norm": 0.6392490111304531, + "learning_rate": 4.777265742704039e-06, + "loss": 0.3414, + "step": 11684 + }, + { + "epoch": 0.5288526816021725, + "grad_norm": 0.6088450841981451, + "learning_rate": 4.776533551654543e-06, + "loss": 0.293, + "step": 11685 + }, + { + "epoch": 0.528897940710568, + "grad_norm": 0.6248569779027412, + "learning_rate": 4.775801365406657e-06, + "loss": 0.3402, + "step": 11686 + }, + { + "epoch": 0.5289431998189635, + "grad_norm": 0.6331192626517268, + "learning_rate": 4.77506918397611e-06, + "loss": 0.3631, + "step": 11687 + }, + { + "epoch": 0.5289884589273591, + "grad_norm": 0.5615598344171098, + "learning_rate": 4.774337007378633e-06, + "loss": 0.2863, + "step": 11688 + }, + { + "epoch": 0.5290337180357547, + "grad_norm": 0.6345617408032932, + "learning_rate": 4.773604835629965e-06, + "loss": 0.379, + "step": 11689 + }, + { + "epoch": 0.5290789771441503, + "grad_norm": 0.6236578233133567, + "learning_rate": 4.77287266874583e-06, + "loss": 0.3255, + "step": 11690 + }, + { + "epoch": 0.5291242362525458, + "grad_norm": 0.6598727164174507, + "learning_rate": 4.772140506741966e-06, + "loss": 0.3424, + "step": 11691 + }, + { + "epoch": 0.5291694953609414, + "grad_norm": 0.33072579076807246, + "learning_rate": 4.771408349634103e-06, + "loss": 0.4797, + "step": 11692 + }, + { + "epoch": 0.529214754469337, + "grad_norm": 0.3321147784678341, + "learning_rate": 4.770676197437971e-06, + "loss": 0.4725, + "step": 11693 + }, + { + "epoch": 0.5292600135777326, + "grad_norm": 0.6858860519788593, + "learning_rate": 4.769944050169303e-06, + "loss": 0.3207, + "step": 11694 + }, + { + "epoch": 0.529305272686128, + "grad_norm": 0.6174437330371626, + "learning_rate": 4.769211907843833e-06, + "loss": 0.3649, + "step": 11695 + }, + { + "epoch": 0.5293505317945236, + "grad_norm": 0.6631526550644542, + "learning_rate": 4.768479770477287e-06, + "loss": 0.32, + "step": 11696 + }, + { + "epoch": 0.5293957909029192, + "grad_norm": 0.6050793631720137, + "learning_rate": 4.767747638085402e-06, + "loss": 0.3154, + "step": 11697 + }, + { + "epoch": 0.5294410500113148, + "grad_norm": 0.6513012927299898, + "learning_rate": 4.767015510683906e-06, + "loss": 0.3788, + "step": 11698 + }, + { + "epoch": 0.5294863091197104, + "grad_norm": 0.7069803491491897, + "learning_rate": 4.766283388288532e-06, + "loss": 0.3555, + "step": 11699 + }, + { + "epoch": 0.5295315682281059, + "grad_norm": 0.6905703982268144, + "learning_rate": 4.765551270915008e-06, + "loss": 0.3533, + "step": 11700 + }, + { + "epoch": 0.5295768273365015, + "grad_norm": 0.6164310287508942, + "learning_rate": 4.764819158579069e-06, + "loss": 0.2813, + "step": 11701 + }, + { + "epoch": 0.5296220864448971, + "grad_norm": 0.6334263773508509, + "learning_rate": 4.764087051296445e-06, + "loss": 0.3171, + "step": 11702 + }, + { + "epoch": 0.5296673455532926, + "grad_norm": 0.4756404789746504, + "learning_rate": 4.763354949082864e-06, + "loss": 0.4844, + "step": 11703 + }, + { + "epoch": 0.5297126046616881, + "grad_norm": 0.8110841726189192, + "learning_rate": 4.762622851954058e-06, + "loss": 0.3588, + "step": 11704 + }, + { + "epoch": 0.5297578637700837, + "grad_norm": 0.6715038490678876, + "learning_rate": 4.761890759925759e-06, + "loss": 0.3587, + "step": 11705 + }, + { + "epoch": 0.5298031228784793, + "grad_norm": 0.29191115580705324, + "learning_rate": 4.761158673013696e-06, + "loss": 0.4792, + "step": 11706 + }, + { + "epoch": 0.5298483819868749, + "grad_norm": 0.67648546466302, + "learning_rate": 4.7604265912336e-06, + "loss": 0.342, + "step": 11707 + }, + { + "epoch": 0.5298936410952704, + "grad_norm": 0.3268432712087116, + "learning_rate": 4.759694514601201e-06, + "loss": 0.4784, + "step": 11708 + }, + { + "epoch": 0.529938900203666, + "grad_norm": 0.6779791178983064, + "learning_rate": 4.758962443132227e-06, + "loss": 0.304, + "step": 11709 + }, + { + "epoch": 0.5299841593120616, + "grad_norm": 0.6503922808669877, + "learning_rate": 4.75823037684241e-06, + "loss": 0.3333, + "step": 11710 + }, + { + "epoch": 0.5300294184204571, + "grad_norm": 0.6275606411625194, + "learning_rate": 4.757498315747482e-06, + "loss": 0.3482, + "step": 11711 + }, + { + "epoch": 0.5300746775288527, + "grad_norm": 0.6126662598486359, + "learning_rate": 4.756766259863169e-06, + "loss": 0.3182, + "step": 11712 + }, + { + "epoch": 0.5301199366372482, + "grad_norm": 0.604527940770792, + "learning_rate": 4.756034209205201e-06, + "loss": 0.3135, + "step": 11713 + }, + { + "epoch": 0.5301651957456438, + "grad_norm": 0.6128021844596666, + "learning_rate": 4.75530216378931e-06, + "loss": 0.2854, + "step": 11714 + }, + { + "epoch": 0.5302104548540394, + "grad_norm": 0.6237930727497758, + "learning_rate": 4.754570123631224e-06, + "loss": 0.313, + "step": 11715 + }, + { + "epoch": 0.530255713962435, + "grad_norm": 0.41318223344137056, + "learning_rate": 4.753838088746672e-06, + "loss": 0.468, + "step": 11716 + }, + { + "epoch": 0.5303009730708305, + "grad_norm": 0.6638863894739259, + "learning_rate": 4.753106059151382e-06, + "loss": 0.3176, + "step": 11717 + }, + { + "epoch": 0.530346232179226, + "grad_norm": 0.32642953381378825, + "learning_rate": 4.752374034861088e-06, + "loss": 0.4704, + "step": 11718 + }, + { + "epoch": 0.5303914912876216, + "grad_norm": 0.621866477687391, + "learning_rate": 4.7516420158915115e-06, + "loss": 0.3375, + "step": 11719 + }, + { + "epoch": 0.5304367503960172, + "grad_norm": 0.6712285445401825, + "learning_rate": 4.750910002258387e-06, + "loss": 0.3308, + "step": 11720 + }, + { + "epoch": 0.5304820095044127, + "grad_norm": 0.7017424575427466, + "learning_rate": 4.750177993977442e-06, + "loss": 0.293, + "step": 11721 + }, + { + "epoch": 0.5305272686128083, + "grad_norm": 0.7136507819466259, + "learning_rate": 4.7494459910644044e-06, + "loss": 0.3111, + "step": 11722 + }, + { + "epoch": 0.5305725277212039, + "grad_norm": 0.6721779070751936, + "learning_rate": 4.7487139935350015e-06, + "loss": 0.3338, + "step": 11723 + }, + { + "epoch": 0.5306177868295995, + "grad_norm": 0.6295776044002247, + "learning_rate": 4.747982001404965e-06, + "loss": 0.2902, + "step": 11724 + }, + { + "epoch": 0.5306630459379951, + "grad_norm": 0.64717952938952, + "learning_rate": 4.7472500146900206e-06, + "loss": 0.3294, + "step": 11725 + }, + { + "epoch": 0.5307083050463905, + "grad_norm": 0.6384251289927297, + "learning_rate": 4.746518033405897e-06, + "loss": 0.3328, + "step": 11726 + }, + { + "epoch": 0.5307535641547861, + "grad_norm": 0.6299181902472535, + "learning_rate": 4.745786057568324e-06, + "loss": 0.3558, + "step": 11727 + }, + { + "epoch": 0.5307988232631817, + "grad_norm": 0.6467058997655225, + "learning_rate": 4.745054087193025e-06, + "loss": 0.4017, + "step": 11728 + }, + { + "epoch": 0.5308440823715773, + "grad_norm": 0.6051062425231508, + "learning_rate": 4.744322122295732e-06, + "loss": 0.3431, + "step": 11729 + }, + { + "epoch": 0.5308893414799728, + "grad_norm": 0.4436395210006455, + "learning_rate": 4.743590162892171e-06, + "loss": 0.503, + "step": 11730 + }, + { + "epoch": 0.5309346005883684, + "grad_norm": 0.7358701096074631, + "learning_rate": 4.742858208998072e-06, + "loss": 0.3252, + "step": 11731 + }, + { + "epoch": 0.530979859696764, + "grad_norm": 0.5952948832084214, + "learning_rate": 4.742126260629158e-06, + "loss": 0.284, + "step": 11732 + }, + { + "epoch": 0.5310251188051596, + "grad_norm": 0.6126645832337427, + "learning_rate": 4.741394317801158e-06, + "loss": 0.336, + "step": 11733 + }, + { + "epoch": 0.5310703779135552, + "grad_norm": 0.5700785120288356, + "learning_rate": 4.740662380529802e-06, + "loss": 0.3016, + "step": 11734 + }, + { + "epoch": 0.5311156370219506, + "grad_norm": 0.6028915201145363, + "learning_rate": 4.739930448830814e-06, + "loss": 0.2852, + "step": 11735 + }, + { + "epoch": 0.5311608961303462, + "grad_norm": 0.29438188912144864, + "learning_rate": 4.739198522719922e-06, + "loss": 0.4607, + "step": 11736 + }, + { + "epoch": 0.5312061552387418, + "grad_norm": 0.6169329863522592, + "learning_rate": 4.738466602212854e-06, + "loss": 0.3175, + "step": 11737 + }, + { + "epoch": 0.5312514143471374, + "grad_norm": 0.6408398342033451, + "learning_rate": 4.737734687325332e-06, + "loss": 0.3394, + "step": 11738 + }, + { + "epoch": 0.5312966734555329, + "grad_norm": 0.2797451455880777, + "learning_rate": 4.737002778073089e-06, + "loss": 0.4713, + "step": 11739 + }, + { + "epoch": 0.5313419325639285, + "grad_norm": 0.8173420620683368, + "learning_rate": 4.736270874471849e-06, + "loss": 0.3008, + "step": 11740 + }, + { + "epoch": 0.5313871916723241, + "grad_norm": 0.6721444700649927, + "learning_rate": 4.735538976537336e-06, + "loss": 0.3178, + "step": 11741 + }, + { + "epoch": 0.5314324507807197, + "grad_norm": 0.2953942616732436, + "learning_rate": 4.734807084285278e-06, + "loss": 0.4832, + "step": 11742 + }, + { + "epoch": 0.5314777098891151, + "grad_norm": 0.6173867128773916, + "learning_rate": 4.734075197731403e-06, + "loss": 0.2878, + "step": 11743 + }, + { + "epoch": 0.5315229689975107, + "grad_norm": 0.6106232963497055, + "learning_rate": 4.733343316891435e-06, + "loss": 0.3174, + "step": 11744 + }, + { + "epoch": 0.5315682281059063, + "grad_norm": 0.6509230072417117, + "learning_rate": 4.7326114417811e-06, + "loss": 0.3542, + "step": 11745 + }, + { + "epoch": 0.5316134872143019, + "grad_norm": 0.627189292175373, + "learning_rate": 4.7318795724161214e-06, + "loss": 0.2748, + "step": 11746 + }, + { + "epoch": 0.5316587463226975, + "grad_norm": 0.7136115075309686, + "learning_rate": 4.731147708812232e-06, + "loss": 0.3616, + "step": 11747 + }, + { + "epoch": 0.531704005431093, + "grad_norm": 0.6629747069512445, + "learning_rate": 4.730415850985149e-06, + "loss": 0.3701, + "step": 11748 + }, + { + "epoch": 0.5317492645394886, + "grad_norm": 0.6906180915021461, + "learning_rate": 4.729683998950602e-06, + "loss": 0.3288, + "step": 11749 + }, + { + "epoch": 0.5317945236478842, + "grad_norm": 0.5979704596044886, + "learning_rate": 4.728952152724317e-06, + "loss": 0.3461, + "step": 11750 + }, + { + "epoch": 0.5318397827562797, + "grad_norm": 0.6733068733347735, + "learning_rate": 4.728220312322017e-06, + "loss": 0.3514, + "step": 11751 + }, + { + "epoch": 0.5318850418646752, + "grad_norm": 0.6288131092820356, + "learning_rate": 4.7274884777594265e-06, + "loss": 0.3166, + "step": 11752 + }, + { + "epoch": 0.5319303009730708, + "grad_norm": 0.34938270335009647, + "learning_rate": 4.726756649052274e-06, + "loss": 0.4755, + "step": 11753 + }, + { + "epoch": 0.5319755600814664, + "grad_norm": 0.8193599994439758, + "learning_rate": 4.726024826216281e-06, + "loss": 0.2964, + "step": 11754 + }, + { + "epoch": 0.532020819189862, + "grad_norm": 0.6055021071486388, + "learning_rate": 4.725293009267173e-06, + "loss": 0.2954, + "step": 11755 + }, + { + "epoch": 0.5320660782982575, + "grad_norm": 0.31638532661399055, + "learning_rate": 4.724561198220672e-06, + "loss": 0.4802, + "step": 11756 + }, + { + "epoch": 0.5321113374066531, + "grad_norm": 0.6145143406936915, + "learning_rate": 4.7238293930925085e-06, + "loss": 0.3484, + "step": 11757 + }, + { + "epoch": 0.5321565965150487, + "grad_norm": 1.0571107759072538, + "learning_rate": 4.723097593898402e-06, + "loss": 0.3007, + "step": 11758 + }, + { + "epoch": 0.5322018556234442, + "grad_norm": 0.6978453898443266, + "learning_rate": 4.7223658006540775e-06, + "loss": 0.3161, + "step": 11759 + }, + { + "epoch": 0.5322471147318398, + "grad_norm": 0.7433654020085223, + "learning_rate": 4.7216340133752604e-06, + "loss": 0.3334, + "step": 11760 + }, + { + "epoch": 0.5322923738402353, + "grad_norm": 0.30745122387587526, + "learning_rate": 4.720902232077671e-06, + "loss": 0.4618, + "step": 11761 + }, + { + "epoch": 0.5323376329486309, + "grad_norm": 1.7468362884445348, + "learning_rate": 4.720170456777036e-06, + "loss": 0.2957, + "step": 11762 + }, + { + "epoch": 0.5323828920570265, + "grad_norm": 0.5927317587947376, + "learning_rate": 4.719438687489081e-06, + "loss": 0.3068, + "step": 11763 + }, + { + "epoch": 0.5324281511654221, + "grad_norm": 0.773194991169054, + "learning_rate": 4.718706924229525e-06, + "loss": 0.3427, + "step": 11764 + }, + { + "epoch": 0.5324734102738176, + "grad_norm": 0.6088699127754879, + "learning_rate": 4.7179751670140936e-06, + "loss": 0.303, + "step": 11765 + }, + { + "epoch": 0.5325186693822132, + "grad_norm": 0.6149492255144813, + "learning_rate": 4.717243415858511e-06, + "loss": 0.3374, + "step": 11766 + }, + { + "epoch": 0.5325639284906087, + "grad_norm": 0.6561851623753537, + "learning_rate": 4.716511670778496e-06, + "loss": 0.3539, + "step": 11767 + }, + { + "epoch": 0.5326091875990043, + "grad_norm": 0.6043910413103322, + "learning_rate": 4.715779931789776e-06, + "loss": 0.3041, + "step": 11768 + }, + { + "epoch": 0.5326544467073998, + "grad_norm": 0.6451846146810193, + "learning_rate": 4.715048198908074e-06, + "loss": 0.2939, + "step": 11769 + }, + { + "epoch": 0.5326997058157954, + "grad_norm": 0.3657427690358148, + "learning_rate": 4.7143164721491095e-06, + "loss": 0.509, + "step": 11770 + }, + { + "epoch": 0.532744964924191, + "grad_norm": 0.3300517167423059, + "learning_rate": 4.713584751528605e-06, + "loss": 0.4882, + "step": 11771 + }, + { + "epoch": 0.5327902240325866, + "grad_norm": 0.8410660502421503, + "learning_rate": 4.712853037062286e-06, + "loss": 0.3101, + "step": 11772 + }, + { + "epoch": 0.5328354831409822, + "grad_norm": 0.3151118849340022, + "learning_rate": 4.712121328765875e-06, + "loss": 0.4687, + "step": 11773 + }, + { + "epoch": 0.5328807422493776, + "grad_norm": 0.6545005536550695, + "learning_rate": 4.71138962665509e-06, + "loss": 0.3459, + "step": 11774 + }, + { + "epoch": 0.5329260013577732, + "grad_norm": 0.30887497244294765, + "learning_rate": 4.710657930745656e-06, + "loss": 0.481, + "step": 11775 + }, + { + "epoch": 0.5329712604661688, + "grad_norm": 0.6612389703396073, + "learning_rate": 4.709926241053296e-06, + "loss": 0.3226, + "step": 11776 + }, + { + "epoch": 0.5330165195745644, + "grad_norm": 0.6661687889059671, + "learning_rate": 4.709194557593729e-06, + "loss": 0.3174, + "step": 11777 + }, + { + "epoch": 0.5330617786829599, + "grad_norm": 0.6595961119315379, + "learning_rate": 4.708462880382677e-06, + "loss": 0.3221, + "step": 11778 + }, + { + "epoch": 0.5331070377913555, + "grad_norm": 0.7566412686848811, + "learning_rate": 4.707731209435864e-06, + "loss": 0.2952, + "step": 11779 + }, + { + "epoch": 0.5331522968997511, + "grad_norm": 0.6319506838900698, + "learning_rate": 4.706999544769009e-06, + "loss": 0.3253, + "step": 11780 + }, + { + "epoch": 0.5331975560081467, + "grad_norm": 0.5638834312022738, + "learning_rate": 4.706267886397833e-06, + "loss": 0.3122, + "step": 11781 + }, + { + "epoch": 0.5332428151165423, + "grad_norm": 0.570363300428756, + "learning_rate": 4.705536234338059e-06, + "loss": 0.3171, + "step": 11782 + }, + { + "epoch": 0.5332880742249377, + "grad_norm": 0.6580123748014653, + "learning_rate": 4.704804588605407e-06, + "loss": 0.32, + "step": 11783 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 1.0069993479241546, + "learning_rate": 4.704072949215598e-06, + "loss": 0.3385, + "step": 11784 + }, + { + "epoch": 0.5333785924417289, + "grad_norm": 0.43109927661137915, + "learning_rate": 4.703341316184351e-06, + "loss": 0.4799, + "step": 11785 + }, + { + "epoch": 0.5334238515501245, + "grad_norm": 0.6811949904047957, + "learning_rate": 4.702609689527389e-06, + "loss": 0.3446, + "step": 11786 + }, + { + "epoch": 0.53346911065852, + "grad_norm": 0.6131719461888363, + "learning_rate": 4.701878069260432e-06, + "loss": 0.386, + "step": 11787 + }, + { + "epoch": 0.5335143697669156, + "grad_norm": 0.6740418379070753, + "learning_rate": 4.701146455399198e-06, + "loss": 0.3326, + "step": 11788 + }, + { + "epoch": 0.5335596288753112, + "grad_norm": 0.6461944062048777, + "learning_rate": 4.7004148479594114e-06, + "loss": 0.3025, + "step": 11789 + }, + { + "epoch": 0.5336048879837068, + "grad_norm": 0.6409431886352649, + "learning_rate": 4.699683246956787e-06, + "loss": 0.3169, + "step": 11790 + }, + { + "epoch": 0.5336501470921022, + "grad_norm": 0.7155276422903248, + "learning_rate": 4.698951652407048e-06, + "loss": 0.3526, + "step": 11791 + }, + { + "epoch": 0.5336954062004978, + "grad_norm": 0.6465783751741991, + "learning_rate": 4.698220064325915e-06, + "loss": 0.3639, + "step": 11792 + }, + { + "epoch": 0.5337406653088934, + "grad_norm": 0.6097472048185627, + "learning_rate": 4.697488482729105e-06, + "loss": 0.3421, + "step": 11793 + }, + { + "epoch": 0.533785924417289, + "grad_norm": 0.6409515693192006, + "learning_rate": 4.696756907632336e-06, + "loss": 0.3602, + "step": 11794 + }, + { + "epoch": 0.5338311835256846, + "grad_norm": 0.599249241356723, + "learning_rate": 4.6960253390513346e-06, + "loss": 0.3309, + "step": 11795 + }, + { + "epoch": 0.5338764426340801, + "grad_norm": 0.6196382148914346, + "learning_rate": 4.6952937770018105e-06, + "loss": 0.3502, + "step": 11796 + }, + { + "epoch": 0.5339217017424757, + "grad_norm": 0.6316758562418318, + "learning_rate": 4.694562221499489e-06, + "loss": 0.2808, + "step": 11797 + }, + { + "epoch": 0.5339669608508713, + "grad_norm": 0.6741241563752027, + "learning_rate": 4.693830672560089e-06, + "loss": 0.3623, + "step": 11798 + }, + { + "epoch": 0.5340122199592668, + "grad_norm": 0.587340740877701, + "learning_rate": 4.6930991301993255e-06, + "loss": 0.3471, + "step": 11799 + }, + { + "epoch": 0.5340574790676623, + "grad_norm": 0.32964651554241226, + "learning_rate": 4.692367594432919e-06, + "loss": 0.4839, + "step": 11800 + }, + { + "epoch": 0.5341027381760579, + "grad_norm": 0.6854707444673713, + "learning_rate": 4.6916360652765876e-06, + "loss": 0.3331, + "step": 11801 + }, + { + "epoch": 0.5341479972844535, + "grad_norm": 0.7373786452883535, + "learning_rate": 4.690904542746052e-06, + "loss": 0.3483, + "step": 11802 + }, + { + "epoch": 0.5341932563928491, + "grad_norm": 0.6466194356635377, + "learning_rate": 4.690173026857028e-06, + "loss": 0.3411, + "step": 11803 + }, + { + "epoch": 0.5342385155012446, + "grad_norm": 0.6102879667747352, + "learning_rate": 4.689441517625232e-06, + "loss": 0.3298, + "step": 11804 + }, + { + "epoch": 0.5342837746096402, + "grad_norm": 0.30553155491046163, + "learning_rate": 4.688710015066388e-06, + "loss": 0.4822, + "step": 11805 + }, + { + "epoch": 0.5343290337180358, + "grad_norm": 0.650611281210423, + "learning_rate": 4.687978519196205e-06, + "loss": 0.2957, + "step": 11806 + }, + { + "epoch": 0.5343742928264313, + "grad_norm": 0.5941169919812131, + "learning_rate": 4.687247030030409e-06, + "loss": 0.3534, + "step": 11807 + }, + { + "epoch": 0.5344195519348269, + "grad_norm": 0.6493684457101689, + "learning_rate": 4.686515547584713e-06, + "loss": 0.29, + "step": 11808 + }, + { + "epoch": 0.5344648110432224, + "grad_norm": 0.6147351712741508, + "learning_rate": 4.685784071874835e-06, + "loss": 0.3088, + "step": 11809 + }, + { + "epoch": 0.534510070151618, + "grad_norm": 0.6363524473472438, + "learning_rate": 4.68505260291649e-06, + "loss": 0.3271, + "step": 11810 + }, + { + "epoch": 0.5345553292600136, + "grad_norm": 0.618546025225323, + "learning_rate": 4.6843211407254e-06, + "loss": 0.3188, + "step": 11811 + }, + { + "epoch": 0.5346005883684092, + "grad_norm": 0.6070095148382589, + "learning_rate": 4.683589685317278e-06, + "loss": 0.3589, + "step": 11812 + }, + { + "epoch": 0.5346458474768047, + "grad_norm": 0.619057250383705, + "learning_rate": 4.682858236707842e-06, + "loss": 0.3006, + "step": 11813 + }, + { + "epoch": 0.5346911065852002, + "grad_norm": 0.6694414833050002, + "learning_rate": 4.682126794912808e-06, + "loss": 0.2924, + "step": 11814 + }, + { + "epoch": 0.5347363656935958, + "grad_norm": 0.6538425008294642, + "learning_rate": 4.681395359947894e-06, + "loss": 0.3283, + "step": 11815 + }, + { + "epoch": 0.5347816248019914, + "grad_norm": 0.618884000810644, + "learning_rate": 4.680663931828815e-06, + "loss": 0.3126, + "step": 11816 + }, + { + "epoch": 0.534826883910387, + "grad_norm": 0.6622986593486823, + "learning_rate": 4.679932510571286e-06, + "loss": 0.3021, + "step": 11817 + }, + { + "epoch": 0.5348721430187825, + "grad_norm": 0.6046152483107878, + "learning_rate": 4.679201096191027e-06, + "loss": 0.3415, + "step": 11818 + }, + { + "epoch": 0.5349174021271781, + "grad_norm": 0.6457583281268079, + "learning_rate": 4.6784696887037475e-06, + "loss": 0.2878, + "step": 11819 + }, + { + "epoch": 0.5349626612355737, + "grad_norm": 0.7731153771800899, + "learning_rate": 4.6777382881251695e-06, + "loss": 0.3254, + "step": 11820 + }, + { + "epoch": 0.5350079203439693, + "grad_norm": 0.5818533268500036, + "learning_rate": 4.677006894471006e-06, + "loss": 0.3089, + "step": 11821 + }, + { + "epoch": 0.5350531794523647, + "grad_norm": 0.6437935399271492, + "learning_rate": 4.676275507756972e-06, + "loss": 0.3404, + "step": 11822 + }, + { + "epoch": 0.5350984385607603, + "grad_norm": 0.6062609675532306, + "learning_rate": 4.6755441279987815e-06, + "loss": 0.3071, + "step": 11823 + }, + { + "epoch": 0.5351436976691559, + "grad_norm": 0.6875800441401139, + "learning_rate": 4.674812755212154e-06, + "loss": 0.3151, + "step": 11824 + }, + { + "epoch": 0.5351889567775515, + "grad_norm": 0.6476206078752045, + "learning_rate": 4.674081389412799e-06, + "loss": 0.3227, + "step": 11825 + }, + { + "epoch": 0.535234215885947, + "grad_norm": 0.7150470473074529, + "learning_rate": 4.673350030616435e-06, + "loss": 0.3612, + "step": 11826 + }, + { + "epoch": 0.5352794749943426, + "grad_norm": 0.8702744022584356, + "learning_rate": 4.6726186788387745e-06, + "loss": 0.3064, + "step": 11827 + }, + { + "epoch": 0.5353247341027382, + "grad_norm": 0.6838788147840302, + "learning_rate": 4.671887334095537e-06, + "loss": 0.3063, + "step": 11828 + }, + { + "epoch": 0.5353699932111338, + "grad_norm": 0.3631132216259713, + "learning_rate": 4.671155996402429e-06, + "loss": 0.4708, + "step": 11829 + }, + { + "epoch": 0.5354152523195294, + "grad_norm": 0.7416558030623343, + "learning_rate": 4.670424665775169e-06, + "loss": 0.2972, + "step": 11830 + }, + { + "epoch": 0.5354605114279248, + "grad_norm": 0.3263187846566589, + "learning_rate": 4.669693342229473e-06, + "loss": 0.4574, + "step": 11831 + }, + { + "epoch": 0.5355057705363204, + "grad_norm": 0.6714728109702466, + "learning_rate": 4.668962025781051e-06, + "loss": 0.3016, + "step": 11832 + }, + { + "epoch": 0.535551029644716, + "grad_norm": 0.6091662070395258, + "learning_rate": 4.668230716445618e-06, + "loss": 0.308, + "step": 11833 + }, + { + "epoch": 0.5355962887531116, + "grad_norm": 0.6675165747541576, + "learning_rate": 4.66749941423889e-06, + "loss": 0.365, + "step": 11834 + }, + { + "epoch": 0.5356415478615071, + "grad_norm": 0.3165058569241368, + "learning_rate": 4.666768119176576e-06, + "loss": 0.4817, + "step": 11835 + }, + { + "epoch": 0.5356868069699027, + "grad_norm": 0.6903447640884273, + "learning_rate": 4.666036831274392e-06, + "loss": 0.3129, + "step": 11836 + }, + { + "epoch": 0.5357320660782983, + "grad_norm": 0.37165014809763863, + "learning_rate": 4.665305550548053e-06, + "loss": 0.4939, + "step": 11837 + }, + { + "epoch": 0.5357773251866939, + "grad_norm": 0.6023690777695794, + "learning_rate": 4.664574277013267e-06, + "loss": 0.2911, + "step": 11838 + }, + { + "epoch": 0.5358225842950893, + "grad_norm": 0.6341556166653007, + "learning_rate": 4.663843010685751e-06, + "loss": 0.3533, + "step": 11839 + }, + { + "epoch": 0.5358678434034849, + "grad_norm": 0.3014725055190625, + "learning_rate": 4.663111751581217e-06, + "loss": 0.4711, + "step": 11840 + }, + { + "epoch": 0.5359131025118805, + "grad_norm": 0.2846751634981725, + "learning_rate": 4.662380499715376e-06, + "loss": 0.4784, + "step": 11841 + }, + { + "epoch": 0.5359583616202761, + "grad_norm": 0.6283147441083387, + "learning_rate": 4.661649255103941e-06, + "loss": 0.3562, + "step": 11842 + }, + { + "epoch": 0.5360036207286717, + "grad_norm": 0.7101453282416704, + "learning_rate": 4.660918017762624e-06, + "loss": 0.4033, + "step": 11843 + }, + { + "epoch": 0.5360488798370672, + "grad_norm": 0.6798369669668829, + "learning_rate": 4.660186787707137e-06, + "loss": 0.3172, + "step": 11844 + }, + { + "epoch": 0.5360941389454628, + "grad_norm": 0.6234796803913034, + "learning_rate": 4.6594555649531935e-06, + "loss": 0.3335, + "step": 11845 + }, + { + "epoch": 0.5361393980538584, + "grad_norm": 0.6276398422477145, + "learning_rate": 4.658724349516504e-06, + "loss": 0.3402, + "step": 11846 + }, + { + "epoch": 0.5361846571622539, + "grad_norm": 0.6032229180107805, + "learning_rate": 4.657993141412781e-06, + "loss": 0.2826, + "step": 11847 + }, + { + "epoch": 0.5362299162706494, + "grad_norm": 0.6492236595026835, + "learning_rate": 4.657261940657732e-06, + "loss": 0.3014, + "step": 11848 + }, + { + "epoch": 0.536275175379045, + "grad_norm": 0.6576568803938141, + "learning_rate": 4.656530747267073e-06, + "loss": 0.3241, + "step": 11849 + }, + { + "epoch": 0.5363204344874406, + "grad_norm": 0.6720134070854751, + "learning_rate": 4.6557995612565146e-06, + "loss": 0.2732, + "step": 11850 + }, + { + "epoch": 0.5363656935958362, + "grad_norm": 0.6301081903669309, + "learning_rate": 4.655068382641764e-06, + "loss": 0.324, + "step": 11851 + }, + { + "epoch": 0.5364109527042318, + "grad_norm": 0.649505180454233, + "learning_rate": 4.654337211438535e-06, + "loss": 0.3196, + "step": 11852 + }, + { + "epoch": 0.5364562118126273, + "grad_norm": 0.9158481667574583, + "learning_rate": 4.653606047662541e-06, + "loss": 0.3554, + "step": 11853 + }, + { + "epoch": 0.5365014709210228, + "grad_norm": 0.5772610368338478, + "learning_rate": 4.652874891329484e-06, + "loss": 0.3233, + "step": 11854 + }, + { + "epoch": 0.5365467300294184, + "grad_norm": 0.6187458467750405, + "learning_rate": 4.652143742455082e-06, + "loss": 0.3304, + "step": 11855 + }, + { + "epoch": 0.536591989137814, + "grad_norm": 0.6887980125160978, + "learning_rate": 4.651412601055042e-06, + "loss": 0.3128, + "step": 11856 + }, + { + "epoch": 0.5366372482462095, + "grad_norm": 0.5715217572713842, + "learning_rate": 4.650681467145077e-06, + "loss": 0.2979, + "step": 11857 + }, + { + "epoch": 0.5366825073546051, + "grad_norm": 0.47967059834371656, + "learning_rate": 4.649950340740892e-06, + "loss": 0.5086, + "step": 11858 + }, + { + "epoch": 0.5367277664630007, + "grad_norm": 0.6855366105109115, + "learning_rate": 4.649219221858199e-06, + "loss": 0.3593, + "step": 11859 + }, + { + "epoch": 0.5367730255713963, + "grad_norm": 0.5939512965029445, + "learning_rate": 4.64848811051271e-06, + "loss": 0.2789, + "step": 11860 + }, + { + "epoch": 0.5368182846797918, + "grad_norm": 0.7928225031359858, + "learning_rate": 4.6477570067201295e-06, + "loss": 0.33, + "step": 11861 + }, + { + "epoch": 0.5368635437881873, + "grad_norm": 0.6282339858757565, + "learning_rate": 4.647025910496169e-06, + "loss": 0.3168, + "step": 11862 + }, + { + "epoch": 0.5369088028965829, + "grad_norm": 0.6349383292936635, + "learning_rate": 4.646294821856539e-06, + "loss": 0.3152, + "step": 11863 + }, + { + "epoch": 0.5369540620049785, + "grad_norm": 0.636330307450891, + "learning_rate": 4.6455637408169466e-06, + "loss": 0.3244, + "step": 11864 + }, + { + "epoch": 0.5369993211133741, + "grad_norm": 0.3136639017064418, + "learning_rate": 4.6448326673931e-06, + "loss": 0.5009, + "step": 11865 + }, + { + "epoch": 0.5370445802217696, + "grad_norm": 0.26578378504096073, + "learning_rate": 4.644101601600711e-06, + "loss": 0.4579, + "step": 11866 + }, + { + "epoch": 0.5370898393301652, + "grad_norm": 0.6772093834795655, + "learning_rate": 4.6433705434554825e-06, + "loss": 0.3301, + "step": 11867 + }, + { + "epoch": 0.5371350984385608, + "grad_norm": 0.6869899521420297, + "learning_rate": 4.6426394929731264e-06, + "loss": 0.3938, + "step": 11868 + }, + { + "epoch": 0.5371803575469564, + "grad_norm": 0.6032550183160156, + "learning_rate": 4.641908450169351e-06, + "loss": 0.287, + "step": 11869 + }, + { + "epoch": 0.5372256166553518, + "grad_norm": 0.2729589981531489, + "learning_rate": 4.641177415059863e-06, + "loss": 0.4619, + "step": 11870 + }, + { + "epoch": 0.5372708757637474, + "grad_norm": 0.2930555677217789, + "learning_rate": 4.640446387660369e-06, + "loss": 0.4725, + "step": 11871 + }, + { + "epoch": 0.537316134872143, + "grad_norm": 0.6367420254060255, + "learning_rate": 4.639715367986578e-06, + "loss": 0.3643, + "step": 11872 + }, + { + "epoch": 0.5373613939805386, + "grad_norm": 0.2707564173906194, + "learning_rate": 4.6389843560541995e-06, + "loss": 0.4915, + "step": 11873 + }, + { + "epoch": 0.5374066530889341, + "grad_norm": 0.7680386608088853, + "learning_rate": 4.638253351878937e-06, + "loss": 0.3589, + "step": 11874 + }, + { + "epoch": 0.5374519121973297, + "grad_norm": 0.6348233087753784, + "learning_rate": 4.637522355476499e-06, + "loss": 0.3109, + "step": 11875 + }, + { + "epoch": 0.5374971713057253, + "grad_norm": 0.7222645882308814, + "learning_rate": 4.636791366862593e-06, + "loss": 0.2914, + "step": 11876 + }, + { + "epoch": 0.5375424304141209, + "grad_norm": 0.5781883641735861, + "learning_rate": 4.636060386052924e-06, + "loss": 0.3326, + "step": 11877 + }, + { + "epoch": 0.5375876895225165, + "grad_norm": 0.593240330734267, + "learning_rate": 4.635329413063199e-06, + "loss": 0.2968, + "step": 11878 + }, + { + "epoch": 0.5376329486309119, + "grad_norm": 0.846105349465719, + "learning_rate": 4.634598447909127e-06, + "loss": 0.2873, + "step": 11879 + }, + { + "epoch": 0.5376782077393075, + "grad_norm": 0.9134058698420537, + "learning_rate": 4.633867490606411e-06, + "loss": 0.3117, + "step": 11880 + }, + { + "epoch": 0.5377234668477031, + "grad_norm": 0.6051682716152033, + "learning_rate": 4.633136541170757e-06, + "loss": 0.3077, + "step": 11881 + }, + { + "epoch": 0.5377687259560987, + "grad_norm": 0.6438228832444766, + "learning_rate": 4.632405599617875e-06, + "loss": 0.3233, + "step": 11882 + }, + { + "epoch": 0.5378139850644942, + "grad_norm": 0.6111067636628326, + "learning_rate": 4.631674665963464e-06, + "loss": 0.3048, + "step": 11883 + }, + { + "epoch": 0.5378592441728898, + "grad_norm": 0.5708830172022095, + "learning_rate": 4.630943740223235e-06, + "loss": 0.3436, + "step": 11884 + }, + { + "epoch": 0.5379045032812854, + "grad_norm": 0.385893873408589, + "learning_rate": 4.630212822412891e-06, + "loss": 0.4633, + "step": 11885 + }, + { + "epoch": 0.537949762389681, + "grad_norm": 0.6496981603736385, + "learning_rate": 4.62948191254814e-06, + "loss": 0.3244, + "step": 11886 + }, + { + "epoch": 0.5379950214980765, + "grad_norm": 0.5687704852137276, + "learning_rate": 4.6287510106446814e-06, + "loss": 0.3126, + "step": 11887 + }, + { + "epoch": 0.538040280606472, + "grad_norm": 0.6763024486567406, + "learning_rate": 4.628020116718225e-06, + "loss": 0.3365, + "step": 11888 + }, + { + "epoch": 0.5380855397148676, + "grad_norm": 0.3024211002191299, + "learning_rate": 4.627289230784474e-06, + "loss": 0.4631, + "step": 11889 + }, + { + "epoch": 0.5381307988232632, + "grad_norm": 0.6528307867202187, + "learning_rate": 4.626558352859133e-06, + "loss": 0.2705, + "step": 11890 + }, + { + "epoch": 0.5381760579316588, + "grad_norm": 0.6384263104660005, + "learning_rate": 4.625827482957904e-06, + "loss": 0.3328, + "step": 11891 + }, + { + "epoch": 0.5382213170400543, + "grad_norm": 0.6278570776518351, + "learning_rate": 4.625096621096497e-06, + "loss": 0.3629, + "step": 11892 + }, + { + "epoch": 0.5382665761484499, + "grad_norm": 0.6506955725953594, + "learning_rate": 4.624365767290609e-06, + "loss": 0.3031, + "step": 11893 + }, + { + "epoch": 0.5383118352568455, + "grad_norm": 0.6058536493638982, + "learning_rate": 4.6236349215559476e-06, + "loss": 0.3065, + "step": 11894 + }, + { + "epoch": 0.538357094365241, + "grad_norm": 0.6477269762566259, + "learning_rate": 4.6229040839082174e-06, + "loss": 0.3567, + "step": 11895 + }, + { + "epoch": 0.5384023534736365, + "grad_norm": 0.7315641361074862, + "learning_rate": 4.622173254363117e-06, + "loss": 0.358, + "step": 11896 + }, + { + "epoch": 0.5384476125820321, + "grad_norm": 0.6390613193796425, + "learning_rate": 4.621442432936355e-06, + "loss": 0.3315, + "step": 11897 + }, + { + "epoch": 0.5384928716904277, + "grad_norm": 0.6932636661113306, + "learning_rate": 4.620711619643633e-06, + "loss": 0.3121, + "step": 11898 + }, + { + "epoch": 0.5385381307988233, + "grad_norm": 0.6466938654027299, + "learning_rate": 4.619980814500654e-06, + "loss": 0.3536, + "step": 11899 + }, + { + "epoch": 0.5385833899072189, + "grad_norm": 0.3754239818716435, + "learning_rate": 4.619250017523118e-06, + "loss": 0.5017, + "step": 11900 + }, + { + "epoch": 0.5386286490156144, + "grad_norm": 0.6201964027603627, + "learning_rate": 4.61851922872673e-06, + "loss": 0.3208, + "step": 11901 + }, + { + "epoch": 0.53867390812401, + "grad_norm": 0.6683817172821697, + "learning_rate": 4.617788448127194e-06, + "loss": 0.3018, + "step": 11902 + }, + { + "epoch": 0.5387191672324055, + "grad_norm": 0.6207113423464251, + "learning_rate": 4.6170576757402095e-06, + "loss": 0.3449, + "step": 11903 + }, + { + "epoch": 0.5387644263408011, + "grad_norm": 0.6061645330534171, + "learning_rate": 4.616326911581478e-06, + "loss": 0.3246, + "step": 11904 + }, + { + "epoch": 0.5388096854491966, + "grad_norm": 0.6102535429940648, + "learning_rate": 4.6155961556667064e-06, + "loss": 0.2933, + "step": 11905 + }, + { + "epoch": 0.5388549445575922, + "grad_norm": 0.6458502246912481, + "learning_rate": 4.614865408011589e-06, + "loss": 0.3462, + "step": 11906 + }, + { + "epoch": 0.5389002036659878, + "grad_norm": 0.7296757629234377, + "learning_rate": 4.614134668631832e-06, + "loss": 0.3193, + "step": 11907 + }, + { + "epoch": 0.5389454627743834, + "grad_norm": 0.6260925635905281, + "learning_rate": 4.613403937543138e-06, + "loss": 0.3622, + "step": 11908 + }, + { + "epoch": 0.5389907218827789, + "grad_norm": 0.3092383604250637, + "learning_rate": 4.612673214761204e-06, + "loss": 0.4524, + "step": 11909 + }, + { + "epoch": 0.5390359809911744, + "grad_norm": 0.641101217881589, + "learning_rate": 4.611942500301733e-06, + "loss": 0.3545, + "step": 11910 + }, + { + "epoch": 0.53908124009957, + "grad_norm": 0.729212585377849, + "learning_rate": 4.611211794180427e-06, + "loss": 0.3894, + "step": 11911 + }, + { + "epoch": 0.5391264992079656, + "grad_norm": 0.6985165644437046, + "learning_rate": 4.610481096412985e-06, + "loss": 0.3575, + "step": 11912 + }, + { + "epoch": 0.5391717583163612, + "grad_norm": 0.5623032596839105, + "learning_rate": 4.609750407015107e-06, + "loss": 0.3114, + "step": 11913 + }, + { + "epoch": 0.5392170174247567, + "grad_norm": 0.6227257576443729, + "learning_rate": 4.609019726002494e-06, + "loss": 0.3008, + "step": 11914 + }, + { + "epoch": 0.5392622765331523, + "grad_norm": 0.6173945881059006, + "learning_rate": 4.608289053390849e-06, + "loss": 0.3494, + "step": 11915 + }, + { + "epoch": 0.5393075356415479, + "grad_norm": 0.3187630793576256, + "learning_rate": 4.6075583891958665e-06, + "loss": 0.5019, + "step": 11916 + }, + { + "epoch": 0.5393527947499435, + "grad_norm": 0.6328780046178049, + "learning_rate": 4.606827733433249e-06, + "loss": 0.3434, + "step": 11917 + }, + { + "epoch": 0.5393980538583389, + "grad_norm": 0.2991991417478379, + "learning_rate": 4.606097086118699e-06, + "loss": 0.46, + "step": 11918 + }, + { + "epoch": 0.5394433129667345, + "grad_norm": 0.5951216847187082, + "learning_rate": 4.60536644726791e-06, + "loss": 0.3301, + "step": 11919 + }, + { + "epoch": 0.5394885720751301, + "grad_norm": 0.6331957282164389, + "learning_rate": 4.604635816896583e-06, + "loss": 0.2981, + "step": 11920 + }, + { + "epoch": 0.5395338311835257, + "grad_norm": 0.28609010619166414, + "learning_rate": 4.6039051950204215e-06, + "loss": 0.4919, + "step": 11921 + }, + { + "epoch": 0.5395790902919213, + "grad_norm": 0.5983856542995213, + "learning_rate": 4.603174581655118e-06, + "loss": 0.3241, + "step": 11922 + }, + { + "epoch": 0.5396243494003168, + "grad_norm": 0.65502210479594, + "learning_rate": 4.602443976816375e-06, + "loss": 0.3307, + "step": 11923 + }, + { + "epoch": 0.5396696085087124, + "grad_norm": 0.6122185417503756, + "learning_rate": 4.601713380519891e-06, + "loss": 0.3455, + "step": 11924 + }, + { + "epoch": 0.539714867617108, + "grad_norm": 0.6313224879589558, + "learning_rate": 4.600982792781361e-06, + "loss": 0.3278, + "step": 11925 + }, + { + "epoch": 0.5397601267255036, + "grad_norm": 0.608001776525165, + "learning_rate": 4.600252213616486e-06, + "loss": 0.3315, + "step": 11926 + }, + { + "epoch": 0.539805385833899, + "grad_norm": 0.6289475431513395, + "learning_rate": 4.599521643040964e-06, + "loss": 0.3284, + "step": 11927 + }, + { + "epoch": 0.5398506449422946, + "grad_norm": 0.6749502573548707, + "learning_rate": 4.598791081070493e-06, + "loss": 0.3224, + "step": 11928 + }, + { + "epoch": 0.5398959040506902, + "grad_norm": 0.5380611898001285, + "learning_rate": 4.598060527720766e-06, + "loss": 0.2764, + "step": 11929 + }, + { + "epoch": 0.5399411631590858, + "grad_norm": 0.6538404966143314, + "learning_rate": 4.597329983007486e-06, + "loss": 0.329, + "step": 11930 + }, + { + "epoch": 0.5399864222674813, + "grad_norm": 0.6662952534304654, + "learning_rate": 4.5965994469463485e-06, + "loss": 0.3138, + "step": 11931 + }, + { + "epoch": 0.5400316813758769, + "grad_norm": 0.59477988751589, + "learning_rate": 4.595868919553049e-06, + "loss": 0.3088, + "step": 11932 + }, + { + "epoch": 0.5400769404842725, + "grad_norm": 0.5960129169306702, + "learning_rate": 4.595138400843285e-06, + "loss": 0.3148, + "step": 11933 + }, + { + "epoch": 0.540122199592668, + "grad_norm": 0.8784161525700802, + "learning_rate": 4.594407890832755e-06, + "loss": 0.3107, + "step": 11934 + }, + { + "epoch": 0.5401674587010636, + "grad_norm": 0.7629956582515477, + "learning_rate": 4.5936773895371525e-06, + "loss": 0.3674, + "step": 11935 + }, + { + "epoch": 0.5402127178094591, + "grad_norm": 0.38093627317634576, + "learning_rate": 4.592946896972174e-06, + "loss": 0.4688, + "step": 11936 + }, + { + "epoch": 0.5402579769178547, + "grad_norm": 0.5874686472601677, + "learning_rate": 4.592216413153519e-06, + "loss": 0.2927, + "step": 11937 + }, + { + "epoch": 0.5403032360262503, + "grad_norm": 0.6845177792070586, + "learning_rate": 4.591485938096879e-06, + "loss": 0.2726, + "step": 11938 + }, + { + "epoch": 0.5403484951346459, + "grad_norm": 0.6942333260757441, + "learning_rate": 4.590755471817951e-06, + "loss": 0.2617, + "step": 11939 + }, + { + "epoch": 0.5403937542430414, + "grad_norm": 0.5685941422602182, + "learning_rate": 4.590025014332431e-06, + "loss": 0.3057, + "step": 11940 + }, + { + "epoch": 0.540439013351437, + "grad_norm": 0.6192698308865857, + "learning_rate": 4.589294565656017e-06, + "loss": 0.3292, + "step": 11941 + }, + { + "epoch": 0.5404842724598325, + "grad_norm": 0.7505932995096428, + "learning_rate": 4.5885641258044e-06, + "loss": 0.3679, + "step": 11942 + }, + { + "epoch": 0.5405295315682281, + "grad_norm": 0.5872677963439269, + "learning_rate": 4.587833694793274e-06, + "loss": 0.3437, + "step": 11943 + }, + { + "epoch": 0.5405747906766236, + "grad_norm": 0.29940284734855005, + "learning_rate": 4.587103272638339e-06, + "loss": 0.4798, + "step": 11944 + }, + { + "epoch": 0.5406200497850192, + "grad_norm": 0.7274460135206671, + "learning_rate": 4.586372859355285e-06, + "loss": 0.3247, + "step": 11945 + }, + { + "epoch": 0.5406653088934148, + "grad_norm": 0.6549271468756779, + "learning_rate": 4.585642454959809e-06, + "loss": 0.3083, + "step": 11946 + }, + { + "epoch": 0.5407105680018104, + "grad_norm": 0.6872198529055045, + "learning_rate": 4.584912059467604e-06, + "loss": 0.2975, + "step": 11947 + }, + { + "epoch": 0.540755827110206, + "grad_norm": 0.7216276634194774, + "learning_rate": 4.584181672894362e-06, + "loss": 0.3594, + "step": 11948 + }, + { + "epoch": 0.5408010862186015, + "grad_norm": 0.6088900714811756, + "learning_rate": 4.5834512952557805e-06, + "loss": 0.3129, + "step": 11949 + }, + { + "epoch": 0.540846345326997, + "grad_norm": 0.5911821508409031, + "learning_rate": 4.582720926567552e-06, + "loss": 0.3398, + "step": 11950 + }, + { + "epoch": 0.5408916044353926, + "grad_norm": 0.6734692864936836, + "learning_rate": 4.581990566845368e-06, + "loss": 0.3065, + "step": 11951 + }, + { + "epoch": 0.5409368635437882, + "grad_norm": 0.6074726667239979, + "learning_rate": 4.581260216104923e-06, + "loss": 0.3369, + "step": 11952 + }, + { + "epoch": 0.5409821226521837, + "grad_norm": 0.779060196209588, + "learning_rate": 4.580529874361911e-06, + "loss": 0.3316, + "step": 11953 + }, + { + "epoch": 0.5410273817605793, + "grad_norm": 0.6229799956881542, + "learning_rate": 4.579799541632022e-06, + "loss": 0.3096, + "step": 11954 + }, + { + "epoch": 0.5410726408689749, + "grad_norm": 0.30134750706287144, + "learning_rate": 4.5790692179309506e-06, + "loss": 0.4898, + "step": 11955 + }, + { + "epoch": 0.5411178999773705, + "grad_norm": 0.6001279768603515, + "learning_rate": 4.578338903274389e-06, + "loss": 0.2994, + "step": 11956 + }, + { + "epoch": 0.5411631590857661, + "grad_norm": 0.6335982180014519, + "learning_rate": 4.577608597678031e-06, + "loss": 0.3237, + "step": 11957 + }, + { + "epoch": 0.5412084181941615, + "grad_norm": 0.6388397554978568, + "learning_rate": 4.576878301157564e-06, + "loss": 0.3291, + "step": 11958 + }, + { + "epoch": 0.5412536773025571, + "grad_norm": 0.6780612805093535, + "learning_rate": 4.576148013728685e-06, + "loss": 0.2893, + "step": 11959 + }, + { + "epoch": 0.5412989364109527, + "grad_norm": 0.6035805490342991, + "learning_rate": 4.575417735407084e-06, + "loss": 0.288, + "step": 11960 + }, + { + "epoch": 0.5413441955193483, + "grad_norm": 0.5938415113037409, + "learning_rate": 4.57468746620845e-06, + "loss": 0.3083, + "step": 11961 + }, + { + "epoch": 0.5413894546277438, + "grad_norm": 0.6037836247611696, + "learning_rate": 4.573957206148476e-06, + "loss": 0.2967, + "step": 11962 + }, + { + "epoch": 0.5414347137361394, + "grad_norm": 0.5917513817545543, + "learning_rate": 4.573226955242856e-06, + "loss": 0.3599, + "step": 11963 + }, + { + "epoch": 0.541479972844535, + "grad_norm": 0.6220484958209552, + "learning_rate": 4.5724967135072746e-06, + "loss": 0.3089, + "step": 11964 + }, + { + "epoch": 0.5415252319529306, + "grad_norm": 0.6425750847613974, + "learning_rate": 4.571766480957427e-06, + "loss": 0.3187, + "step": 11965 + }, + { + "epoch": 0.541570491061326, + "grad_norm": 0.6090489388183843, + "learning_rate": 4.571036257609004e-06, + "loss": 0.3204, + "step": 11966 + }, + { + "epoch": 0.5416157501697216, + "grad_norm": 0.31832057329905533, + "learning_rate": 4.570306043477693e-06, + "loss": 0.4816, + "step": 11967 + }, + { + "epoch": 0.5416610092781172, + "grad_norm": 0.6710364857290778, + "learning_rate": 4.569575838579184e-06, + "loss": 0.3218, + "step": 11968 + }, + { + "epoch": 0.5417062683865128, + "grad_norm": 0.6389355538423641, + "learning_rate": 4.56884564292917e-06, + "loss": 0.3415, + "step": 11969 + }, + { + "epoch": 0.5417515274949084, + "grad_norm": 0.682930932654138, + "learning_rate": 4.568115456543339e-06, + "loss": 0.3246, + "step": 11970 + }, + { + "epoch": 0.5417967866033039, + "grad_norm": 0.2845147003828224, + "learning_rate": 4.567385279437381e-06, + "loss": 0.4868, + "step": 11971 + }, + { + "epoch": 0.5418420457116995, + "grad_norm": 0.2978005161473394, + "learning_rate": 4.566655111626982e-06, + "loss": 0.4951, + "step": 11972 + }, + { + "epoch": 0.5418873048200951, + "grad_norm": 0.688595153123291, + "learning_rate": 4.565924953127837e-06, + "loss": 0.362, + "step": 11973 + }, + { + "epoch": 0.5419325639284907, + "grad_norm": 0.28024177680746104, + "learning_rate": 4.56519480395563e-06, + "loss": 0.4786, + "step": 11974 + }, + { + "epoch": 0.5419778230368861, + "grad_norm": 0.6663854522634133, + "learning_rate": 4.564464664126052e-06, + "loss": 0.3376, + "step": 11975 + }, + { + "epoch": 0.5420230821452817, + "grad_norm": 0.6908051407952255, + "learning_rate": 4.56373453365479e-06, + "loss": 0.3519, + "step": 11976 + }, + { + "epoch": 0.5420683412536773, + "grad_norm": 0.658050688240133, + "learning_rate": 4.563004412557532e-06, + "loss": 0.3238, + "step": 11977 + }, + { + "epoch": 0.5421136003620729, + "grad_norm": 0.6400947024626424, + "learning_rate": 4.562274300849968e-06, + "loss": 0.3674, + "step": 11978 + }, + { + "epoch": 0.5421588594704684, + "grad_norm": 0.611671600869507, + "learning_rate": 4.561544198547786e-06, + "loss": 0.3258, + "step": 11979 + }, + { + "epoch": 0.542204118578864, + "grad_norm": 0.6596228395551971, + "learning_rate": 4.560814105666672e-06, + "loss": 0.317, + "step": 11980 + }, + { + "epoch": 0.5422493776872596, + "grad_norm": 0.3664205560652866, + "learning_rate": 4.560084022222313e-06, + "loss": 0.4869, + "step": 11981 + }, + { + "epoch": 0.5422946367956551, + "grad_norm": 0.655928615674818, + "learning_rate": 4.559353948230399e-06, + "loss": 0.3114, + "step": 11982 + }, + { + "epoch": 0.5423398959040507, + "grad_norm": 0.591209692657875, + "learning_rate": 4.558623883706613e-06, + "loss": 0.2704, + "step": 11983 + }, + { + "epoch": 0.5423851550124462, + "grad_norm": 0.6378830156479883, + "learning_rate": 4.5578938286666455e-06, + "loss": 0.3081, + "step": 11984 + }, + { + "epoch": 0.5424304141208418, + "grad_norm": 0.28458597921919504, + "learning_rate": 4.557163783126181e-06, + "loss": 0.4662, + "step": 11985 + }, + { + "epoch": 0.5424756732292374, + "grad_norm": 0.29464568064879143, + "learning_rate": 4.556433747100909e-06, + "loss": 0.4925, + "step": 11986 + }, + { + "epoch": 0.542520932337633, + "grad_norm": 0.2928511770969385, + "learning_rate": 4.5557037206065105e-06, + "loss": 0.4858, + "step": 11987 + }, + { + "epoch": 0.5425661914460285, + "grad_norm": 0.6863478044764233, + "learning_rate": 4.554973703658676e-06, + "loss": 0.3027, + "step": 11988 + }, + { + "epoch": 0.5426114505544241, + "grad_norm": 0.3077387580483022, + "learning_rate": 4.554243696273091e-06, + "loss": 0.4724, + "step": 11989 + }, + { + "epoch": 0.5426567096628196, + "grad_norm": 0.5953582888405915, + "learning_rate": 4.553513698465438e-06, + "loss": 0.3115, + "step": 11990 + }, + { + "epoch": 0.5427019687712152, + "grad_norm": 0.6285184049048317, + "learning_rate": 4.552783710251404e-06, + "loss": 0.3038, + "step": 11991 + }, + { + "epoch": 0.5427472278796107, + "grad_norm": 0.5900146272819862, + "learning_rate": 4.5520537316466775e-06, + "loss": 0.3, + "step": 11992 + }, + { + "epoch": 0.5427924869880063, + "grad_norm": 0.5857511473636708, + "learning_rate": 4.551323762666937e-06, + "loss": 0.3211, + "step": 11993 + }, + { + "epoch": 0.5428377460964019, + "grad_norm": 0.35763795383161173, + "learning_rate": 4.550593803327873e-06, + "loss": 0.4651, + "step": 11994 + }, + { + "epoch": 0.5428830052047975, + "grad_norm": 0.6251418565575303, + "learning_rate": 4.5498638536451675e-06, + "loss": 0.3648, + "step": 11995 + }, + { + "epoch": 0.5429282643131931, + "grad_norm": 0.6511923733741692, + "learning_rate": 4.5491339136345055e-06, + "loss": 0.3317, + "step": 11996 + }, + { + "epoch": 0.5429735234215886, + "grad_norm": 0.6871015698177254, + "learning_rate": 4.548403983311569e-06, + "loss": 0.3394, + "step": 11997 + }, + { + "epoch": 0.5430187825299841, + "grad_norm": 0.3014881726501736, + "learning_rate": 4.547674062692046e-06, + "loss": 0.505, + "step": 11998 + }, + { + "epoch": 0.5430640416383797, + "grad_norm": 0.28402470065143265, + "learning_rate": 4.546944151791618e-06, + "loss": 0.4723, + "step": 11999 + }, + { + "epoch": 0.5431093007467753, + "grad_norm": 0.5945012448408755, + "learning_rate": 4.546214250625969e-06, + "loss": 0.3266, + "step": 12000 + }, + { + "epoch": 0.5431545598551708, + "grad_norm": 0.6330897895361995, + "learning_rate": 4.54548435921078e-06, + "loss": 0.3621, + "step": 12001 + }, + { + "epoch": 0.5431998189635664, + "grad_norm": 0.3276877083329625, + "learning_rate": 4.544754477561739e-06, + "loss": 0.474, + "step": 12002 + }, + { + "epoch": 0.543245078071962, + "grad_norm": 0.6783486670274087, + "learning_rate": 4.544024605694524e-06, + "loss": 0.3111, + "step": 12003 + }, + { + "epoch": 0.5432903371803576, + "grad_norm": 0.6288752587564099, + "learning_rate": 4.54329474362482e-06, + "loss": 0.3129, + "step": 12004 + }, + { + "epoch": 0.5433355962887532, + "grad_norm": 0.6338755715175465, + "learning_rate": 4.542564891368311e-06, + "loss": 0.2944, + "step": 12005 + }, + { + "epoch": 0.5433808553971486, + "grad_norm": 0.7239986062645524, + "learning_rate": 4.541835048940675e-06, + "loss": 0.3262, + "step": 12006 + }, + { + "epoch": 0.5434261145055442, + "grad_norm": 0.6078227665196527, + "learning_rate": 4.5411052163575986e-06, + "loss": 0.3215, + "step": 12007 + }, + { + "epoch": 0.5434713736139398, + "grad_norm": 0.6332652552296952, + "learning_rate": 4.540375393634762e-06, + "loss": 0.343, + "step": 12008 + }, + { + "epoch": 0.5435166327223354, + "grad_norm": 0.29713938921676397, + "learning_rate": 4.539645580787845e-06, + "loss": 0.4671, + "step": 12009 + }, + { + "epoch": 0.5435618918307309, + "grad_norm": 0.30752786135562743, + "learning_rate": 4.538915777832531e-06, + "loss": 0.4816, + "step": 12010 + }, + { + "epoch": 0.5436071509391265, + "grad_norm": 0.30628064591608223, + "learning_rate": 4.538185984784501e-06, + "loss": 0.4879, + "step": 12011 + }, + { + "epoch": 0.5436524100475221, + "grad_norm": 0.6034792349851319, + "learning_rate": 4.537456201659437e-06, + "loss": 0.3286, + "step": 12012 + }, + { + "epoch": 0.5436976691559177, + "grad_norm": 0.6460022175329486, + "learning_rate": 4.536726428473017e-06, + "loss": 0.3018, + "step": 12013 + }, + { + "epoch": 0.5437429282643131, + "grad_norm": 0.6330256849552708, + "learning_rate": 4.535996665240923e-06, + "loss": 0.3531, + "step": 12014 + }, + { + "epoch": 0.5437881873727087, + "grad_norm": 1.0554318969867602, + "learning_rate": 4.535266911978838e-06, + "loss": 0.3037, + "step": 12015 + }, + { + "epoch": 0.5438334464811043, + "grad_norm": 1.0406055103027143, + "learning_rate": 4.534537168702437e-06, + "loss": 0.3666, + "step": 12016 + }, + { + "epoch": 0.5438787055894999, + "grad_norm": 0.6586450980212927, + "learning_rate": 4.533807435427404e-06, + "loss": 0.318, + "step": 12017 + }, + { + "epoch": 0.5439239646978955, + "grad_norm": 0.6321733642129694, + "learning_rate": 4.533077712169418e-06, + "loss": 0.3146, + "step": 12018 + }, + { + "epoch": 0.543969223806291, + "grad_norm": 0.5850842470544956, + "learning_rate": 4.532347998944158e-06, + "loss": 0.2886, + "step": 12019 + }, + { + "epoch": 0.5440144829146866, + "grad_norm": 0.356016927067565, + "learning_rate": 4.531618295767301e-06, + "loss": 0.4809, + "step": 12020 + }, + { + "epoch": 0.5440597420230822, + "grad_norm": 0.30494498859019264, + "learning_rate": 4.53088860265453e-06, + "loss": 0.4538, + "step": 12021 + }, + { + "epoch": 0.5441050011314778, + "grad_norm": 0.27791137503165086, + "learning_rate": 4.5301589196215214e-06, + "loss": 0.4459, + "step": 12022 + }, + { + "epoch": 0.5441502602398732, + "grad_norm": 0.7059627342048436, + "learning_rate": 4.529429246683956e-06, + "loss": 0.3609, + "step": 12023 + }, + { + "epoch": 0.5441955193482688, + "grad_norm": 0.6517913855189322, + "learning_rate": 4.52869958385751e-06, + "loss": 0.333, + "step": 12024 + }, + { + "epoch": 0.5442407784566644, + "grad_norm": 0.6495883155524725, + "learning_rate": 4.527969931157863e-06, + "loss": 0.3143, + "step": 12025 + }, + { + "epoch": 0.54428603756506, + "grad_norm": 0.633776785633181, + "learning_rate": 4.5272402886006904e-06, + "loss": 0.3674, + "step": 12026 + }, + { + "epoch": 0.5443312966734555, + "grad_norm": 0.3472350589231652, + "learning_rate": 4.526510656201673e-06, + "loss": 0.4945, + "step": 12027 + }, + { + "epoch": 0.5443765557818511, + "grad_norm": 0.682612710667583, + "learning_rate": 4.525781033976489e-06, + "loss": 0.3017, + "step": 12028 + }, + { + "epoch": 0.5444218148902467, + "grad_norm": 0.7569396141176389, + "learning_rate": 4.525051421940813e-06, + "loss": 0.3625, + "step": 12029 + }, + { + "epoch": 0.5444670739986422, + "grad_norm": 0.7007330874287369, + "learning_rate": 4.524321820110322e-06, + "loss": 0.3546, + "step": 12030 + }, + { + "epoch": 0.5445123331070378, + "grad_norm": 0.624317023029584, + "learning_rate": 4.523592228500696e-06, + "loss": 0.3169, + "step": 12031 + }, + { + "epoch": 0.5445575922154333, + "grad_norm": 0.6360197728590014, + "learning_rate": 4.522862647127609e-06, + "loss": 0.2608, + "step": 12032 + }, + { + "epoch": 0.5446028513238289, + "grad_norm": 0.622608362495375, + "learning_rate": 4.5221330760067386e-06, + "loss": 0.2985, + "step": 12033 + }, + { + "epoch": 0.5446481104322245, + "grad_norm": 0.6777283117689842, + "learning_rate": 4.521403515153762e-06, + "loss": 0.2987, + "step": 12034 + }, + { + "epoch": 0.5446933695406201, + "grad_norm": 0.6558982457517104, + "learning_rate": 4.520673964584351e-06, + "loss": 0.312, + "step": 12035 + }, + { + "epoch": 0.5447386286490156, + "grad_norm": 0.5762145784272449, + "learning_rate": 4.519944424314186e-06, + "loss": 0.273, + "step": 12036 + }, + { + "epoch": 0.5447838877574112, + "grad_norm": 0.7052212045652171, + "learning_rate": 4.519214894358942e-06, + "loss": 0.3186, + "step": 12037 + }, + { + "epoch": 0.5448291468658067, + "grad_norm": 0.6014994041697003, + "learning_rate": 4.5184853747342926e-06, + "loss": 0.3102, + "step": 12038 + }, + { + "epoch": 0.5448744059742023, + "grad_norm": 0.3189094508896656, + "learning_rate": 4.517755865455912e-06, + "loss": 0.4806, + "step": 12039 + }, + { + "epoch": 0.5449196650825979, + "grad_norm": 0.6859969141969133, + "learning_rate": 4.517026366539477e-06, + "loss": 0.3474, + "step": 12040 + }, + { + "epoch": 0.5449649241909934, + "grad_norm": 0.6115840685105562, + "learning_rate": 4.516296878000664e-06, + "loss": 0.323, + "step": 12041 + }, + { + "epoch": 0.545010183299389, + "grad_norm": 0.6546584864922931, + "learning_rate": 4.515567399855145e-06, + "loss": 0.3155, + "step": 12042 + }, + { + "epoch": 0.5450554424077846, + "grad_norm": 0.600731391425279, + "learning_rate": 4.514837932118593e-06, + "loss": 0.3146, + "step": 12043 + }, + { + "epoch": 0.5451007015161802, + "grad_norm": 0.5894286013671077, + "learning_rate": 4.514108474806687e-06, + "loss": 0.295, + "step": 12044 + }, + { + "epoch": 0.5451459606245757, + "grad_norm": 0.7288286128601018, + "learning_rate": 4.513379027935094e-06, + "loss": 0.3349, + "step": 12045 + }, + { + "epoch": 0.5451912197329712, + "grad_norm": 0.5718145121685675, + "learning_rate": 4.5126495915194936e-06, + "loss": 0.3451, + "step": 12046 + }, + { + "epoch": 0.5452364788413668, + "grad_norm": 0.6346709284490044, + "learning_rate": 4.5119201655755565e-06, + "loss": 0.3341, + "step": 12047 + }, + { + "epoch": 0.5452817379497624, + "grad_norm": 0.6521556040336637, + "learning_rate": 4.511190750118955e-06, + "loss": 0.3268, + "step": 12048 + }, + { + "epoch": 0.5453269970581579, + "grad_norm": 0.3027426900568299, + "learning_rate": 4.510461345165362e-06, + "loss": 0.4741, + "step": 12049 + }, + { + "epoch": 0.5453722561665535, + "grad_norm": 0.649681449175025, + "learning_rate": 4.509731950730454e-06, + "loss": 0.3516, + "step": 12050 + }, + { + "epoch": 0.5454175152749491, + "grad_norm": 0.7190082909611529, + "learning_rate": 4.509002566829899e-06, + "loss": 0.3321, + "step": 12051 + }, + { + "epoch": 0.5454627743833447, + "grad_norm": 0.30540548048002153, + "learning_rate": 4.508273193479371e-06, + "loss": 0.4746, + "step": 12052 + }, + { + "epoch": 0.5455080334917403, + "grad_norm": 0.6481182817233865, + "learning_rate": 4.507543830694543e-06, + "loss": 0.3647, + "step": 12053 + }, + { + "epoch": 0.5455532926001357, + "grad_norm": 0.6334004009278218, + "learning_rate": 4.506814478491084e-06, + "loss": 0.308, + "step": 12054 + }, + { + "epoch": 0.5455985517085313, + "grad_norm": 0.6768165181731143, + "learning_rate": 4.506085136884667e-06, + "loss": 0.3341, + "step": 12055 + }, + { + "epoch": 0.5456438108169269, + "grad_norm": 0.6072803233190442, + "learning_rate": 4.505355805890964e-06, + "loss": 0.3679, + "step": 12056 + }, + { + "epoch": 0.5456890699253225, + "grad_norm": 0.6449685267219651, + "learning_rate": 4.504626485525647e-06, + "loss": 0.2941, + "step": 12057 + }, + { + "epoch": 0.545734329033718, + "grad_norm": 0.3259735884598887, + "learning_rate": 4.503897175804383e-06, + "loss": 0.4699, + "step": 12058 + }, + { + "epoch": 0.5457795881421136, + "grad_norm": 0.622368199343704, + "learning_rate": 4.503167876742846e-06, + "loss": 0.3379, + "step": 12059 + }, + { + "epoch": 0.5458248472505092, + "grad_norm": 0.6202468893832425, + "learning_rate": 4.502438588356707e-06, + "loss": 0.3012, + "step": 12060 + }, + { + "epoch": 0.5458701063589048, + "grad_norm": 0.6416966220784046, + "learning_rate": 4.501709310661632e-06, + "loss": 0.297, + "step": 12061 + }, + { + "epoch": 0.5459153654673002, + "grad_norm": 0.4889086177698067, + "learning_rate": 4.500980043673295e-06, + "loss": 0.5102, + "step": 12062 + }, + { + "epoch": 0.5459606245756958, + "grad_norm": 0.25808571648322653, + "learning_rate": 4.5002507874073655e-06, + "loss": 0.4713, + "step": 12063 + }, + { + "epoch": 0.5460058836840914, + "grad_norm": 0.6337181113011422, + "learning_rate": 4.499521541879508e-06, + "loss": 0.3067, + "step": 12064 + }, + { + "epoch": 0.546051142792487, + "grad_norm": 0.5806705729297652, + "learning_rate": 4.498792307105398e-06, + "loss": 0.3187, + "step": 12065 + }, + { + "epoch": 0.5460964019008826, + "grad_norm": 0.6180438362672532, + "learning_rate": 4.498063083100703e-06, + "loss": 0.3391, + "step": 12066 + }, + { + "epoch": 0.5461416610092781, + "grad_norm": 0.6576832425490626, + "learning_rate": 4.497333869881089e-06, + "loss": 0.3072, + "step": 12067 + }, + { + "epoch": 0.5461869201176737, + "grad_norm": 0.32216628821352994, + "learning_rate": 4.496604667462225e-06, + "loss": 0.4592, + "step": 12068 + }, + { + "epoch": 0.5462321792260693, + "grad_norm": 0.7037977697870069, + "learning_rate": 4.495875475859783e-06, + "loss": 0.3309, + "step": 12069 + }, + { + "epoch": 0.5462774383344648, + "grad_norm": 0.6512723214925447, + "learning_rate": 4.495146295089428e-06, + "loss": 0.3421, + "step": 12070 + }, + { + "epoch": 0.5463226974428603, + "grad_norm": 0.6249409724190828, + "learning_rate": 4.49441712516683e-06, + "loss": 0.3383, + "step": 12071 + }, + { + "epoch": 0.5463679565512559, + "grad_norm": 0.5988598636905825, + "learning_rate": 4.493687966107652e-06, + "loss": 0.3084, + "step": 12072 + }, + { + "epoch": 0.5464132156596515, + "grad_norm": 0.666932683877474, + "learning_rate": 4.492958817927569e-06, + "loss": 0.3239, + "step": 12073 + }, + { + "epoch": 0.5464584747680471, + "grad_norm": 0.7087470576939989, + "learning_rate": 4.492229680642239e-06, + "loss": 0.3071, + "step": 12074 + }, + { + "epoch": 0.5465037338764427, + "grad_norm": 0.5911853754264186, + "learning_rate": 4.4915005542673365e-06, + "loss": 0.3713, + "step": 12075 + }, + { + "epoch": 0.5465489929848382, + "grad_norm": 0.6432801875991805, + "learning_rate": 4.490771438818525e-06, + "loss": 0.3105, + "step": 12076 + }, + { + "epoch": 0.5465942520932338, + "grad_norm": 0.6452758095884515, + "learning_rate": 4.490042334311472e-06, + "loss": 0.3419, + "step": 12077 + }, + { + "epoch": 0.5466395112016293, + "grad_norm": 0.6089284177796053, + "learning_rate": 4.48931324076184e-06, + "loss": 0.269, + "step": 12078 + }, + { + "epoch": 0.5466847703100249, + "grad_norm": 0.6470458332930936, + "learning_rate": 4.488584158185301e-06, + "loss": 0.3078, + "step": 12079 + }, + { + "epoch": 0.5467300294184204, + "grad_norm": 0.6071068578697375, + "learning_rate": 4.487855086597517e-06, + "loss": 0.2899, + "step": 12080 + }, + { + "epoch": 0.546775288526816, + "grad_norm": 0.6715335365000962, + "learning_rate": 4.487126026014154e-06, + "loss": 0.3827, + "step": 12081 + }, + { + "epoch": 0.5468205476352116, + "grad_norm": 0.6113273788313882, + "learning_rate": 4.486396976450876e-06, + "loss": 0.3398, + "step": 12082 + }, + { + "epoch": 0.5468658067436072, + "grad_norm": 0.6478775139279586, + "learning_rate": 4.485667937923352e-06, + "loss": 0.2983, + "step": 12083 + }, + { + "epoch": 0.5469110658520027, + "grad_norm": 0.34625857371418256, + "learning_rate": 4.4849389104472435e-06, + "loss": 0.4905, + "step": 12084 + }, + { + "epoch": 0.5469563249603983, + "grad_norm": 0.6405866789560921, + "learning_rate": 4.4842098940382155e-06, + "loss": 0.3285, + "step": 12085 + }, + { + "epoch": 0.5470015840687938, + "grad_norm": 0.5824949965901939, + "learning_rate": 4.483480888711935e-06, + "loss": 0.3227, + "step": 12086 + }, + { + "epoch": 0.5470468431771894, + "grad_norm": 0.6195648132210937, + "learning_rate": 4.4827518944840606e-06, + "loss": 0.3531, + "step": 12087 + }, + { + "epoch": 0.547092102285585, + "grad_norm": 0.6462744084113154, + "learning_rate": 4.48202291137026e-06, + "loss": 0.3271, + "step": 12088 + }, + { + "epoch": 0.5471373613939805, + "grad_norm": 0.6415272922809488, + "learning_rate": 4.481293939386198e-06, + "loss": 0.3474, + "step": 12089 + }, + { + "epoch": 0.5471826205023761, + "grad_norm": 0.7149404102600072, + "learning_rate": 4.480564978547535e-06, + "loss": 0.3007, + "step": 12090 + }, + { + "epoch": 0.5472278796107717, + "grad_norm": 0.7008732021330117, + "learning_rate": 4.479836028869935e-06, + "loss": 0.3111, + "step": 12091 + }, + { + "epoch": 0.5472731387191673, + "grad_norm": 0.583088917034706, + "learning_rate": 4.479107090369063e-06, + "loss": 0.3306, + "step": 12092 + }, + { + "epoch": 0.5473183978275628, + "grad_norm": 0.6070470620347299, + "learning_rate": 4.478378163060577e-06, + "loss": 0.2929, + "step": 12093 + }, + { + "epoch": 0.5473636569359583, + "grad_norm": 0.7040018121624776, + "learning_rate": 4.477649246960144e-06, + "loss": 0.292, + "step": 12094 + }, + { + "epoch": 0.5474089160443539, + "grad_norm": 0.6153340024288626, + "learning_rate": 4.476920342083425e-06, + "loss": 0.293, + "step": 12095 + }, + { + "epoch": 0.5474541751527495, + "grad_norm": 0.6445238387160752, + "learning_rate": 4.47619144844608e-06, + "loss": 0.3593, + "step": 12096 + }, + { + "epoch": 0.547499434261145, + "grad_norm": 0.5952893306035464, + "learning_rate": 4.475462566063771e-06, + "loss": 0.3116, + "step": 12097 + }, + { + "epoch": 0.5475446933695406, + "grad_norm": 0.5764937417401543, + "learning_rate": 4.474733694952162e-06, + "loss": 0.3125, + "step": 12098 + }, + { + "epoch": 0.5475899524779362, + "grad_norm": 0.5910636591909206, + "learning_rate": 4.474004835126913e-06, + "loss": 0.3149, + "step": 12099 + }, + { + "epoch": 0.5476352115863318, + "grad_norm": 0.6165891092203315, + "learning_rate": 4.4732759866036846e-06, + "loss": 0.3249, + "step": 12100 + }, + { + "epoch": 0.5476804706947274, + "grad_norm": 0.6241000733082706, + "learning_rate": 4.472547149398136e-06, + "loss": 0.3324, + "step": 12101 + }, + { + "epoch": 0.5477257298031228, + "grad_norm": 0.6730294073739986, + "learning_rate": 4.471818323525932e-06, + "loss": 0.284, + "step": 12102 + }, + { + "epoch": 0.5477709889115184, + "grad_norm": 0.6396666882471538, + "learning_rate": 4.471089509002728e-06, + "loss": 0.301, + "step": 12103 + }, + { + "epoch": 0.547816248019914, + "grad_norm": 0.33717396214770234, + "learning_rate": 4.470360705844186e-06, + "loss": 0.4623, + "step": 12104 + }, + { + "epoch": 0.5478615071283096, + "grad_norm": 0.5946426926801431, + "learning_rate": 4.469631914065967e-06, + "loss": 0.2796, + "step": 12105 + }, + { + "epoch": 0.5479067662367051, + "grad_norm": 0.3031311469575686, + "learning_rate": 4.468903133683728e-06, + "loss": 0.4492, + "step": 12106 + }, + { + "epoch": 0.5479520253451007, + "grad_norm": 0.5894908589290491, + "learning_rate": 4.4681743647131285e-06, + "loss": 0.3431, + "step": 12107 + }, + { + "epoch": 0.5479972844534963, + "grad_norm": 0.6107167467803513, + "learning_rate": 4.4674456071698315e-06, + "loss": 0.3392, + "step": 12108 + }, + { + "epoch": 0.5480425435618919, + "grad_norm": 0.7012730961307557, + "learning_rate": 4.466716861069491e-06, + "loss": 0.3126, + "step": 12109 + }, + { + "epoch": 0.5480878026702874, + "grad_norm": 0.29570784932734145, + "learning_rate": 4.465988126427767e-06, + "loss": 0.4605, + "step": 12110 + }, + { + "epoch": 0.5481330617786829, + "grad_norm": 0.3330322126985907, + "learning_rate": 4.4652594032603174e-06, + "loss": 0.4735, + "step": 12111 + }, + { + "epoch": 0.5481783208870785, + "grad_norm": 0.6404707219224894, + "learning_rate": 4.4645306915828025e-06, + "loss": 0.3188, + "step": 12112 + }, + { + "epoch": 0.5482235799954741, + "grad_norm": 0.6781019519562312, + "learning_rate": 4.463801991410878e-06, + "loss": 0.2984, + "step": 12113 + }, + { + "epoch": 0.5482688391038697, + "grad_norm": 0.6193509541981514, + "learning_rate": 4.463073302760202e-06, + "loss": 0.3858, + "step": 12114 + }, + { + "epoch": 0.5483140982122652, + "grad_norm": 0.6195827035062909, + "learning_rate": 4.462344625646433e-06, + "loss": 0.3567, + "step": 12115 + }, + { + "epoch": 0.5483593573206608, + "grad_norm": 0.29883589279706985, + "learning_rate": 4.461615960085224e-06, + "loss": 0.4641, + "step": 12116 + }, + { + "epoch": 0.5484046164290564, + "grad_norm": 0.6455256843550773, + "learning_rate": 4.460887306092236e-06, + "loss": 0.3359, + "step": 12117 + }, + { + "epoch": 0.548449875537452, + "grad_norm": 0.6054056715448799, + "learning_rate": 4.460158663683125e-06, + "loss": 0.3094, + "step": 12118 + }, + { + "epoch": 0.5484951346458474, + "grad_norm": 0.641684405299334, + "learning_rate": 4.459430032873545e-06, + "loss": 0.3059, + "step": 12119 + }, + { + "epoch": 0.548540393754243, + "grad_norm": 0.6473034497692319, + "learning_rate": 4.458701413679152e-06, + "loss": 0.3443, + "step": 12120 + }, + { + "epoch": 0.5485856528626386, + "grad_norm": 0.578951718971156, + "learning_rate": 4.457972806115607e-06, + "loss": 0.2921, + "step": 12121 + }, + { + "epoch": 0.5486309119710342, + "grad_norm": 0.66634270501242, + "learning_rate": 4.4572442101985584e-06, + "loss": 0.311, + "step": 12122 + }, + { + "epoch": 0.5486761710794298, + "grad_norm": 0.2952537233831481, + "learning_rate": 4.456515625943666e-06, + "loss": 0.4561, + "step": 12123 + }, + { + "epoch": 0.5487214301878253, + "grad_norm": 0.6761764609665392, + "learning_rate": 4.455787053366583e-06, + "loss": 0.3031, + "step": 12124 + }, + { + "epoch": 0.5487666892962209, + "grad_norm": 0.6259223256492764, + "learning_rate": 4.455058492482966e-06, + "loss": 0.331, + "step": 12125 + }, + { + "epoch": 0.5488119484046164, + "grad_norm": 0.27580866668673604, + "learning_rate": 4.454329943308466e-06, + "loss": 0.458, + "step": 12126 + }, + { + "epoch": 0.548857207513012, + "grad_norm": 0.693568649449279, + "learning_rate": 4.453601405858741e-06, + "loss": 0.3242, + "step": 12127 + }, + { + "epoch": 0.5489024666214075, + "grad_norm": 0.29380132883853927, + "learning_rate": 4.4528728801494455e-06, + "loss": 0.501, + "step": 12128 + }, + { + "epoch": 0.5489477257298031, + "grad_norm": 0.6456773713755553, + "learning_rate": 4.452144366196229e-06, + "loss": 0.297, + "step": 12129 + }, + { + "epoch": 0.5489929848381987, + "grad_norm": 0.6466053776304937, + "learning_rate": 4.451415864014747e-06, + "loss": 0.3567, + "step": 12130 + }, + { + "epoch": 0.5490382439465943, + "grad_norm": 0.6261122547635842, + "learning_rate": 4.450687373620656e-06, + "loss": 0.2942, + "step": 12131 + }, + { + "epoch": 0.5490835030549898, + "grad_norm": 0.6552351948762685, + "learning_rate": 4.449958895029604e-06, + "loss": 0.3534, + "step": 12132 + }, + { + "epoch": 0.5491287621633854, + "grad_norm": 0.28897300839931045, + "learning_rate": 4.449230428257247e-06, + "loss": 0.4713, + "step": 12133 + }, + { + "epoch": 0.5491740212717809, + "grad_norm": 0.6544743726214356, + "learning_rate": 4.448501973319237e-06, + "loss": 0.3511, + "step": 12134 + }, + { + "epoch": 0.5492192803801765, + "grad_norm": 0.6922051336111326, + "learning_rate": 4.447773530231225e-06, + "loss": 0.3179, + "step": 12135 + }, + { + "epoch": 0.5492645394885721, + "grad_norm": 0.7139447536673934, + "learning_rate": 4.447045099008863e-06, + "loss": 0.3338, + "step": 12136 + }, + { + "epoch": 0.5493097985969676, + "grad_norm": 0.6248896889526092, + "learning_rate": 4.446316679667805e-06, + "loss": 0.3448, + "step": 12137 + }, + { + "epoch": 0.5493550577053632, + "grad_norm": 0.6580828432429198, + "learning_rate": 4.445588272223701e-06, + "loss": 0.3224, + "step": 12138 + }, + { + "epoch": 0.5494003168137588, + "grad_norm": 0.6255330996021665, + "learning_rate": 4.4448598766922005e-06, + "loss": 0.3264, + "step": 12139 + }, + { + "epoch": 0.5494455759221544, + "grad_norm": 0.655788026482158, + "learning_rate": 4.444131493088956e-06, + "loss": 0.3109, + "step": 12140 + }, + { + "epoch": 0.5494908350305499, + "grad_norm": 0.33868090853487126, + "learning_rate": 4.443403121429621e-06, + "loss": 0.4838, + "step": 12141 + }, + { + "epoch": 0.5495360941389454, + "grad_norm": 0.6138962693984958, + "learning_rate": 4.442674761729843e-06, + "loss": 0.2962, + "step": 12142 + }, + { + "epoch": 0.549581353247341, + "grad_norm": 0.6139537752754014, + "learning_rate": 4.441946414005272e-06, + "loss": 0.3167, + "step": 12143 + }, + { + "epoch": 0.5496266123557366, + "grad_norm": 0.5965832383615192, + "learning_rate": 4.44121807827156e-06, + "loss": 0.2908, + "step": 12144 + }, + { + "epoch": 0.5496718714641322, + "grad_norm": 0.6017878823620227, + "learning_rate": 4.4404897545443525e-06, + "loss": 0.2979, + "step": 12145 + }, + { + "epoch": 0.5497171305725277, + "grad_norm": 0.6192189887678029, + "learning_rate": 4.439761442839303e-06, + "loss": 0.2946, + "step": 12146 + }, + { + "epoch": 0.5497623896809233, + "grad_norm": 0.6770136591794844, + "learning_rate": 4.439033143172061e-06, + "loss": 0.3149, + "step": 12147 + }, + { + "epoch": 0.5498076487893189, + "grad_norm": 0.6328717534830032, + "learning_rate": 4.4383048555582725e-06, + "loss": 0.2928, + "step": 12148 + }, + { + "epoch": 0.5498529078977145, + "grad_norm": 0.6284658393596169, + "learning_rate": 4.437576580013587e-06, + "loss": 0.3373, + "step": 12149 + }, + { + "epoch": 0.5498981670061099, + "grad_norm": 0.6863147237521107, + "learning_rate": 4.436848316553655e-06, + "loss": 0.352, + "step": 12150 + }, + { + "epoch": 0.5499434261145055, + "grad_norm": 0.6818944988875426, + "learning_rate": 4.436120065194121e-06, + "loss": 0.3245, + "step": 12151 + }, + { + "epoch": 0.5499886852229011, + "grad_norm": 0.3969587015992509, + "learning_rate": 4.435391825950637e-06, + "loss": 0.4963, + "step": 12152 + }, + { + "epoch": 0.5500339443312967, + "grad_norm": 0.6994559752644615, + "learning_rate": 4.434663598838847e-06, + "loss": 0.2811, + "step": 12153 + }, + { + "epoch": 0.5500792034396922, + "grad_norm": 0.5782646329030336, + "learning_rate": 4.4339353838744024e-06, + "loss": 0.2816, + "step": 12154 + }, + { + "epoch": 0.5501244625480878, + "grad_norm": 0.6333818254219354, + "learning_rate": 4.433207181072945e-06, + "loss": 0.3159, + "step": 12155 + }, + { + "epoch": 0.5501697216564834, + "grad_norm": 0.6194011645499826, + "learning_rate": 4.432478990450126e-06, + "loss": 0.3255, + "step": 12156 + }, + { + "epoch": 0.550214980764879, + "grad_norm": 0.680557849521606, + "learning_rate": 4.431750812021591e-06, + "loss": 0.2947, + "step": 12157 + }, + { + "epoch": 0.5502602398732745, + "grad_norm": 0.6530503515211541, + "learning_rate": 4.431022645802985e-06, + "loss": 0.3302, + "step": 12158 + }, + { + "epoch": 0.55030549898167, + "grad_norm": 0.6781429736989625, + "learning_rate": 4.430294491809954e-06, + "loss": 0.3135, + "step": 12159 + }, + { + "epoch": 0.5503507580900656, + "grad_norm": 0.6310181148735358, + "learning_rate": 4.429566350058146e-06, + "loss": 0.3066, + "step": 12160 + }, + { + "epoch": 0.5503960171984612, + "grad_norm": 0.6180237070682488, + "learning_rate": 4.428838220563205e-06, + "loss": 0.3766, + "step": 12161 + }, + { + "epoch": 0.5504412763068568, + "grad_norm": 0.6238922783779193, + "learning_rate": 4.428110103340776e-06, + "loss": 0.2911, + "step": 12162 + }, + { + "epoch": 0.5504865354152523, + "grad_norm": 0.627010311697926, + "learning_rate": 4.427381998406506e-06, + "loss": 0.3012, + "step": 12163 + }, + { + "epoch": 0.5505317945236479, + "grad_norm": 0.3007742329014419, + "learning_rate": 4.426653905776035e-06, + "loss": 0.458, + "step": 12164 + }, + { + "epoch": 0.5505770536320435, + "grad_norm": 0.2872082070907317, + "learning_rate": 4.425925825465013e-06, + "loss": 0.4722, + "step": 12165 + }, + { + "epoch": 0.550622312740439, + "grad_norm": 0.6197356908953288, + "learning_rate": 4.425197757489082e-06, + "loss": 0.3196, + "step": 12166 + }, + { + "epoch": 0.5506675718488345, + "grad_norm": 0.6987742922291102, + "learning_rate": 4.4244697018638845e-06, + "loss": 0.3273, + "step": 12167 + }, + { + "epoch": 0.5507128309572301, + "grad_norm": 0.6004808697850207, + "learning_rate": 4.423741658605066e-06, + "loss": 0.2972, + "step": 12168 + }, + { + "epoch": 0.5507580900656257, + "grad_norm": 0.6460250550153468, + "learning_rate": 4.423013627728269e-06, + "loss": 0.3355, + "step": 12169 + }, + { + "epoch": 0.5508033491740213, + "grad_norm": 0.6257553030413675, + "learning_rate": 4.422285609249139e-06, + "loss": 0.3276, + "step": 12170 + }, + { + "epoch": 0.5508486082824169, + "grad_norm": 0.6327753745132836, + "learning_rate": 4.4215576031833155e-06, + "loss": 0.3384, + "step": 12171 + }, + { + "epoch": 0.5508938673908124, + "grad_norm": 0.31764744050962074, + "learning_rate": 4.420829609546442e-06, + "loss": 0.4806, + "step": 12172 + }, + { + "epoch": 0.550939126499208, + "grad_norm": 0.6285208469040747, + "learning_rate": 4.420101628354164e-06, + "loss": 0.3133, + "step": 12173 + }, + { + "epoch": 0.5509843856076035, + "grad_norm": 0.5957967636997421, + "learning_rate": 4.419373659622117e-06, + "loss": 0.3227, + "step": 12174 + }, + { + "epoch": 0.5510296447159991, + "grad_norm": 0.6882976139091561, + "learning_rate": 4.418645703365949e-06, + "loss": 0.3315, + "step": 12175 + }, + { + "epoch": 0.5510749038243946, + "grad_norm": 0.7370220979424742, + "learning_rate": 4.4179177596013005e-06, + "loss": 0.3295, + "step": 12176 + }, + { + "epoch": 0.5511201629327902, + "grad_norm": 0.30805795099873884, + "learning_rate": 4.4171898283438104e-06, + "loss": 0.4769, + "step": 12177 + }, + { + "epoch": 0.5511654220411858, + "grad_norm": 0.6003256657053226, + "learning_rate": 4.416461909609119e-06, + "loss": 0.3468, + "step": 12178 + }, + { + "epoch": 0.5512106811495814, + "grad_norm": 0.6286414600373732, + "learning_rate": 4.415734003412873e-06, + "loss": 0.2972, + "step": 12179 + }, + { + "epoch": 0.551255940257977, + "grad_norm": 0.6598533584898012, + "learning_rate": 4.415006109770706e-06, + "loss": 0.3631, + "step": 12180 + }, + { + "epoch": 0.5513011993663725, + "grad_norm": 0.2659973937123042, + "learning_rate": 4.414278228698261e-06, + "loss": 0.4777, + "step": 12181 + }, + { + "epoch": 0.551346458474768, + "grad_norm": 2.6385411080370003, + "learning_rate": 4.413550360211177e-06, + "loss": 0.3176, + "step": 12182 + }, + { + "epoch": 0.5513917175831636, + "grad_norm": 0.6190630777781309, + "learning_rate": 4.412822504325099e-06, + "loss": 0.3038, + "step": 12183 + }, + { + "epoch": 0.5514369766915592, + "grad_norm": 0.6594277253803893, + "learning_rate": 4.412094661055658e-06, + "loss": 0.3428, + "step": 12184 + }, + { + "epoch": 0.5514822357999547, + "grad_norm": 0.6978013596534337, + "learning_rate": 4.411366830418498e-06, + "loss": 0.3288, + "step": 12185 + }, + { + "epoch": 0.5515274949083503, + "grad_norm": 0.6448886516709758, + "learning_rate": 4.410639012429259e-06, + "loss": 0.3011, + "step": 12186 + }, + { + "epoch": 0.5515727540167459, + "grad_norm": 0.6144338416530524, + "learning_rate": 4.409911207103576e-06, + "loss": 0.3257, + "step": 12187 + }, + { + "epoch": 0.5516180131251415, + "grad_norm": 0.6649078543814887, + "learning_rate": 4.409183414457086e-06, + "loss": 0.3444, + "step": 12188 + }, + { + "epoch": 0.551663272233537, + "grad_norm": 0.6712539994119634, + "learning_rate": 4.408455634505435e-06, + "loss": 0.3016, + "step": 12189 + }, + { + "epoch": 0.5517085313419325, + "grad_norm": 0.5761806251973871, + "learning_rate": 4.407727867264253e-06, + "loss": 0.3021, + "step": 12190 + }, + { + "epoch": 0.5517537904503281, + "grad_norm": 0.31372664794374205, + "learning_rate": 4.407000112749179e-06, + "loss": 0.474, + "step": 12191 + }, + { + "epoch": 0.5517990495587237, + "grad_norm": 0.7033017381026658, + "learning_rate": 4.406272370975854e-06, + "loss": 0.3507, + "step": 12192 + }, + { + "epoch": 0.5518443086671193, + "grad_norm": 0.8187659702589716, + "learning_rate": 4.40554464195991e-06, + "loss": 0.3767, + "step": 12193 + }, + { + "epoch": 0.5518895677755148, + "grad_norm": 0.7055026439228711, + "learning_rate": 4.404816925716987e-06, + "loss": 0.332, + "step": 12194 + }, + { + "epoch": 0.5519348268839104, + "grad_norm": 0.30227345454402327, + "learning_rate": 4.404089222262721e-06, + "loss": 0.4674, + "step": 12195 + }, + { + "epoch": 0.551980085992306, + "grad_norm": 0.6038295544645176, + "learning_rate": 4.4033615316127466e-06, + "loss": 0.2831, + "step": 12196 + }, + { + "epoch": 0.5520253451007016, + "grad_norm": 0.6109965842943273, + "learning_rate": 4.402633853782699e-06, + "loss": 0.312, + "step": 12197 + }, + { + "epoch": 0.552070604209097, + "grad_norm": 0.6117511805063619, + "learning_rate": 4.401906188788216e-06, + "loss": 0.2787, + "step": 12198 + }, + { + "epoch": 0.5521158633174926, + "grad_norm": 0.6615939112863005, + "learning_rate": 4.401178536644934e-06, + "loss": 0.2971, + "step": 12199 + }, + { + "epoch": 0.5521611224258882, + "grad_norm": 0.7905339578895, + "learning_rate": 4.4004508973684844e-06, + "loss": 0.3102, + "step": 12200 + }, + { + "epoch": 0.5522063815342838, + "grad_norm": 0.6298379390373061, + "learning_rate": 4.399723270974503e-06, + "loss": 0.2908, + "step": 12201 + }, + { + "epoch": 0.5522516406426793, + "grad_norm": 1.0137912656423052, + "learning_rate": 4.398995657478628e-06, + "loss": 0.3327, + "step": 12202 + }, + { + "epoch": 0.5522968997510749, + "grad_norm": 0.642412628400293, + "learning_rate": 4.398268056896488e-06, + "loss": 0.3398, + "step": 12203 + }, + { + "epoch": 0.5523421588594705, + "grad_norm": 0.6056875907816118, + "learning_rate": 4.397540469243719e-06, + "loss": 0.2802, + "step": 12204 + }, + { + "epoch": 0.5523874179678661, + "grad_norm": 0.623085870979715, + "learning_rate": 4.396812894535957e-06, + "loss": 0.3177, + "step": 12205 + }, + { + "epoch": 0.5524326770762616, + "grad_norm": 0.6998316003144387, + "learning_rate": 4.396085332788832e-06, + "loss": 0.3342, + "step": 12206 + }, + { + "epoch": 0.5524779361846571, + "grad_norm": 0.692349791540142, + "learning_rate": 4.395357784017977e-06, + "loss": 0.2944, + "step": 12207 + }, + { + "epoch": 0.5525231952930527, + "grad_norm": 0.5964649889517802, + "learning_rate": 4.394630248239029e-06, + "loss": 0.2867, + "step": 12208 + }, + { + "epoch": 0.5525684544014483, + "grad_norm": 0.36281497979131894, + "learning_rate": 4.393902725467616e-06, + "loss": 0.491, + "step": 12209 + }, + { + "epoch": 0.5526137135098439, + "grad_norm": 0.6925980131373757, + "learning_rate": 4.3931752157193725e-06, + "loss": 0.3744, + "step": 12210 + }, + { + "epoch": 0.5526589726182394, + "grad_norm": 0.5824848786527309, + "learning_rate": 4.3924477190099286e-06, + "loss": 0.3029, + "step": 12211 + }, + { + "epoch": 0.552704231726635, + "grad_norm": 0.6192125272134775, + "learning_rate": 4.391720235354921e-06, + "loss": 0.3234, + "step": 12212 + }, + { + "epoch": 0.5527494908350306, + "grad_norm": 0.6198155454511541, + "learning_rate": 4.390992764769974e-06, + "loss": 0.3189, + "step": 12213 + }, + { + "epoch": 0.5527947499434261, + "grad_norm": 0.6843133179389448, + "learning_rate": 4.390265307270722e-06, + "loss": 0.3609, + "step": 12214 + }, + { + "epoch": 0.5528400090518216, + "grad_norm": 0.6429985571485314, + "learning_rate": 4.389537862872798e-06, + "loss": 0.3014, + "step": 12215 + }, + { + "epoch": 0.5528852681602172, + "grad_norm": 0.6993967541463431, + "learning_rate": 4.388810431591829e-06, + "loss": 0.2934, + "step": 12216 + }, + { + "epoch": 0.5529305272686128, + "grad_norm": 0.5852124802333049, + "learning_rate": 4.388083013443445e-06, + "loss": 0.3322, + "step": 12217 + }, + { + "epoch": 0.5529757863770084, + "grad_norm": 0.6098756681057482, + "learning_rate": 4.387355608443281e-06, + "loss": 0.3433, + "step": 12218 + }, + { + "epoch": 0.553021045485404, + "grad_norm": 0.5960790982391869, + "learning_rate": 4.386628216606962e-06, + "loss": 0.3231, + "step": 12219 + }, + { + "epoch": 0.5530663045937995, + "grad_norm": 0.5563541033680087, + "learning_rate": 4.385900837950119e-06, + "loss": 0.2942, + "step": 12220 + }, + { + "epoch": 0.553111563702195, + "grad_norm": 0.6190208956368316, + "learning_rate": 4.385173472488382e-06, + "loss": 0.2754, + "step": 12221 + }, + { + "epoch": 0.5531568228105906, + "grad_norm": 0.6148730927860822, + "learning_rate": 4.384446120237375e-06, + "loss": 0.2881, + "step": 12222 + }, + { + "epoch": 0.5532020819189862, + "grad_norm": 0.6017200305214885, + "learning_rate": 4.3837187812127335e-06, + "loss": 0.2961, + "step": 12223 + }, + { + "epoch": 0.5532473410273817, + "grad_norm": 0.6199969656357945, + "learning_rate": 4.382991455430082e-06, + "loss": 0.3047, + "step": 12224 + }, + { + "epoch": 0.5532926001357773, + "grad_norm": 0.6884288422848379, + "learning_rate": 4.38226414290505e-06, + "loss": 0.3139, + "step": 12225 + }, + { + "epoch": 0.5533378592441729, + "grad_norm": 0.574458671702428, + "learning_rate": 4.381536843653262e-06, + "loss": 0.2918, + "step": 12226 + }, + { + "epoch": 0.5533831183525685, + "grad_norm": 0.6292783706079346, + "learning_rate": 4.380809557690349e-06, + "loss": 0.3011, + "step": 12227 + }, + { + "epoch": 0.5534283774609641, + "grad_norm": 0.3549734484788748, + "learning_rate": 4.380082285031938e-06, + "loss": 0.4775, + "step": 12228 + }, + { + "epoch": 0.5534736365693596, + "grad_norm": 0.3279537547182673, + "learning_rate": 4.379355025693654e-06, + "loss": 0.4714, + "step": 12229 + }, + { + "epoch": 0.5535188956777551, + "grad_norm": 0.6157546861625464, + "learning_rate": 4.378627779691123e-06, + "loss": 0.309, + "step": 12230 + }, + { + "epoch": 0.5535641547861507, + "grad_norm": 0.6530262504499489, + "learning_rate": 4.377900547039976e-06, + "loss": 0.2888, + "step": 12231 + }, + { + "epoch": 0.5536094138945463, + "grad_norm": 0.6055986541476903, + "learning_rate": 4.377173327755832e-06, + "loss": 0.3317, + "step": 12232 + }, + { + "epoch": 0.5536546730029418, + "grad_norm": 0.6338147844885054, + "learning_rate": 4.376446121854322e-06, + "loss": 0.3026, + "step": 12233 + }, + { + "epoch": 0.5536999321113374, + "grad_norm": 0.6080275418862028, + "learning_rate": 4.3757189293510696e-06, + "loss": 0.357, + "step": 12234 + }, + { + "epoch": 0.553745191219733, + "grad_norm": 0.5928751762361252, + "learning_rate": 4.3749917502617e-06, + "loss": 0.3525, + "step": 12235 + }, + { + "epoch": 0.5537904503281286, + "grad_norm": 0.698941683211696, + "learning_rate": 4.374264584601837e-06, + "loss": 0.3303, + "step": 12236 + }, + { + "epoch": 0.553835709436524, + "grad_norm": 0.6663961230327661, + "learning_rate": 4.3735374323871084e-06, + "loss": 0.3464, + "step": 12237 + }, + { + "epoch": 0.5538809685449196, + "grad_norm": 0.6377326823601144, + "learning_rate": 4.372810293633135e-06, + "loss": 0.2846, + "step": 12238 + }, + { + "epoch": 0.5539262276533152, + "grad_norm": 0.5987760173833161, + "learning_rate": 4.372083168355543e-06, + "loss": 0.2871, + "step": 12239 + }, + { + "epoch": 0.5539714867617108, + "grad_norm": 0.582142971841127, + "learning_rate": 4.371356056569953e-06, + "loss": 0.3175, + "step": 12240 + }, + { + "epoch": 0.5540167458701064, + "grad_norm": 0.6951289154150705, + "learning_rate": 4.370628958291993e-06, + "loss": 0.2885, + "step": 12241 + }, + { + "epoch": 0.5540620049785019, + "grad_norm": 0.691357756254213, + "learning_rate": 4.369901873537283e-06, + "loss": 0.3195, + "step": 12242 + }, + { + "epoch": 0.5541072640868975, + "grad_norm": 0.6281731913361734, + "learning_rate": 4.369174802321447e-06, + "loss": 0.3092, + "step": 12243 + }, + { + "epoch": 0.5541525231952931, + "grad_norm": 0.7427270951811668, + "learning_rate": 4.368447744660107e-06, + "loss": 0.3712, + "step": 12244 + }, + { + "epoch": 0.5541977823036887, + "grad_norm": 0.40689852775381286, + "learning_rate": 4.367720700568885e-06, + "loss": 0.487, + "step": 12245 + }, + { + "epoch": 0.5542430414120841, + "grad_norm": 0.7117771895581163, + "learning_rate": 4.366993670063402e-06, + "loss": 0.2849, + "step": 12246 + }, + { + "epoch": 0.5542883005204797, + "grad_norm": 0.6206158885079222, + "learning_rate": 4.366266653159283e-06, + "loss": 0.2953, + "step": 12247 + }, + { + "epoch": 0.5543335596288753, + "grad_norm": 0.6255779764365972, + "learning_rate": 4.365539649872146e-06, + "loss": 0.3379, + "step": 12248 + }, + { + "epoch": 0.5543788187372709, + "grad_norm": 0.6808302225014655, + "learning_rate": 4.364812660217614e-06, + "loss": 0.3351, + "step": 12249 + }, + { + "epoch": 0.5544240778456664, + "grad_norm": 0.6666912810231732, + "learning_rate": 4.364085684211307e-06, + "loss": 0.3615, + "step": 12250 + }, + { + "epoch": 0.554469336954062, + "grad_norm": 0.6315922128061487, + "learning_rate": 4.363358721868844e-06, + "loss": 0.3224, + "step": 12251 + }, + { + "epoch": 0.5545145960624576, + "grad_norm": 0.6674302959256082, + "learning_rate": 4.362631773205848e-06, + "loss": 0.354, + "step": 12252 + }, + { + "epoch": 0.5545598551708532, + "grad_norm": 0.6301333820021424, + "learning_rate": 4.361904838237938e-06, + "loss": 0.3141, + "step": 12253 + }, + { + "epoch": 0.5546051142792487, + "grad_norm": 0.6182147024876224, + "learning_rate": 4.3611779169807335e-06, + "loss": 0.3001, + "step": 12254 + }, + { + "epoch": 0.5546503733876442, + "grad_norm": 0.3287120848947762, + "learning_rate": 4.360451009449852e-06, + "loss": 0.5037, + "step": 12255 + }, + { + "epoch": 0.5546956324960398, + "grad_norm": 0.5805587844522027, + "learning_rate": 4.359724115660915e-06, + "loss": 0.3193, + "step": 12256 + }, + { + "epoch": 0.5547408916044354, + "grad_norm": 0.5753460065480046, + "learning_rate": 4.3589972356295415e-06, + "loss": 0.2737, + "step": 12257 + }, + { + "epoch": 0.554786150712831, + "grad_norm": 0.27834192808116914, + "learning_rate": 4.3582703693713475e-06, + "loss": 0.4724, + "step": 12258 + }, + { + "epoch": 0.5548314098212265, + "grad_norm": 0.5975988206476961, + "learning_rate": 4.357543516901951e-06, + "loss": 0.3138, + "step": 12259 + }, + { + "epoch": 0.5548766689296221, + "grad_norm": 0.66133209006719, + "learning_rate": 4.356816678236975e-06, + "loss": 0.3372, + "step": 12260 + }, + { + "epoch": 0.5549219280380177, + "grad_norm": 0.5894943538014618, + "learning_rate": 4.35608985339203e-06, + "loss": 0.3399, + "step": 12261 + }, + { + "epoch": 0.5549671871464132, + "grad_norm": 0.5781218574299265, + "learning_rate": 4.355363042382737e-06, + "loss": 0.3074, + "step": 12262 + }, + { + "epoch": 0.5550124462548088, + "grad_norm": 0.6363791850426851, + "learning_rate": 4.3546362452247135e-06, + "loss": 0.3386, + "step": 12263 + }, + { + "epoch": 0.5550577053632043, + "grad_norm": 0.6129669070303816, + "learning_rate": 4.3539094619335746e-06, + "loss": 0.3461, + "step": 12264 + }, + { + "epoch": 0.5551029644715999, + "grad_norm": 0.5914749739899136, + "learning_rate": 4.3531826925249355e-06, + "loss": 0.2904, + "step": 12265 + }, + { + "epoch": 0.5551482235799955, + "grad_norm": 0.6209594996658195, + "learning_rate": 4.352455937014414e-06, + "loss": 0.3281, + "step": 12266 + }, + { + "epoch": 0.5551934826883911, + "grad_norm": 0.6419748858543516, + "learning_rate": 4.351729195417627e-06, + "loss": 0.3305, + "step": 12267 + }, + { + "epoch": 0.5552387417967866, + "grad_norm": 0.6433578417970277, + "learning_rate": 4.351002467750189e-06, + "loss": 0.3338, + "step": 12268 + }, + { + "epoch": 0.5552840009051822, + "grad_norm": 0.6542378892446439, + "learning_rate": 4.350275754027713e-06, + "loss": 0.3219, + "step": 12269 + }, + { + "epoch": 0.5553292600135777, + "grad_norm": 0.6360447076266809, + "learning_rate": 4.349549054265817e-06, + "loss": 0.3169, + "step": 12270 + }, + { + "epoch": 0.5553745191219733, + "grad_norm": 0.5615509706467567, + "learning_rate": 4.348822368480113e-06, + "loss": 0.3268, + "step": 12271 + }, + { + "epoch": 0.5554197782303688, + "grad_norm": 0.3815671050635444, + "learning_rate": 4.348095696686217e-06, + "loss": 0.4908, + "step": 12272 + }, + { + "epoch": 0.5554650373387644, + "grad_norm": 0.6375360091018338, + "learning_rate": 4.347369038899744e-06, + "loss": 0.2881, + "step": 12273 + }, + { + "epoch": 0.55551029644716, + "grad_norm": 0.5722597760505286, + "learning_rate": 4.346642395136303e-06, + "loss": 0.31, + "step": 12274 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.633575672379051, + "learning_rate": 4.345915765411511e-06, + "loss": 0.337, + "step": 12275 + }, + { + "epoch": 0.5556008146639512, + "grad_norm": 0.6112490004503777, + "learning_rate": 4.345189149740982e-06, + "loss": 0.3364, + "step": 12276 + }, + { + "epoch": 0.5556460737723466, + "grad_norm": 0.5802223407166853, + "learning_rate": 4.344462548140325e-06, + "loss": 0.2906, + "step": 12277 + }, + { + "epoch": 0.5556913328807422, + "grad_norm": 0.5860357053241917, + "learning_rate": 4.343735960625156e-06, + "loss": 0.3308, + "step": 12278 + }, + { + "epoch": 0.5557365919891378, + "grad_norm": 0.3729925473292314, + "learning_rate": 4.343009387211086e-06, + "loss": 0.4983, + "step": 12279 + }, + { + "epoch": 0.5557818510975334, + "grad_norm": 0.5933383937464417, + "learning_rate": 4.3422828279137245e-06, + "loss": 0.3084, + "step": 12280 + }, + { + "epoch": 0.5558271102059289, + "grad_norm": 0.8911220942079052, + "learning_rate": 4.341556282748685e-06, + "loss": 0.349, + "step": 12281 + }, + { + "epoch": 0.5558723693143245, + "grad_norm": 0.6579449341515171, + "learning_rate": 4.34082975173158e-06, + "loss": 0.289, + "step": 12282 + }, + { + "epoch": 0.5559176284227201, + "grad_norm": 0.64297167758931, + "learning_rate": 4.34010323487802e-06, + "loss": 0.3467, + "step": 12283 + }, + { + "epoch": 0.5559628875311157, + "grad_norm": 0.6358433470685065, + "learning_rate": 4.3393767322036125e-06, + "loss": 0.3273, + "step": 12284 + }, + { + "epoch": 0.5560081466395111, + "grad_norm": 0.5989624689504114, + "learning_rate": 4.338650243723971e-06, + "loss": 0.2801, + "step": 12285 + }, + { + "epoch": 0.5560534057479067, + "grad_norm": 0.6361718365706649, + "learning_rate": 4.337923769454706e-06, + "loss": 0.383, + "step": 12286 + }, + { + "epoch": 0.5560986648563023, + "grad_norm": 0.5717929766259564, + "learning_rate": 4.337197309411424e-06, + "loss": 0.3281, + "step": 12287 + }, + { + "epoch": 0.5561439239646979, + "grad_norm": 0.31954508426963363, + "learning_rate": 4.336470863609736e-06, + "loss": 0.4839, + "step": 12288 + }, + { + "epoch": 0.5561891830730935, + "grad_norm": 0.7010522726012361, + "learning_rate": 4.335744432065254e-06, + "loss": 0.3484, + "step": 12289 + }, + { + "epoch": 0.556234442181489, + "grad_norm": 0.6413260103451215, + "learning_rate": 4.33501801479358e-06, + "loss": 0.3233, + "step": 12290 + }, + { + "epoch": 0.5562797012898846, + "grad_norm": 0.6260547273063286, + "learning_rate": 4.334291611810329e-06, + "loss": 0.26, + "step": 12291 + }, + { + "epoch": 0.5563249603982802, + "grad_norm": 0.5989004568803357, + "learning_rate": 4.333565223131107e-06, + "loss": 0.3036, + "step": 12292 + }, + { + "epoch": 0.5563702195066758, + "grad_norm": 0.6397236265829002, + "learning_rate": 4.332838848771521e-06, + "loss": 0.2733, + "step": 12293 + }, + { + "epoch": 0.5564154786150712, + "grad_norm": 0.6092452917225999, + "learning_rate": 4.332112488747178e-06, + "loss": 0.3241, + "step": 12294 + }, + { + "epoch": 0.5564607377234668, + "grad_norm": 0.6211793412226144, + "learning_rate": 4.331386143073687e-06, + "loss": 0.3309, + "step": 12295 + }, + { + "epoch": 0.5565059968318624, + "grad_norm": 0.2849571156764159, + "learning_rate": 4.330659811766655e-06, + "loss": 0.4651, + "step": 12296 + }, + { + "epoch": 0.556551255940258, + "grad_norm": 0.6398917116737811, + "learning_rate": 4.329933494841689e-06, + "loss": 0.3299, + "step": 12297 + }, + { + "epoch": 0.5565965150486536, + "grad_norm": 0.6752901353544469, + "learning_rate": 4.3292071923143905e-06, + "loss": 0.3257, + "step": 12298 + }, + { + "epoch": 0.5566417741570491, + "grad_norm": 0.7044712818301637, + "learning_rate": 4.328480904200373e-06, + "loss": 0.3306, + "step": 12299 + }, + { + "epoch": 0.5566870332654447, + "grad_norm": 0.6004103904041961, + "learning_rate": 4.327754630515236e-06, + "loss": 0.3237, + "step": 12300 + }, + { + "epoch": 0.5567322923738403, + "grad_norm": 0.6051455344287193, + "learning_rate": 4.3270283712745885e-06, + "loss": 0.2621, + "step": 12301 + }, + { + "epoch": 0.5567775514822358, + "grad_norm": 0.6325282147607654, + "learning_rate": 4.326302126494035e-06, + "loss": 0.3687, + "step": 12302 + }, + { + "epoch": 0.5568228105906313, + "grad_norm": 0.2778695822780776, + "learning_rate": 4.325575896189178e-06, + "loss": 0.4804, + "step": 12303 + }, + { + "epoch": 0.5568680696990269, + "grad_norm": 0.6901227211736138, + "learning_rate": 4.324849680375625e-06, + "loss": 0.3322, + "step": 12304 + }, + { + "epoch": 0.5569133288074225, + "grad_norm": 0.6373310885407559, + "learning_rate": 4.324123479068979e-06, + "loss": 0.3157, + "step": 12305 + }, + { + "epoch": 0.5569585879158181, + "grad_norm": 0.626480672047507, + "learning_rate": 4.3233972922848435e-06, + "loss": 0.3418, + "step": 12306 + }, + { + "epoch": 0.5570038470242136, + "grad_norm": 0.6081506214931938, + "learning_rate": 4.32267112003882e-06, + "loss": 0.3671, + "step": 12307 + }, + { + "epoch": 0.5570491061326092, + "grad_norm": 0.6349245263217774, + "learning_rate": 4.321944962346517e-06, + "loss": 0.3599, + "step": 12308 + }, + { + "epoch": 0.5570943652410048, + "grad_norm": 0.6028597309271571, + "learning_rate": 4.321218819223533e-06, + "loss": 0.279, + "step": 12309 + }, + { + "epoch": 0.5571396243494003, + "grad_norm": 0.6558489076897699, + "learning_rate": 4.320492690685471e-06, + "loss": 0.347, + "step": 12310 + }, + { + "epoch": 0.5571848834577959, + "grad_norm": 0.631884906983182, + "learning_rate": 4.319766576747934e-06, + "loss": 0.29, + "step": 12311 + }, + { + "epoch": 0.5572301425661914, + "grad_norm": 0.5963251037440932, + "learning_rate": 4.319040477426527e-06, + "loss": 0.2948, + "step": 12312 + }, + { + "epoch": 0.557275401674587, + "grad_norm": 0.3178597251797919, + "learning_rate": 4.318314392736845e-06, + "loss": 0.4583, + "step": 12313 + }, + { + "epoch": 0.5573206607829826, + "grad_norm": 0.5772619349251756, + "learning_rate": 4.317588322694495e-06, + "loss": 0.3433, + "step": 12314 + }, + { + "epoch": 0.5573659198913782, + "grad_norm": 0.29671798934162635, + "learning_rate": 4.3168622673150765e-06, + "loss": 0.4762, + "step": 12315 + }, + { + "epoch": 0.5574111789997737, + "grad_norm": 0.7101392642545054, + "learning_rate": 4.3161362266141895e-06, + "loss": 0.3671, + "step": 12316 + }, + { + "epoch": 0.5574564381081692, + "grad_norm": 0.5306534955505768, + "learning_rate": 4.315410200607433e-06, + "loss": 0.2922, + "step": 12317 + }, + { + "epoch": 0.5575016972165648, + "grad_norm": 0.6101963309651014, + "learning_rate": 4.314684189310412e-06, + "loss": 0.3151, + "step": 12318 + }, + { + "epoch": 0.5575469563249604, + "grad_norm": 0.6203140579627333, + "learning_rate": 4.31395819273872e-06, + "loss": 0.3394, + "step": 12319 + }, + { + "epoch": 0.5575922154333559, + "grad_norm": 0.6078518079041008, + "learning_rate": 4.313232210907959e-06, + "loss": 0.3391, + "step": 12320 + }, + { + "epoch": 0.5576374745417515, + "grad_norm": 0.3124391598086161, + "learning_rate": 4.312506243833732e-06, + "loss": 0.4811, + "step": 12321 + }, + { + "epoch": 0.5576827336501471, + "grad_norm": 0.31236298450559097, + "learning_rate": 4.311780291531632e-06, + "loss": 0.4722, + "step": 12322 + }, + { + "epoch": 0.5577279927585427, + "grad_norm": 0.2680955973781369, + "learning_rate": 4.311054354017259e-06, + "loss": 0.4677, + "step": 12323 + }, + { + "epoch": 0.5577732518669383, + "grad_norm": 0.6576465320357912, + "learning_rate": 4.310328431306213e-06, + "loss": 0.3133, + "step": 12324 + }, + { + "epoch": 0.5578185109753337, + "grad_norm": 0.6337519434030606, + "learning_rate": 4.309602523414092e-06, + "loss": 0.2899, + "step": 12325 + }, + { + "epoch": 0.5578637700837293, + "grad_norm": 0.6146377689137756, + "learning_rate": 4.308876630356491e-06, + "loss": 0.3153, + "step": 12326 + }, + { + "epoch": 0.5579090291921249, + "grad_norm": 0.6836536803399664, + "learning_rate": 4.308150752149007e-06, + "loss": 0.293, + "step": 12327 + }, + { + "epoch": 0.5579542883005205, + "grad_norm": 0.6250698035311567, + "learning_rate": 4.307424888807242e-06, + "loss": 0.3027, + "step": 12328 + }, + { + "epoch": 0.557999547408916, + "grad_norm": 0.7400318887479171, + "learning_rate": 4.306699040346788e-06, + "loss": 0.3148, + "step": 12329 + }, + { + "epoch": 0.5580448065173116, + "grad_norm": 0.5955651260196211, + "learning_rate": 4.305973206783241e-06, + "loss": 0.2921, + "step": 12330 + }, + { + "epoch": 0.5580900656257072, + "grad_norm": 0.6247834393219636, + "learning_rate": 4.3052473881322e-06, + "loss": 0.2934, + "step": 12331 + }, + { + "epoch": 0.5581353247341028, + "grad_norm": 1.2878975281808585, + "learning_rate": 4.304521584409257e-06, + "loss": 0.2905, + "step": 12332 + }, + { + "epoch": 0.5581805838424984, + "grad_norm": 0.6963145804773342, + "learning_rate": 4.30379579563001e-06, + "loss": 0.3457, + "step": 12333 + }, + { + "epoch": 0.5582258429508938, + "grad_norm": 0.6439684461235223, + "learning_rate": 4.303070021810053e-06, + "loss": 0.3477, + "step": 12334 + }, + { + "epoch": 0.5582711020592894, + "grad_norm": 0.37819610638538015, + "learning_rate": 4.3023442629649816e-06, + "loss": 0.4662, + "step": 12335 + }, + { + "epoch": 0.558316361167685, + "grad_norm": 0.6274598277275636, + "learning_rate": 4.3016185191103874e-06, + "loss": 0.3422, + "step": 12336 + }, + { + "epoch": 0.5583616202760806, + "grad_norm": 0.6752885359439768, + "learning_rate": 4.300892790261867e-06, + "loss": 0.3476, + "step": 12337 + }, + { + "epoch": 0.5584068793844761, + "grad_norm": 0.6359580258984199, + "learning_rate": 4.300167076435015e-06, + "loss": 0.3122, + "step": 12338 + }, + { + "epoch": 0.5584521384928717, + "grad_norm": 0.6038278153379546, + "learning_rate": 4.2994413776454225e-06, + "loss": 0.3011, + "step": 12339 + }, + { + "epoch": 0.5584973976012673, + "grad_norm": 0.6408495486894391, + "learning_rate": 4.298715693908682e-06, + "loss": 0.3211, + "step": 12340 + }, + { + "epoch": 0.5585426567096629, + "grad_norm": 0.6594204681087569, + "learning_rate": 4.2979900252403895e-06, + "loss": 0.2956, + "step": 12341 + }, + { + "epoch": 0.5585879158180583, + "grad_norm": 0.8247731950449819, + "learning_rate": 4.297264371656133e-06, + "loss": 0.2894, + "step": 12342 + }, + { + "epoch": 0.5586331749264539, + "grad_norm": 0.6941235288007112, + "learning_rate": 4.296538733171507e-06, + "loss": 0.3281, + "step": 12343 + }, + { + "epoch": 0.5586784340348495, + "grad_norm": 0.6599637003374355, + "learning_rate": 4.295813109802106e-06, + "loss": 0.2901, + "step": 12344 + }, + { + "epoch": 0.5587236931432451, + "grad_norm": 0.29949819318411175, + "learning_rate": 4.295087501563516e-06, + "loss": 0.4677, + "step": 12345 + }, + { + "epoch": 0.5587689522516407, + "grad_norm": 0.6169216710706968, + "learning_rate": 4.294361908471329e-06, + "loss": 0.2863, + "step": 12346 + }, + { + "epoch": 0.5588142113600362, + "grad_norm": 0.27206602819963843, + "learning_rate": 4.293636330541141e-06, + "loss": 0.4689, + "step": 12347 + }, + { + "epoch": 0.5588594704684318, + "grad_norm": 0.7291025964436351, + "learning_rate": 4.2929107677885375e-06, + "loss": 0.3434, + "step": 12348 + }, + { + "epoch": 0.5589047295768274, + "grad_norm": 0.5961323828297462, + "learning_rate": 4.29218522022911e-06, + "loss": 0.3006, + "step": 12349 + }, + { + "epoch": 0.5589499886852229, + "grad_norm": 0.6081864008471584, + "learning_rate": 4.291459687878449e-06, + "loss": 0.3232, + "step": 12350 + }, + { + "epoch": 0.5589952477936184, + "grad_norm": 0.8359323755964424, + "learning_rate": 4.29073417075214e-06, + "loss": 0.3078, + "step": 12351 + }, + { + "epoch": 0.559040506902014, + "grad_norm": 0.7135344530168528, + "learning_rate": 4.290008668865778e-06, + "loss": 0.3308, + "step": 12352 + }, + { + "epoch": 0.5590857660104096, + "grad_norm": 0.6491425822680523, + "learning_rate": 4.289283182234948e-06, + "loss": 0.2947, + "step": 12353 + }, + { + "epoch": 0.5591310251188052, + "grad_norm": 0.6778886453996668, + "learning_rate": 4.288557710875242e-06, + "loss": 0.3435, + "step": 12354 + }, + { + "epoch": 0.5591762842272007, + "grad_norm": 0.6056368355836651, + "learning_rate": 4.287832254802244e-06, + "loss": 0.3024, + "step": 12355 + }, + { + "epoch": 0.5592215433355963, + "grad_norm": 0.5971191283513166, + "learning_rate": 4.287106814031542e-06, + "loss": 0.2883, + "step": 12356 + }, + { + "epoch": 0.5592668024439919, + "grad_norm": 0.6408366890253473, + "learning_rate": 4.286381388578728e-06, + "loss": 0.3402, + "step": 12357 + }, + { + "epoch": 0.5593120615523874, + "grad_norm": 0.614472336181477, + "learning_rate": 4.285655978459385e-06, + "loss": 0.3338, + "step": 12358 + }, + { + "epoch": 0.559357320660783, + "grad_norm": 0.645675843719086, + "learning_rate": 4.2849305836891e-06, + "loss": 0.3243, + "step": 12359 + }, + { + "epoch": 0.5594025797691785, + "grad_norm": 0.6961527371204147, + "learning_rate": 4.284205204283463e-06, + "loss": 0.3116, + "step": 12360 + }, + { + "epoch": 0.5594478388775741, + "grad_norm": 0.37595319417062534, + "learning_rate": 4.283479840258055e-06, + "loss": 0.4632, + "step": 12361 + }, + { + "epoch": 0.5594930979859697, + "grad_norm": 0.6705977459084392, + "learning_rate": 4.2827544916284655e-06, + "loss": 0.3192, + "step": 12362 + }, + { + "epoch": 0.5595383570943653, + "grad_norm": 0.6716578120239428, + "learning_rate": 4.2820291584102815e-06, + "loss": 0.3134, + "step": 12363 + }, + { + "epoch": 0.5595836162027608, + "grad_norm": 0.6678896667477587, + "learning_rate": 4.281303840619083e-06, + "loss": 0.2788, + "step": 12364 + }, + { + "epoch": 0.5596288753111563, + "grad_norm": 0.6762181244429644, + "learning_rate": 4.280578538270458e-06, + "loss": 0.3372, + "step": 12365 + }, + { + "epoch": 0.5596741344195519, + "grad_norm": 0.30584590139664086, + "learning_rate": 4.27985325137999e-06, + "loss": 0.4595, + "step": 12366 + }, + { + "epoch": 0.5597193935279475, + "grad_norm": 0.6381422081849868, + "learning_rate": 4.279127979963266e-06, + "loss": 0.3272, + "step": 12367 + }, + { + "epoch": 0.5597646526363431, + "grad_norm": 0.7344560788607514, + "learning_rate": 4.278402724035868e-06, + "loss": 0.3583, + "step": 12368 + }, + { + "epoch": 0.5598099117447386, + "grad_norm": 0.6260833716254307, + "learning_rate": 4.277677483613377e-06, + "loss": 0.3134, + "step": 12369 + }, + { + "epoch": 0.5598551708531342, + "grad_norm": 0.7521739392430035, + "learning_rate": 4.276952258711381e-06, + "loss": 0.3312, + "step": 12370 + }, + { + "epoch": 0.5599004299615298, + "grad_norm": 0.599559188266718, + "learning_rate": 4.276227049345458e-06, + "loss": 0.2971, + "step": 12371 + }, + { + "epoch": 0.5599456890699254, + "grad_norm": 1.1519106873598228, + "learning_rate": 4.2755018555311935e-06, + "loss": 0.326, + "step": 12372 + }, + { + "epoch": 0.5599909481783208, + "grad_norm": 1.1096769841546963, + "learning_rate": 4.2747766772841695e-06, + "loss": 0.3232, + "step": 12373 + }, + { + "epoch": 0.5600362072867164, + "grad_norm": 0.628112357109904, + "learning_rate": 4.2740515146199675e-06, + "loss": 0.3061, + "step": 12374 + }, + { + "epoch": 0.560081466395112, + "grad_norm": 0.6317378580340778, + "learning_rate": 4.273326367554167e-06, + "loss": 0.3176, + "step": 12375 + }, + { + "epoch": 0.5601267255035076, + "grad_norm": 0.6372257992411345, + "learning_rate": 4.272601236102353e-06, + "loss": 0.3638, + "step": 12376 + }, + { + "epoch": 0.5601719846119031, + "grad_norm": 0.32587176034152876, + "learning_rate": 4.271876120280104e-06, + "loss": 0.4667, + "step": 12377 + }, + { + "epoch": 0.5602172437202987, + "grad_norm": 0.6291645665666001, + "learning_rate": 4.2711510201030005e-06, + "loss": 0.3214, + "step": 12378 + }, + { + "epoch": 0.5602625028286943, + "grad_norm": 0.6139529937097798, + "learning_rate": 4.270425935586624e-06, + "loss": 0.2826, + "step": 12379 + }, + { + "epoch": 0.5603077619370899, + "grad_norm": 0.7866626209421049, + "learning_rate": 4.2697008667465515e-06, + "loss": 0.3625, + "step": 12380 + }, + { + "epoch": 0.5603530210454855, + "grad_norm": 0.6575234379812429, + "learning_rate": 4.268975813598366e-06, + "loss": 0.3586, + "step": 12381 + }, + { + "epoch": 0.5603982801538809, + "grad_norm": 0.6294270608967876, + "learning_rate": 4.268250776157644e-06, + "loss": 0.3114, + "step": 12382 + }, + { + "epoch": 0.5604435392622765, + "grad_norm": 0.6302772266526723, + "learning_rate": 4.267525754439967e-06, + "loss": 0.3194, + "step": 12383 + }, + { + "epoch": 0.5604887983706721, + "grad_norm": 0.6089326567380705, + "learning_rate": 4.2668007484609106e-06, + "loss": 0.3264, + "step": 12384 + }, + { + "epoch": 0.5605340574790677, + "grad_norm": 0.5959077220631366, + "learning_rate": 4.266075758236055e-06, + "loss": 0.2781, + "step": 12385 + }, + { + "epoch": 0.5605793165874632, + "grad_norm": 0.6175674183428848, + "learning_rate": 4.265350783780977e-06, + "loss": 0.2962, + "step": 12386 + }, + { + "epoch": 0.5606245756958588, + "grad_norm": 0.544309122649723, + "learning_rate": 4.264625825111255e-06, + "loss": 0.2856, + "step": 12387 + }, + { + "epoch": 0.5606698348042544, + "grad_norm": 0.6570475858920106, + "learning_rate": 4.2639008822424644e-06, + "loss": 0.2918, + "step": 12388 + }, + { + "epoch": 0.56071509391265, + "grad_norm": 0.5978688068020497, + "learning_rate": 4.2631759551901845e-06, + "loss": 0.2824, + "step": 12389 + }, + { + "epoch": 0.5607603530210454, + "grad_norm": 0.689402321038741, + "learning_rate": 4.262451043969988e-06, + "loss": 0.3136, + "step": 12390 + }, + { + "epoch": 0.560805612129441, + "grad_norm": 0.3426201622095635, + "learning_rate": 4.2617261485974545e-06, + "loss": 0.4657, + "step": 12391 + }, + { + "epoch": 0.5608508712378366, + "grad_norm": 0.6249086540005816, + "learning_rate": 4.261001269088161e-06, + "loss": 0.3229, + "step": 12392 + }, + { + "epoch": 0.5608961303462322, + "grad_norm": 0.6209909771694748, + "learning_rate": 4.260276405457678e-06, + "loss": 0.2768, + "step": 12393 + }, + { + "epoch": 0.5609413894546278, + "grad_norm": 0.7335251266441353, + "learning_rate": 4.259551557721582e-06, + "loss": 0.3645, + "step": 12394 + }, + { + "epoch": 0.5609866485630233, + "grad_norm": 0.6449609075121965, + "learning_rate": 4.25882672589545e-06, + "loss": 0.3363, + "step": 12395 + }, + { + "epoch": 0.5610319076714189, + "grad_norm": 0.9284828798571106, + "learning_rate": 4.258101909994857e-06, + "loss": 0.2566, + "step": 12396 + }, + { + "epoch": 0.5610771667798145, + "grad_norm": 0.8104997105300392, + "learning_rate": 4.257377110035374e-06, + "loss": 0.3242, + "step": 12397 + }, + { + "epoch": 0.56112242588821, + "grad_norm": 0.5587361776941873, + "learning_rate": 4.2566523260325755e-06, + "loss": 0.294, + "step": 12398 + }, + { + "epoch": 0.5611676849966055, + "grad_norm": 0.8574056365982415, + "learning_rate": 4.255927558002038e-06, + "loss": 0.317, + "step": 12399 + }, + { + "epoch": 0.5612129441050011, + "grad_norm": 0.6338371685512539, + "learning_rate": 4.2552028059593294e-06, + "loss": 0.2914, + "step": 12400 + }, + { + "epoch": 0.5612582032133967, + "grad_norm": 0.6167283818031284, + "learning_rate": 4.2544780699200265e-06, + "loss": 0.32, + "step": 12401 + }, + { + "epoch": 0.5613034623217923, + "grad_norm": 0.5741515398590784, + "learning_rate": 4.2537533498997005e-06, + "loss": 0.3176, + "step": 12402 + }, + { + "epoch": 0.5613487214301879, + "grad_norm": 0.6307609620005522, + "learning_rate": 4.253028645913922e-06, + "loss": 0.3117, + "step": 12403 + }, + { + "epoch": 0.5613939805385834, + "grad_norm": 0.3266962933317707, + "learning_rate": 4.252303957978263e-06, + "loss": 0.4565, + "step": 12404 + }, + { + "epoch": 0.561439239646979, + "grad_norm": 0.9995670900758218, + "learning_rate": 4.251579286108297e-06, + "loss": 0.4909, + "step": 12405 + }, + { + "epoch": 0.5614844987553745, + "grad_norm": 0.8186606301570003, + "learning_rate": 4.250854630319593e-06, + "loss": 0.3343, + "step": 12406 + }, + { + "epoch": 0.5615297578637701, + "grad_norm": 0.6546198346426051, + "learning_rate": 4.2501299906277225e-06, + "loss": 0.3144, + "step": 12407 + }, + { + "epoch": 0.5615750169721656, + "grad_norm": 0.661691131812228, + "learning_rate": 4.249405367048254e-06, + "loss": 0.3132, + "step": 12408 + }, + { + "epoch": 0.5616202760805612, + "grad_norm": 0.6055607608378684, + "learning_rate": 4.248680759596761e-06, + "loss": 0.3229, + "step": 12409 + }, + { + "epoch": 0.5616655351889568, + "grad_norm": 0.33701630670332994, + "learning_rate": 4.24795616828881e-06, + "loss": 0.4666, + "step": 12410 + }, + { + "epoch": 0.5617107942973524, + "grad_norm": 0.5820937489912922, + "learning_rate": 4.247231593139971e-06, + "loss": 0.3406, + "step": 12411 + }, + { + "epoch": 0.5617560534057479, + "grad_norm": 0.6162890639808882, + "learning_rate": 4.246507034165815e-06, + "loss": 0.2933, + "step": 12412 + }, + { + "epoch": 0.5618013125141434, + "grad_norm": 0.3381713375818819, + "learning_rate": 4.245782491381905e-06, + "loss": 0.4581, + "step": 12413 + }, + { + "epoch": 0.561846571622539, + "grad_norm": 0.6219759160425906, + "learning_rate": 4.245057964803815e-06, + "loss": 0.252, + "step": 12414 + }, + { + "epoch": 0.5618918307309346, + "grad_norm": 0.6374850105878566, + "learning_rate": 4.244333454447112e-06, + "loss": 0.2949, + "step": 12415 + }, + { + "epoch": 0.5619370898393302, + "grad_norm": 0.2702414442270348, + "learning_rate": 4.243608960327361e-06, + "loss": 0.4669, + "step": 12416 + }, + { + "epoch": 0.5619823489477257, + "grad_norm": 0.2660859641437605, + "learning_rate": 4.242884482460129e-06, + "loss": 0.4573, + "step": 12417 + }, + { + "epoch": 0.5620276080561213, + "grad_norm": 0.3057110766984944, + "learning_rate": 4.242160020860988e-06, + "loss": 0.4678, + "step": 12418 + }, + { + "epoch": 0.5620728671645169, + "grad_norm": 0.6044778611836339, + "learning_rate": 4.241435575545496e-06, + "loss": 0.3115, + "step": 12419 + }, + { + "epoch": 0.5621181262729125, + "grad_norm": 0.6265272349973912, + "learning_rate": 4.2407111465292265e-06, + "loss": 0.331, + "step": 12420 + }, + { + "epoch": 0.562163385381308, + "grad_norm": 0.2717995782592673, + "learning_rate": 4.239986733827742e-06, + "loss": 0.4752, + "step": 12421 + }, + { + "epoch": 0.5622086444897035, + "grad_norm": 0.586424601557017, + "learning_rate": 4.239262337456609e-06, + "loss": 0.3366, + "step": 12422 + }, + { + "epoch": 0.5622539035980991, + "grad_norm": 0.30407532265471443, + "learning_rate": 4.238537957431389e-06, + "loss": 0.4845, + "step": 12423 + }, + { + "epoch": 0.5622991627064947, + "grad_norm": 0.28007290194860684, + "learning_rate": 4.2378135937676515e-06, + "loss": 0.467, + "step": 12424 + }, + { + "epoch": 0.5623444218148902, + "grad_norm": 0.6812849363923967, + "learning_rate": 4.23708924648096e-06, + "loss": 0.3326, + "step": 12425 + }, + { + "epoch": 0.5623896809232858, + "grad_norm": 0.6931167864204915, + "learning_rate": 4.236364915586877e-06, + "loss": 0.3533, + "step": 12426 + }, + { + "epoch": 0.5624349400316814, + "grad_norm": 0.616717908059227, + "learning_rate": 4.2356406011009654e-06, + "loss": 0.3113, + "step": 12427 + }, + { + "epoch": 0.562480199140077, + "grad_norm": 0.6588738954730928, + "learning_rate": 4.234916303038793e-06, + "loss": 0.3134, + "step": 12428 + }, + { + "epoch": 0.5625254582484726, + "grad_norm": 0.6045150831513721, + "learning_rate": 4.234192021415916e-06, + "loss": 0.3124, + "step": 12429 + }, + { + "epoch": 0.562570717356868, + "grad_norm": 0.6866858585460003, + "learning_rate": 4.233467756247901e-06, + "loss": 0.3182, + "step": 12430 + }, + { + "epoch": 0.5626159764652636, + "grad_norm": 0.7354770339037802, + "learning_rate": 4.232743507550311e-06, + "loss": 0.3195, + "step": 12431 + }, + { + "epoch": 0.5626612355736592, + "grad_norm": 0.5548344294691757, + "learning_rate": 4.232019275338706e-06, + "loss": 0.2722, + "step": 12432 + }, + { + "epoch": 0.5627064946820548, + "grad_norm": 0.6686252850257488, + "learning_rate": 4.231295059628647e-06, + "loss": 0.3577, + "step": 12433 + }, + { + "epoch": 0.5627517537904503, + "grad_norm": 0.6753077640957464, + "learning_rate": 4.230570860435698e-06, + "loss": 0.3318, + "step": 12434 + }, + { + "epoch": 0.5627970128988459, + "grad_norm": 0.37310213050581825, + "learning_rate": 4.2298466777754175e-06, + "loss": 0.4465, + "step": 12435 + }, + { + "epoch": 0.5628422720072415, + "grad_norm": 0.5783839882048719, + "learning_rate": 4.2291225116633665e-06, + "loss": 0.3255, + "step": 12436 + }, + { + "epoch": 0.562887531115637, + "grad_norm": 0.6434521887200615, + "learning_rate": 4.228398362115103e-06, + "loss": 0.3002, + "step": 12437 + }, + { + "epoch": 0.5629327902240326, + "grad_norm": 0.3290184769604561, + "learning_rate": 4.227674229146193e-06, + "loss": 0.4682, + "step": 12438 + }, + { + "epoch": 0.5629780493324281, + "grad_norm": 0.27603873378163185, + "learning_rate": 4.226950112772189e-06, + "loss": 0.4763, + "step": 12439 + }, + { + "epoch": 0.5630233084408237, + "grad_norm": 0.6221420328633019, + "learning_rate": 4.226226013008654e-06, + "loss": 0.2841, + "step": 12440 + }, + { + "epoch": 0.5630685675492193, + "grad_norm": 0.5667063179695143, + "learning_rate": 4.225501929871146e-06, + "loss": 0.2746, + "step": 12441 + }, + { + "epoch": 0.5631138266576149, + "grad_norm": 0.33589286969250104, + "learning_rate": 4.22477786337522e-06, + "loss": 0.4623, + "step": 12442 + }, + { + "epoch": 0.5631590857660104, + "grad_norm": 0.6587862966320408, + "learning_rate": 4.224053813536439e-06, + "loss": 0.3203, + "step": 12443 + }, + { + "epoch": 0.563204344874406, + "grad_norm": 0.6754237354060931, + "learning_rate": 4.223329780370359e-06, + "loss": 0.2977, + "step": 12444 + }, + { + "epoch": 0.5632496039828015, + "grad_norm": 0.663692464836983, + "learning_rate": 4.222605763892535e-06, + "loss": 0.3681, + "step": 12445 + }, + { + "epoch": 0.5632948630911971, + "grad_norm": 0.33810651602626374, + "learning_rate": 4.221881764118526e-06, + "loss": 0.4469, + "step": 12446 + }, + { + "epoch": 0.5633401221995926, + "grad_norm": 0.6857029521910759, + "learning_rate": 4.22115778106389e-06, + "loss": 0.2916, + "step": 12447 + }, + { + "epoch": 0.5633853813079882, + "grad_norm": 0.6359154186799292, + "learning_rate": 4.220433814744179e-06, + "loss": 0.3617, + "step": 12448 + }, + { + "epoch": 0.5634306404163838, + "grad_norm": 0.29578040102195857, + "learning_rate": 4.219709865174951e-06, + "loss": 0.4948, + "step": 12449 + }, + { + "epoch": 0.5634758995247794, + "grad_norm": 0.6621635262913025, + "learning_rate": 4.218985932371764e-06, + "loss": 0.3066, + "step": 12450 + }, + { + "epoch": 0.563521158633175, + "grad_norm": 0.5902543749544588, + "learning_rate": 4.218262016350169e-06, + "loss": 0.2919, + "step": 12451 + }, + { + "epoch": 0.5635664177415705, + "grad_norm": 0.8094348894797204, + "learning_rate": 4.21753811712572e-06, + "loss": 0.3323, + "step": 12452 + }, + { + "epoch": 0.563611676849966, + "grad_norm": 0.6149692212115275, + "learning_rate": 4.2168142347139765e-06, + "loss": 0.3319, + "step": 12453 + }, + { + "epoch": 0.5636569359583616, + "grad_norm": 0.7002479435493842, + "learning_rate": 4.21609036913049e-06, + "loss": 0.2577, + "step": 12454 + }, + { + "epoch": 0.5637021950667572, + "grad_norm": 0.6264439152159017, + "learning_rate": 4.2153665203908125e-06, + "loss": 0.2823, + "step": 12455 + }, + { + "epoch": 0.5637474541751527, + "grad_norm": 0.6442835838787396, + "learning_rate": 4.214642688510498e-06, + "loss": 0.3785, + "step": 12456 + }, + { + "epoch": 0.5637927132835483, + "grad_norm": 0.5754049656613867, + "learning_rate": 4.213918873505103e-06, + "loss": 0.3356, + "step": 12457 + }, + { + "epoch": 0.5638379723919439, + "grad_norm": 0.6206389488426615, + "learning_rate": 4.213195075390175e-06, + "loss": 0.2787, + "step": 12458 + }, + { + "epoch": 0.5638832315003395, + "grad_norm": 0.6025820884307962, + "learning_rate": 4.212471294181269e-06, + "loss": 0.3161, + "step": 12459 + }, + { + "epoch": 0.563928490608735, + "grad_norm": 0.6599164073600439, + "learning_rate": 4.211747529893936e-06, + "loss": 0.3361, + "step": 12460 + }, + { + "epoch": 0.5639737497171305, + "grad_norm": 0.4259348750726592, + "learning_rate": 4.2110237825437275e-06, + "loss": 0.4476, + "step": 12461 + }, + { + "epoch": 0.5640190088255261, + "grad_norm": 0.6265420256719098, + "learning_rate": 4.210300052146194e-06, + "loss": 0.3217, + "step": 12462 + }, + { + "epoch": 0.5640642679339217, + "grad_norm": 0.6012447982413059, + "learning_rate": 4.2095763387168895e-06, + "loss": 0.3401, + "step": 12463 + }, + { + "epoch": 0.5641095270423173, + "grad_norm": 0.6426222003181654, + "learning_rate": 4.208852642271359e-06, + "loss": 0.2803, + "step": 12464 + }, + { + "epoch": 0.5641547861507128, + "grad_norm": 2.3473227350320647, + "learning_rate": 4.208128962825157e-06, + "loss": 0.2666, + "step": 12465 + }, + { + "epoch": 0.5642000452591084, + "grad_norm": 0.6419177385904847, + "learning_rate": 4.2074053003938296e-06, + "loss": 0.3239, + "step": 12466 + }, + { + "epoch": 0.564245304367504, + "grad_norm": 0.640389380164466, + "learning_rate": 4.2066816549929315e-06, + "loss": 0.2964, + "step": 12467 + }, + { + "epoch": 0.5642905634758996, + "grad_norm": 0.5871885642771933, + "learning_rate": 4.205958026638006e-06, + "loss": 0.3068, + "step": 12468 + }, + { + "epoch": 0.564335822584295, + "grad_norm": 0.36427456424647253, + "learning_rate": 4.2052344153446035e-06, + "loss": 0.4818, + "step": 12469 + }, + { + "epoch": 0.5643810816926906, + "grad_norm": 0.3353419505539499, + "learning_rate": 4.204510821128274e-06, + "loss": 0.5034, + "step": 12470 + }, + { + "epoch": 0.5644263408010862, + "grad_norm": 0.629458993351915, + "learning_rate": 4.2037872440045615e-06, + "loss": 0.2994, + "step": 12471 + }, + { + "epoch": 0.5644715999094818, + "grad_norm": 0.7561757437539882, + "learning_rate": 4.203063683989017e-06, + "loss": 0.3031, + "step": 12472 + }, + { + "epoch": 0.5645168590178773, + "grad_norm": 0.6085330300726697, + "learning_rate": 4.202340141097188e-06, + "loss": 0.2994, + "step": 12473 + }, + { + "epoch": 0.5645621181262729, + "grad_norm": 0.5898340763903122, + "learning_rate": 4.2016166153446174e-06, + "loss": 0.3275, + "step": 12474 + }, + { + "epoch": 0.5646073772346685, + "grad_norm": 0.3621647513055581, + "learning_rate": 4.200893106746853e-06, + "loss": 0.5033, + "step": 12475 + }, + { + "epoch": 0.5646526363430641, + "grad_norm": 0.6416906160818326, + "learning_rate": 4.2001696153194445e-06, + "loss": 0.3175, + "step": 12476 + }, + { + "epoch": 0.5646978954514597, + "grad_norm": 0.6275430528310015, + "learning_rate": 4.199446141077932e-06, + "loss": 0.3422, + "step": 12477 + }, + { + "epoch": 0.5647431545598551, + "grad_norm": 0.66606714845945, + "learning_rate": 4.198722684037864e-06, + "loss": 0.2949, + "step": 12478 + }, + { + "epoch": 0.5647884136682507, + "grad_norm": 0.6124342570104664, + "learning_rate": 4.197999244214783e-06, + "loss": 0.3036, + "step": 12479 + }, + { + "epoch": 0.5648336727766463, + "grad_norm": 0.8367497963384407, + "learning_rate": 4.197275821624239e-06, + "loss": 0.3299, + "step": 12480 + }, + { + "epoch": 0.5648789318850419, + "grad_norm": 0.3321969996370139, + "learning_rate": 4.196552416281768e-06, + "loss": 0.482, + "step": 12481 + }, + { + "epoch": 0.5649241909934374, + "grad_norm": 0.5723278264837929, + "learning_rate": 4.19582902820292e-06, + "loss": 0.3089, + "step": 12482 + }, + { + "epoch": 0.564969450101833, + "grad_norm": 0.6631719342411454, + "learning_rate": 4.195105657403236e-06, + "loss": 0.3272, + "step": 12483 + }, + { + "epoch": 0.5650147092102286, + "grad_norm": 0.2974743436661958, + "learning_rate": 4.19438230389826e-06, + "loss": 0.4605, + "step": 12484 + }, + { + "epoch": 0.5650599683186242, + "grad_norm": 2.4532523574610137, + "learning_rate": 4.193658967703532e-06, + "loss": 0.2911, + "step": 12485 + }, + { + "epoch": 0.5651052274270197, + "grad_norm": 0.6637453590331593, + "learning_rate": 4.192935648834599e-06, + "loss": 0.3498, + "step": 12486 + }, + { + "epoch": 0.5651504865354152, + "grad_norm": 0.2940291784258796, + "learning_rate": 4.192212347306999e-06, + "loss": 0.4973, + "step": 12487 + }, + { + "epoch": 0.5651957456438108, + "grad_norm": 0.5755812778287472, + "learning_rate": 4.191489063136274e-06, + "loss": 0.3016, + "step": 12488 + }, + { + "epoch": 0.5652410047522064, + "grad_norm": 0.6427057193532089, + "learning_rate": 4.190765796337968e-06, + "loss": 0.3179, + "step": 12489 + }, + { + "epoch": 0.565286263860602, + "grad_norm": 0.6591412378938449, + "learning_rate": 4.190042546927618e-06, + "loss": 0.3303, + "step": 12490 + }, + { + "epoch": 0.5653315229689975, + "grad_norm": 0.32428203390183824, + "learning_rate": 4.189319314920766e-06, + "loss": 0.4698, + "step": 12491 + }, + { + "epoch": 0.5653767820773931, + "grad_norm": 0.6346047768285362, + "learning_rate": 4.188596100332953e-06, + "loss": 0.3052, + "step": 12492 + }, + { + "epoch": 0.5654220411857886, + "grad_norm": 0.5467035608516998, + "learning_rate": 4.1878729031797165e-06, + "loss": 0.4703, + "step": 12493 + }, + { + "epoch": 0.5654673002941842, + "grad_norm": 0.6546999103912576, + "learning_rate": 4.187149723476597e-06, + "loss": 0.3133, + "step": 12494 + }, + { + "epoch": 0.5655125594025797, + "grad_norm": 0.6745645431237851, + "learning_rate": 4.186426561239134e-06, + "loss": 0.3387, + "step": 12495 + }, + { + "epoch": 0.5655578185109753, + "grad_norm": 0.6720264368361668, + "learning_rate": 4.185703416482867e-06, + "loss": 0.2982, + "step": 12496 + }, + { + "epoch": 0.5656030776193709, + "grad_norm": 0.5966460736841118, + "learning_rate": 4.184980289223331e-06, + "loss": 0.3212, + "step": 12497 + }, + { + "epoch": 0.5656483367277665, + "grad_norm": 0.5941486030441155, + "learning_rate": 4.184257179476065e-06, + "loss": 0.4905, + "step": 12498 + }, + { + "epoch": 0.5656935958361621, + "grad_norm": 0.6087554097990314, + "learning_rate": 4.183534087256609e-06, + "loss": 0.283, + "step": 12499 + }, + { + "epoch": 0.5657388549445576, + "grad_norm": 0.6923120727230525, + "learning_rate": 4.182811012580495e-06, + "loss": 0.3618, + "step": 12500 + }, + { + "epoch": 0.5657841140529531, + "grad_norm": 0.6157486806061441, + "learning_rate": 4.182087955463264e-06, + "loss": 0.3, + "step": 12501 + }, + { + "epoch": 0.5658293731613487, + "grad_norm": 0.5838262062234353, + "learning_rate": 4.181364915920453e-06, + "loss": 0.316, + "step": 12502 + }, + { + "epoch": 0.5658746322697443, + "grad_norm": 0.7193124084515455, + "learning_rate": 4.180641893967593e-06, + "loss": 0.3127, + "step": 12503 + }, + { + "epoch": 0.5659198913781398, + "grad_norm": 0.655315477496645, + "learning_rate": 4.179918889620221e-06, + "loss": 0.3122, + "step": 12504 + }, + { + "epoch": 0.5659651504865354, + "grad_norm": 0.645451805585538, + "learning_rate": 4.179195902893878e-06, + "loss": 0.3616, + "step": 12505 + }, + { + "epoch": 0.566010409594931, + "grad_norm": 0.6778970618048709, + "learning_rate": 4.17847293380409e-06, + "loss": 0.2893, + "step": 12506 + }, + { + "epoch": 0.5660556687033266, + "grad_norm": 0.5948215804243057, + "learning_rate": 4.177749982366397e-06, + "loss": 0.3205, + "step": 12507 + }, + { + "epoch": 0.566100927811722, + "grad_norm": 0.6467076040360541, + "learning_rate": 4.17702704859633e-06, + "loss": 0.3061, + "step": 12508 + }, + { + "epoch": 0.5661461869201176, + "grad_norm": 0.5704107704943451, + "learning_rate": 4.176304132509428e-06, + "loss": 0.3304, + "step": 12509 + }, + { + "epoch": 0.5661914460285132, + "grad_norm": 0.6390073058823326, + "learning_rate": 4.175581234121216e-06, + "loss": 0.3336, + "step": 12510 + }, + { + "epoch": 0.5662367051369088, + "grad_norm": 0.6960136454544098, + "learning_rate": 4.174858353447234e-06, + "loss": 0.311, + "step": 12511 + }, + { + "epoch": 0.5662819642453044, + "grad_norm": 0.6104350462181477, + "learning_rate": 4.1741354905030115e-06, + "loss": 0.3133, + "step": 12512 + }, + { + "epoch": 0.5663272233536999, + "grad_norm": 0.2917322371481019, + "learning_rate": 4.17341264530408e-06, + "loss": 0.4926, + "step": 12513 + }, + { + "epoch": 0.5663724824620955, + "grad_norm": 0.7144016324699435, + "learning_rate": 4.1726898178659714e-06, + "loss": 0.3293, + "step": 12514 + }, + { + "epoch": 0.5664177415704911, + "grad_norm": 0.7909838933014116, + "learning_rate": 4.1719670082042194e-06, + "loss": 0.2988, + "step": 12515 + }, + { + "epoch": 0.5664630006788867, + "grad_norm": 0.5857732475977157, + "learning_rate": 4.171244216334353e-06, + "loss": 0.2753, + "step": 12516 + }, + { + "epoch": 0.5665082597872821, + "grad_norm": 0.7735522212373134, + "learning_rate": 4.1705214422719024e-06, + "loss": 0.2952, + "step": 12517 + }, + { + "epoch": 0.5665535188956777, + "grad_norm": 0.5934958361037524, + "learning_rate": 4.1697986860324e-06, + "loss": 0.2912, + "step": 12518 + }, + { + "epoch": 0.5665987780040733, + "grad_norm": 0.5350188760957395, + "learning_rate": 4.169075947631371e-06, + "loss": 0.2762, + "step": 12519 + }, + { + "epoch": 0.5666440371124689, + "grad_norm": 0.86593002964982, + "learning_rate": 4.1683532270843505e-06, + "loss": 0.3131, + "step": 12520 + }, + { + "epoch": 0.5666892962208645, + "grad_norm": 0.8766304309226985, + "learning_rate": 4.1676305244068645e-06, + "loss": 0.2886, + "step": 12521 + }, + { + "epoch": 0.56673455532926, + "grad_norm": 0.6122471541271316, + "learning_rate": 4.166907839614442e-06, + "loss": 0.2739, + "step": 12522 + }, + { + "epoch": 0.5667798144376556, + "grad_norm": 0.6411446012105898, + "learning_rate": 4.16618517272261e-06, + "loss": 0.3459, + "step": 12523 + }, + { + "epoch": 0.5668250735460512, + "grad_norm": 0.6081462210575242, + "learning_rate": 4.165462523746899e-06, + "loss": 0.3422, + "step": 12524 + }, + { + "epoch": 0.5668703326544468, + "grad_norm": 0.5875629432466725, + "learning_rate": 4.164739892702836e-06, + "loss": 0.2996, + "step": 12525 + }, + { + "epoch": 0.5669155917628422, + "grad_norm": 0.3287040692234207, + "learning_rate": 4.164017279605946e-06, + "loss": 0.4936, + "step": 12526 + }, + { + "epoch": 0.5669608508712378, + "grad_norm": 0.6252197324600116, + "learning_rate": 4.163294684471757e-06, + "loss": 0.3036, + "step": 12527 + }, + { + "epoch": 0.5670061099796334, + "grad_norm": 0.301561350521929, + "learning_rate": 4.162572107315798e-06, + "loss": 0.475, + "step": 12528 + }, + { + "epoch": 0.567051369088029, + "grad_norm": 0.6671373326093292, + "learning_rate": 4.161849548153589e-06, + "loss": 0.2777, + "step": 12529 + }, + { + "epoch": 0.5670966281964245, + "grad_norm": 0.6620647807278043, + "learning_rate": 4.161127007000662e-06, + "loss": 0.324, + "step": 12530 + }, + { + "epoch": 0.5671418873048201, + "grad_norm": 0.6390365926440836, + "learning_rate": 4.160404483872538e-06, + "loss": 0.3413, + "step": 12531 + }, + { + "epoch": 0.5671871464132157, + "grad_norm": 0.6515037937373498, + "learning_rate": 4.159681978784743e-06, + "loss": 0.3346, + "step": 12532 + }, + { + "epoch": 0.5672324055216112, + "grad_norm": 0.70976713212594, + "learning_rate": 4.1589594917528006e-06, + "loss": 0.2838, + "step": 12533 + }, + { + "epoch": 0.5672776646300068, + "grad_norm": 0.6151749329986993, + "learning_rate": 4.158237022792237e-06, + "loss": 0.3601, + "step": 12534 + }, + { + "epoch": 0.5673229237384023, + "grad_norm": 0.5838365082606324, + "learning_rate": 4.157514571918574e-06, + "loss": 0.3204, + "step": 12535 + }, + { + "epoch": 0.5673681828467979, + "grad_norm": 0.6441399562644541, + "learning_rate": 4.156792139147336e-06, + "loss": 0.3476, + "step": 12536 + }, + { + "epoch": 0.5674134419551935, + "grad_norm": 0.5794311590688679, + "learning_rate": 4.156069724494043e-06, + "loss": 0.3006, + "step": 12537 + }, + { + "epoch": 0.5674587010635891, + "grad_norm": 0.6359896469226998, + "learning_rate": 4.155347327974223e-06, + "loss": 0.3577, + "step": 12538 + }, + { + "epoch": 0.5675039601719846, + "grad_norm": 0.9274568004702104, + "learning_rate": 4.154624949603391e-06, + "loss": 0.3332, + "step": 12539 + }, + { + "epoch": 0.5675492192803802, + "grad_norm": 0.6045661204640542, + "learning_rate": 4.153902589397075e-06, + "loss": 0.3121, + "step": 12540 + }, + { + "epoch": 0.5675944783887757, + "grad_norm": 0.5885564599166756, + "learning_rate": 4.153180247370794e-06, + "loss": 0.2814, + "step": 12541 + }, + { + "epoch": 0.5676397374971713, + "grad_norm": 0.6470512546656807, + "learning_rate": 4.152457923540068e-06, + "loss": 0.3642, + "step": 12542 + }, + { + "epoch": 0.5676849966055668, + "grad_norm": 0.6405301017985353, + "learning_rate": 4.151735617920417e-06, + "loss": 0.3252, + "step": 12543 + }, + { + "epoch": 0.5677302557139624, + "grad_norm": 0.44958018256330395, + "learning_rate": 4.151013330527364e-06, + "loss": 0.5038, + "step": 12544 + }, + { + "epoch": 0.567775514822358, + "grad_norm": 0.6490291798847689, + "learning_rate": 4.150291061376426e-06, + "loss": 0.2978, + "step": 12545 + }, + { + "epoch": 0.5678207739307536, + "grad_norm": 0.7109462139773439, + "learning_rate": 4.149568810483124e-06, + "loss": 0.3462, + "step": 12546 + }, + { + "epoch": 0.5678660330391492, + "grad_norm": 0.6038537660170973, + "learning_rate": 4.148846577862977e-06, + "loss": 0.3077, + "step": 12547 + }, + { + "epoch": 0.5679112921475447, + "grad_norm": 0.628410020280818, + "learning_rate": 4.148124363531501e-06, + "loss": 0.324, + "step": 12548 + }, + { + "epoch": 0.5679565512559402, + "grad_norm": 0.6446328523395594, + "learning_rate": 4.147402167504218e-06, + "loss": 0.3144, + "step": 12549 + }, + { + "epoch": 0.5680018103643358, + "grad_norm": 0.5789304471638873, + "learning_rate": 4.146679989796643e-06, + "loss": 0.3268, + "step": 12550 + }, + { + "epoch": 0.5680470694727314, + "grad_norm": 0.2817358866934816, + "learning_rate": 4.145957830424294e-06, + "loss": 0.486, + "step": 12551 + }, + { + "epoch": 0.5680923285811269, + "grad_norm": 0.6622630583860681, + "learning_rate": 4.145235689402688e-06, + "loss": 0.3289, + "step": 12552 + }, + { + "epoch": 0.5681375876895225, + "grad_norm": 0.3054074301809164, + "learning_rate": 4.144513566747342e-06, + "loss": 0.4641, + "step": 12553 + }, + { + "epoch": 0.5681828467979181, + "grad_norm": 0.6051067622157187, + "learning_rate": 4.143791462473774e-06, + "loss": 0.2911, + "step": 12554 + }, + { + "epoch": 0.5682281059063137, + "grad_norm": 0.27926213530322874, + "learning_rate": 4.143069376597496e-06, + "loss": 0.4848, + "step": 12555 + }, + { + "epoch": 0.5682733650147093, + "grad_norm": 0.6382366524036927, + "learning_rate": 4.142347309134024e-06, + "loss": 0.3453, + "step": 12556 + }, + { + "epoch": 0.5683186241231047, + "grad_norm": 0.6275300895206359, + "learning_rate": 4.141625260098878e-06, + "loss": 0.2998, + "step": 12557 + }, + { + "epoch": 0.5683638832315003, + "grad_norm": 0.5958461166824709, + "learning_rate": 4.140903229507566e-06, + "loss": 0.2994, + "step": 12558 + }, + { + "epoch": 0.5684091423398959, + "grad_norm": 0.6190751677743475, + "learning_rate": 4.1401812173756055e-06, + "loss": 0.2883, + "step": 12559 + }, + { + "epoch": 0.5684544014482915, + "grad_norm": 0.7678343018745986, + "learning_rate": 4.139459223718511e-06, + "loss": 0.2983, + "step": 12560 + }, + { + "epoch": 0.568499660556687, + "grad_norm": 0.3230824047445227, + "learning_rate": 4.138737248551793e-06, + "loss": 0.4877, + "step": 12561 + }, + { + "epoch": 0.5685449196650826, + "grad_norm": 0.6874374038803182, + "learning_rate": 4.1380152918909665e-06, + "loss": 0.295, + "step": 12562 + }, + { + "epoch": 0.5685901787734782, + "grad_norm": 0.6884284518616037, + "learning_rate": 4.137293353751546e-06, + "loss": 0.3186, + "step": 12563 + }, + { + "epoch": 0.5686354378818738, + "grad_norm": 0.5863243128631148, + "learning_rate": 4.13657143414904e-06, + "loss": 0.3564, + "step": 12564 + }, + { + "epoch": 0.5686806969902692, + "grad_norm": 0.627551766252621, + "learning_rate": 4.1358495330989625e-06, + "loss": 0.294, + "step": 12565 + }, + { + "epoch": 0.5687259560986648, + "grad_norm": 0.27480827574499894, + "learning_rate": 4.1351276506168235e-06, + "loss": 0.4816, + "step": 12566 + }, + { + "epoch": 0.5687712152070604, + "grad_norm": 0.6537997783814505, + "learning_rate": 4.134405786718138e-06, + "loss": 0.3285, + "step": 12567 + }, + { + "epoch": 0.568816474315456, + "grad_norm": 0.6606229899697229, + "learning_rate": 4.133683941418411e-06, + "loss": 0.3106, + "step": 12568 + }, + { + "epoch": 0.5688617334238516, + "grad_norm": 0.6383340830097654, + "learning_rate": 4.132962114733156e-06, + "loss": 0.3025, + "step": 12569 + }, + { + "epoch": 0.5689069925322471, + "grad_norm": 0.6340031462442014, + "learning_rate": 4.132240306677883e-06, + "loss": 0.2986, + "step": 12570 + }, + { + "epoch": 0.5689522516406427, + "grad_norm": 0.7355684662490959, + "learning_rate": 4.1315185172681e-06, + "loss": 0.3112, + "step": 12571 + }, + { + "epoch": 0.5689975107490383, + "grad_norm": 0.6237544027549345, + "learning_rate": 4.130796746519316e-06, + "loss": 0.3361, + "step": 12572 + }, + { + "epoch": 0.5690427698574338, + "grad_norm": 0.628615082490157, + "learning_rate": 4.130074994447042e-06, + "loss": 0.3462, + "step": 12573 + }, + { + "epoch": 0.5690880289658293, + "grad_norm": 0.3102732059637665, + "learning_rate": 4.129353261066784e-06, + "loss": 0.4807, + "step": 12574 + }, + { + "epoch": 0.5691332880742249, + "grad_norm": 0.5653306933787046, + "learning_rate": 4.12863154639405e-06, + "loss": 0.287, + "step": 12575 + }, + { + "epoch": 0.5691785471826205, + "grad_norm": 0.6455550118701715, + "learning_rate": 4.127909850444349e-06, + "loss": 0.2981, + "step": 12576 + }, + { + "epoch": 0.5692238062910161, + "grad_norm": 0.6443727050940745, + "learning_rate": 4.127188173233185e-06, + "loss": 0.3185, + "step": 12577 + }, + { + "epoch": 0.5692690653994116, + "grad_norm": 0.6621726582166643, + "learning_rate": 4.126466514776067e-06, + "loss": 0.3418, + "step": 12578 + }, + { + "epoch": 0.5693143245078072, + "grad_norm": 0.6077376379773018, + "learning_rate": 4.125744875088502e-06, + "loss": 0.3396, + "step": 12579 + }, + { + "epoch": 0.5693595836162028, + "grad_norm": 0.28837605872185323, + "learning_rate": 4.125023254185995e-06, + "loss": 0.4544, + "step": 12580 + }, + { + "epoch": 0.5694048427245983, + "grad_norm": 0.5750559154445537, + "learning_rate": 4.124301652084049e-06, + "loss": 0.3333, + "step": 12581 + }, + { + "epoch": 0.5694501018329939, + "grad_norm": 0.6627604804141155, + "learning_rate": 4.123580068798171e-06, + "loss": 0.2996, + "step": 12582 + }, + { + "epoch": 0.5694953609413894, + "grad_norm": 0.7667761592962654, + "learning_rate": 4.122858504343868e-06, + "loss": 0.3424, + "step": 12583 + }, + { + "epoch": 0.569540620049785, + "grad_norm": 0.7305511014596607, + "learning_rate": 4.1221369587366395e-06, + "loss": 0.3573, + "step": 12584 + }, + { + "epoch": 0.5695858791581806, + "grad_norm": 0.6430158353794494, + "learning_rate": 4.121415431991991e-06, + "loss": 0.3094, + "step": 12585 + }, + { + "epoch": 0.5696311382665762, + "grad_norm": 0.7005562455378558, + "learning_rate": 4.12069392412543e-06, + "loss": 0.3755, + "step": 12586 + }, + { + "epoch": 0.5696763973749717, + "grad_norm": 0.5827775448153438, + "learning_rate": 4.119972435152453e-06, + "loss": 0.3079, + "step": 12587 + }, + { + "epoch": 0.5697216564833673, + "grad_norm": 0.7272577668064656, + "learning_rate": 4.119250965088566e-06, + "loss": 0.367, + "step": 12588 + }, + { + "epoch": 0.5697669155917628, + "grad_norm": 0.6866287625304002, + "learning_rate": 4.118529513949272e-06, + "loss": 0.32, + "step": 12589 + }, + { + "epoch": 0.5698121747001584, + "grad_norm": 0.6199082633100993, + "learning_rate": 4.11780808175007e-06, + "loss": 0.3211, + "step": 12590 + }, + { + "epoch": 0.569857433808554, + "grad_norm": 0.3132223313660452, + "learning_rate": 4.1170866685064625e-06, + "loss": 0.4791, + "step": 12591 + }, + { + "epoch": 0.5699026929169495, + "grad_norm": 0.6191028735269731, + "learning_rate": 4.116365274233952e-06, + "loss": 0.3395, + "step": 12592 + }, + { + "epoch": 0.5699479520253451, + "grad_norm": 0.3177343220045825, + "learning_rate": 4.115643898948039e-06, + "loss": 0.4736, + "step": 12593 + }, + { + "epoch": 0.5699932111337407, + "grad_norm": 0.662381067071977, + "learning_rate": 4.114922542664221e-06, + "loss": 0.3509, + "step": 12594 + }, + { + "epoch": 0.5700384702421363, + "grad_norm": 0.6767235362430225, + "learning_rate": 4.114201205397998e-06, + "loss": 0.3642, + "step": 12595 + }, + { + "epoch": 0.5700837293505318, + "grad_norm": 0.6522887209274827, + "learning_rate": 4.113479887164873e-06, + "loss": 0.3442, + "step": 12596 + }, + { + "epoch": 0.5701289884589273, + "grad_norm": 0.6629478414681544, + "learning_rate": 4.112758587980342e-06, + "loss": 0.3241, + "step": 12597 + }, + { + "epoch": 0.5701742475673229, + "grad_norm": 0.5835930500962448, + "learning_rate": 4.112037307859903e-06, + "loss": 0.3091, + "step": 12598 + }, + { + "epoch": 0.5702195066757185, + "grad_norm": 0.6259458536357286, + "learning_rate": 4.111316046819057e-06, + "loss": 0.3226, + "step": 12599 + }, + { + "epoch": 0.570264765784114, + "grad_norm": 0.6550185899599036, + "learning_rate": 4.110594804873297e-06, + "loss": 0.2821, + "step": 12600 + }, + { + "epoch": 0.5703100248925096, + "grad_norm": 0.6371420170637637, + "learning_rate": 4.1098735820381244e-06, + "loss": 0.3065, + "step": 12601 + }, + { + "epoch": 0.5703552840009052, + "grad_norm": 0.7330960754239171, + "learning_rate": 4.109152378329036e-06, + "loss": 0.3473, + "step": 12602 + }, + { + "epoch": 0.5704005431093008, + "grad_norm": 0.662731391985183, + "learning_rate": 4.108431193761525e-06, + "loss": 0.3019, + "step": 12603 + }, + { + "epoch": 0.5704458022176964, + "grad_norm": 0.5955175335506525, + "learning_rate": 4.107710028351089e-06, + "loss": 0.3158, + "step": 12604 + }, + { + "epoch": 0.5704910613260918, + "grad_norm": 0.5912044747365863, + "learning_rate": 4.106988882113228e-06, + "loss": 0.3779, + "step": 12605 + }, + { + "epoch": 0.5705363204344874, + "grad_norm": 0.7742260903363052, + "learning_rate": 4.106267755063429e-06, + "loss": 0.3616, + "step": 12606 + }, + { + "epoch": 0.570581579542883, + "grad_norm": 0.6187984756118377, + "learning_rate": 4.105546647217192e-06, + "loss": 0.3191, + "step": 12607 + }, + { + "epoch": 0.5706268386512786, + "grad_norm": 0.6393811844148454, + "learning_rate": 4.104825558590011e-06, + "loss": 0.2965, + "step": 12608 + }, + { + "epoch": 0.5706720977596741, + "grad_norm": 0.7597578452839173, + "learning_rate": 4.104104489197381e-06, + "loss": 0.3399, + "step": 12609 + }, + { + "epoch": 0.5707173568680697, + "grad_norm": 0.5876493739667094, + "learning_rate": 4.1033834390547905e-06, + "loss": 0.2856, + "step": 12610 + }, + { + "epoch": 0.5707626159764653, + "grad_norm": 0.6053375291576475, + "learning_rate": 4.102662408177738e-06, + "loss": 0.3119, + "step": 12611 + }, + { + "epoch": 0.5708078750848609, + "grad_norm": 0.6556433596751344, + "learning_rate": 4.1019413965817154e-06, + "loss": 0.3265, + "step": 12612 + }, + { + "epoch": 0.5708531341932563, + "grad_norm": 0.4186174147926591, + "learning_rate": 4.101220404282213e-06, + "loss": 0.4635, + "step": 12613 + }, + { + "epoch": 0.5708983933016519, + "grad_norm": 0.3760300246390405, + "learning_rate": 4.100499431294722e-06, + "loss": 0.4867, + "step": 12614 + }, + { + "epoch": 0.5709436524100475, + "grad_norm": 0.6868071595945288, + "learning_rate": 4.099778477634739e-06, + "loss": 0.3504, + "step": 12615 + }, + { + "epoch": 0.5709889115184431, + "grad_norm": 0.6522276286304554, + "learning_rate": 4.099057543317749e-06, + "loss": 0.2872, + "step": 12616 + }, + { + "epoch": 0.5710341706268387, + "grad_norm": 0.611826672633969, + "learning_rate": 4.098336628359247e-06, + "loss": 0.3311, + "step": 12617 + }, + { + "epoch": 0.5710794297352342, + "grad_norm": 0.5757073653381102, + "learning_rate": 4.097615732774722e-06, + "loss": 0.3015, + "step": 12618 + }, + { + "epoch": 0.5711246888436298, + "grad_norm": 0.6543449155667355, + "learning_rate": 4.096894856579662e-06, + "loss": 0.3205, + "step": 12619 + }, + { + "epoch": 0.5711699479520254, + "grad_norm": 0.6499685402810728, + "learning_rate": 4.096173999789558e-06, + "loss": 0.3432, + "step": 12620 + }, + { + "epoch": 0.571215207060421, + "grad_norm": 0.6168195605009977, + "learning_rate": 4.095453162419898e-06, + "loss": 0.286, + "step": 12621 + }, + { + "epoch": 0.5712604661688164, + "grad_norm": 0.6798988113354032, + "learning_rate": 4.094732344486174e-06, + "loss": 0.2918, + "step": 12622 + }, + { + "epoch": 0.571305725277212, + "grad_norm": 0.6054918063775061, + "learning_rate": 4.0940115460038695e-06, + "loss": 0.3076, + "step": 12623 + }, + { + "epoch": 0.5713509843856076, + "grad_norm": 0.6838615401522805, + "learning_rate": 4.093290766988474e-06, + "loss": 0.3144, + "step": 12624 + }, + { + "epoch": 0.5713962434940032, + "grad_norm": 0.6405000541011384, + "learning_rate": 4.092570007455477e-06, + "loss": 0.3133, + "step": 12625 + }, + { + "epoch": 0.5714415026023988, + "grad_norm": 0.6336666469050273, + "learning_rate": 4.0918492674203634e-06, + "loss": 0.3394, + "step": 12626 + }, + { + "epoch": 0.5714867617107943, + "grad_norm": 0.6549913219931577, + "learning_rate": 4.091128546898619e-06, + "loss": 0.2751, + "step": 12627 + }, + { + "epoch": 0.5715320208191899, + "grad_norm": 0.6524068949012286, + "learning_rate": 4.090407845905732e-06, + "loss": 0.3164, + "step": 12628 + }, + { + "epoch": 0.5715772799275854, + "grad_norm": 0.6281430874277494, + "learning_rate": 4.089687164457184e-06, + "loss": 0.3177, + "step": 12629 + }, + { + "epoch": 0.571622539035981, + "grad_norm": 0.6065271029200794, + "learning_rate": 4.088966502568465e-06, + "loss": 0.2746, + "step": 12630 + }, + { + "epoch": 0.5716677981443765, + "grad_norm": 0.5427963923567827, + "learning_rate": 4.0882458602550586e-06, + "loss": 0.4791, + "step": 12631 + }, + { + "epoch": 0.5717130572527721, + "grad_norm": 0.6819911887506003, + "learning_rate": 4.087525237532447e-06, + "loss": 0.3448, + "step": 12632 + }, + { + "epoch": 0.5717583163611677, + "grad_norm": 0.655494482755223, + "learning_rate": 4.086804634416115e-06, + "loss": 0.3352, + "step": 12633 + }, + { + "epoch": 0.5718035754695633, + "grad_norm": 0.6626172355189169, + "learning_rate": 4.08608405092155e-06, + "loss": 0.3025, + "step": 12634 + }, + { + "epoch": 0.5718488345779588, + "grad_norm": 0.6266465106621113, + "learning_rate": 4.085363487064228e-06, + "loss": 0.3052, + "step": 12635 + }, + { + "epoch": 0.5718940936863544, + "grad_norm": 0.6048553709958095, + "learning_rate": 4.084642942859638e-06, + "loss": 0.287, + "step": 12636 + }, + { + "epoch": 0.5719393527947499, + "grad_norm": 0.6391854257326505, + "learning_rate": 4.083922418323257e-06, + "loss": 0.3331, + "step": 12637 + }, + { + "epoch": 0.5719846119031455, + "grad_norm": 0.6385617384850278, + "learning_rate": 4.083201913470574e-06, + "loss": 0.2752, + "step": 12638 + }, + { + "epoch": 0.5720298710115411, + "grad_norm": 0.6224515114391435, + "learning_rate": 4.082481428317063e-06, + "loss": 0.2886, + "step": 12639 + }, + { + "epoch": 0.5720751301199366, + "grad_norm": 0.31548837794382356, + "learning_rate": 4.081760962878209e-06, + "loss": 0.4714, + "step": 12640 + }, + { + "epoch": 0.5721203892283322, + "grad_norm": 0.6290456500635456, + "learning_rate": 4.081040517169493e-06, + "loss": 0.3219, + "step": 12641 + }, + { + "epoch": 0.5721656483367278, + "grad_norm": 0.6123838167027538, + "learning_rate": 4.080320091206392e-06, + "loss": 0.3539, + "step": 12642 + }, + { + "epoch": 0.5722109074451234, + "grad_norm": 0.7026155094801178, + "learning_rate": 4.079599685004388e-06, + "loss": 0.2924, + "step": 12643 + }, + { + "epoch": 0.5722561665535189, + "grad_norm": 0.6244310496732987, + "learning_rate": 4.078879298578961e-06, + "loss": 0.311, + "step": 12644 + }, + { + "epoch": 0.5723014256619144, + "grad_norm": 0.3315480626927533, + "learning_rate": 4.078158931945588e-06, + "loss": 0.4704, + "step": 12645 + }, + { + "epoch": 0.57234668477031, + "grad_norm": 0.28464159746166984, + "learning_rate": 4.077438585119748e-06, + "loss": 0.485, + "step": 12646 + }, + { + "epoch": 0.5723919438787056, + "grad_norm": 0.6443006669304278, + "learning_rate": 4.076718258116922e-06, + "loss": 0.3401, + "step": 12647 + }, + { + "epoch": 0.5724372029871011, + "grad_norm": 0.3044131293795776, + "learning_rate": 4.0759979509525826e-06, + "loss": 0.4848, + "step": 12648 + }, + { + "epoch": 0.5724824620954967, + "grad_norm": 0.630328672719413, + "learning_rate": 4.075277663642208e-06, + "loss": 0.3524, + "step": 12649 + }, + { + "epoch": 0.5725277212038923, + "grad_norm": 0.6414834546317076, + "learning_rate": 4.074557396201279e-06, + "loss": 0.317, + "step": 12650 + }, + { + "epoch": 0.5725729803122879, + "grad_norm": 0.5825287557417055, + "learning_rate": 4.073837148645269e-06, + "loss": 0.2713, + "step": 12651 + }, + { + "epoch": 0.5726182394206835, + "grad_norm": 0.5882568662766171, + "learning_rate": 4.073116920989653e-06, + "loss": 0.2846, + "step": 12652 + }, + { + "epoch": 0.5726634985290789, + "grad_norm": 0.3948042084638724, + "learning_rate": 4.072396713249907e-06, + "loss": 0.4546, + "step": 12653 + }, + { + "epoch": 0.5727087576374745, + "grad_norm": 0.6078405818525381, + "learning_rate": 4.071676525441509e-06, + "loss": 0.3553, + "step": 12654 + }, + { + "epoch": 0.5727540167458701, + "grad_norm": 0.7194445472363838, + "learning_rate": 4.07095635757993e-06, + "loss": 0.308, + "step": 12655 + }, + { + "epoch": 0.5727992758542657, + "grad_norm": 0.3168071964095616, + "learning_rate": 4.070236209680646e-06, + "loss": 0.4775, + "step": 12656 + }, + { + "epoch": 0.5728445349626612, + "grad_norm": 0.6062929801973241, + "learning_rate": 4.069516081759131e-06, + "loss": 0.2968, + "step": 12657 + }, + { + "epoch": 0.5728897940710568, + "grad_norm": 0.6174877429131435, + "learning_rate": 4.068795973830856e-06, + "loss": 0.3192, + "step": 12658 + }, + { + "epoch": 0.5729350531794524, + "grad_norm": 0.614924387600183, + "learning_rate": 4.068075885911295e-06, + "loss": 0.3188, + "step": 12659 + }, + { + "epoch": 0.572980312287848, + "grad_norm": 0.7104822249231385, + "learning_rate": 4.067355818015925e-06, + "loss": 0.3344, + "step": 12660 + }, + { + "epoch": 0.5730255713962435, + "grad_norm": 0.7277396852893528, + "learning_rate": 4.0666357701602105e-06, + "loss": 0.2727, + "step": 12661 + }, + { + "epoch": 0.573070830504639, + "grad_norm": 0.5984528852078446, + "learning_rate": 4.0659157423596265e-06, + "loss": 0.3058, + "step": 12662 + }, + { + "epoch": 0.5731160896130346, + "grad_norm": 0.5822935757880945, + "learning_rate": 4.065195734629646e-06, + "loss": 0.3135, + "step": 12663 + }, + { + "epoch": 0.5731613487214302, + "grad_norm": 0.7260328271628357, + "learning_rate": 4.064475746985738e-06, + "loss": 0.3307, + "step": 12664 + }, + { + "epoch": 0.5732066078298258, + "grad_norm": 0.6039575429945476, + "learning_rate": 4.063755779443372e-06, + "loss": 0.3185, + "step": 12665 + }, + { + "epoch": 0.5732518669382213, + "grad_norm": 0.6447887050870809, + "learning_rate": 4.063035832018018e-06, + "loss": 0.3056, + "step": 12666 + }, + { + "epoch": 0.5732971260466169, + "grad_norm": 0.636991093146034, + "learning_rate": 4.06231590472515e-06, + "loss": 0.3142, + "step": 12667 + }, + { + "epoch": 0.5733423851550125, + "grad_norm": 0.6648637483903794, + "learning_rate": 4.06159599758023e-06, + "loss": 0.3491, + "step": 12668 + }, + { + "epoch": 0.573387644263408, + "grad_norm": 0.3602955931210812, + "learning_rate": 4.060876110598731e-06, + "loss": 0.4871, + "step": 12669 + }, + { + "epoch": 0.5734329033718035, + "grad_norm": 0.5996667931514984, + "learning_rate": 4.0601562437961215e-06, + "loss": 0.3092, + "step": 12670 + }, + { + "epoch": 0.5734781624801991, + "grad_norm": 0.591590722739638, + "learning_rate": 4.059436397187866e-06, + "loss": 0.3432, + "step": 12671 + }, + { + "epoch": 0.5735234215885947, + "grad_norm": 0.42572110447408806, + "learning_rate": 4.0587165707894326e-06, + "loss": 0.4563, + "step": 12672 + }, + { + "epoch": 0.5735686806969903, + "grad_norm": 0.6530519024731599, + "learning_rate": 4.0579967646162915e-06, + "loss": 0.3394, + "step": 12673 + }, + { + "epoch": 0.5736139398053859, + "grad_norm": 0.29068368801514916, + "learning_rate": 4.057276978683906e-06, + "loss": 0.4838, + "step": 12674 + }, + { + "epoch": 0.5736591989137814, + "grad_norm": 0.6793392061769342, + "learning_rate": 4.056557213007743e-06, + "loss": 0.2792, + "step": 12675 + }, + { + "epoch": 0.573704458022177, + "grad_norm": 0.6032629284496717, + "learning_rate": 4.055837467603268e-06, + "loss": 0.3201, + "step": 12676 + }, + { + "epoch": 0.5737497171305725, + "grad_norm": 0.6227563593182059, + "learning_rate": 4.055117742485944e-06, + "loss": 0.3243, + "step": 12677 + }, + { + "epoch": 0.5737949762389681, + "grad_norm": 0.6140441774350899, + "learning_rate": 4.05439803767124e-06, + "loss": 0.3283, + "step": 12678 + }, + { + "epoch": 0.5738402353473636, + "grad_norm": 0.6493695835053662, + "learning_rate": 4.053678353174616e-06, + "loss": 0.3038, + "step": 12679 + }, + { + "epoch": 0.5738854944557592, + "grad_norm": 0.674659064650729, + "learning_rate": 4.05295868901154e-06, + "loss": 0.2927, + "step": 12680 + }, + { + "epoch": 0.5739307535641548, + "grad_norm": 0.3355983023504748, + "learning_rate": 4.052239045197472e-06, + "loss": 0.4449, + "step": 12681 + }, + { + "epoch": 0.5739760126725504, + "grad_norm": 0.6013667689517775, + "learning_rate": 4.051519421747876e-06, + "loss": 0.2783, + "step": 12682 + }, + { + "epoch": 0.5740212717809459, + "grad_norm": 0.6710995569903319, + "learning_rate": 4.050799818678216e-06, + "loss": 0.3296, + "step": 12683 + }, + { + "epoch": 0.5740665308893415, + "grad_norm": 0.6503624710261977, + "learning_rate": 4.050080236003952e-06, + "loss": 0.3149, + "step": 12684 + }, + { + "epoch": 0.574111789997737, + "grad_norm": 0.29002163267180253, + "learning_rate": 4.049360673740545e-06, + "loss": 0.4615, + "step": 12685 + }, + { + "epoch": 0.5741570491061326, + "grad_norm": 0.6306557530330095, + "learning_rate": 4.04864113190346e-06, + "loss": 0.357, + "step": 12686 + }, + { + "epoch": 0.5742023082145282, + "grad_norm": 0.2837338449412986, + "learning_rate": 4.047921610508152e-06, + "loss": 0.4879, + "step": 12687 + }, + { + "epoch": 0.5742475673229237, + "grad_norm": 0.6031911633754032, + "learning_rate": 4.047202109570086e-06, + "loss": 0.2996, + "step": 12688 + }, + { + "epoch": 0.5742928264313193, + "grad_norm": 0.7921926294430719, + "learning_rate": 4.046482629104722e-06, + "loss": 0.3194, + "step": 12689 + }, + { + "epoch": 0.5743380855397149, + "grad_norm": 0.6333884473496525, + "learning_rate": 4.045763169127516e-06, + "loss": 0.3046, + "step": 12690 + }, + { + "epoch": 0.5743833446481105, + "grad_norm": 0.2641626700740687, + "learning_rate": 4.045043729653927e-06, + "loss": 0.4862, + "step": 12691 + }, + { + "epoch": 0.574428603756506, + "grad_norm": 0.6023775403806684, + "learning_rate": 4.044324310699418e-06, + "loss": 0.3217, + "step": 12692 + }, + { + "epoch": 0.5744738628649015, + "grad_norm": 0.6682577862608416, + "learning_rate": 4.043604912279444e-06, + "loss": 0.3261, + "step": 12693 + }, + { + "epoch": 0.5745191219732971, + "grad_norm": 0.6677978891771651, + "learning_rate": 4.0428855344094635e-06, + "loss": 0.3143, + "step": 12694 + }, + { + "epoch": 0.5745643810816927, + "grad_norm": 0.5927057380531942, + "learning_rate": 4.042166177104932e-06, + "loss": 0.3123, + "step": 12695 + }, + { + "epoch": 0.5746096401900882, + "grad_norm": 0.6226581838813324, + "learning_rate": 4.041446840381309e-06, + "loss": 0.3114, + "step": 12696 + }, + { + "epoch": 0.5746548992984838, + "grad_norm": 0.6481231373661983, + "learning_rate": 4.040727524254048e-06, + "loss": 0.3143, + "step": 12697 + }, + { + "epoch": 0.5747001584068794, + "grad_norm": 0.6117393568444289, + "learning_rate": 4.040008228738607e-06, + "loss": 0.3287, + "step": 12698 + }, + { + "epoch": 0.574745417515275, + "grad_norm": 0.6422235183124816, + "learning_rate": 4.039288953850442e-06, + "loss": 0.3346, + "step": 12699 + }, + { + "epoch": 0.5747906766236706, + "grad_norm": 0.6381619210328913, + "learning_rate": 4.038569699605005e-06, + "loss": 0.2951, + "step": 12700 + }, + { + "epoch": 0.574835935732066, + "grad_norm": 0.6660502247869946, + "learning_rate": 4.037850466017752e-06, + "loss": 0.3187, + "step": 12701 + }, + { + "epoch": 0.5748811948404616, + "grad_norm": 0.6603447028048267, + "learning_rate": 4.03713125310414e-06, + "loss": 0.3356, + "step": 12702 + }, + { + "epoch": 0.5749264539488572, + "grad_norm": 0.5892533315864354, + "learning_rate": 4.036412060879618e-06, + "loss": 0.3152, + "step": 12703 + }, + { + "epoch": 0.5749717130572528, + "grad_norm": 0.6621800751010352, + "learning_rate": 4.035692889359642e-06, + "loss": 0.346, + "step": 12704 + }, + { + "epoch": 0.5750169721656483, + "grad_norm": 0.6373051961133144, + "learning_rate": 4.034973738559664e-06, + "loss": 0.3314, + "step": 12705 + }, + { + "epoch": 0.5750622312740439, + "grad_norm": 0.6217914074195996, + "learning_rate": 4.034254608495136e-06, + "loss": 0.325, + "step": 12706 + }, + { + "epoch": 0.5751074903824395, + "grad_norm": 0.6660384422033511, + "learning_rate": 4.03353549918151e-06, + "loss": 0.3181, + "step": 12707 + }, + { + "epoch": 0.5751527494908351, + "grad_norm": 0.6239159549463238, + "learning_rate": 4.032816410634239e-06, + "loss": 0.3084, + "step": 12708 + }, + { + "epoch": 0.5751980085992306, + "grad_norm": 0.37417081939858304, + "learning_rate": 4.032097342868774e-06, + "loss": 0.4744, + "step": 12709 + }, + { + "epoch": 0.5752432677076261, + "grad_norm": 0.6254837344720027, + "learning_rate": 4.031378295900562e-06, + "loss": 0.2774, + "step": 12710 + }, + { + "epoch": 0.5752885268160217, + "grad_norm": 0.7112272484871678, + "learning_rate": 4.030659269745057e-06, + "loss": 0.3565, + "step": 12711 + }, + { + "epoch": 0.5753337859244173, + "grad_norm": 0.5642715316156801, + "learning_rate": 4.029940264417708e-06, + "loss": 0.3082, + "step": 12712 + }, + { + "epoch": 0.5753790450328129, + "grad_norm": 0.918628603221368, + "learning_rate": 4.0292212799339615e-06, + "loss": 0.3061, + "step": 12713 + }, + { + "epoch": 0.5754243041412084, + "grad_norm": 0.6357959836387956, + "learning_rate": 4.028502316309268e-06, + "loss": 0.3714, + "step": 12714 + }, + { + "epoch": 0.575469563249604, + "grad_norm": 0.2789898995520453, + "learning_rate": 4.0277833735590785e-06, + "loss": 0.4591, + "step": 12715 + }, + { + "epoch": 0.5755148223579996, + "grad_norm": 0.6074162663412183, + "learning_rate": 4.027064451698836e-06, + "loss": 0.3302, + "step": 12716 + }, + { + "epoch": 0.5755600814663951, + "grad_norm": 0.6881224733338933, + "learning_rate": 4.026345550743991e-06, + "loss": 0.2989, + "step": 12717 + }, + { + "epoch": 0.5756053405747906, + "grad_norm": 0.6765735227591337, + "learning_rate": 4.02562667070999e-06, + "loss": 0.3411, + "step": 12718 + }, + { + "epoch": 0.5756505996831862, + "grad_norm": 0.28841045085364697, + "learning_rate": 4.024907811612279e-06, + "loss": 0.4767, + "step": 12719 + }, + { + "epoch": 0.5756958587915818, + "grad_norm": 0.6353278290575984, + "learning_rate": 4.024188973466304e-06, + "loss": 0.3265, + "step": 12720 + }, + { + "epoch": 0.5757411178999774, + "grad_norm": 0.26528212661842693, + "learning_rate": 4.023470156287511e-06, + "loss": 0.4648, + "step": 12721 + }, + { + "epoch": 0.575786377008373, + "grad_norm": 0.7287600120998405, + "learning_rate": 4.022751360091347e-06, + "loss": 0.3239, + "step": 12722 + }, + { + "epoch": 0.5758316361167685, + "grad_norm": 0.7113149968104155, + "learning_rate": 4.022032584893253e-06, + "loss": 0.308, + "step": 12723 + }, + { + "epoch": 0.575876895225164, + "grad_norm": 0.581184386254656, + "learning_rate": 4.021313830708675e-06, + "loss": 0.3078, + "step": 12724 + }, + { + "epoch": 0.5759221543335596, + "grad_norm": 0.2746754520286916, + "learning_rate": 4.0205950975530596e-06, + "loss": 0.4655, + "step": 12725 + }, + { + "epoch": 0.5759674134419552, + "grad_norm": 0.5908541244857715, + "learning_rate": 4.019876385441844e-06, + "loss": 0.272, + "step": 12726 + }, + { + "epoch": 0.5760126725503507, + "grad_norm": 0.2643314381459348, + "learning_rate": 4.019157694390477e-06, + "loss": 0.4895, + "step": 12727 + }, + { + "epoch": 0.5760579316587463, + "grad_norm": 0.650686614819478, + "learning_rate": 4.018439024414399e-06, + "loss": 0.3155, + "step": 12728 + }, + { + "epoch": 0.5761031907671419, + "grad_norm": 0.6030405524006986, + "learning_rate": 4.0177203755290496e-06, + "loss": 0.3354, + "step": 12729 + }, + { + "epoch": 0.5761484498755375, + "grad_norm": 0.6695106687422865, + "learning_rate": 4.017001747749873e-06, + "loss": 0.3462, + "step": 12730 + }, + { + "epoch": 0.576193708983933, + "grad_norm": 0.5929157274011767, + "learning_rate": 4.016283141092311e-06, + "loss": 0.2662, + "step": 12731 + }, + { + "epoch": 0.5762389680923286, + "grad_norm": 0.669110331844809, + "learning_rate": 4.015564555571802e-06, + "loss": 0.3397, + "step": 12732 + }, + { + "epoch": 0.5762842272007241, + "grad_norm": 0.6443987887172116, + "learning_rate": 4.014845991203787e-06, + "loss": 0.3736, + "step": 12733 + }, + { + "epoch": 0.5763294863091197, + "grad_norm": 0.5731768790903239, + "learning_rate": 4.0141274480037065e-06, + "loss": 0.3354, + "step": 12734 + }, + { + "epoch": 0.5763747454175153, + "grad_norm": 0.6561540012298605, + "learning_rate": 4.0134089259870005e-06, + "loss": 0.3162, + "step": 12735 + }, + { + "epoch": 0.5764200045259108, + "grad_norm": 0.6221184811404515, + "learning_rate": 4.012690425169104e-06, + "loss": 0.2947, + "step": 12736 + }, + { + "epoch": 0.5764652636343064, + "grad_norm": 0.32495705128941155, + "learning_rate": 4.011971945565461e-06, + "loss": 0.459, + "step": 12737 + }, + { + "epoch": 0.576510522742702, + "grad_norm": 0.2992483251964859, + "learning_rate": 4.011253487191505e-06, + "loss": 0.4768, + "step": 12738 + }, + { + "epoch": 0.5765557818510976, + "grad_norm": 0.2760061214392678, + "learning_rate": 4.0105350500626735e-06, + "loss": 0.4937, + "step": 12739 + }, + { + "epoch": 0.576601040959493, + "grad_norm": 0.3164653998547672, + "learning_rate": 4.009816634194405e-06, + "loss": 0.476, + "step": 12740 + }, + { + "epoch": 0.5766463000678886, + "grad_norm": 0.6132393217954801, + "learning_rate": 4.009098239602139e-06, + "loss": 0.2865, + "step": 12741 + }, + { + "epoch": 0.5766915591762842, + "grad_norm": 0.6912522265863241, + "learning_rate": 4.008379866301307e-06, + "loss": 0.3452, + "step": 12742 + }, + { + "epoch": 0.5767368182846798, + "grad_norm": 0.688102334677657, + "learning_rate": 4.007661514307344e-06, + "loss": 0.3405, + "step": 12743 + }, + { + "epoch": 0.5767820773930754, + "grad_norm": 0.5962739726272968, + "learning_rate": 4.006943183635691e-06, + "loss": 0.2986, + "step": 12744 + }, + { + "epoch": 0.5768273365014709, + "grad_norm": 0.6144464910937142, + "learning_rate": 4.006224874301776e-06, + "loss": 0.3043, + "step": 12745 + }, + { + "epoch": 0.5768725956098665, + "grad_norm": 0.34017827111284143, + "learning_rate": 4.0055065863210365e-06, + "loss": 0.4581, + "step": 12746 + }, + { + "epoch": 0.5769178547182621, + "grad_norm": 0.6265824965179838, + "learning_rate": 4.004788319708908e-06, + "loss": 0.3251, + "step": 12747 + }, + { + "epoch": 0.5769631138266577, + "grad_norm": 0.7435574824893756, + "learning_rate": 4.004070074480821e-06, + "loss": 0.3459, + "step": 12748 + }, + { + "epoch": 0.5770083729350531, + "grad_norm": 0.31812262154286036, + "learning_rate": 4.003351850652208e-06, + "loss": 0.4624, + "step": 12749 + }, + { + "epoch": 0.5770536320434487, + "grad_norm": 0.7157534177543335, + "learning_rate": 4.002633648238504e-06, + "loss": 0.3289, + "step": 12750 + }, + { + "epoch": 0.5770988911518443, + "grad_norm": 0.5694676418437684, + "learning_rate": 4.00191546725514e-06, + "loss": 0.2835, + "step": 12751 + }, + { + "epoch": 0.5771441502602399, + "grad_norm": 0.6275407189140688, + "learning_rate": 4.001197307717547e-06, + "loss": 0.3197, + "step": 12752 + }, + { + "epoch": 0.5771894093686354, + "grad_norm": 0.6405241813082845, + "learning_rate": 4.000479169641155e-06, + "loss": 0.2917, + "step": 12753 + }, + { + "epoch": 0.577234668477031, + "grad_norm": 0.30821742585046996, + "learning_rate": 3.999761053041398e-06, + "loss": 0.485, + "step": 12754 + }, + { + "epoch": 0.5772799275854266, + "grad_norm": 0.5630410441773159, + "learning_rate": 3.999042957933703e-06, + "loss": 0.2894, + "step": 12755 + }, + { + "epoch": 0.5773251866938222, + "grad_norm": 0.6669897014151894, + "learning_rate": 3.9983248843335e-06, + "loss": 0.3302, + "step": 12756 + }, + { + "epoch": 0.5773704458022177, + "grad_norm": 0.7217996992980149, + "learning_rate": 3.997606832256221e-06, + "loss": 0.3323, + "step": 12757 + }, + { + "epoch": 0.5774157049106132, + "grad_norm": 0.7234170943206248, + "learning_rate": 3.9968888017172905e-06, + "loss": 0.3612, + "step": 12758 + }, + { + "epoch": 0.5774609640190088, + "grad_norm": 0.6280443839476121, + "learning_rate": 3.996170792732139e-06, + "loss": 0.3308, + "step": 12759 + }, + { + "epoch": 0.5775062231274044, + "grad_norm": 0.6410420830552802, + "learning_rate": 3.995452805316195e-06, + "loss": 0.3524, + "step": 12760 + }, + { + "epoch": 0.5775514822358, + "grad_norm": 0.6175844970036328, + "learning_rate": 3.994734839484884e-06, + "loss": 0.3039, + "step": 12761 + }, + { + "epoch": 0.5775967413441955, + "grad_norm": 0.6249105358043234, + "learning_rate": 3.994016895253635e-06, + "loss": 0.3257, + "step": 12762 + }, + { + "epoch": 0.5776420004525911, + "grad_norm": 0.6208124682102306, + "learning_rate": 3.9932989726378705e-06, + "loss": 0.349, + "step": 12763 + }, + { + "epoch": 0.5776872595609867, + "grad_norm": 0.6246253614348443, + "learning_rate": 3.992581071653023e-06, + "loss": 0.3263, + "step": 12764 + }, + { + "epoch": 0.5777325186693822, + "grad_norm": 0.35917458120348167, + "learning_rate": 3.991863192314512e-06, + "loss": 0.4966, + "step": 12765 + }, + { + "epoch": 0.5777777777777777, + "grad_norm": 0.5729151681896888, + "learning_rate": 3.991145334637765e-06, + "loss": 0.3089, + "step": 12766 + }, + { + "epoch": 0.5778230368861733, + "grad_norm": 0.6572528307452108, + "learning_rate": 3.990427498638208e-06, + "loss": 0.2974, + "step": 12767 + }, + { + "epoch": 0.5778682959945689, + "grad_norm": 0.8428472380687402, + "learning_rate": 3.98970968433126e-06, + "loss": 0.264, + "step": 12768 + }, + { + "epoch": 0.5779135551029645, + "grad_norm": 0.6736949965479246, + "learning_rate": 3.98899189173235e-06, + "loss": 0.3117, + "step": 12769 + }, + { + "epoch": 0.5779588142113601, + "grad_norm": 0.6581459249727647, + "learning_rate": 3.988274120856901e-06, + "loss": 0.3536, + "step": 12770 + }, + { + "epoch": 0.5780040733197556, + "grad_norm": 0.28315764861161824, + "learning_rate": 3.987556371720331e-06, + "loss": 0.482, + "step": 12771 + }, + { + "epoch": 0.5780493324281512, + "grad_norm": 0.28487596963205375, + "learning_rate": 3.986838644338066e-06, + "loss": 0.4758, + "step": 12772 + }, + { + "epoch": 0.5780945915365467, + "grad_norm": 0.2829952282231626, + "learning_rate": 3.986120938725529e-06, + "loss": 0.4713, + "step": 12773 + }, + { + "epoch": 0.5781398506449423, + "grad_norm": 0.621195865422692, + "learning_rate": 3.9854032548981354e-06, + "loss": 0.313, + "step": 12774 + }, + { + "epoch": 0.5781851097533378, + "grad_norm": 0.611176870304872, + "learning_rate": 3.984685592871311e-06, + "loss": 0.3479, + "step": 12775 + }, + { + "epoch": 0.5782303688617334, + "grad_norm": 0.6438316659957177, + "learning_rate": 3.983967952660477e-06, + "loss": 0.3335, + "step": 12776 + }, + { + "epoch": 0.578275627970129, + "grad_norm": 0.29361965736641604, + "learning_rate": 3.983250334281049e-06, + "loss": 0.4994, + "step": 12777 + }, + { + "epoch": 0.5783208870785246, + "grad_norm": 0.7576573224162063, + "learning_rate": 3.982532737748448e-06, + "loss": 0.303, + "step": 12778 + }, + { + "epoch": 0.5783661461869202, + "grad_norm": 0.5893669556988376, + "learning_rate": 3.9818151630780945e-06, + "loss": 0.2755, + "step": 12779 + }, + { + "epoch": 0.5784114052953157, + "grad_norm": 0.6434397060345102, + "learning_rate": 3.981097610285407e-06, + "loss": 0.2798, + "step": 12780 + }, + { + "epoch": 0.5784566644037112, + "grad_norm": 0.6010668767369041, + "learning_rate": 3.980380079385802e-06, + "loss": 0.3416, + "step": 12781 + }, + { + "epoch": 0.5785019235121068, + "grad_norm": 0.6356394138357916, + "learning_rate": 3.979662570394696e-06, + "loss": 0.3412, + "step": 12782 + }, + { + "epoch": 0.5785471826205024, + "grad_norm": 0.5965268149709056, + "learning_rate": 3.97894508332751e-06, + "loss": 0.3152, + "step": 12783 + }, + { + "epoch": 0.5785924417288979, + "grad_norm": 0.6397490509031601, + "learning_rate": 3.978227618199657e-06, + "loss": 0.3035, + "step": 12784 + }, + { + "epoch": 0.5786377008372935, + "grad_norm": 0.7026293236806275, + "learning_rate": 3.977510175026555e-06, + "loss": 0.3658, + "step": 12785 + }, + { + "epoch": 0.5786829599456891, + "grad_norm": 0.647604276207456, + "learning_rate": 3.976792753823619e-06, + "loss": 0.3371, + "step": 12786 + }, + { + "epoch": 0.5787282190540847, + "grad_norm": 1.547699319518536, + "learning_rate": 3.976075354606263e-06, + "loss": 0.2776, + "step": 12787 + }, + { + "epoch": 0.5787734781624801, + "grad_norm": 0.5939460075902443, + "learning_rate": 3.975357977389903e-06, + "loss": 0.3261, + "step": 12788 + }, + { + "epoch": 0.5788187372708757, + "grad_norm": 0.6238851371558091, + "learning_rate": 3.974640622189955e-06, + "loss": 0.2983, + "step": 12789 + }, + { + "epoch": 0.5788639963792713, + "grad_norm": 0.6823620323587973, + "learning_rate": 3.973923289021829e-06, + "loss": 0.3346, + "step": 12790 + }, + { + "epoch": 0.5789092554876669, + "grad_norm": 0.6663588433491106, + "learning_rate": 3.97320597790094e-06, + "loss": 0.3021, + "step": 12791 + }, + { + "epoch": 0.5789545145960625, + "grad_norm": 0.7458479916275186, + "learning_rate": 3.972488688842701e-06, + "loss": 0.3425, + "step": 12792 + }, + { + "epoch": 0.578999773704458, + "grad_norm": 0.6802867122290038, + "learning_rate": 3.971771421862527e-06, + "loss": 0.3797, + "step": 12793 + }, + { + "epoch": 0.5790450328128536, + "grad_norm": 0.6712055248027057, + "learning_rate": 3.971054176975825e-06, + "loss": 0.3342, + "step": 12794 + }, + { + "epoch": 0.5790902919212492, + "grad_norm": 0.7196814387097701, + "learning_rate": 3.970336954198008e-06, + "loss": 0.3158, + "step": 12795 + }, + { + "epoch": 0.5791355510296448, + "grad_norm": 0.6351901479408457, + "learning_rate": 3.969619753544491e-06, + "loss": 0.2616, + "step": 12796 + }, + { + "epoch": 0.5791808101380402, + "grad_norm": 0.655342732680527, + "learning_rate": 3.968902575030676e-06, + "loss": 0.3328, + "step": 12797 + }, + { + "epoch": 0.5792260692464358, + "grad_norm": 0.770406743505147, + "learning_rate": 3.968185418671981e-06, + "loss": 0.3492, + "step": 12798 + }, + { + "epoch": 0.5792713283548314, + "grad_norm": 0.3541400721543433, + "learning_rate": 3.967468284483812e-06, + "loss": 0.468, + "step": 12799 + }, + { + "epoch": 0.579316587463227, + "grad_norm": 0.3190653247968186, + "learning_rate": 3.966751172481577e-06, + "loss": 0.4574, + "step": 12800 + }, + { + "epoch": 0.5793618465716225, + "grad_norm": 0.6770977156420795, + "learning_rate": 3.966034082680686e-06, + "loss": 0.2742, + "step": 12801 + }, + { + "epoch": 0.5794071056800181, + "grad_norm": 0.28384315591545206, + "learning_rate": 3.9653170150965494e-06, + "loss": 0.4658, + "step": 12802 + }, + { + "epoch": 0.5794523647884137, + "grad_norm": 0.6288247634755264, + "learning_rate": 3.96459996974457e-06, + "loss": 0.3507, + "step": 12803 + }, + { + "epoch": 0.5794976238968093, + "grad_norm": 0.6554554918173014, + "learning_rate": 3.963882946640158e-06, + "loss": 0.3339, + "step": 12804 + }, + { + "epoch": 0.5795428830052048, + "grad_norm": 0.6005424677853866, + "learning_rate": 3.963165945798718e-06, + "loss": 0.3093, + "step": 12805 + }, + { + "epoch": 0.5795881421136003, + "grad_norm": 0.6177179456099513, + "learning_rate": 3.9624489672356605e-06, + "loss": 0.3165, + "step": 12806 + }, + { + "epoch": 0.5796334012219959, + "grad_norm": 0.623730570206981, + "learning_rate": 3.961732010966385e-06, + "loss": 0.3249, + "step": 12807 + }, + { + "epoch": 0.5796786603303915, + "grad_norm": 0.5890721069191882, + "learning_rate": 3.961015077006301e-06, + "loss": 0.3376, + "step": 12808 + }, + { + "epoch": 0.5797239194387871, + "grad_norm": 0.5774276192360328, + "learning_rate": 3.960298165370814e-06, + "loss": 0.2701, + "step": 12809 + }, + { + "epoch": 0.5797691785471826, + "grad_norm": 0.6945883497703088, + "learning_rate": 3.959581276075324e-06, + "loss": 0.3298, + "step": 12810 + }, + { + "epoch": 0.5798144376555782, + "grad_norm": 0.5855370075421497, + "learning_rate": 3.958864409135236e-06, + "loss": 0.3123, + "step": 12811 + }, + { + "epoch": 0.5798596967639738, + "grad_norm": 0.6266850936855771, + "learning_rate": 3.9581475645659565e-06, + "loss": 0.3212, + "step": 12812 + }, + { + "epoch": 0.5799049558723693, + "grad_norm": 0.6205072222388495, + "learning_rate": 3.957430742382885e-06, + "loss": 0.2849, + "step": 12813 + }, + { + "epoch": 0.5799502149807649, + "grad_norm": 0.6942275103026728, + "learning_rate": 3.956713942601425e-06, + "loss": 0.2991, + "step": 12814 + }, + { + "epoch": 0.5799954740891604, + "grad_norm": 0.4025010941475067, + "learning_rate": 3.955997165236979e-06, + "loss": 0.4859, + "step": 12815 + }, + { + "epoch": 0.580040733197556, + "grad_norm": 0.6681984766651937, + "learning_rate": 3.955280410304945e-06, + "loss": 0.3484, + "step": 12816 + }, + { + "epoch": 0.5800859923059516, + "grad_norm": 0.6516308948976135, + "learning_rate": 3.954563677820729e-06, + "loss": 0.3124, + "step": 12817 + }, + { + "epoch": 0.5801312514143472, + "grad_norm": 0.31077237574827565, + "learning_rate": 3.953846967799728e-06, + "loss": 0.4557, + "step": 12818 + }, + { + "epoch": 0.5801765105227427, + "grad_norm": 0.2932590904326027, + "learning_rate": 3.953130280257342e-06, + "loss": 0.4849, + "step": 12819 + }, + { + "epoch": 0.5802217696311383, + "grad_norm": 0.6527721169835451, + "learning_rate": 3.95241361520897e-06, + "loss": 0.2986, + "step": 12820 + }, + { + "epoch": 0.5802670287395338, + "grad_norm": 0.6206449598878321, + "learning_rate": 3.9516969726700135e-06, + "loss": 0.2966, + "step": 12821 + }, + { + "epoch": 0.5803122878479294, + "grad_norm": 0.6256445807776387, + "learning_rate": 3.950980352655871e-06, + "loss": 0.3191, + "step": 12822 + }, + { + "epoch": 0.5803575469563249, + "grad_norm": 0.32108889209316294, + "learning_rate": 3.950263755181937e-06, + "loss": 0.459, + "step": 12823 + }, + { + "epoch": 0.5804028060647205, + "grad_norm": 0.6527273268980939, + "learning_rate": 3.94954718026361e-06, + "loss": 0.3327, + "step": 12824 + }, + { + "epoch": 0.5804480651731161, + "grad_norm": 0.6044083467629512, + "learning_rate": 3.948830627916291e-06, + "loss": 0.3324, + "step": 12825 + }, + { + "epoch": 0.5804933242815117, + "grad_norm": 0.4489799411707293, + "learning_rate": 3.94811409815537e-06, + "loss": 0.4796, + "step": 12826 + }, + { + "epoch": 0.5805385833899073, + "grad_norm": 0.6309711058364544, + "learning_rate": 3.9473975909962484e-06, + "loss": 0.3054, + "step": 12827 + }, + { + "epoch": 0.5805838424983027, + "grad_norm": 0.33958261127758466, + "learning_rate": 3.946681106454319e-06, + "loss": 0.4489, + "step": 12828 + }, + { + "epoch": 0.5806291016066983, + "grad_norm": 0.6527249012356833, + "learning_rate": 3.9459646445449785e-06, + "loss": 0.318, + "step": 12829 + }, + { + "epoch": 0.5806743607150939, + "grad_norm": 0.5899912082073602, + "learning_rate": 3.945248205283618e-06, + "loss": 0.2796, + "step": 12830 + }, + { + "epoch": 0.5807196198234895, + "grad_norm": 0.6648150974363372, + "learning_rate": 3.944531788685637e-06, + "loss": 0.3251, + "step": 12831 + }, + { + "epoch": 0.580764878931885, + "grad_norm": 0.33089472321988717, + "learning_rate": 3.943815394766426e-06, + "loss": 0.4743, + "step": 12832 + }, + { + "epoch": 0.5808101380402806, + "grad_norm": 0.5800850848571503, + "learning_rate": 3.943099023541377e-06, + "loss": 0.2991, + "step": 12833 + }, + { + "epoch": 0.5808553971486762, + "grad_norm": 0.33315107133424154, + "learning_rate": 3.942382675025883e-06, + "loss": 0.4845, + "step": 12834 + }, + { + "epoch": 0.5809006562570718, + "grad_norm": 0.598203060435861, + "learning_rate": 3.941666349235341e-06, + "loss": 0.3057, + "step": 12835 + }, + { + "epoch": 0.5809459153654672, + "grad_norm": 0.6589915175799442, + "learning_rate": 3.9409500461851355e-06, + "loss": 0.3694, + "step": 12836 + }, + { + "epoch": 0.5809911744738628, + "grad_norm": 0.7464556060783643, + "learning_rate": 3.9402337658906615e-06, + "loss": 0.3176, + "step": 12837 + }, + { + "epoch": 0.5810364335822584, + "grad_norm": 0.38661431178489153, + "learning_rate": 3.93951750836731e-06, + "loss": 0.4881, + "step": 12838 + }, + { + "epoch": 0.581081692690654, + "grad_norm": 0.6059876360330815, + "learning_rate": 3.93880127363047e-06, + "loss": 0.3421, + "step": 12839 + }, + { + "epoch": 0.5811269517990496, + "grad_norm": 0.3021531130840247, + "learning_rate": 3.938085061695529e-06, + "loss": 0.489, + "step": 12840 + }, + { + "epoch": 0.5811722109074451, + "grad_norm": 0.27781242396514616, + "learning_rate": 3.937368872577882e-06, + "loss": 0.4935, + "step": 12841 + }, + { + "epoch": 0.5812174700158407, + "grad_norm": 0.44269264539934433, + "learning_rate": 3.9366527062929126e-06, + "loss": 0.4707, + "step": 12842 + }, + { + "epoch": 0.5812627291242363, + "grad_norm": 0.6714753451841458, + "learning_rate": 3.935936562856011e-06, + "loss": 0.3245, + "step": 12843 + }, + { + "epoch": 0.5813079882326319, + "grad_norm": 0.5905751013141842, + "learning_rate": 3.935220442282565e-06, + "loss": 0.3028, + "step": 12844 + }, + { + "epoch": 0.5813532473410273, + "grad_norm": 0.5738321032246363, + "learning_rate": 3.93450434458796e-06, + "loss": 0.3371, + "step": 12845 + }, + { + "epoch": 0.5813985064494229, + "grad_norm": 0.6601455637263614, + "learning_rate": 3.933788269787585e-06, + "loss": 0.3246, + "step": 12846 + }, + { + "epoch": 0.5814437655578185, + "grad_norm": 0.5894179831252712, + "learning_rate": 3.9330722178968275e-06, + "loss": 0.2964, + "step": 12847 + }, + { + "epoch": 0.5814890246662141, + "grad_norm": 0.6887398419556391, + "learning_rate": 3.932356188931069e-06, + "loss": 0.2994, + "step": 12848 + }, + { + "epoch": 0.5815342837746097, + "grad_norm": 0.5871548930199265, + "learning_rate": 3.931640182905696e-06, + "loss": 0.3156, + "step": 12849 + }, + { + "epoch": 0.5815795428830052, + "grad_norm": 0.37197516532945846, + "learning_rate": 3.930924199836096e-06, + "loss": 0.4892, + "step": 12850 + }, + { + "epoch": 0.5816248019914008, + "grad_norm": 0.572034524958866, + "learning_rate": 3.930208239737651e-06, + "loss": 0.3032, + "step": 12851 + }, + { + "epoch": 0.5816700610997964, + "grad_norm": 1.2407798640954382, + "learning_rate": 3.929492302625746e-06, + "loss": 0.285, + "step": 12852 + }, + { + "epoch": 0.5817153202081919, + "grad_norm": 0.6219601274131884, + "learning_rate": 3.9287763885157625e-06, + "loss": 0.3773, + "step": 12853 + }, + { + "epoch": 0.5817605793165874, + "grad_norm": 0.3062327908374255, + "learning_rate": 3.928060497423087e-06, + "loss": 0.5083, + "step": 12854 + }, + { + "epoch": 0.581805838424983, + "grad_norm": 0.6209752052130844, + "learning_rate": 3.9273446293630956e-06, + "loss": 0.3089, + "step": 12855 + }, + { + "epoch": 0.5818510975333786, + "grad_norm": 0.6137163222857616, + "learning_rate": 3.926628784351175e-06, + "loss": 0.3641, + "step": 12856 + }, + { + "epoch": 0.5818963566417742, + "grad_norm": 0.27066161094848945, + "learning_rate": 3.925912962402707e-06, + "loss": 0.4472, + "step": 12857 + }, + { + "epoch": 0.5819416157501697, + "grad_norm": 0.2924520462926132, + "learning_rate": 3.925197163533069e-06, + "loss": 0.4903, + "step": 12858 + }, + { + "epoch": 0.5819868748585653, + "grad_norm": 0.5878082240172025, + "learning_rate": 3.924481387757642e-06, + "loss": 0.2943, + "step": 12859 + }, + { + "epoch": 0.5820321339669609, + "grad_norm": 0.28326281618130894, + "learning_rate": 3.9237656350918095e-06, + "loss": 0.4897, + "step": 12860 + }, + { + "epoch": 0.5820773930753564, + "grad_norm": 0.655266197542611, + "learning_rate": 3.9230499055509454e-06, + "loss": 0.2985, + "step": 12861 + }, + { + "epoch": 0.582122652183752, + "grad_norm": 0.6480727118851123, + "learning_rate": 3.922334199150433e-06, + "loss": 0.2734, + "step": 12862 + }, + { + "epoch": 0.5821679112921475, + "grad_norm": 0.6601708874608262, + "learning_rate": 3.921618515905647e-06, + "loss": 0.3413, + "step": 12863 + }, + { + "epoch": 0.5822131704005431, + "grad_norm": 0.895556337999452, + "learning_rate": 3.920902855831969e-06, + "loss": 0.2701, + "step": 12864 + }, + { + "epoch": 0.5822584295089387, + "grad_norm": 0.6330166938868974, + "learning_rate": 3.920187218944774e-06, + "loss": 0.318, + "step": 12865 + }, + { + "epoch": 0.5823036886173343, + "grad_norm": 0.6601310669210659, + "learning_rate": 3.919471605259438e-06, + "loss": 0.2955, + "step": 12866 + }, + { + "epoch": 0.5823489477257298, + "grad_norm": 0.30908864221676563, + "learning_rate": 3.918756014791341e-06, + "loss": 0.4634, + "step": 12867 + }, + { + "epoch": 0.5823942068341253, + "grad_norm": 0.5976452518286259, + "learning_rate": 3.9180404475558555e-06, + "loss": 0.3537, + "step": 12868 + }, + { + "epoch": 0.5824394659425209, + "grad_norm": 0.5953767557162067, + "learning_rate": 3.917324903568356e-06, + "loss": 0.3309, + "step": 12869 + }, + { + "epoch": 0.5824847250509165, + "grad_norm": 0.6054511531383094, + "learning_rate": 3.916609382844221e-06, + "loss": 0.3396, + "step": 12870 + }, + { + "epoch": 0.582529984159312, + "grad_norm": 0.2822725519781029, + "learning_rate": 3.915893885398823e-06, + "loss": 0.4907, + "step": 12871 + }, + { + "epoch": 0.5825752432677076, + "grad_norm": 0.6860614582918673, + "learning_rate": 3.915178411247535e-06, + "loss": 0.2795, + "step": 12872 + }, + { + "epoch": 0.5826205023761032, + "grad_norm": 0.6228482677721467, + "learning_rate": 3.914462960405733e-06, + "loss": 0.3569, + "step": 12873 + }, + { + "epoch": 0.5826657614844988, + "grad_norm": 0.6258446491336329, + "learning_rate": 3.913747532888784e-06, + "loss": 0.3243, + "step": 12874 + }, + { + "epoch": 0.5827110205928944, + "grad_norm": 0.6210560291668852, + "learning_rate": 3.913032128712068e-06, + "loss": 0.3357, + "step": 12875 + }, + { + "epoch": 0.5827562797012898, + "grad_norm": 0.6804612819642828, + "learning_rate": 3.912316747890951e-06, + "loss": 0.307, + "step": 12876 + }, + { + "epoch": 0.5828015388096854, + "grad_norm": 0.2943025412568137, + "learning_rate": 3.911601390440809e-06, + "loss": 0.4564, + "step": 12877 + }, + { + "epoch": 0.582846797918081, + "grad_norm": 0.6165984329019698, + "learning_rate": 3.910886056377008e-06, + "loss": 0.3168, + "step": 12878 + }, + { + "epoch": 0.5828920570264766, + "grad_norm": 0.6612618568342109, + "learning_rate": 3.9101707457149216e-06, + "loss": 0.3258, + "step": 12879 + }, + { + "epoch": 0.5829373161348721, + "grad_norm": 0.6777851209953912, + "learning_rate": 3.90945545846992e-06, + "loss": 0.292, + "step": 12880 + }, + { + "epoch": 0.5829825752432677, + "grad_norm": 0.6028725455461451, + "learning_rate": 3.908740194657369e-06, + "loss": 0.3352, + "step": 12881 + }, + { + "epoch": 0.5830278343516633, + "grad_norm": 0.6581191427684837, + "learning_rate": 3.90802495429264e-06, + "loss": 0.3178, + "step": 12882 + }, + { + "epoch": 0.5830730934600589, + "grad_norm": 0.29210513084469514, + "learning_rate": 3.907309737391104e-06, + "loss": 0.483, + "step": 12883 + }, + { + "epoch": 0.5831183525684545, + "grad_norm": 0.6627943810879815, + "learning_rate": 3.906594543968122e-06, + "loss": 0.3101, + "step": 12884 + }, + { + "epoch": 0.5831636116768499, + "grad_norm": 0.7161743237685878, + "learning_rate": 3.905879374039066e-06, + "loss": 0.3404, + "step": 12885 + }, + { + "epoch": 0.5832088707852455, + "grad_norm": 0.6400180015090664, + "learning_rate": 3.905164227619303e-06, + "loss": 0.3235, + "step": 12886 + }, + { + "epoch": 0.5832541298936411, + "grad_norm": 0.266768614980608, + "learning_rate": 3.904449104724198e-06, + "loss": 0.4719, + "step": 12887 + }, + { + "epoch": 0.5832993890020367, + "grad_norm": 0.6990613540113876, + "learning_rate": 3.903734005369115e-06, + "loss": 0.3302, + "step": 12888 + }, + { + "epoch": 0.5833446481104322, + "grad_norm": 0.5934812759285606, + "learning_rate": 3.903018929569424e-06, + "loss": 0.3261, + "step": 12889 + }, + { + "epoch": 0.5833899072188278, + "grad_norm": 0.6164190942026797, + "learning_rate": 3.902303877340486e-06, + "loss": 0.3165, + "step": 12890 + }, + { + "epoch": 0.5834351663272234, + "grad_norm": 0.7572763337769141, + "learning_rate": 3.9015888486976666e-06, + "loss": 0.3159, + "step": 12891 + }, + { + "epoch": 0.583480425435619, + "grad_norm": 0.31349991781540076, + "learning_rate": 3.900873843656328e-06, + "loss": 0.4659, + "step": 12892 + }, + { + "epoch": 0.5835256845440144, + "grad_norm": 0.6034155507082287, + "learning_rate": 3.900158862231837e-06, + "loss": 0.3018, + "step": 12893 + }, + { + "epoch": 0.58357094365241, + "grad_norm": 0.27460524405924486, + "learning_rate": 3.899443904439553e-06, + "loss": 0.4543, + "step": 12894 + }, + { + "epoch": 0.5836162027608056, + "grad_norm": 0.5654824623125475, + "learning_rate": 3.89872897029484e-06, + "loss": 0.3148, + "step": 12895 + }, + { + "epoch": 0.5836614618692012, + "grad_norm": 0.6280443259816898, + "learning_rate": 3.8980140598130585e-06, + "loss": 0.3318, + "step": 12896 + }, + { + "epoch": 0.5837067209775968, + "grad_norm": 0.6714072933785529, + "learning_rate": 3.89729917300957e-06, + "loss": 0.3363, + "step": 12897 + }, + { + "epoch": 0.5837519800859923, + "grad_norm": 2.789699193483099, + "learning_rate": 3.896584309899736e-06, + "loss": 0.2579, + "step": 12898 + }, + { + "epoch": 0.5837972391943879, + "grad_norm": 0.6012598147773093, + "learning_rate": 3.895869470498917e-06, + "loss": 0.2702, + "step": 12899 + }, + { + "epoch": 0.5838424983027835, + "grad_norm": 0.6348915755057556, + "learning_rate": 3.895154654822471e-06, + "loss": 0.3081, + "step": 12900 + }, + { + "epoch": 0.583887757411179, + "grad_norm": 0.5786267882485973, + "learning_rate": 3.894439862885758e-06, + "loss": 0.2654, + "step": 12901 + }, + { + "epoch": 0.5839330165195745, + "grad_norm": 0.5707728278791753, + "learning_rate": 3.89372509470414e-06, + "loss": 0.2948, + "step": 12902 + }, + { + "epoch": 0.5839782756279701, + "grad_norm": 0.2980866435477156, + "learning_rate": 3.893010350292967e-06, + "loss": 0.4875, + "step": 12903 + }, + { + "epoch": 0.5840235347363657, + "grad_norm": 0.272228408390971, + "learning_rate": 3.892295629667604e-06, + "loss": 0.4832, + "step": 12904 + }, + { + "epoch": 0.5840687938447613, + "grad_norm": 0.6108062446836691, + "learning_rate": 3.891580932843406e-06, + "loss": 0.2819, + "step": 12905 + }, + { + "epoch": 0.5841140529531568, + "grad_norm": 0.7200864095677929, + "learning_rate": 3.890866259835731e-06, + "loss": 0.3065, + "step": 12906 + }, + { + "epoch": 0.5841593120615524, + "grad_norm": 0.6191814565649314, + "learning_rate": 3.890151610659931e-06, + "loss": 0.3127, + "step": 12907 + }, + { + "epoch": 0.584204571169948, + "grad_norm": 0.6345450883334448, + "learning_rate": 3.8894369853313654e-06, + "loss": 0.3008, + "step": 12908 + }, + { + "epoch": 0.5842498302783435, + "grad_norm": 0.6054599253457945, + "learning_rate": 3.888722383865389e-06, + "loss": 0.3201, + "step": 12909 + }, + { + "epoch": 0.5842950893867391, + "grad_norm": 0.6019468470687604, + "learning_rate": 3.888007806277355e-06, + "loss": 0.2845, + "step": 12910 + }, + { + "epoch": 0.5843403484951346, + "grad_norm": 0.6457389851076811, + "learning_rate": 3.887293252582616e-06, + "loss": 0.282, + "step": 12911 + }, + { + "epoch": 0.5843856076035302, + "grad_norm": 0.666848565280595, + "learning_rate": 3.886578722796532e-06, + "loss": 0.3555, + "step": 12912 + }, + { + "epoch": 0.5844308667119258, + "grad_norm": 0.7169589320217262, + "learning_rate": 3.885864216934448e-06, + "loss": 0.2958, + "step": 12913 + }, + { + "epoch": 0.5844761258203214, + "grad_norm": 0.7171063512367375, + "learning_rate": 3.88514973501172e-06, + "loss": 0.302, + "step": 12914 + }, + { + "epoch": 0.5845213849287169, + "grad_norm": 0.35810858882672103, + "learning_rate": 3.884435277043703e-06, + "loss": 0.4629, + "step": 12915 + }, + { + "epoch": 0.5845666440371124, + "grad_norm": 0.3523811905750544, + "learning_rate": 3.883720843045744e-06, + "loss": 0.4891, + "step": 12916 + }, + { + "epoch": 0.584611903145508, + "grad_norm": 0.6418237068424264, + "learning_rate": 3.883006433033194e-06, + "loss": 0.3221, + "step": 12917 + }, + { + "epoch": 0.5846571622539036, + "grad_norm": 0.6018725955131646, + "learning_rate": 3.882292047021407e-06, + "loss": 0.3118, + "step": 12918 + }, + { + "epoch": 0.5847024213622991, + "grad_norm": 0.6411292128169545, + "learning_rate": 3.8815776850257325e-06, + "loss": 0.2973, + "step": 12919 + }, + { + "epoch": 0.5847476804706947, + "grad_norm": 0.6211094756032005, + "learning_rate": 3.880863347061516e-06, + "loss": 0.329, + "step": 12920 + }, + { + "epoch": 0.5847929395790903, + "grad_norm": 0.620245134177958, + "learning_rate": 3.88014903314411e-06, + "loss": 0.3365, + "step": 12921 + }, + { + "epoch": 0.5848381986874859, + "grad_norm": 0.6868312250884809, + "learning_rate": 3.879434743288863e-06, + "loss": 0.322, + "step": 12922 + }, + { + "epoch": 0.5848834577958815, + "grad_norm": 0.6462710705303839, + "learning_rate": 3.87872047751112e-06, + "loss": 0.303, + "step": 12923 + }, + { + "epoch": 0.584928716904277, + "grad_norm": 0.6655929839190928, + "learning_rate": 3.878006235826231e-06, + "loss": 0.2956, + "step": 12924 + }, + { + "epoch": 0.5849739760126725, + "grad_norm": 0.6818995713564633, + "learning_rate": 3.877292018249543e-06, + "loss": 0.3132, + "step": 12925 + }, + { + "epoch": 0.5850192351210681, + "grad_norm": 0.38136703826778795, + "learning_rate": 3.8765778247964e-06, + "loss": 0.4698, + "step": 12926 + }, + { + "epoch": 0.5850644942294637, + "grad_norm": 0.3412002803460328, + "learning_rate": 3.875863655482149e-06, + "loss": 0.4696, + "step": 12927 + }, + { + "epoch": 0.5851097533378592, + "grad_norm": 0.5931125850967812, + "learning_rate": 3.875149510322137e-06, + "loss": 0.343, + "step": 12928 + }, + { + "epoch": 0.5851550124462548, + "grad_norm": 0.6466884623426743, + "learning_rate": 3.8744353893317075e-06, + "loss": 0.3706, + "step": 12929 + }, + { + "epoch": 0.5852002715546504, + "grad_norm": 0.6434405477840357, + "learning_rate": 3.873721292526202e-06, + "loss": 0.2481, + "step": 12930 + }, + { + "epoch": 0.585245530663046, + "grad_norm": 0.7373483580330585, + "learning_rate": 3.8730072199209705e-06, + "loss": 0.2998, + "step": 12931 + }, + { + "epoch": 0.5852907897714416, + "grad_norm": 0.9587743096576387, + "learning_rate": 3.87229317153135e-06, + "loss": 0.3174, + "step": 12932 + }, + { + "epoch": 0.585336048879837, + "grad_norm": 0.6359927738078779, + "learning_rate": 3.871579147372685e-06, + "loss": 0.3463, + "step": 12933 + }, + { + "epoch": 0.5853813079882326, + "grad_norm": 0.34809681752623584, + "learning_rate": 3.870865147460319e-06, + "loss": 0.4735, + "step": 12934 + }, + { + "epoch": 0.5854265670966282, + "grad_norm": 0.6740694498628458, + "learning_rate": 3.870151171809596e-06, + "loss": 0.3279, + "step": 12935 + }, + { + "epoch": 0.5854718262050238, + "grad_norm": 0.6666175522244245, + "learning_rate": 3.869437220435851e-06, + "loss": 0.3176, + "step": 12936 + }, + { + "epoch": 0.5855170853134193, + "grad_norm": 0.31852509391543643, + "learning_rate": 3.868723293354429e-06, + "loss": 0.474, + "step": 12937 + }, + { + "epoch": 0.5855623444218149, + "grad_norm": 0.6416405830095192, + "learning_rate": 3.8680093905806695e-06, + "loss": 0.3437, + "step": 12938 + }, + { + "epoch": 0.5856076035302105, + "grad_norm": 0.608559978948162, + "learning_rate": 3.86729551212991e-06, + "loss": 0.2698, + "step": 12939 + }, + { + "epoch": 0.585652862638606, + "grad_norm": 0.6460179817947529, + "learning_rate": 3.866581658017492e-06, + "loss": 0.3077, + "step": 12940 + }, + { + "epoch": 0.5856981217470015, + "grad_norm": 0.6021138283075794, + "learning_rate": 3.865867828258754e-06, + "loss": 0.3642, + "step": 12941 + }, + { + "epoch": 0.5857433808553971, + "grad_norm": 0.3031943483218063, + "learning_rate": 3.865154022869032e-06, + "loss": 0.5104, + "step": 12942 + }, + { + "epoch": 0.5857886399637927, + "grad_norm": 0.32523217420743983, + "learning_rate": 3.864440241863665e-06, + "loss": 0.4606, + "step": 12943 + }, + { + "epoch": 0.5858338990721883, + "grad_norm": 0.6740941885338018, + "learning_rate": 3.86372648525799e-06, + "loss": 0.337, + "step": 12944 + }, + { + "epoch": 0.5858791581805839, + "grad_norm": 0.7082381442153999, + "learning_rate": 3.863012753067343e-06, + "loss": 0.307, + "step": 12945 + }, + { + "epoch": 0.5859244172889794, + "grad_norm": 0.3069995352713124, + "learning_rate": 3.862299045307058e-06, + "loss": 0.4816, + "step": 12946 + }, + { + "epoch": 0.585969676397375, + "grad_norm": 0.6359988599338807, + "learning_rate": 3.861585361992474e-06, + "loss": 0.3188, + "step": 12947 + }, + { + "epoch": 0.5860149355057706, + "grad_norm": 0.6044620031315169, + "learning_rate": 3.860871703138925e-06, + "loss": 0.2627, + "step": 12948 + }, + { + "epoch": 0.5860601946141661, + "grad_norm": 0.28995056119402646, + "learning_rate": 3.860158068761743e-06, + "loss": 0.4874, + "step": 12949 + }, + { + "epoch": 0.5861054537225616, + "grad_norm": 0.6203997084408709, + "learning_rate": 3.859444458876264e-06, + "loss": 0.3463, + "step": 12950 + }, + { + "epoch": 0.5861507128309572, + "grad_norm": 0.5766424550001963, + "learning_rate": 3.85873087349782e-06, + "loss": 0.2999, + "step": 12951 + }, + { + "epoch": 0.5861959719393528, + "grad_norm": 0.6454390235375199, + "learning_rate": 3.8580173126417455e-06, + "loss": 0.3003, + "step": 12952 + }, + { + "epoch": 0.5862412310477484, + "grad_norm": 0.6459639638157862, + "learning_rate": 3.857303776323371e-06, + "loss": 0.3141, + "step": 12953 + }, + { + "epoch": 0.5862864901561439, + "grad_norm": 0.674173137586515, + "learning_rate": 3.85659026455803e-06, + "loss": 0.3233, + "step": 12954 + }, + { + "epoch": 0.5863317492645395, + "grad_norm": 0.6182253537794538, + "learning_rate": 3.855876777361051e-06, + "loss": 0.3477, + "step": 12955 + }, + { + "epoch": 0.586377008372935, + "grad_norm": 0.30382783121496, + "learning_rate": 3.855163314747765e-06, + "loss": 0.4931, + "step": 12956 + }, + { + "epoch": 0.5864222674813306, + "grad_norm": 0.6468381071406778, + "learning_rate": 3.854449876733507e-06, + "loss": 0.3316, + "step": 12957 + }, + { + "epoch": 0.5864675265897262, + "grad_norm": 0.6063408189941445, + "learning_rate": 3.8537364633336e-06, + "loss": 0.3195, + "step": 12958 + }, + { + "epoch": 0.5865127856981217, + "grad_norm": 0.5842726588714356, + "learning_rate": 3.853023074563376e-06, + "loss": 0.2697, + "step": 12959 + }, + { + "epoch": 0.5865580448065173, + "grad_norm": 0.5781391825709464, + "learning_rate": 3.852309710438165e-06, + "loss": 0.3199, + "step": 12960 + }, + { + "epoch": 0.5866033039149129, + "grad_norm": 0.6755210754771175, + "learning_rate": 3.851596370973292e-06, + "loss": 0.333, + "step": 12961 + }, + { + "epoch": 0.5866485630233085, + "grad_norm": 0.6235477776992845, + "learning_rate": 3.850883056184087e-06, + "loss": 0.3779, + "step": 12962 + }, + { + "epoch": 0.586693822131704, + "grad_norm": 0.5812073917967391, + "learning_rate": 3.850169766085874e-06, + "loss": 0.2543, + "step": 12963 + }, + { + "epoch": 0.5867390812400995, + "grad_norm": 0.6138726206796495, + "learning_rate": 3.849456500693985e-06, + "loss": 0.3029, + "step": 12964 + }, + { + "epoch": 0.5867843403484951, + "grad_norm": 0.6533874774901375, + "learning_rate": 3.848743260023739e-06, + "loss": 0.2815, + "step": 12965 + }, + { + "epoch": 0.5868295994568907, + "grad_norm": 1.647690739910162, + "learning_rate": 3.848030044090464e-06, + "loss": 0.3062, + "step": 12966 + }, + { + "epoch": 0.5868748585652863, + "grad_norm": 0.751645176452105, + "learning_rate": 3.847316852909488e-06, + "loss": 0.2889, + "step": 12967 + }, + { + "epoch": 0.5869201176736818, + "grad_norm": 0.6473090293190665, + "learning_rate": 3.8466036864961315e-06, + "loss": 0.3171, + "step": 12968 + }, + { + "epoch": 0.5869653767820774, + "grad_norm": 0.6242772006515882, + "learning_rate": 3.845890544865718e-06, + "loss": 0.3188, + "step": 12969 + }, + { + "epoch": 0.587010635890473, + "grad_norm": 0.5576862503082809, + "learning_rate": 3.845177428033574e-06, + "loss": 0.3178, + "step": 12970 + }, + { + "epoch": 0.5870558949988686, + "grad_norm": 0.6225941302204959, + "learning_rate": 3.84446433601502e-06, + "loss": 0.3154, + "step": 12971 + }, + { + "epoch": 0.587101154107264, + "grad_norm": 0.7071677053660017, + "learning_rate": 3.843751268825378e-06, + "loss": 0.355, + "step": 12972 + }, + { + "epoch": 0.5871464132156596, + "grad_norm": 0.6348094595951862, + "learning_rate": 3.843038226479971e-06, + "loss": 0.2623, + "step": 12973 + }, + { + "epoch": 0.5871916723240552, + "grad_norm": 0.6802360931954542, + "learning_rate": 3.842325208994117e-06, + "loss": 0.318, + "step": 12974 + }, + { + "epoch": 0.5872369314324508, + "grad_norm": 0.3432497848458775, + "learning_rate": 3.84161221638314e-06, + "loss": 0.474, + "step": 12975 + }, + { + "epoch": 0.5872821905408463, + "grad_norm": 0.3179869040291361, + "learning_rate": 3.840899248662358e-06, + "loss": 0.4543, + "step": 12976 + }, + { + "epoch": 0.5873274496492419, + "grad_norm": 0.30280561510928294, + "learning_rate": 3.840186305847094e-06, + "loss": 0.4646, + "step": 12977 + }, + { + "epoch": 0.5873727087576375, + "grad_norm": 0.588527362911705, + "learning_rate": 3.839473387952662e-06, + "loss": 0.2744, + "step": 12978 + }, + { + "epoch": 0.5874179678660331, + "grad_norm": 0.6391647492314123, + "learning_rate": 3.8387604949943816e-06, + "loss": 0.3408, + "step": 12979 + }, + { + "epoch": 0.5874632269744287, + "grad_norm": 0.6678100391164818, + "learning_rate": 3.8380476269875745e-06, + "loss": 0.3547, + "step": 12980 + }, + { + "epoch": 0.5875084860828241, + "grad_norm": 0.6755665912320579, + "learning_rate": 3.837334783947553e-06, + "loss": 0.3443, + "step": 12981 + }, + { + "epoch": 0.5875537451912197, + "grad_norm": 0.6287999602413568, + "learning_rate": 3.836621965889637e-06, + "loss": 0.2746, + "step": 12982 + }, + { + "epoch": 0.5875990042996153, + "grad_norm": 1.2565627186637056, + "learning_rate": 3.8359091728291426e-06, + "loss": 0.3053, + "step": 12983 + }, + { + "epoch": 0.5876442634080109, + "grad_norm": 0.6786037263914724, + "learning_rate": 3.835196404781383e-06, + "loss": 0.3133, + "step": 12984 + }, + { + "epoch": 0.5876895225164064, + "grad_norm": 0.6310320695975805, + "learning_rate": 3.834483661761676e-06, + "loss": 0.3334, + "step": 12985 + }, + { + "epoch": 0.587734781624802, + "grad_norm": 0.6925419971381628, + "learning_rate": 3.8337709437853365e-06, + "loss": 0.3574, + "step": 12986 + }, + { + "epoch": 0.5877800407331976, + "grad_norm": 0.4543710896067492, + "learning_rate": 3.833058250867677e-06, + "loss": 0.4888, + "step": 12987 + }, + { + "epoch": 0.5878252998415932, + "grad_norm": 0.6252709175360854, + "learning_rate": 3.83234558302401e-06, + "loss": 0.3251, + "step": 12988 + }, + { + "epoch": 0.5878705589499886, + "grad_norm": 0.35321906291413185, + "learning_rate": 3.8316329402696524e-06, + "loss": 0.4555, + "step": 12989 + }, + { + "epoch": 0.5879158180583842, + "grad_norm": 0.8580523387406522, + "learning_rate": 3.8309203226199145e-06, + "loss": 0.354, + "step": 12990 + }, + { + "epoch": 0.5879610771667798, + "grad_norm": 0.648674379215099, + "learning_rate": 3.830207730090108e-06, + "loss": 0.3289, + "step": 12991 + }, + { + "epoch": 0.5880063362751754, + "grad_norm": 0.6421814724828729, + "learning_rate": 3.829495162695543e-06, + "loss": 0.3033, + "step": 12992 + }, + { + "epoch": 0.588051595383571, + "grad_norm": 0.6463585445701963, + "learning_rate": 3.828782620451535e-06, + "loss": 0.3507, + "step": 12993 + }, + { + "epoch": 0.5880968544919665, + "grad_norm": 0.5912068879481209, + "learning_rate": 3.828070103373389e-06, + "loss": 0.3406, + "step": 12994 + }, + { + "epoch": 0.5881421136003621, + "grad_norm": 0.6465651509972019, + "learning_rate": 3.8273576114764176e-06, + "loss": 0.2994, + "step": 12995 + }, + { + "epoch": 0.5881873727087576, + "grad_norm": 0.6432592889234297, + "learning_rate": 3.8266451447759315e-06, + "loss": 0.2897, + "step": 12996 + }, + { + "epoch": 0.5882326318171532, + "grad_norm": 0.640912875516796, + "learning_rate": 3.825932703287236e-06, + "loss": 0.3524, + "step": 12997 + }, + { + "epoch": 0.5882778909255487, + "grad_norm": 0.441436437662703, + "learning_rate": 3.8252202870256395e-06, + "loss": 0.4835, + "step": 12998 + }, + { + "epoch": 0.5883231500339443, + "grad_norm": 0.39010100191764097, + "learning_rate": 3.824507896006454e-06, + "loss": 0.4494, + "step": 12999 + }, + { + "epoch": 0.5883684091423399, + "grad_norm": 0.6478115035019769, + "learning_rate": 3.823795530244982e-06, + "loss": 0.2936, + "step": 13000 + }, + { + "epoch": 0.5884136682507355, + "grad_norm": 0.5908031580738624, + "learning_rate": 3.823083189756531e-06, + "loss": 0.3461, + "step": 13001 + }, + { + "epoch": 0.5884589273591311, + "grad_norm": 0.6325161225022814, + "learning_rate": 3.822370874556408e-06, + "loss": 0.3281, + "step": 13002 + }, + { + "epoch": 0.5885041864675266, + "grad_norm": 0.6765632817653675, + "learning_rate": 3.821658584659918e-06, + "loss": 0.3767, + "step": 13003 + }, + { + "epoch": 0.5885494455759221, + "grad_norm": 0.3610735045231856, + "learning_rate": 3.820946320082366e-06, + "loss": 0.4932, + "step": 13004 + }, + { + "epoch": 0.5885947046843177, + "grad_norm": 0.7040019211616559, + "learning_rate": 3.820234080839057e-06, + "loss": 0.3134, + "step": 13005 + }, + { + "epoch": 0.5886399637927133, + "grad_norm": 0.6711299751775798, + "learning_rate": 3.819521866945295e-06, + "loss": 0.3553, + "step": 13006 + }, + { + "epoch": 0.5886852229011088, + "grad_norm": 0.5931720760249749, + "learning_rate": 3.81880967841638e-06, + "loss": 0.3059, + "step": 13007 + }, + { + "epoch": 0.5887304820095044, + "grad_norm": 0.36097307950893653, + "learning_rate": 3.818097515267618e-06, + "loss": 0.4693, + "step": 13008 + }, + { + "epoch": 0.5887757411179, + "grad_norm": 0.772885550432615, + "learning_rate": 3.817385377514312e-06, + "loss": 0.2972, + "step": 13009 + }, + { + "epoch": 0.5888210002262956, + "grad_norm": 0.6133863749155195, + "learning_rate": 3.816673265171762e-06, + "loss": 0.3071, + "step": 13010 + }, + { + "epoch": 0.5888662593346911, + "grad_norm": 0.6605504314425381, + "learning_rate": 3.815961178255267e-06, + "loss": 0.3196, + "step": 13011 + }, + { + "epoch": 0.5889115184430866, + "grad_norm": 0.5994473366414182, + "learning_rate": 3.815249116780133e-06, + "loss": 0.297, + "step": 13012 + }, + { + "epoch": 0.5889567775514822, + "grad_norm": 0.31147458714389, + "learning_rate": 3.8145370807616545e-06, + "loss": 0.4696, + "step": 13013 + }, + { + "epoch": 0.5890020366598778, + "grad_norm": 0.27633811696298244, + "learning_rate": 3.8138250702151336e-06, + "loss": 0.4779, + "step": 13014 + }, + { + "epoch": 0.5890472957682734, + "grad_norm": 0.6898232002811877, + "learning_rate": 3.8131130851558696e-06, + "loss": 0.3148, + "step": 13015 + }, + { + "epoch": 0.5890925548766689, + "grad_norm": 0.6506935341637908, + "learning_rate": 3.81240112559916e-06, + "loss": 0.3316, + "step": 13016 + }, + { + "epoch": 0.5891378139850645, + "grad_norm": 0.6508169215201881, + "learning_rate": 3.811689191560301e-06, + "loss": 0.2769, + "step": 13017 + }, + { + "epoch": 0.5891830730934601, + "grad_norm": 0.659463492837663, + "learning_rate": 3.8109772830545933e-06, + "loss": 0.2914, + "step": 13018 + }, + { + "epoch": 0.5892283322018557, + "grad_norm": 0.6711464673967386, + "learning_rate": 3.8102654000973326e-06, + "loss": 0.3229, + "step": 13019 + }, + { + "epoch": 0.5892735913102511, + "grad_norm": 0.6483412067025784, + "learning_rate": 3.8095535427038134e-06, + "loss": 0.3426, + "step": 13020 + }, + { + "epoch": 0.5893188504186467, + "grad_norm": 0.5998729560447539, + "learning_rate": 3.808841710889332e-06, + "loss": 0.2988, + "step": 13021 + }, + { + "epoch": 0.5893641095270423, + "grad_norm": 0.6400881001941805, + "learning_rate": 3.808129904669186e-06, + "loss": 0.3056, + "step": 13022 + }, + { + "epoch": 0.5894093686354379, + "grad_norm": 0.7228179397581488, + "learning_rate": 3.807418124058665e-06, + "loss": 0.2728, + "step": 13023 + }, + { + "epoch": 0.5894546277438334, + "grad_norm": 0.646531452310446, + "learning_rate": 3.8067063690730672e-06, + "loss": 0.318, + "step": 13024 + }, + { + "epoch": 0.589499886852229, + "grad_norm": 0.6952376992340696, + "learning_rate": 3.8059946397276854e-06, + "loss": 0.3524, + "step": 13025 + }, + { + "epoch": 0.5895451459606246, + "grad_norm": 0.4455158133737101, + "learning_rate": 3.805282936037811e-06, + "loss": 0.5017, + "step": 13026 + }, + { + "epoch": 0.5895904050690202, + "grad_norm": 0.3768748833292996, + "learning_rate": 3.8045712580187356e-06, + "loss": 0.4685, + "step": 13027 + }, + { + "epoch": 0.5896356641774158, + "grad_norm": 0.2977798946241966, + "learning_rate": 3.803859605685754e-06, + "loss": 0.4487, + "step": 13028 + }, + { + "epoch": 0.5896809232858112, + "grad_norm": 0.7536806509562007, + "learning_rate": 3.803147979054155e-06, + "loss": 0.3178, + "step": 13029 + }, + { + "epoch": 0.5897261823942068, + "grad_norm": 0.34640164397571077, + "learning_rate": 3.8024363781392304e-06, + "loss": 0.4742, + "step": 13030 + }, + { + "epoch": 0.5897714415026024, + "grad_norm": 0.6354442806532212, + "learning_rate": 3.8017248029562713e-06, + "loss": 0.2978, + "step": 13031 + }, + { + "epoch": 0.589816700610998, + "grad_norm": 0.6308339653666732, + "learning_rate": 3.8010132535205634e-06, + "loss": 0.3283, + "step": 13032 + }, + { + "epoch": 0.5898619597193935, + "grad_norm": 0.43622553607956044, + "learning_rate": 3.8003017298474e-06, + "loss": 0.4627, + "step": 13033 + }, + { + "epoch": 0.5899072188277891, + "grad_norm": 0.5934010485802537, + "learning_rate": 3.7995902319520674e-06, + "loss": 0.2918, + "step": 13034 + }, + { + "epoch": 0.5899524779361847, + "grad_norm": 0.6677173047396981, + "learning_rate": 3.7988787598498543e-06, + "loss": 0.2955, + "step": 13035 + }, + { + "epoch": 0.5899977370445803, + "grad_norm": 0.38851060414369165, + "learning_rate": 3.7981673135560464e-06, + "loss": 0.5038, + "step": 13036 + }, + { + "epoch": 0.5900429961529758, + "grad_norm": 0.657627358931422, + "learning_rate": 3.797455893085933e-06, + "loss": 0.3106, + "step": 13037 + }, + { + "epoch": 0.5900882552613713, + "grad_norm": 0.6333835119681968, + "learning_rate": 3.7967444984548e-06, + "loss": 0.3411, + "step": 13038 + }, + { + "epoch": 0.5901335143697669, + "grad_norm": 0.5793072590899354, + "learning_rate": 3.796033129677931e-06, + "loss": 0.3323, + "step": 13039 + }, + { + "epoch": 0.5901787734781625, + "grad_norm": 0.6453468287225503, + "learning_rate": 3.7953217867706106e-06, + "loss": 0.3203, + "step": 13040 + }, + { + "epoch": 0.5902240325865581, + "grad_norm": 0.6391441798115033, + "learning_rate": 3.794610469748129e-06, + "loss": 0.2964, + "step": 13041 + }, + { + "epoch": 0.5902692916949536, + "grad_norm": 0.621375675956063, + "learning_rate": 3.793899178625763e-06, + "loss": 0.3161, + "step": 13042 + }, + { + "epoch": 0.5903145508033492, + "grad_norm": 0.6274388766689246, + "learning_rate": 3.7931879134188002e-06, + "loss": 0.2955, + "step": 13043 + }, + { + "epoch": 0.5903598099117447, + "grad_norm": 0.6190550008897789, + "learning_rate": 3.7924766741425247e-06, + "loss": 0.3132, + "step": 13044 + }, + { + "epoch": 0.5904050690201403, + "grad_norm": 0.616290704979202, + "learning_rate": 3.791765460812215e-06, + "loss": 0.3291, + "step": 13045 + }, + { + "epoch": 0.5904503281285358, + "grad_norm": 0.6969178418384471, + "learning_rate": 3.7910542734431537e-06, + "loss": 0.3059, + "step": 13046 + }, + { + "epoch": 0.5904955872369314, + "grad_norm": 0.6856688562365002, + "learning_rate": 3.7903431120506247e-06, + "loss": 0.2953, + "step": 13047 + }, + { + "epoch": 0.590540846345327, + "grad_norm": 0.4143437058883918, + "learning_rate": 3.7896319766499073e-06, + "loss": 0.4756, + "step": 13048 + }, + { + "epoch": 0.5905861054537226, + "grad_norm": 0.6607130200060778, + "learning_rate": 3.788920867256281e-06, + "loss": 0.2771, + "step": 13049 + }, + { + "epoch": 0.5906313645621182, + "grad_norm": 0.5988983320355563, + "learning_rate": 3.788209783885024e-06, + "loss": 0.3414, + "step": 13050 + }, + { + "epoch": 0.5906766236705137, + "grad_norm": 0.6226333656842887, + "learning_rate": 3.7874987265514197e-06, + "loss": 0.2825, + "step": 13051 + }, + { + "epoch": 0.5907218827789092, + "grad_norm": 0.6485822725381329, + "learning_rate": 3.786787695270743e-06, + "loss": 0.2941, + "step": 13052 + }, + { + "epoch": 0.5907671418873048, + "grad_norm": 0.6526722775186214, + "learning_rate": 3.7860766900582716e-06, + "loss": 0.3147, + "step": 13053 + }, + { + "epoch": 0.5908124009957004, + "grad_norm": 0.2947319369351773, + "learning_rate": 3.785365710929286e-06, + "loss": 0.4555, + "step": 13054 + }, + { + "epoch": 0.5908576601040959, + "grad_norm": 0.6537057936992485, + "learning_rate": 3.784654757899059e-06, + "loss": 0.3248, + "step": 13055 + }, + { + "epoch": 0.5909029192124915, + "grad_norm": 0.6367926569809269, + "learning_rate": 3.783943830982868e-06, + "loss": 0.3591, + "step": 13056 + }, + { + "epoch": 0.5909481783208871, + "grad_norm": 0.6043301757986551, + "learning_rate": 3.7832329301959914e-06, + "loss": 0.2841, + "step": 13057 + }, + { + "epoch": 0.5909934374292827, + "grad_norm": 0.6727643037951684, + "learning_rate": 3.7825220555537006e-06, + "loss": 0.3573, + "step": 13058 + }, + { + "epoch": 0.5910386965376782, + "grad_norm": 0.590524442701709, + "learning_rate": 3.781811207071272e-06, + "loss": 0.3215, + "step": 13059 + }, + { + "epoch": 0.5910839556460737, + "grad_norm": 0.6354132649387176, + "learning_rate": 3.781100384763978e-06, + "loss": 0.3183, + "step": 13060 + }, + { + "epoch": 0.5911292147544693, + "grad_norm": 0.6075769280663038, + "learning_rate": 3.7803895886470952e-06, + "loss": 0.2922, + "step": 13061 + }, + { + "epoch": 0.5911744738628649, + "grad_norm": 0.592386100884859, + "learning_rate": 3.7796788187358934e-06, + "loss": 0.3243, + "step": 13062 + }, + { + "epoch": 0.5912197329712605, + "grad_norm": 0.3625514389963376, + "learning_rate": 3.778968075045646e-06, + "loss": 0.4747, + "step": 13063 + }, + { + "epoch": 0.591264992079656, + "grad_norm": 0.6902941165882858, + "learning_rate": 3.7782573575916255e-06, + "loss": 0.3174, + "step": 13064 + }, + { + "epoch": 0.5913102511880516, + "grad_norm": 0.6402809000377081, + "learning_rate": 3.7775466663890997e-06, + "loss": 0.3421, + "step": 13065 + }, + { + "epoch": 0.5913555102964472, + "grad_norm": 0.30137486000398755, + "learning_rate": 3.7768360014533427e-06, + "loss": 0.4765, + "step": 13066 + }, + { + "epoch": 0.5914007694048428, + "grad_norm": 0.5848318681138999, + "learning_rate": 3.7761253627996245e-06, + "loss": 0.3262, + "step": 13067 + }, + { + "epoch": 0.5914460285132382, + "grad_norm": 0.6320255325803715, + "learning_rate": 3.7754147504432128e-06, + "loss": 0.2678, + "step": 13068 + }, + { + "epoch": 0.5914912876216338, + "grad_norm": 0.6638000769349245, + "learning_rate": 3.7747041643993755e-06, + "loss": 0.3337, + "step": 13069 + }, + { + "epoch": 0.5915365467300294, + "grad_norm": 0.6680733135489192, + "learning_rate": 3.7739936046833856e-06, + "loss": 0.3151, + "step": 13070 + }, + { + "epoch": 0.591581805838425, + "grad_norm": 0.6145310273808362, + "learning_rate": 3.773283071310505e-06, + "loss": 0.2493, + "step": 13071 + }, + { + "epoch": 0.5916270649468206, + "grad_norm": 0.5646272596608798, + "learning_rate": 3.7725725642960047e-06, + "loss": 0.3029, + "step": 13072 + }, + { + "epoch": 0.5916723240552161, + "grad_norm": 0.5684147601436456, + "learning_rate": 3.7718620836551512e-06, + "loss": 0.3276, + "step": 13073 + }, + { + "epoch": 0.5917175831636117, + "grad_norm": 0.6525397914151486, + "learning_rate": 3.7711516294032086e-06, + "loss": 0.3275, + "step": 13074 + }, + { + "epoch": 0.5917628422720073, + "grad_norm": 0.6030261523641939, + "learning_rate": 3.770441201555442e-06, + "loss": 0.3035, + "step": 13075 + }, + { + "epoch": 0.5918081013804029, + "grad_norm": 0.658476768697074, + "learning_rate": 3.769730800127119e-06, + "loss": 0.3092, + "step": 13076 + }, + { + "epoch": 0.5918533604887983, + "grad_norm": 0.752410239829877, + "learning_rate": 3.769020425133503e-06, + "loss": 0.3108, + "step": 13077 + }, + { + "epoch": 0.5918986195971939, + "grad_norm": 0.8037543463094624, + "learning_rate": 3.7683100765898573e-06, + "loss": 0.3165, + "step": 13078 + }, + { + "epoch": 0.5919438787055895, + "grad_norm": 0.7767337488831968, + "learning_rate": 3.7675997545114435e-06, + "loss": 0.3113, + "step": 13079 + }, + { + "epoch": 0.5919891378139851, + "grad_norm": 0.6177110144688015, + "learning_rate": 3.7668894589135284e-06, + "loss": 0.3304, + "step": 13080 + }, + { + "epoch": 0.5920343969223806, + "grad_norm": 0.6972389230091802, + "learning_rate": 3.76617918981137e-06, + "loss": 0.3054, + "step": 13081 + }, + { + "epoch": 0.5920796560307762, + "grad_norm": 0.594758697082108, + "learning_rate": 3.7654689472202323e-06, + "loss": 0.3142, + "step": 13082 + }, + { + "epoch": 0.5921249151391718, + "grad_norm": 0.6565473637710484, + "learning_rate": 3.7647587311553758e-06, + "loss": 0.3388, + "step": 13083 + }, + { + "epoch": 0.5921701742475673, + "grad_norm": 0.3632395504641611, + "learning_rate": 3.7640485416320586e-06, + "loss": 0.4423, + "step": 13084 + }, + { + "epoch": 0.5922154333559629, + "grad_norm": 0.6618035706270688, + "learning_rate": 3.763338378665543e-06, + "loss": 0.2714, + "step": 13085 + }, + { + "epoch": 0.5922606924643584, + "grad_norm": 0.9727610599227895, + "learning_rate": 3.762628242271089e-06, + "loss": 0.315, + "step": 13086 + }, + { + "epoch": 0.592305951572754, + "grad_norm": 0.6265858065970411, + "learning_rate": 3.7619181324639526e-06, + "loss": 0.2623, + "step": 13087 + }, + { + "epoch": 0.5923512106811496, + "grad_norm": 0.658143947875074, + "learning_rate": 3.761208049259393e-06, + "loss": 0.3221, + "step": 13088 + }, + { + "epoch": 0.5923964697895452, + "grad_norm": 0.2794168498838751, + "learning_rate": 3.760497992672667e-06, + "loss": 0.4734, + "step": 13089 + }, + { + "epoch": 0.5924417288979407, + "grad_norm": 0.29591340671507, + "learning_rate": 3.7597879627190337e-06, + "loss": 0.4726, + "step": 13090 + }, + { + "epoch": 0.5924869880063363, + "grad_norm": 0.2957711298433703, + "learning_rate": 3.7590779594137476e-06, + "loss": 0.4824, + "step": 13091 + }, + { + "epoch": 0.5925322471147318, + "grad_norm": 0.29424958786733124, + "learning_rate": 3.758367982772065e-06, + "loss": 0.51, + "step": 13092 + }, + { + "epoch": 0.5925775062231274, + "grad_norm": 0.6516448084403705, + "learning_rate": 3.7576580328092416e-06, + "loss": 0.322, + "step": 13093 + }, + { + "epoch": 0.5926227653315229, + "grad_norm": 0.6188868863340756, + "learning_rate": 3.7569481095405297e-06, + "loss": 0.3076, + "step": 13094 + }, + { + "epoch": 0.5926680244399185, + "grad_norm": 0.643871355009258, + "learning_rate": 3.7562382129811863e-06, + "loss": 0.3304, + "step": 13095 + }, + { + "epoch": 0.5927132835483141, + "grad_norm": 0.613491725606984, + "learning_rate": 3.755528343146465e-06, + "loss": 0.3142, + "step": 13096 + }, + { + "epoch": 0.5927585426567097, + "grad_norm": 0.5718218562017449, + "learning_rate": 3.7548185000516163e-06, + "loss": 0.3098, + "step": 13097 + }, + { + "epoch": 0.5928038017651053, + "grad_norm": 0.32929129744003455, + "learning_rate": 3.7541086837118923e-06, + "loss": 0.4664, + "step": 13098 + }, + { + "epoch": 0.5928490608735008, + "grad_norm": 0.7171791450318256, + "learning_rate": 3.7533988941425497e-06, + "loss": 0.3681, + "step": 13099 + }, + { + "epoch": 0.5928943199818963, + "grad_norm": 0.7508003949385689, + "learning_rate": 3.7526891313588334e-06, + "loss": 0.2816, + "step": 13100 + }, + { + "epoch": 0.5929395790902919, + "grad_norm": 0.6463903925300547, + "learning_rate": 3.7519793953759976e-06, + "loss": 0.342, + "step": 13101 + }, + { + "epoch": 0.5929848381986875, + "grad_norm": 0.6034850507917721, + "learning_rate": 3.7512696862092924e-06, + "loss": 0.3186, + "step": 13102 + }, + { + "epoch": 0.593030097307083, + "grad_norm": 0.323643335339662, + "learning_rate": 3.750560003873965e-06, + "loss": 0.4677, + "step": 13103 + }, + { + "epoch": 0.5930753564154786, + "grad_norm": 0.5797391881618859, + "learning_rate": 3.7498503483852655e-06, + "loss": 0.3061, + "step": 13104 + }, + { + "epoch": 0.5931206155238742, + "grad_norm": 0.6533239166183596, + "learning_rate": 3.749140719758444e-06, + "loss": 0.2891, + "step": 13105 + }, + { + "epoch": 0.5931658746322698, + "grad_norm": 0.28435596156172427, + "learning_rate": 3.748431118008747e-06, + "loss": 0.4748, + "step": 13106 + }, + { + "epoch": 0.5932111337406654, + "grad_norm": 0.6077688964257816, + "learning_rate": 3.7477215431514203e-06, + "loss": 0.3061, + "step": 13107 + }, + { + "epoch": 0.5932563928490608, + "grad_norm": 0.8999032205253109, + "learning_rate": 3.74701199520171e-06, + "loss": 0.3221, + "step": 13108 + }, + { + "epoch": 0.5933016519574564, + "grad_norm": 0.34773585807513885, + "learning_rate": 3.7463024741748665e-06, + "loss": 0.4725, + "step": 13109 + }, + { + "epoch": 0.593346911065852, + "grad_norm": 0.6586318888584578, + "learning_rate": 3.745592980086132e-06, + "loss": 0.3044, + "step": 13110 + }, + { + "epoch": 0.5933921701742476, + "grad_norm": 0.26404863142989515, + "learning_rate": 3.744883512950751e-06, + "loss": 0.4663, + "step": 13111 + }, + { + "epoch": 0.5934374292826431, + "grad_norm": 0.6557620068116499, + "learning_rate": 3.7441740727839693e-06, + "loss": 0.3381, + "step": 13112 + }, + { + "epoch": 0.5934826883910387, + "grad_norm": 0.72527557830192, + "learning_rate": 3.7434646596010284e-06, + "loss": 0.3182, + "step": 13113 + }, + { + "epoch": 0.5935279474994343, + "grad_norm": 0.6732815876045888, + "learning_rate": 3.742755273417173e-06, + "loss": 0.3369, + "step": 13114 + }, + { + "epoch": 0.5935732066078299, + "grad_norm": 0.6230884754824806, + "learning_rate": 3.742045914247647e-06, + "loss": 0.3187, + "step": 13115 + }, + { + "epoch": 0.5936184657162253, + "grad_norm": 0.6512486014953773, + "learning_rate": 3.7413365821076897e-06, + "loss": 0.3313, + "step": 13116 + }, + { + "epoch": 0.5936637248246209, + "grad_norm": 0.6264029046891488, + "learning_rate": 3.740627277012542e-06, + "loss": 0.2908, + "step": 13117 + }, + { + "epoch": 0.5937089839330165, + "grad_norm": 0.8811085700085609, + "learning_rate": 3.7399179989774483e-06, + "loss": 0.2991, + "step": 13118 + }, + { + "epoch": 0.5937542430414121, + "grad_norm": 0.6397706143848809, + "learning_rate": 3.739208748017647e-06, + "loss": 0.3208, + "step": 13119 + }, + { + "epoch": 0.5937995021498077, + "grad_norm": 0.6302304728471242, + "learning_rate": 3.7384995241483767e-06, + "loss": 0.3121, + "step": 13120 + }, + { + "epoch": 0.5938447612582032, + "grad_norm": 0.6752519831545459, + "learning_rate": 3.737790327384876e-06, + "loss": 0.3062, + "step": 13121 + }, + { + "epoch": 0.5938900203665988, + "grad_norm": 0.6138243669102184, + "learning_rate": 3.7370811577423883e-06, + "loss": 0.3445, + "step": 13122 + }, + { + "epoch": 0.5939352794749944, + "grad_norm": 0.6637397709643503, + "learning_rate": 3.7363720152361436e-06, + "loss": 0.3125, + "step": 13123 + }, + { + "epoch": 0.59398053858339, + "grad_norm": 0.5809437749619315, + "learning_rate": 3.735662899881385e-06, + "loss": 0.2955, + "step": 13124 + }, + { + "epoch": 0.5940257976917854, + "grad_norm": 0.6544272698464104, + "learning_rate": 3.734953811693349e-06, + "loss": 0.3139, + "step": 13125 + }, + { + "epoch": 0.594071056800181, + "grad_norm": 1.2643189481680515, + "learning_rate": 3.7342447506872686e-06, + "loss": 0.3094, + "step": 13126 + }, + { + "epoch": 0.5941163159085766, + "grad_norm": 0.6409913588796115, + "learning_rate": 3.7335357168783802e-06, + "loss": 0.2845, + "step": 13127 + }, + { + "epoch": 0.5941615750169722, + "grad_norm": 0.7650230050867153, + "learning_rate": 3.732826710281923e-06, + "loss": 0.3311, + "step": 13128 + }, + { + "epoch": 0.5942068341253677, + "grad_norm": 0.6220906941303066, + "learning_rate": 3.7321177309131248e-06, + "loss": 0.2978, + "step": 13129 + }, + { + "epoch": 0.5942520932337633, + "grad_norm": 0.6174783888824636, + "learning_rate": 3.7314087787872234e-06, + "loss": 0.3028, + "step": 13130 + }, + { + "epoch": 0.5942973523421589, + "grad_norm": 0.5795536682728678, + "learning_rate": 3.73069985391945e-06, + "loss": 0.2791, + "step": 13131 + }, + { + "epoch": 0.5943426114505544, + "grad_norm": 0.5777705970662388, + "learning_rate": 3.7299909563250414e-06, + "loss": 0.2955, + "step": 13132 + }, + { + "epoch": 0.59438787055895, + "grad_norm": 0.3486003160911572, + "learning_rate": 3.7292820860192235e-06, + "loss": 0.4909, + "step": 13133 + }, + { + "epoch": 0.5944331296673455, + "grad_norm": 0.6233231953624803, + "learning_rate": 3.7285732430172327e-06, + "loss": 0.2864, + "step": 13134 + }, + { + "epoch": 0.5944783887757411, + "grad_norm": 0.6304934226609963, + "learning_rate": 3.7278644273342982e-06, + "loss": 0.3719, + "step": 13135 + }, + { + "epoch": 0.5945236478841367, + "grad_norm": 0.30752683520412194, + "learning_rate": 3.7271556389856493e-06, + "loss": 0.4767, + "step": 13136 + }, + { + "epoch": 0.5945689069925323, + "grad_norm": 0.6703948879065504, + "learning_rate": 3.726446877986516e-06, + "loss": 0.3436, + "step": 13137 + }, + { + "epoch": 0.5946141661009278, + "grad_norm": 0.29394166362208135, + "learning_rate": 3.725738144352129e-06, + "loss": 0.4707, + "step": 13138 + }, + { + "epoch": 0.5946594252093234, + "grad_norm": 0.6364131387899856, + "learning_rate": 3.725029438097715e-06, + "loss": 0.3421, + "step": 13139 + }, + { + "epoch": 0.594704684317719, + "grad_norm": 0.3072016357761098, + "learning_rate": 3.7243207592385034e-06, + "loss": 0.4662, + "step": 13140 + }, + { + "epoch": 0.5947499434261145, + "grad_norm": 1.065624468877734, + "learning_rate": 3.7236121077897208e-06, + "loss": 0.3032, + "step": 13141 + }, + { + "epoch": 0.59479520253451, + "grad_norm": 0.6361755726188255, + "learning_rate": 3.7229034837665923e-06, + "loss": 0.329, + "step": 13142 + }, + { + "epoch": 0.5948404616429056, + "grad_norm": 0.6633173196046636, + "learning_rate": 3.722194887184346e-06, + "loss": 0.323, + "step": 13143 + }, + { + "epoch": 0.5948857207513012, + "grad_norm": 0.39576080171503325, + "learning_rate": 3.7214863180582085e-06, + "loss": 0.4561, + "step": 13144 + }, + { + "epoch": 0.5949309798596968, + "grad_norm": 1.0278169141323898, + "learning_rate": 3.7207777764034027e-06, + "loss": 0.2974, + "step": 13145 + }, + { + "epoch": 0.5949762389680924, + "grad_norm": 0.6570809526656647, + "learning_rate": 3.720069262235152e-06, + "loss": 0.3285, + "step": 13146 + }, + { + "epoch": 0.5950214980764879, + "grad_norm": 0.6269859847151987, + "learning_rate": 3.7193607755686836e-06, + "loss": 0.3014, + "step": 13147 + }, + { + "epoch": 0.5950667571848834, + "grad_norm": 0.657802326899779, + "learning_rate": 3.718652316419219e-06, + "loss": 0.3585, + "step": 13148 + }, + { + "epoch": 0.595112016293279, + "grad_norm": 0.5563274901862401, + "learning_rate": 3.7179438848019805e-06, + "loss": 0.3145, + "step": 13149 + }, + { + "epoch": 0.5951572754016746, + "grad_norm": 0.3098645111637703, + "learning_rate": 3.7172354807321894e-06, + "loss": 0.4512, + "step": 13150 + }, + { + "epoch": 0.5952025345100701, + "grad_norm": 0.3088313873530433, + "learning_rate": 3.7165271042250706e-06, + "loss": 0.4798, + "step": 13151 + }, + { + "epoch": 0.5952477936184657, + "grad_norm": 0.6343676769371877, + "learning_rate": 3.7158187552958403e-06, + "loss": 0.2714, + "step": 13152 + }, + { + "epoch": 0.5952930527268613, + "grad_norm": 0.6710306240092611, + "learning_rate": 3.7151104339597212e-06, + "loss": 0.2857, + "step": 13153 + }, + { + "epoch": 0.5953383118352569, + "grad_norm": 0.27054839637529265, + "learning_rate": 3.7144021402319334e-06, + "loss": 0.4489, + "step": 13154 + }, + { + "epoch": 0.5953835709436525, + "grad_norm": 0.6816851121029694, + "learning_rate": 3.713693874127695e-06, + "loss": 0.2968, + "step": 13155 + }, + { + "epoch": 0.5954288300520479, + "grad_norm": 0.6553860649813443, + "learning_rate": 3.712985635662223e-06, + "loss": 0.3018, + "step": 13156 + }, + { + "epoch": 0.5954740891604435, + "grad_norm": 0.6782676736421867, + "learning_rate": 3.7122774248507386e-06, + "loss": 0.304, + "step": 13157 + }, + { + "epoch": 0.5955193482688391, + "grad_norm": 0.2797039203100852, + "learning_rate": 3.7115692417084574e-06, + "loss": 0.4485, + "step": 13158 + }, + { + "epoch": 0.5955646073772347, + "grad_norm": 0.6062200687096395, + "learning_rate": 3.7108610862505955e-06, + "loss": 0.291, + "step": 13159 + }, + { + "epoch": 0.5956098664856302, + "grad_norm": 0.5928153101844938, + "learning_rate": 3.710152958492369e-06, + "loss": 0.3194, + "step": 13160 + }, + { + "epoch": 0.5956551255940258, + "grad_norm": 0.6381956679356376, + "learning_rate": 3.7094448584489955e-06, + "loss": 0.3465, + "step": 13161 + }, + { + "epoch": 0.5957003847024214, + "grad_norm": 0.6051622048176114, + "learning_rate": 3.708736786135687e-06, + "loss": 0.2943, + "step": 13162 + }, + { + "epoch": 0.595745643810817, + "grad_norm": 0.7120118076247374, + "learning_rate": 3.70802874156766e-06, + "loss": 0.2923, + "step": 13163 + }, + { + "epoch": 0.5957909029192124, + "grad_norm": 0.3312549098297892, + "learning_rate": 3.7073207247601285e-06, + "loss": 0.4615, + "step": 13164 + }, + { + "epoch": 0.595836162027608, + "grad_norm": 0.29657193989846825, + "learning_rate": 3.7066127357283026e-06, + "loss": 0.4679, + "step": 13165 + }, + { + "epoch": 0.5958814211360036, + "grad_norm": 0.5841555972131169, + "learning_rate": 3.705904774487396e-06, + "loss": 0.3242, + "step": 13166 + }, + { + "epoch": 0.5959266802443992, + "grad_norm": 0.28481066257332377, + "learning_rate": 3.7051968410526236e-06, + "loss": 0.4993, + "step": 13167 + }, + { + "epoch": 0.5959719393527948, + "grad_norm": 0.6477331403565503, + "learning_rate": 3.7044889354391934e-06, + "loss": 0.3452, + "step": 13168 + }, + { + "epoch": 0.5960171984611903, + "grad_norm": 0.6739577213503118, + "learning_rate": 3.703781057662317e-06, + "loss": 0.2652, + "step": 13169 + }, + { + "epoch": 0.5960624575695859, + "grad_norm": 0.6052741040937766, + "learning_rate": 3.703073207737205e-06, + "loss": 0.3061, + "step": 13170 + }, + { + "epoch": 0.5961077166779815, + "grad_norm": 0.5757953144746631, + "learning_rate": 3.7023653856790655e-06, + "loss": 0.2465, + "step": 13171 + }, + { + "epoch": 0.596152975786377, + "grad_norm": 0.6197451940727606, + "learning_rate": 3.7016575915031084e-06, + "loss": 0.3131, + "step": 13172 + }, + { + "epoch": 0.5961982348947725, + "grad_norm": 0.6442250708090359, + "learning_rate": 3.700949825224544e-06, + "loss": 0.3386, + "step": 13173 + }, + { + "epoch": 0.5962434940031681, + "grad_norm": 0.6828610388809717, + "learning_rate": 3.700242086858577e-06, + "loss": 0.3504, + "step": 13174 + }, + { + "epoch": 0.5962887531115637, + "grad_norm": 0.6245904625773707, + "learning_rate": 3.6995343764204143e-06, + "loss": 0.3109, + "step": 13175 + }, + { + "epoch": 0.5963340122199593, + "grad_norm": 0.29816155391876076, + "learning_rate": 3.6988266939252647e-06, + "loss": 0.4575, + "step": 13176 + }, + { + "epoch": 0.5963792713283548, + "grad_norm": 0.6163325957250416, + "learning_rate": 3.698119039388335e-06, + "loss": 0.3007, + "step": 13177 + }, + { + "epoch": 0.5964245304367504, + "grad_norm": 0.5790691504496822, + "learning_rate": 3.6974114128248274e-06, + "loss": 0.2607, + "step": 13178 + }, + { + "epoch": 0.596469789545146, + "grad_norm": 0.7598184284764053, + "learning_rate": 3.696703814249947e-06, + "loss": 0.3196, + "step": 13179 + }, + { + "epoch": 0.5965150486535415, + "grad_norm": 0.2825882685277589, + "learning_rate": 3.695996243678901e-06, + "loss": 0.4382, + "step": 13180 + }, + { + "epoch": 0.5965603077619371, + "grad_norm": 0.6359353171429746, + "learning_rate": 3.6952887011268885e-06, + "loss": 0.3261, + "step": 13181 + }, + { + "epoch": 0.5966055668703326, + "grad_norm": 0.6116551042236428, + "learning_rate": 3.6945811866091153e-06, + "loss": 0.3117, + "step": 13182 + }, + { + "epoch": 0.5966508259787282, + "grad_norm": 0.7821988598175718, + "learning_rate": 3.6938737001407847e-06, + "loss": 0.2932, + "step": 13183 + }, + { + "epoch": 0.5966960850871238, + "grad_norm": 0.621388684245905, + "learning_rate": 3.6931662417370956e-06, + "loss": 0.3058, + "step": 13184 + }, + { + "epoch": 0.5967413441955194, + "grad_norm": 0.5882159349528706, + "learning_rate": 3.692458811413249e-06, + "loss": 0.2793, + "step": 13185 + }, + { + "epoch": 0.5967866033039149, + "grad_norm": 0.5827495491993245, + "learning_rate": 3.6917514091844497e-06, + "loss": 0.3099, + "step": 13186 + }, + { + "epoch": 0.5968318624123105, + "grad_norm": 0.6505631041125741, + "learning_rate": 3.691044035065893e-06, + "loss": 0.3516, + "step": 13187 + }, + { + "epoch": 0.596877121520706, + "grad_norm": 0.6166883268007635, + "learning_rate": 3.6903366890727792e-06, + "loss": 0.298, + "step": 13188 + }, + { + "epoch": 0.5969223806291016, + "grad_norm": 0.6128453820552738, + "learning_rate": 3.6896293712203075e-06, + "loss": 0.2935, + "step": 13189 + }, + { + "epoch": 0.5969676397374972, + "grad_norm": 0.6593897806724689, + "learning_rate": 3.6889220815236776e-06, + "loss": 0.3052, + "step": 13190 + }, + { + "epoch": 0.5970128988458927, + "grad_norm": 0.6467674494601752, + "learning_rate": 3.688214819998085e-06, + "loss": 0.2967, + "step": 13191 + }, + { + "epoch": 0.5970581579542883, + "grad_norm": 0.9026493624411818, + "learning_rate": 3.687507586658726e-06, + "loss": 0.3223, + "step": 13192 + }, + { + "epoch": 0.5971034170626839, + "grad_norm": 0.35026014317474924, + "learning_rate": 3.6868003815208003e-06, + "loss": 0.4837, + "step": 13193 + }, + { + "epoch": 0.5971486761710795, + "grad_norm": 0.6215915010677973, + "learning_rate": 3.686093204599499e-06, + "loss": 0.3092, + "step": 13194 + }, + { + "epoch": 0.597193935279475, + "grad_norm": 0.6232982673485767, + "learning_rate": 3.68538605591002e-06, + "loss": 0.2798, + "step": 13195 + }, + { + "epoch": 0.5972391943878705, + "grad_norm": 0.6129914831636445, + "learning_rate": 3.6846789354675584e-06, + "loss": 0.3021, + "step": 13196 + }, + { + "epoch": 0.5972844534962661, + "grad_norm": 0.6216919708011899, + "learning_rate": 3.683971843287305e-06, + "loss": 0.2759, + "step": 13197 + }, + { + "epoch": 0.5973297126046617, + "grad_norm": 0.6521665296020159, + "learning_rate": 3.6832647793844557e-06, + "loss": 0.2936, + "step": 13198 + }, + { + "epoch": 0.5973749717130572, + "grad_norm": 0.2537563545628852, + "learning_rate": 3.6825577437742028e-06, + "loss": 0.479, + "step": 13199 + }, + { + "epoch": 0.5974202308214528, + "grad_norm": 0.5903977732511272, + "learning_rate": 3.681850736471736e-06, + "loss": 0.2845, + "step": 13200 + }, + { + "epoch": 0.5974654899298484, + "grad_norm": 0.639447725295969, + "learning_rate": 3.6811437574922494e-06, + "loss": 0.326, + "step": 13201 + }, + { + "epoch": 0.597510749038244, + "grad_norm": 0.6507427678934059, + "learning_rate": 3.680436806850933e-06, + "loss": 0.36, + "step": 13202 + }, + { + "epoch": 0.5975560081466396, + "grad_norm": 0.6147854529236003, + "learning_rate": 3.6797298845629776e-06, + "loss": 0.3502, + "step": 13203 + }, + { + "epoch": 0.597601267255035, + "grad_norm": 0.6453083421397205, + "learning_rate": 3.6790229906435706e-06, + "loss": 0.3185, + "step": 13204 + }, + { + "epoch": 0.5976465263634306, + "grad_norm": 0.2641602153652307, + "learning_rate": 3.6783161251079026e-06, + "loss": 0.4633, + "step": 13205 + }, + { + "epoch": 0.5976917854718262, + "grad_norm": 0.6993737385876693, + "learning_rate": 3.677609287971163e-06, + "loss": 0.3133, + "step": 13206 + }, + { + "epoch": 0.5977370445802218, + "grad_norm": 0.6268767172580938, + "learning_rate": 3.676902479248538e-06, + "loss": 0.3749, + "step": 13207 + }, + { + "epoch": 0.5977823036886173, + "grad_norm": 0.27262484330790376, + "learning_rate": 3.6761956989552138e-06, + "loss": 0.4625, + "step": 13208 + }, + { + "epoch": 0.5978275627970129, + "grad_norm": 0.8495143981164707, + "learning_rate": 3.6754889471063814e-06, + "loss": 0.2837, + "step": 13209 + }, + { + "epoch": 0.5978728219054085, + "grad_norm": 0.33892822582460136, + "learning_rate": 3.6747822237172204e-06, + "loss": 0.4842, + "step": 13210 + }, + { + "epoch": 0.5979180810138041, + "grad_norm": 0.6494562517896424, + "learning_rate": 3.6740755288029206e-06, + "loss": 0.3122, + "step": 13211 + }, + { + "epoch": 0.5979633401221995, + "grad_norm": 0.6489746897269669, + "learning_rate": 3.6733688623786667e-06, + "loss": 0.2886, + "step": 13212 + }, + { + "epoch": 0.5980085992305951, + "grad_norm": 0.27422493756120675, + "learning_rate": 3.67266222445964e-06, + "loss": 0.4729, + "step": 13213 + }, + { + "epoch": 0.5980538583389907, + "grad_norm": 0.5409356168569753, + "learning_rate": 3.6719556150610243e-06, + "loss": 0.2931, + "step": 13214 + }, + { + "epoch": 0.5980991174473863, + "grad_norm": 0.6618889061564469, + "learning_rate": 3.6712490341980057e-06, + "loss": 0.306, + "step": 13215 + }, + { + "epoch": 0.5981443765557819, + "grad_norm": 0.27558100516395273, + "learning_rate": 3.6705424818857636e-06, + "loss": 0.4711, + "step": 13216 + }, + { + "epoch": 0.5981896356641774, + "grad_norm": 0.26551145333875725, + "learning_rate": 3.6698359581394803e-06, + "loss": 0.4569, + "step": 13217 + }, + { + "epoch": 0.598234894772573, + "grad_norm": 0.6406255430648299, + "learning_rate": 3.669129462974337e-06, + "loss": 0.309, + "step": 13218 + }, + { + "epoch": 0.5982801538809686, + "grad_norm": 0.6131297048044994, + "learning_rate": 3.668422996405515e-06, + "loss": 0.3046, + "step": 13219 + }, + { + "epoch": 0.5983254129893641, + "grad_norm": 0.7123061004163291, + "learning_rate": 3.667716558448192e-06, + "loss": 0.3683, + "step": 13220 + }, + { + "epoch": 0.5983706720977596, + "grad_norm": 0.4038625913971144, + "learning_rate": 3.667010149117549e-06, + "loss": 0.4673, + "step": 13221 + }, + { + "epoch": 0.5984159312061552, + "grad_norm": 0.635090127562506, + "learning_rate": 3.666303768428765e-06, + "loss": 0.2671, + "step": 13222 + }, + { + "epoch": 0.5984611903145508, + "grad_norm": 0.8239338249393553, + "learning_rate": 3.665597416397014e-06, + "loss": 0.3068, + "step": 13223 + }, + { + "epoch": 0.5985064494229464, + "grad_norm": 0.6299322564454998, + "learning_rate": 3.6648910930374783e-06, + "loss": 0.3097, + "step": 13224 + }, + { + "epoch": 0.598551708531342, + "grad_norm": 0.28881756200221953, + "learning_rate": 3.6641847983653326e-06, + "loss": 0.4702, + "step": 13225 + }, + { + "epoch": 0.5985969676397375, + "grad_norm": 0.5770655353848592, + "learning_rate": 3.6634785323957522e-06, + "loss": 0.3299, + "step": 13226 + }, + { + "epoch": 0.598642226748133, + "grad_norm": 0.5883840900394046, + "learning_rate": 3.6627722951439125e-06, + "loss": 0.2902, + "step": 13227 + }, + { + "epoch": 0.5986874858565286, + "grad_norm": 0.6533817489703067, + "learning_rate": 3.6620660866249922e-06, + "loss": 0.2909, + "step": 13228 + }, + { + "epoch": 0.5987327449649242, + "grad_norm": 0.6342454950558009, + "learning_rate": 3.66135990685416e-06, + "loss": 0.3526, + "step": 13229 + }, + { + "epoch": 0.5987780040733197, + "grad_norm": 0.6688804477128425, + "learning_rate": 3.6606537558465925e-06, + "loss": 0.3083, + "step": 13230 + }, + { + "epoch": 0.5988232631817153, + "grad_norm": 0.6621555789786193, + "learning_rate": 3.6599476336174622e-06, + "loss": 0.3435, + "step": 13231 + }, + { + "epoch": 0.5988685222901109, + "grad_norm": 0.614671513931057, + "learning_rate": 3.659241540181943e-06, + "loss": 0.3667, + "step": 13232 + }, + { + "epoch": 0.5989137813985065, + "grad_norm": 0.5872663880952672, + "learning_rate": 3.6585354755552032e-06, + "loss": 0.3283, + "step": 13233 + }, + { + "epoch": 0.598959040506902, + "grad_norm": 0.6158303410100913, + "learning_rate": 3.6578294397524174e-06, + "loss": 0.3528, + "step": 13234 + }, + { + "epoch": 0.5990042996152976, + "grad_norm": 0.6203668634959106, + "learning_rate": 3.657123432788755e-06, + "loss": 0.3133, + "step": 13235 + }, + { + "epoch": 0.5990495587236931, + "grad_norm": 0.6394831186748366, + "learning_rate": 3.656417454679385e-06, + "loss": 0.3182, + "step": 13236 + }, + { + "epoch": 0.5990948178320887, + "grad_norm": 0.30639042230507724, + "learning_rate": 3.6557115054394764e-06, + "loss": 0.4717, + "step": 13237 + }, + { + "epoch": 0.5991400769404843, + "grad_norm": 0.29415688444257554, + "learning_rate": 3.655005585084202e-06, + "loss": 0.4776, + "step": 13238 + }, + { + "epoch": 0.5991853360488798, + "grad_norm": 0.8168793453305554, + "learning_rate": 3.6542996936287233e-06, + "loss": 0.3387, + "step": 13239 + }, + { + "epoch": 0.5992305951572754, + "grad_norm": 0.6441091079639482, + "learning_rate": 3.6535938310882124e-06, + "loss": 0.3146, + "step": 13240 + }, + { + "epoch": 0.599275854265671, + "grad_norm": 0.6282743875406009, + "learning_rate": 3.6528879974778365e-06, + "loss": 0.3021, + "step": 13241 + }, + { + "epoch": 0.5993211133740666, + "grad_norm": 0.6778664796167693, + "learning_rate": 3.6521821928127588e-06, + "loss": 0.3358, + "step": 13242 + }, + { + "epoch": 0.599366372482462, + "grad_norm": 0.27633099760908875, + "learning_rate": 3.6514764171081454e-06, + "loss": 0.4482, + "step": 13243 + }, + { + "epoch": 0.5994116315908576, + "grad_norm": 0.6270164059202556, + "learning_rate": 3.6507706703791624e-06, + "loss": 0.2861, + "step": 13244 + }, + { + "epoch": 0.5994568906992532, + "grad_norm": 0.5866021898261772, + "learning_rate": 3.650064952640976e-06, + "loss": 0.3334, + "step": 13245 + }, + { + "epoch": 0.5995021498076488, + "grad_norm": 0.627212468249608, + "learning_rate": 3.649359263908746e-06, + "loss": 0.2756, + "step": 13246 + }, + { + "epoch": 0.5995474089160443, + "grad_norm": 0.6358470020069243, + "learning_rate": 3.6486536041976362e-06, + "loss": 0.2926, + "step": 13247 + }, + { + "epoch": 0.5995926680244399, + "grad_norm": 0.6168356613463569, + "learning_rate": 3.6479479735228117e-06, + "loss": 0.2641, + "step": 13248 + }, + { + "epoch": 0.5996379271328355, + "grad_norm": 0.3012527809032249, + "learning_rate": 3.6472423718994326e-06, + "loss": 0.4825, + "step": 13249 + }, + { + "epoch": 0.5996831862412311, + "grad_norm": 0.6151714677223123, + "learning_rate": 3.6465367993426603e-06, + "loss": 0.3112, + "step": 13250 + }, + { + "epoch": 0.5997284453496267, + "grad_norm": 0.6661811042765926, + "learning_rate": 3.6458312558676555e-06, + "loss": 0.2875, + "step": 13251 + }, + { + "epoch": 0.5997737044580221, + "grad_norm": 0.6218647748077784, + "learning_rate": 3.6451257414895767e-06, + "loss": 0.3149, + "step": 13252 + }, + { + "epoch": 0.5998189635664177, + "grad_norm": 0.5646580107053886, + "learning_rate": 3.6444202562235854e-06, + "loss": 0.3269, + "step": 13253 + }, + { + "epoch": 0.5998642226748133, + "grad_norm": 0.6234519410493783, + "learning_rate": 3.6437148000848404e-06, + "loss": 0.3071, + "step": 13254 + }, + { + "epoch": 0.5999094817832089, + "grad_norm": 0.6258900034740645, + "learning_rate": 3.6430093730884973e-06, + "loss": 0.3069, + "step": 13255 + }, + { + "epoch": 0.5999547408916044, + "grad_norm": 0.30236203249057025, + "learning_rate": 3.6423039752497146e-06, + "loss": 0.4885, + "step": 13256 + }, + { + "epoch": 0.6, + "grad_norm": 0.8056969703403314, + "learning_rate": 3.641598606583653e-06, + "loss": 0.3121, + "step": 13257 + }, + { + "epoch": 0.6000452591083956, + "grad_norm": 0.623021541836843, + "learning_rate": 3.640893267105462e-06, + "loss": 0.2839, + "step": 13258 + }, + { + "epoch": 0.6000905182167912, + "grad_norm": 0.654267609693474, + "learning_rate": 3.6401879568303013e-06, + "loss": 0.2896, + "step": 13259 + }, + { + "epoch": 0.6001357773251867, + "grad_norm": 0.7279223216549868, + "learning_rate": 3.639482675773324e-06, + "loss": 0.2611, + "step": 13260 + }, + { + "epoch": 0.6001810364335822, + "grad_norm": 0.7553416821907419, + "learning_rate": 3.6387774239496893e-06, + "loss": 0.3153, + "step": 13261 + }, + { + "epoch": 0.6002262955419778, + "grad_norm": 0.6802481482404504, + "learning_rate": 3.6380722013745434e-06, + "loss": 0.3622, + "step": 13262 + }, + { + "epoch": 0.6002715546503734, + "grad_norm": 0.6082365759513287, + "learning_rate": 3.637367008063044e-06, + "loss": 0.3007, + "step": 13263 + }, + { + "epoch": 0.600316813758769, + "grad_norm": 0.5913283821044162, + "learning_rate": 3.6366618440303436e-06, + "loss": 0.2381, + "step": 13264 + }, + { + "epoch": 0.6003620728671645, + "grad_norm": 0.7278794020979359, + "learning_rate": 3.6359567092915928e-06, + "loss": 0.3526, + "step": 13265 + }, + { + "epoch": 0.6004073319755601, + "grad_norm": 0.785940979506602, + "learning_rate": 3.635251603861941e-06, + "loss": 0.3553, + "step": 13266 + }, + { + "epoch": 0.6004525910839557, + "grad_norm": 0.6008767130405239, + "learning_rate": 3.6345465277565427e-06, + "loss": 0.3146, + "step": 13267 + }, + { + "epoch": 0.6004978501923512, + "grad_norm": 0.967970368322063, + "learning_rate": 3.6338414809905453e-06, + "loss": 0.3412, + "step": 13268 + }, + { + "epoch": 0.6005431093007467, + "grad_norm": 0.6307483799109639, + "learning_rate": 3.633136463579099e-06, + "loss": 0.287, + "step": 13269 + }, + { + "epoch": 0.6005883684091423, + "grad_norm": 0.600574822035914, + "learning_rate": 3.6324314755373523e-06, + "loss": 0.3575, + "step": 13270 + }, + { + "epoch": 0.6006336275175379, + "grad_norm": 0.29013622581752974, + "learning_rate": 3.6317265168804526e-06, + "loss": 0.4642, + "step": 13271 + }, + { + "epoch": 0.6006788866259335, + "grad_norm": 0.6462805277268697, + "learning_rate": 3.631021587623547e-06, + "loss": 0.3476, + "step": 13272 + }, + { + "epoch": 0.6007241457343291, + "grad_norm": 0.6454512431625602, + "learning_rate": 3.630316687781783e-06, + "loss": 0.33, + "step": 13273 + }, + { + "epoch": 0.6007694048427246, + "grad_norm": 0.6181042514441442, + "learning_rate": 3.6296118173703075e-06, + "loss": 0.3078, + "step": 13274 + }, + { + "epoch": 0.6008146639511202, + "grad_norm": 0.6651883769771438, + "learning_rate": 3.628906976404265e-06, + "loss": 0.3061, + "step": 13275 + }, + { + "epoch": 0.6008599230595157, + "grad_norm": 0.6182735386264937, + "learning_rate": 3.6282021648988e-06, + "loss": 0.3268, + "step": 13276 + }, + { + "epoch": 0.6009051821679113, + "grad_norm": 0.7468317605762024, + "learning_rate": 3.6274973828690584e-06, + "loss": 0.3354, + "step": 13277 + }, + { + "epoch": 0.6009504412763068, + "grad_norm": 0.6376219336618065, + "learning_rate": 3.6267926303301827e-06, + "loss": 0.3057, + "step": 13278 + }, + { + "epoch": 0.6009957003847024, + "grad_norm": 0.5961662944166821, + "learning_rate": 3.6260879072973155e-06, + "loss": 0.3278, + "step": 13279 + }, + { + "epoch": 0.601040959493098, + "grad_norm": 0.590256658116722, + "learning_rate": 3.6253832137856e-06, + "loss": 0.3189, + "step": 13280 + }, + { + "epoch": 0.6010862186014936, + "grad_norm": 0.6627626694142456, + "learning_rate": 3.6246785498101754e-06, + "loss": 0.3412, + "step": 13281 + }, + { + "epoch": 0.6011314777098891, + "grad_norm": 0.7990817075726195, + "learning_rate": 3.6239739153861863e-06, + "loss": 0.3421, + "step": 13282 + }, + { + "epoch": 0.6011767368182847, + "grad_norm": 0.6661198485629423, + "learning_rate": 3.623269310528773e-06, + "loss": 0.2819, + "step": 13283 + }, + { + "epoch": 0.6012219959266802, + "grad_norm": 0.3169460283991881, + "learning_rate": 3.622564735253072e-06, + "loss": 0.5014, + "step": 13284 + }, + { + "epoch": 0.6012672550350758, + "grad_norm": 0.6367798706528928, + "learning_rate": 3.6218601895742234e-06, + "loss": 0.3107, + "step": 13285 + }, + { + "epoch": 0.6013125141434714, + "grad_norm": 0.6489505146493503, + "learning_rate": 3.6211556735073704e-06, + "loss": 0.3321, + "step": 13286 + }, + { + "epoch": 0.6013577732518669, + "grad_norm": 0.30221069429949665, + "learning_rate": 3.620451187067644e-06, + "loss": 0.4626, + "step": 13287 + }, + { + "epoch": 0.6014030323602625, + "grad_norm": 0.6338490368445091, + "learning_rate": 3.619746730270185e-06, + "loss": 0.2908, + "step": 13288 + }, + { + "epoch": 0.6014482914686581, + "grad_norm": 0.6348180019316083, + "learning_rate": 3.619042303130129e-06, + "loss": 0.3412, + "step": 13289 + }, + { + "epoch": 0.6014935505770537, + "grad_norm": 0.2727124763310069, + "learning_rate": 3.618337905662616e-06, + "loss": 0.4547, + "step": 13290 + }, + { + "epoch": 0.6015388096854491, + "grad_norm": 0.6508143398730285, + "learning_rate": 3.6176335378827747e-06, + "loss": 0.3109, + "step": 13291 + }, + { + "epoch": 0.6015840687938447, + "grad_norm": 0.8292016353801743, + "learning_rate": 3.616929199805744e-06, + "loss": 0.4804, + "step": 13292 + }, + { + "epoch": 0.6016293279022403, + "grad_norm": 0.6238778648347993, + "learning_rate": 3.616224891446658e-06, + "loss": 0.2883, + "step": 13293 + }, + { + "epoch": 0.6016745870106359, + "grad_norm": 0.6587510313119597, + "learning_rate": 3.615520612820649e-06, + "loss": 0.354, + "step": 13294 + }, + { + "epoch": 0.6017198461190315, + "grad_norm": 0.6311257520628122, + "learning_rate": 3.6148163639428475e-06, + "loss": 0.3445, + "step": 13295 + }, + { + "epoch": 0.601765105227427, + "grad_norm": 0.6216525729643326, + "learning_rate": 3.6141121448283904e-06, + "loss": 0.3234, + "step": 13296 + }, + { + "epoch": 0.6018103643358226, + "grad_norm": 0.7357679569482551, + "learning_rate": 3.6134079554924062e-06, + "loss": 0.3193, + "step": 13297 + }, + { + "epoch": 0.6018556234442182, + "grad_norm": 0.3342103094602393, + "learning_rate": 3.6127037959500267e-06, + "loss": 0.4836, + "step": 13298 + }, + { + "epoch": 0.6019008825526138, + "grad_norm": 0.6043645496196589, + "learning_rate": 3.6119996662163824e-06, + "loss": 0.3104, + "step": 13299 + }, + { + "epoch": 0.6019461416610092, + "grad_norm": 0.32463802624158095, + "learning_rate": 3.6112955663066008e-06, + "loss": 0.4925, + "step": 13300 + }, + { + "epoch": 0.6019914007694048, + "grad_norm": 0.6827651046445397, + "learning_rate": 3.610591496235813e-06, + "loss": 0.3298, + "step": 13301 + }, + { + "epoch": 0.6020366598778004, + "grad_norm": 0.6881451561163642, + "learning_rate": 3.6098874560191465e-06, + "loss": 0.3104, + "step": 13302 + }, + { + "epoch": 0.602081918986196, + "grad_norm": 0.2740287878315835, + "learning_rate": 3.609183445671731e-06, + "loss": 0.4674, + "step": 13303 + }, + { + "epoch": 0.6021271780945915, + "grad_norm": 0.6265971913851386, + "learning_rate": 3.6084794652086892e-06, + "loss": 0.2934, + "step": 13304 + }, + { + "epoch": 0.6021724372029871, + "grad_norm": 0.27654597975083506, + "learning_rate": 3.607775514645151e-06, + "loss": 0.4862, + "step": 13305 + }, + { + "epoch": 0.6022176963113827, + "grad_norm": 0.6190244001689355, + "learning_rate": 3.607071593996242e-06, + "loss": 0.3091, + "step": 13306 + }, + { + "epoch": 0.6022629554197783, + "grad_norm": 0.6018265444032312, + "learning_rate": 3.606367703277085e-06, + "loss": 0.3189, + "step": 13307 + }, + { + "epoch": 0.6023082145281738, + "grad_norm": 0.6326827208686144, + "learning_rate": 3.6056638425028068e-06, + "loss": 0.2785, + "step": 13308 + }, + { + "epoch": 0.6023534736365693, + "grad_norm": 1.0066003674078408, + "learning_rate": 3.6049600116885307e-06, + "loss": 0.3024, + "step": 13309 + }, + { + "epoch": 0.6023987327449649, + "grad_norm": 0.3089674950974842, + "learning_rate": 3.6042562108493772e-06, + "loss": 0.4698, + "step": 13310 + }, + { + "epoch": 0.6024439918533605, + "grad_norm": 0.6178521392338734, + "learning_rate": 3.603552440000472e-06, + "loss": 0.2933, + "step": 13311 + }, + { + "epoch": 0.6024892509617561, + "grad_norm": 0.6770493087166252, + "learning_rate": 3.6028486991569376e-06, + "loss": 0.3329, + "step": 13312 + }, + { + "epoch": 0.6025345100701516, + "grad_norm": 0.6601070659367079, + "learning_rate": 3.6021449883338923e-06, + "loss": 0.3046, + "step": 13313 + }, + { + "epoch": 0.6025797691785472, + "grad_norm": 0.6320469520340588, + "learning_rate": 3.6014413075464573e-06, + "loss": 0.3644, + "step": 13314 + }, + { + "epoch": 0.6026250282869428, + "grad_norm": 0.6056737659973819, + "learning_rate": 3.600737656809754e-06, + "loss": 0.2946, + "step": 13315 + }, + { + "epoch": 0.6026702873953383, + "grad_norm": 0.5849266824543456, + "learning_rate": 3.600034036138902e-06, + "loss": 0.2944, + "step": 13316 + }, + { + "epoch": 0.6027155465037338, + "grad_norm": 0.30346221504831905, + "learning_rate": 3.5993304455490173e-06, + "loss": 0.4546, + "step": 13317 + }, + { + "epoch": 0.6027608056121294, + "grad_norm": 0.6478262489689017, + "learning_rate": 3.598626885055219e-06, + "loss": 0.3263, + "step": 13318 + }, + { + "epoch": 0.602806064720525, + "grad_norm": 0.6031657905281659, + "learning_rate": 3.597923354672628e-06, + "loss": 0.2907, + "step": 13319 + }, + { + "epoch": 0.6028513238289206, + "grad_norm": 0.6356314317536731, + "learning_rate": 3.597219854416355e-06, + "loss": 0.3628, + "step": 13320 + }, + { + "epoch": 0.6028965829373162, + "grad_norm": 0.2929946520224333, + "learning_rate": 3.59651638430152e-06, + "loss": 0.4826, + "step": 13321 + }, + { + "epoch": 0.6029418420457117, + "grad_norm": 0.2812105966859869, + "learning_rate": 3.595812944343239e-06, + "loss": 0.4851, + "step": 13322 + }, + { + "epoch": 0.6029871011541073, + "grad_norm": 0.6210192622739172, + "learning_rate": 3.5951095345566232e-06, + "loss": 0.3125, + "step": 13323 + }, + { + "epoch": 0.6030323602625028, + "grad_norm": 0.6331813274922533, + "learning_rate": 3.5944061549567876e-06, + "loss": 0.2914, + "step": 13324 + }, + { + "epoch": 0.6030776193708984, + "grad_norm": 0.6672025406942507, + "learning_rate": 3.59370280555885e-06, + "loss": 0.2978, + "step": 13325 + }, + { + "epoch": 0.6031228784792939, + "grad_norm": 0.2833025363843245, + "learning_rate": 3.592999486377918e-06, + "loss": 0.4624, + "step": 13326 + }, + { + "epoch": 0.6031681375876895, + "grad_norm": 0.6844554810860389, + "learning_rate": 3.592296197429106e-06, + "loss": 0.3058, + "step": 13327 + }, + { + "epoch": 0.6032133966960851, + "grad_norm": 0.2904387392592274, + "learning_rate": 3.591592938727526e-06, + "loss": 0.5005, + "step": 13328 + }, + { + "epoch": 0.6032586558044807, + "grad_norm": 0.6018361904817027, + "learning_rate": 3.5908897102882868e-06, + "loss": 0.2839, + "step": 13329 + }, + { + "epoch": 0.6033039149128763, + "grad_norm": 0.2586940970741543, + "learning_rate": 3.5901865121265e-06, + "loss": 0.4737, + "step": 13330 + }, + { + "epoch": 0.6033491740212718, + "grad_norm": 0.27398799440128146, + "learning_rate": 3.5894833442572763e-06, + "loss": 0.4526, + "step": 13331 + }, + { + "epoch": 0.6033944331296673, + "grad_norm": 0.622998206410414, + "learning_rate": 3.588780206695724e-06, + "loss": 0.3301, + "step": 13332 + }, + { + "epoch": 0.6034396922380629, + "grad_norm": 0.6808638843999142, + "learning_rate": 3.5880770994569485e-06, + "loss": 0.313, + "step": 13333 + }, + { + "epoch": 0.6034849513464585, + "grad_norm": 0.6318067758791139, + "learning_rate": 3.587374022556061e-06, + "loss": 0.2803, + "step": 13334 + }, + { + "epoch": 0.603530210454854, + "grad_norm": 0.6496949493400798, + "learning_rate": 3.5866709760081684e-06, + "loss": 0.3366, + "step": 13335 + }, + { + "epoch": 0.6035754695632496, + "grad_norm": 0.6469781095008104, + "learning_rate": 3.585967959828375e-06, + "loss": 0.33, + "step": 13336 + }, + { + "epoch": 0.6036207286716452, + "grad_norm": 0.6061078384711173, + "learning_rate": 3.5852649740317858e-06, + "loss": 0.3142, + "step": 13337 + }, + { + "epoch": 0.6036659877800408, + "grad_norm": 0.6063965955355926, + "learning_rate": 3.58456201863351e-06, + "loss": 0.3582, + "step": 13338 + }, + { + "epoch": 0.6037112468884362, + "grad_norm": 0.6221814436406296, + "learning_rate": 3.5838590936486467e-06, + "loss": 0.2983, + "step": 13339 + }, + { + "epoch": 0.6037565059968318, + "grad_norm": 0.6344451011991756, + "learning_rate": 3.583156199092303e-06, + "loss": 0.3154, + "step": 13340 + }, + { + "epoch": 0.6038017651052274, + "grad_norm": 0.5959380289884578, + "learning_rate": 3.582453334979582e-06, + "loss": 0.3203, + "step": 13341 + }, + { + "epoch": 0.603847024213623, + "grad_norm": 0.6284294962048357, + "learning_rate": 3.5817505013255847e-06, + "loss": 0.316, + "step": 13342 + }, + { + "epoch": 0.6038922833220186, + "grad_norm": 0.6991259807827724, + "learning_rate": 3.581047698145412e-06, + "loss": 0.3196, + "step": 13343 + }, + { + "epoch": 0.6039375424304141, + "grad_norm": 0.6036458234344553, + "learning_rate": 3.580344925454167e-06, + "loss": 0.2832, + "step": 13344 + }, + { + "epoch": 0.6039828015388097, + "grad_norm": 0.6094023062667671, + "learning_rate": 3.5796421832669503e-06, + "loss": 0.313, + "step": 13345 + }, + { + "epoch": 0.6040280606472053, + "grad_norm": 0.6116601881054674, + "learning_rate": 3.5789394715988602e-06, + "loss": 0.358, + "step": 13346 + }, + { + "epoch": 0.6040733197556009, + "grad_norm": 0.33519227315021916, + "learning_rate": 3.578236790464995e-06, + "loss": 0.4588, + "step": 13347 + }, + { + "epoch": 0.6041185788639963, + "grad_norm": 1.018762962468953, + "learning_rate": 3.5775341398804585e-06, + "loss": 0.3414, + "step": 13348 + }, + { + "epoch": 0.6041638379723919, + "grad_norm": 0.6323299652027151, + "learning_rate": 3.576831519860341e-06, + "loss": 0.3244, + "step": 13349 + }, + { + "epoch": 0.6042090970807875, + "grad_norm": 0.6310873421400982, + "learning_rate": 3.576128930419744e-06, + "loss": 0.2797, + "step": 13350 + }, + { + "epoch": 0.6042543561891831, + "grad_norm": 0.2841091768586345, + "learning_rate": 3.575426371573764e-06, + "loss": 0.4488, + "step": 13351 + }, + { + "epoch": 0.6042996152975786, + "grad_norm": 0.6497447780283677, + "learning_rate": 3.5747238433374952e-06, + "loss": 0.2842, + "step": 13352 + }, + { + "epoch": 0.6043448744059742, + "grad_norm": 0.6442050756032109, + "learning_rate": 3.5740213457260333e-06, + "loss": 0.3004, + "step": 13353 + }, + { + "epoch": 0.6043901335143698, + "grad_norm": 0.6022216934016352, + "learning_rate": 3.573318878754475e-06, + "loss": 0.2854, + "step": 13354 + }, + { + "epoch": 0.6044353926227654, + "grad_norm": 0.6266066483654207, + "learning_rate": 3.5726164424379106e-06, + "loss": 0.3581, + "step": 13355 + }, + { + "epoch": 0.604480651731161, + "grad_norm": 0.2971858300753773, + "learning_rate": 3.571914036791435e-06, + "loss": 0.4978, + "step": 13356 + }, + { + "epoch": 0.6045259108395564, + "grad_norm": 0.6109185868269074, + "learning_rate": 3.571211661830142e-06, + "loss": 0.316, + "step": 13357 + }, + { + "epoch": 0.604571169947952, + "grad_norm": 0.6521278981812285, + "learning_rate": 3.5705093175691195e-06, + "loss": 0.2798, + "step": 13358 + }, + { + "epoch": 0.6046164290563476, + "grad_norm": 0.6130984616975004, + "learning_rate": 3.5698070040234633e-06, + "loss": 0.3216, + "step": 13359 + }, + { + "epoch": 0.6046616881647432, + "grad_norm": 0.6458711888618369, + "learning_rate": 3.569104721208262e-06, + "loss": 0.3053, + "step": 13360 + }, + { + "epoch": 0.6047069472731387, + "grad_norm": 0.585594980145394, + "learning_rate": 3.5684024691386067e-06, + "loss": 0.3134, + "step": 13361 + }, + { + "epoch": 0.6047522063815343, + "grad_norm": 0.6469463908589563, + "learning_rate": 3.567700247829583e-06, + "loss": 0.3302, + "step": 13362 + }, + { + "epoch": 0.6047974654899299, + "grad_norm": 0.6154644667480161, + "learning_rate": 3.5669980572962836e-06, + "loss": 0.2925, + "step": 13363 + }, + { + "epoch": 0.6048427245983254, + "grad_norm": 0.6104621738919574, + "learning_rate": 3.5662958975537955e-06, + "loss": 0.2968, + "step": 13364 + }, + { + "epoch": 0.604887983706721, + "grad_norm": 0.6360070530683783, + "learning_rate": 3.5655937686172037e-06, + "loss": 0.3088, + "step": 13365 + }, + { + "epoch": 0.6049332428151165, + "grad_norm": 0.5661342734041364, + "learning_rate": 3.5648916705015964e-06, + "loss": 0.3785, + "step": 13366 + }, + { + "epoch": 0.6049785019235121, + "grad_norm": 0.5702960130273277, + "learning_rate": 3.5641896032220626e-06, + "loss": 0.3104, + "step": 13367 + }, + { + "epoch": 0.6050237610319077, + "grad_norm": 0.6131253656760342, + "learning_rate": 3.5634875667936803e-06, + "loss": 0.3048, + "step": 13368 + }, + { + "epoch": 0.6050690201403033, + "grad_norm": 0.6174377700558944, + "learning_rate": 3.56278556123154e-06, + "loss": 0.3033, + "step": 13369 + }, + { + "epoch": 0.6051142792486988, + "grad_norm": 0.6471682654144832, + "learning_rate": 3.562083586550725e-06, + "loss": 0.3133, + "step": 13370 + }, + { + "epoch": 0.6051595383570944, + "grad_norm": 0.36123504228980124, + "learning_rate": 3.5613816427663162e-06, + "loss": 0.4485, + "step": 13371 + }, + { + "epoch": 0.6052047974654899, + "grad_norm": 0.6044077284848175, + "learning_rate": 3.5606797298933967e-06, + "loss": 0.2823, + "step": 13372 + }, + { + "epoch": 0.6052500565738855, + "grad_norm": 0.6937280255969518, + "learning_rate": 3.5599778479470498e-06, + "loss": 0.295, + "step": 13373 + }, + { + "epoch": 0.605295315682281, + "grad_norm": 0.6140668102863763, + "learning_rate": 3.5592759969423573e-06, + "loss": 0.2912, + "step": 13374 + }, + { + "epoch": 0.6053405747906766, + "grad_norm": 0.6584099771068982, + "learning_rate": 3.5585741768943982e-06, + "loss": 0.3286, + "step": 13375 + }, + { + "epoch": 0.6053858338990722, + "grad_norm": 0.613803210549905, + "learning_rate": 3.5578723878182518e-06, + "loss": 0.3716, + "step": 13376 + }, + { + "epoch": 0.6054310930074678, + "grad_norm": 0.7060930148845469, + "learning_rate": 3.557170629729001e-06, + "loss": 0.3128, + "step": 13377 + }, + { + "epoch": 0.6054763521158634, + "grad_norm": 0.6792705093090348, + "learning_rate": 3.556468902641721e-06, + "loss": 0.3283, + "step": 13378 + }, + { + "epoch": 0.6055216112242588, + "grad_norm": 0.8428630717752048, + "learning_rate": 3.555767206571491e-06, + "loss": 0.3075, + "step": 13379 + }, + { + "epoch": 0.6055668703326544, + "grad_norm": 0.5752766645066538, + "learning_rate": 3.555065541533389e-06, + "loss": 0.3179, + "step": 13380 + }, + { + "epoch": 0.60561212944105, + "grad_norm": 0.3290549372923084, + "learning_rate": 3.5543639075424897e-06, + "loss": 0.4824, + "step": 13381 + }, + { + "epoch": 0.6056573885494456, + "grad_norm": 0.6225255968894837, + "learning_rate": 3.5536623046138685e-06, + "loss": 0.3344, + "step": 13382 + }, + { + "epoch": 0.6057026476578411, + "grad_norm": 0.5772962530922928, + "learning_rate": 3.552960732762605e-06, + "loss": 0.2937, + "step": 13383 + }, + { + "epoch": 0.6057479067662367, + "grad_norm": 0.28994310594926126, + "learning_rate": 3.5522591920037698e-06, + "loss": 0.4464, + "step": 13384 + }, + { + "epoch": 0.6057931658746323, + "grad_norm": 0.7948689428509049, + "learning_rate": 3.5515576823524377e-06, + "loss": 0.3062, + "step": 13385 + }, + { + "epoch": 0.6058384249830279, + "grad_norm": 0.6432547158435971, + "learning_rate": 3.5508562038236817e-06, + "loss": 0.3117, + "step": 13386 + }, + { + "epoch": 0.6058836840914233, + "grad_norm": 0.713155852051197, + "learning_rate": 3.5501547564325777e-06, + "loss": 0.3015, + "step": 13387 + }, + { + "epoch": 0.6059289431998189, + "grad_norm": 0.5769908367074335, + "learning_rate": 3.549453340194194e-06, + "loss": 0.3255, + "step": 13388 + }, + { + "epoch": 0.6059742023082145, + "grad_norm": 0.5942258901578376, + "learning_rate": 3.5487519551236025e-06, + "loss": 0.3209, + "step": 13389 + }, + { + "epoch": 0.6060194614166101, + "grad_norm": 0.29522903900146297, + "learning_rate": 3.548050601235876e-06, + "loss": 0.4456, + "step": 13390 + }, + { + "epoch": 0.6060647205250057, + "grad_norm": 0.599766040405492, + "learning_rate": 3.54734927854608e-06, + "loss": 0.3175, + "step": 13391 + }, + { + "epoch": 0.6061099796334012, + "grad_norm": 0.6533666125281052, + "learning_rate": 3.5466479870692883e-06, + "loss": 0.3344, + "step": 13392 + }, + { + "epoch": 0.6061552387417968, + "grad_norm": 0.3529903275260208, + "learning_rate": 3.5459467268205683e-06, + "loss": 0.4758, + "step": 13393 + }, + { + "epoch": 0.6062004978501924, + "grad_norm": 0.3486681973081686, + "learning_rate": 3.5452454978149864e-06, + "loss": 0.4692, + "step": 13394 + }, + { + "epoch": 0.606245756958588, + "grad_norm": 0.6403340211996071, + "learning_rate": 3.5445443000676096e-06, + "loss": 0.3866, + "step": 13395 + }, + { + "epoch": 0.6062910160669834, + "grad_norm": 0.6369882531039904, + "learning_rate": 3.543843133593509e-06, + "loss": 0.2986, + "step": 13396 + }, + { + "epoch": 0.606336275175379, + "grad_norm": 0.6608157083343831, + "learning_rate": 3.5431419984077444e-06, + "loss": 0.3006, + "step": 13397 + }, + { + "epoch": 0.6063815342837746, + "grad_norm": 0.6016818949787632, + "learning_rate": 3.542440894525384e-06, + "loss": 0.3066, + "step": 13398 + }, + { + "epoch": 0.6064267933921702, + "grad_norm": 0.6261206840159689, + "learning_rate": 3.541739821961494e-06, + "loss": 0.3113, + "step": 13399 + }, + { + "epoch": 0.6064720525005657, + "grad_norm": 0.3348048032808948, + "learning_rate": 3.5410387807311353e-06, + "loss": 0.4883, + "step": 13400 + }, + { + "epoch": 0.6065173116089613, + "grad_norm": 0.6042801979473782, + "learning_rate": 3.5403377708493714e-06, + "loss": 0.3508, + "step": 13401 + }, + { + "epoch": 0.6065625707173569, + "grad_norm": 0.30132407700194175, + "learning_rate": 3.539636792331267e-06, + "loss": 0.4753, + "step": 13402 + }, + { + "epoch": 0.6066078298257525, + "grad_norm": 0.645422373017156, + "learning_rate": 3.538935845191884e-06, + "loss": 0.3028, + "step": 13403 + }, + { + "epoch": 0.606653088934148, + "grad_norm": 0.6487120512379727, + "learning_rate": 3.5382349294462803e-06, + "loss": 0.3165, + "step": 13404 + }, + { + "epoch": 0.6066983480425435, + "grad_norm": 0.6076635547412993, + "learning_rate": 3.5375340451095186e-06, + "loss": 0.3097, + "step": 13405 + }, + { + "epoch": 0.6067436071509391, + "grad_norm": 0.6358029989813431, + "learning_rate": 3.53683319219666e-06, + "loss": 0.2996, + "step": 13406 + }, + { + "epoch": 0.6067888662593347, + "grad_norm": 0.2926919659262302, + "learning_rate": 3.536132370722761e-06, + "loss": 0.5306, + "step": 13407 + }, + { + "epoch": 0.6068341253677303, + "grad_norm": 0.2741018832720857, + "learning_rate": 3.5354315807028826e-06, + "loss": 0.4834, + "step": 13408 + }, + { + "epoch": 0.6068793844761258, + "grad_norm": 0.6091839935589203, + "learning_rate": 3.5347308221520814e-06, + "loss": 0.2883, + "step": 13409 + }, + { + "epoch": 0.6069246435845214, + "grad_norm": 0.6006493393550284, + "learning_rate": 3.5340300950854135e-06, + "loss": 0.2632, + "step": 13410 + }, + { + "epoch": 0.606969902692917, + "grad_norm": 0.5984574567811332, + "learning_rate": 3.5333293995179362e-06, + "loss": 0.281, + "step": 13411 + }, + { + "epoch": 0.6070151618013125, + "grad_norm": 0.5765612346379179, + "learning_rate": 3.5326287354647077e-06, + "loss": 0.3004, + "step": 13412 + }, + { + "epoch": 0.6070604209097081, + "grad_norm": 0.5907166678593426, + "learning_rate": 3.5319281029407793e-06, + "loss": 0.3161, + "step": 13413 + }, + { + "epoch": 0.6071056800181036, + "grad_norm": 0.6486670915591076, + "learning_rate": 3.5312275019612065e-06, + "loss": 0.3119, + "step": 13414 + }, + { + "epoch": 0.6071509391264992, + "grad_norm": 0.2855136328850759, + "learning_rate": 3.530526932541045e-06, + "loss": 0.4739, + "step": 13415 + }, + { + "epoch": 0.6071961982348948, + "grad_norm": 0.28702072041527865, + "learning_rate": 3.529826394695347e-06, + "loss": 0.4689, + "step": 13416 + }, + { + "epoch": 0.6072414573432904, + "grad_norm": 1.0163123512406775, + "learning_rate": 3.529125888439164e-06, + "loss": 0.2641, + "step": 13417 + }, + { + "epoch": 0.6072867164516859, + "grad_norm": 0.3417303228950468, + "learning_rate": 3.5284254137875472e-06, + "loss": 0.4725, + "step": 13418 + }, + { + "epoch": 0.6073319755600814, + "grad_norm": 0.6336573660997611, + "learning_rate": 3.5277249707555507e-06, + "loss": 0.3404, + "step": 13419 + }, + { + "epoch": 0.607377234668477, + "grad_norm": 0.27356633073337205, + "learning_rate": 3.527024559358221e-06, + "loss": 0.4588, + "step": 13420 + }, + { + "epoch": 0.6074224937768726, + "grad_norm": 2.292413323718286, + "learning_rate": 3.5263241796106097e-06, + "loss": 0.3182, + "step": 13421 + }, + { + "epoch": 0.6074677528852681, + "grad_norm": 0.6645893638135631, + "learning_rate": 3.525623831527767e-06, + "loss": 0.294, + "step": 13422 + }, + { + "epoch": 0.6075130119936637, + "grad_norm": 0.6186388894118936, + "learning_rate": 3.5249235151247398e-06, + "loss": 0.3234, + "step": 13423 + }, + { + "epoch": 0.6075582711020593, + "grad_norm": 0.633921444031, + "learning_rate": 3.5242232304165736e-06, + "loss": 0.3798, + "step": 13424 + }, + { + "epoch": 0.6076035302104549, + "grad_norm": 0.6255478093227299, + "learning_rate": 3.5235229774183217e-06, + "loss": 0.3376, + "step": 13425 + }, + { + "epoch": 0.6076487893188505, + "grad_norm": 0.6235753491898307, + "learning_rate": 3.522822756145022e-06, + "loss": 0.298, + "step": 13426 + }, + { + "epoch": 0.607694048427246, + "grad_norm": 0.6644382198804875, + "learning_rate": 3.5221225666117272e-06, + "loss": 0.3151, + "step": 13427 + }, + { + "epoch": 0.6077393075356415, + "grad_norm": 0.6755344353780085, + "learning_rate": 3.52142240883348e-06, + "loss": 0.2818, + "step": 13428 + }, + { + "epoch": 0.6077845666440371, + "grad_norm": 0.6135967249537492, + "learning_rate": 3.520722282825323e-06, + "loss": 0.319, + "step": 13429 + }, + { + "epoch": 0.6078298257524327, + "grad_norm": 0.6180751455692161, + "learning_rate": 3.520022188602299e-06, + "loss": 0.314, + "step": 13430 + }, + { + "epoch": 0.6078750848608282, + "grad_norm": 0.6419498870555718, + "learning_rate": 3.519322126179455e-06, + "loss": 0.3285, + "step": 13431 + }, + { + "epoch": 0.6079203439692238, + "grad_norm": 0.34575186377873107, + "learning_rate": 3.518622095571831e-06, + "loss": 0.4893, + "step": 13432 + }, + { + "epoch": 0.6079656030776194, + "grad_norm": 0.6326462699830524, + "learning_rate": 3.517922096794468e-06, + "loss": 0.3333, + "step": 13433 + }, + { + "epoch": 0.608010862186015, + "grad_norm": 0.6098311111428941, + "learning_rate": 3.5172221298624067e-06, + "loss": 0.3326, + "step": 13434 + }, + { + "epoch": 0.6080561212944104, + "grad_norm": 0.2651308468352901, + "learning_rate": 3.516522194790689e-06, + "loss": 0.4515, + "step": 13435 + }, + { + "epoch": 0.608101380402806, + "grad_norm": 0.27729441427110796, + "learning_rate": 3.5158222915943524e-06, + "loss": 0.456, + "step": 13436 + }, + { + "epoch": 0.6081466395112016, + "grad_norm": 0.6091409290588269, + "learning_rate": 3.5151224202884364e-06, + "loss": 0.3203, + "step": 13437 + }, + { + "epoch": 0.6081918986195972, + "grad_norm": 0.2652121404589406, + "learning_rate": 3.5144225808879806e-06, + "loss": 0.4623, + "step": 13438 + }, + { + "epoch": 0.6082371577279928, + "grad_norm": 0.6086578023770722, + "learning_rate": 3.513722773408018e-06, + "loss": 0.2576, + "step": 13439 + }, + { + "epoch": 0.6082824168363883, + "grad_norm": 0.7042375101393927, + "learning_rate": 3.51302299786359e-06, + "loss": 0.3113, + "step": 13440 + }, + { + "epoch": 0.6083276759447839, + "grad_norm": 0.6347651648407661, + "learning_rate": 3.512323254269732e-06, + "loss": 0.3279, + "step": 13441 + }, + { + "epoch": 0.6083729350531795, + "grad_norm": 0.6096456656656934, + "learning_rate": 3.5116235426414767e-06, + "loss": 0.2872, + "step": 13442 + }, + { + "epoch": 0.608418194161575, + "grad_norm": 0.6177301460681318, + "learning_rate": 3.51092386299386e-06, + "loss": 0.2631, + "step": 13443 + }, + { + "epoch": 0.6084634532699705, + "grad_norm": 0.6562325905779547, + "learning_rate": 3.5102242153419164e-06, + "loss": 0.3088, + "step": 13444 + }, + { + "epoch": 0.6085087123783661, + "grad_norm": 0.3103744888178165, + "learning_rate": 3.50952459970068e-06, + "loss": 0.4631, + "step": 13445 + }, + { + "epoch": 0.6085539714867617, + "grad_norm": 0.6626718082982688, + "learning_rate": 3.5088250160851817e-06, + "loss": 0.3588, + "step": 13446 + }, + { + "epoch": 0.6085992305951573, + "grad_norm": 0.6568108014435411, + "learning_rate": 3.5081254645104525e-06, + "loss": 0.3407, + "step": 13447 + }, + { + "epoch": 0.6086444897035529, + "grad_norm": 0.27975503482648345, + "learning_rate": 3.507425944991529e-06, + "loss": 0.4779, + "step": 13448 + }, + { + "epoch": 0.6086897488119484, + "grad_norm": 0.6176299986228942, + "learning_rate": 3.506726457543434e-06, + "loss": 0.3334, + "step": 13449 + }, + { + "epoch": 0.608735007920344, + "grad_norm": 0.701602134769069, + "learning_rate": 3.5060270021812027e-06, + "loss": 0.2849, + "step": 13450 + }, + { + "epoch": 0.6087802670287396, + "grad_norm": 0.28129388479860296, + "learning_rate": 3.5053275789198634e-06, + "loss": 0.4584, + "step": 13451 + }, + { + "epoch": 0.6088255261371351, + "grad_norm": 0.7571593717790948, + "learning_rate": 3.5046281877744424e-06, + "loss": 0.3203, + "step": 13452 + }, + { + "epoch": 0.6088707852455306, + "grad_norm": 0.6840506216804946, + "learning_rate": 3.503928828759969e-06, + "loss": 0.2742, + "step": 13453 + }, + { + "epoch": 0.6089160443539262, + "grad_norm": 0.6572465584720832, + "learning_rate": 3.503229501891472e-06, + "loss": 0.3317, + "step": 13454 + }, + { + "epoch": 0.6089613034623218, + "grad_norm": 0.6389350839603637, + "learning_rate": 3.5025302071839746e-06, + "loss": 0.3169, + "step": 13455 + }, + { + "epoch": 0.6090065625707174, + "grad_norm": 0.5981348804029151, + "learning_rate": 3.501830944652504e-06, + "loss": 0.3028, + "step": 13456 + }, + { + "epoch": 0.6090518216791129, + "grad_norm": 0.26849945214336984, + "learning_rate": 3.5011317143120845e-06, + "loss": 0.4633, + "step": 13457 + }, + { + "epoch": 0.6090970807875085, + "grad_norm": 0.7653660823165149, + "learning_rate": 3.5004325161777437e-06, + "loss": 0.3132, + "step": 13458 + }, + { + "epoch": 0.609142339895904, + "grad_norm": 0.6098125186287409, + "learning_rate": 3.4997333502644994e-06, + "loss": 0.3269, + "step": 13459 + }, + { + "epoch": 0.6091875990042996, + "grad_norm": 0.5955342692604938, + "learning_rate": 3.499034216587379e-06, + "loss": 0.3019, + "step": 13460 + }, + { + "epoch": 0.6092328581126952, + "grad_norm": 0.28986142679869115, + "learning_rate": 3.4983351151614043e-06, + "loss": 0.4498, + "step": 13461 + }, + { + "epoch": 0.6092781172210907, + "grad_norm": 0.6228379080550507, + "learning_rate": 3.4976360460015953e-06, + "loss": 0.304, + "step": 13462 + }, + { + "epoch": 0.6093233763294863, + "grad_norm": 0.6021659146296512, + "learning_rate": 3.496937009122972e-06, + "loss": 0.3011, + "step": 13463 + }, + { + "epoch": 0.6093686354378819, + "grad_norm": 1.0426986399623595, + "learning_rate": 3.4962380045405585e-06, + "loss": 0.3529, + "step": 13464 + }, + { + "epoch": 0.6094138945462775, + "grad_norm": 0.7277310854494984, + "learning_rate": 3.4955390322693704e-06, + "loss": 0.3446, + "step": 13465 + }, + { + "epoch": 0.609459153654673, + "grad_norm": 0.6511462469867378, + "learning_rate": 3.4948400923244286e-06, + "loss": 0.2836, + "step": 13466 + }, + { + "epoch": 0.6095044127630685, + "grad_norm": 0.6410223484258175, + "learning_rate": 3.4941411847207505e-06, + "loss": 0.3008, + "step": 13467 + }, + { + "epoch": 0.6095496718714641, + "grad_norm": 0.6357391756745651, + "learning_rate": 3.4934423094733516e-06, + "loss": 0.3549, + "step": 13468 + }, + { + "epoch": 0.6095949309798597, + "grad_norm": 0.8127095590421107, + "learning_rate": 3.492743466597252e-06, + "loss": 0.3126, + "step": 13469 + }, + { + "epoch": 0.6096401900882552, + "grad_norm": 0.6054784960538481, + "learning_rate": 3.4920446561074673e-06, + "loss": 0.3239, + "step": 13470 + }, + { + "epoch": 0.6096854491966508, + "grad_norm": 0.6112478540856312, + "learning_rate": 3.49134587801901e-06, + "loss": 0.3085, + "step": 13471 + }, + { + "epoch": 0.6097307083050464, + "grad_norm": 0.655204505443951, + "learning_rate": 3.4906471323468955e-06, + "loss": 0.2751, + "step": 13472 + }, + { + "epoch": 0.609775967413442, + "grad_norm": 0.6170945340887124, + "learning_rate": 3.4899484191061394e-06, + "loss": 0.2883, + "step": 13473 + }, + { + "epoch": 0.6098212265218376, + "grad_norm": 0.3074866732977803, + "learning_rate": 3.4892497383117553e-06, + "loss": 0.4773, + "step": 13474 + }, + { + "epoch": 0.609866485630233, + "grad_norm": 0.6455642516141017, + "learning_rate": 3.488551089978753e-06, + "loss": 0.3611, + "step": 13475 + }, + { + "epoch": 0.6099117447386286, + "grad_norm": 0.6488672160053081, + "learning_rate": 3.487852474122145e-06, + "loss": 0.3316, + "step": 13476 + }, + { + "epoch": 0.6099570038470242, + "grad_norm": 0.5809931280865803, + "learning_rate": 3.487153890756946e-06, + "loss": 0.3207, + "step": 13477 + }, + { + "epoch": 0.6100022629554198, + "grad_norm": 0.6430702923168584, + "learning_rate": 3.4864553398981606e-06, + "loss": 0.3279, + "step": 13478 + }, + { + "epoch": 0.6100475220638153, + "grad_norm": 0.6790153937287134, + "learning_rate": 3.4857568215608024e-06, + "loss": 0.2798, + "step": 13479 + }, + { + "epoch": 0.6100927811722109, + "grad_norm": 0.5692761648336583, + "learning_rate": 3.4850583357598805e-06, + "loss": 0.2876, + "step": 13480 + }, + { + "epoch": 0.6101380402806065, + "grad_norm": 2.1181254113615373, + "learning_rate": 3.4843598825104013e-06, + "loss": 0.2788, + "step": 13481 + }, + { + "epoch": 0.6101832993890021, + "grad_norm": 0.6191110811683334, + "learning_rate": 3.483661461827372e-06, + "loss": 0.3434, + "step": 13482 + }, + { + "epoch": 0.6102285584973977, + "grad_norm": 0.6159076449318271, + "learning_rate": 3.482963073725803e-06, + "loss": 0.3268, + "step": 13483 + }, + { + "epoch": 0.6102738176057931, + "grad_norm": 0.29597245858900834, + "learning_rate": 3.482264718220697e-06, + "loss": 0.457, + "step": 13484 + }, + { + "epoch": 0.6103190767141887, + "grad_norm": 0.6516268351726187, + "learning_rate": 3.481566395327062e-06, + "loss": 0.2979, + "step": 13485 + }, + { + "epoch": 0.6103643358225843, + "grad_norm": 0.5816022423269239, + "learning_rate": 3.480868105059899e-06, + "loss": 0.3135, + "step": 13486 + }, + { + "epoch": 0.6104095949309799, + "grad_norm": 0.5999535442164554, + "learning_rate": 3.4801698474342176e-06, + "loss": 0.2927, + "step": 13487 + }, + { + "epoch": 0.6104548540393754, + "grad_norm": 0.7022729924679137, + "learning_rate": 3.479471622465017e-06, + "loss": 0.316, + "step": 13488 + }, + { + "epoch": 0.610500113147771, + "grad_norm": 0.6212432678891955, + "learning_rate": 3.478773430167302e-06, + "loss": 0.3041, + "step": 13489 + }, + { + "epoch": 0.6105453722561666, + "grad_norm": 0.6285515117991046, + "learning_rate": 3.478075270556075e-06, + "loss": 0.2661, + "step": 13490 + }, + { + "epoch": 0.6105906313645622, + "grad_norm": 0.26767567328925884, + "learning_rate": 3.4773771436463346e-06, + "loss": 0.4593, + "step": 13491 + }, + { + "epoch": 0.6106358904729576, + "grad_norm": 0.6986766791328546, + "learning_rate": 3.4766790494530824e-06, + "loss": 0.3232, + "step": 13492 + }, + { + "epoch": 0.6106811495813532, + "grad_norm": 0.6301950786058406, + "learning_rate": 3.47598098799132e-06, + "loss": 0.3202, + "step": 13493 + }, + { + "epoch": 0.6107264086897488, + "grad_norm": 0.6723265814968592, + "learning_rate": 3.475282959276045e-06, + "loss": 0.3065, + "step": 13494 + }, + { + "epoch": 0.6107716677981444, + "grad_norm": 0.7080890105682609, + "learning_rate": 3.4745849633222566e-06, + "loss": 0.3619, + "step": 13495 + }, + { + "epoch": 0.61081692690654, + "grad_norm": 0.2851715591834395, + "learning_rate": 3.4738870001449533e-06, + "loss": 0.4856, + "step": 13496 + }, + { + "epoch": 0.6108621860149355, + "grad_norm": 0.6202568147808254, + "learning_rate": 3.4731890697591297e-06, + "loss": 0.3401, + "step": 13497 + }, + { + "epoch": 0.6109074451233311, + "grad_norm": 0.6091828279663052, + "learning_rate": 3.472491172179784e-06, + "loss": 0.2923, + "step": 13498 + }, + { + "epoch": 0.6109527042317267, + "grad_norm": 0.6171615737326103, + "learning_rate": 3.471793307421913e-06, + "loss": 0.324, + "step": 13499 + }, + { + "epoch": 0.6109979633401222, + "grad_norm": 0.2717845790814612, + "learning_rate": 3.4710954755005087e-06, + "loss": 0.4887, + "step": 13500 + }, + { + "epoch": 0.6110432224485177, + "grad_norm": 0.2640603581855504, + "learning_rate": 3.470397676430567e-06, + "loss": 0.48, + "step": 13501 + }, + { + "epoch": 0.6110884815569133, + "grad_norm": 0.6644426756295154, + "learning_rate": 3.469699910227082e-06, + "loss": 0.3468, + "step": 13502 + }, + { + "epoch": 0.6111337406653089, + "grad_norm": 0.25364988424384577, + "learning_rate": 3.4690021769050462e-06, + "loss": 0.467, + "step": 13503 + }, + { + "epoch": 0.6111789997737045, + "grad_norm": 0.6486677182552096, + "learning_rate": 3.4683044764794516e-06, + "loss": 0.2898, + "step": 13504 + }, + { + "epoch": 0.6112242588821, + "grad_norm": 0.26813038137037226, + "learning_rate": 3.4676068089652883e-06, + "loss": 0.4651, + "step": 13505 + }, + { + "epoch": 0.6112695179904956, + "grad_norm": 0.671097206847934, + "learning_rate": 3.466909174377551e-06, + "loss": 0.3387, + "step": 13506 + }, + { + "epoch": 0.6113147770988911, + "grad_norm": 0.6126280949793477, + "learning_rate": 3.466211572731224e-06, + "loss": 0.2821, + "step": 13507 + }, + { + "epoch": 0.6113600362072867, + "grad_norm": 0.5705294614602041, + "learning_rate": 3.465514004041301e-06, + "loss": 0.3244, + "step": 13508 + }, + { + "epoch": 0.6114052953156823, + "grad_norm": 0.5805375561356275, + "learning_rate": 3.4648164683227702e-06, + "loss": 0.3043, + "step": 13509 + }, + { + "epoch": 0.6114505544240778, + "grad_norm": 0.6295482157076951, + "learning_rate": 3.464118965590617e-06, + "loss": 0.2985, + "step": 13510 + }, + { + "epoch": 0.6114958135324734, + "grad_norm": 0.3022925327555956, + "learning_rate": 3.46342149585983e-06, + "loss": 0.4726, + "step": 13511 + }, + { + "epoch": 0.611541072640869, + "grad_norm": 0.27754398433310995, + "learning_rate": 3.462724059145397e-06, + "loss": 0.4703, + "step": 13512 + }, + { + "epoch": 0.6115863317492646, + "grad_norm": 0.6260772278890104, + "learning_rate": 3.4620266554623016e-06, + "loss": 0.3484, + "step": 13513 + }, + { + "epoch": 0.6116315908576601, + "grad_norm": 0.32133429856181145, + "learning_rate": 3.4613292848255307e-06, + "loss": 0.4927, + "step": 13514 + }, + { + "epoch": 0.6116768499660556, + "grad_norm": 0.2931450993845942, + "learning_rate": 3.460631947250066e-06, + "loss": 0.4865, + "step": 13515 + }, + { + "epoch": 0.6117221090744512, + "grad_norm": 0.6189908854062645, + "learning_rate": 3.459934642750895e-06, + "loss": 0.326, + "step": 13516 + }, + { + "epoch": 0.6117673681828468, + "grad_norm": 0.6752200443544591, + "learning_rate": 3.4592373713429984e-06, + "loss": 0.2923, + "step": 13517 + }, + { + "epoch": 0.6118126272912424, + "grad_norm": 0.5882537092542902, + "learning_rate": 3.4585401330413574e-06, + "loss": 0.3386, + "step": 13518 + }, + { + "epoch": 0.6118578863996379, + "grad_norm": 0.936107982255461, + "learning_rate": 3.4578429278609566e-06, + "loss": 0.299, + "step": 13519 + }, + { + "epoch": 0.6119031455080335, + "grad_norm": 0.3321527606429436, + "learning_rate": 3.4571457558167727e-06, + "loss": 0.4628, + "step": 13520 + }, + { + "epoch": 0.6119484046164291, + "grad_norm": 0.7063148875935, + "learning_rate": 3.4564486169237888e-06, + "loss": 0.3115, + "step": 13521 + }, + { + "epoch": 0.6119936637248247, + "grad_norm": 0.6443386328125476, + "learning_rate": 3.4557515111969843e-06, + "loss": 0.3187, + "step": 13522 + }, + { + "epoch": 0.6120389228332201, + "grad_norm": 0.6155674561221551, + "learning_rate": 3.4550544386513364e-06, + "loss": 0.3011, + "step": 13523 + }, + { + "epoch": 0.6120841819416157, + "grad_norm": 0.6732227821436269, + "learning_rate": 3.4543573993018225e-06, + "loss": 0.3257, + "step": 13524 + }, + { + "epoch": 0.6121294410500113, + "grad_norm": 0.2799539037248683, + "learning_rate": 3.453660393163424e-06, + "loss": 0.4452, + "step": 13525 + }, + { + "epoch": 0.6121747001584069, + "grad_norm": 0.5963331242645898, + "learning_rate": 3.452963420251112e-06, + "loss": 0.3111, + "step": 13526 + }, + { + "epoch": 0.6122199592668024, + "grad_norm": 0.5948844068033212, + "learning_rate": 3.4522664805798643e-06, + "loss": 0.3555, + "step": 13527 + }, + { + "epoch": 0.612265218375198, + "grad_norm": 0.5394219952798536, + "learning_rate": 3.451569574164658e-06, + "loss": 0.3022, + "step": 13528 + }, + { + "epoch": 0.6123104774835936, + "grad_norm": 0.631473684109706, + "learning_rate": 3.4508727010204663e-06, + "loss": 0.2994, + "step": 13529 + }, + { + "epoch": 0.6123557365919892, + "grad_norm": 0.6854496656754867, + "learning_rate": 3.4501758611622606e-06, + "loss": 0.3709, + "step": 13530 + }, + { + "epoch": 0.6124009957003848, + "grad_norm": 0.6196837073329371, + "learning_rate": 3.449479054605016e-06, + "loss": 0.3225, + "step": 13531 + }, + { + "epoch": 0.6124462548087802, + "grad_norm": 0.2770645560779661, + "learning_rate": 3.448782281363706e-06, + "loss": 0.4733, + "step": 13532 + }, + { + "epoch": 0.6124915139171758, + "grad_norm": 0.5787548892775997, + "learning_rate": 3.4480855414533e-06, + "loss": 0.2879, + "step": 13533 + }, + { + "epoch": 0.6125367730255714, + "grad_norm": 0.5992493454284118, + "learning_rate": 3.4473888348887673e-06, + "loss": 0.292, + "step": 13534 + }, + { + "epoch": 0.612582032133967, + "grad_norm": 0.6407299956446944, + "learning_rate": 3.4466921616850847e-06, + "loss": 0.3218, + "step": 13535 + }, + { + "epoch": 0.6126272912423625, + "grad_norm": 0.6333286620983509, + "learning_rate": 3.445995521857213e-06, + "loss": 0.3027, + "step": 13536 + }, + { + "epoch": 0.6126725503507581, + "grad_norm": 0.662291156417707, + "learning_rate": 3.4452989154201256e-06, + "loss": 0.3287, + "step": 13537 + }, + { + "epoch": 0.6127178094591537, + "grad_norm": 0.7020675293337494, + "learning_rate": 3.4446023423887905e-06, + "loss": 0.3112, + "step": 13538 + }, + { + "epoch": 0.6127630685675493, + "grad_norm": 0.8046855346438109, + "learning_rate": 3.443905802778173e-06, + "loss": 0.3326, + "step": 13539 + }, + { + "epoch": 0.6128083276759447, + "grad_norm": 0.6657469550052136, + "learning_rate": 3.4432092966032397e-06, + "loss": 0.3219, + "step": 13540 + }, + { + "epoch": 0.6128535867843403, + "grad_norm": 0.3111574269257986, + "learning_rate": 3.4425128238789594e-06, + "loss": 0.4805, + "step": 13541 + }, + { + "epoch": 0.6128988458927359, + "grad_norm": 0.7803599969425479, + "learning_rate": 3.4418163846202945e-06, + "loss": 0.3149, + "step": 13542 + }, + { + "epoch": 0.6129441050011315, + "grad_norm": 0.6984892738731218, + "learning_rate": 3.4411199788422093e-06, + "loss": 0.2993, + "step": 13543 + }, + { + "epoch": 0.6129893641095271, + "grad_norm": 0.3298082981291895, + "learning_rate": 3.4404236065596673e-06, + "loss": 0.4713, + "step": 13544 + }, + { + "epoch": 0.6130346232179226, + "grad_norm": 0.641591555289859, + "learning_rate": 3.439727267787634e-06, + "loss": 0.3007, + "step": 13545 + }, + { + "epoch": 0.6130798823263182, + "grad_norm": 0.30014997903508966, + "learning_rate": 3.439030962541069e-06, + "loss": 0.4702, + "step": 13546 + }, + { + "epoch": 0.6131251414347137, + "grad_norm": 0.7192659293934158, + "learning_rate": 3.438334690834934e-06, + "loss": 0.2835, + "step": 13547 + }, + { + "epoch": 0.6131704005431093, + "grad_norm": 0.27457620479106704, + "learning_rate": 3.4376384526841918e-06, + "loss": 0.4918, + "step": 13548 + }, + { + "epoch": 0.6132156596515048, + "grad_norm": 0.6625082498934292, + "learning_rate": 3.4369422481037984e-06, + "loss": 0.3029, + "step": 13549 + }, + { + "epoch": 0.6132609187599004, + "grad_norm": 0.6179382329542554, + "learning_rate": 3.4362460771087162e-06, + "loss": 0.3097, + "step": 13550 + }, + { + "epoch": 0.613306177868296, + "grad_norm": 0.690846385935376, + "learning_rate": 3.4355499397139047e-06, + "loss": 0.3403, + "step": 13551 + }, + { + "epoch": 0.6133514369766916, + "grad_norm": 0.6691610028396701, + "learning_rate": 3.4348538359343187e-06, + "loss": 0.308, + "step": 13552 + }, + { + "epoch": 0.6133966960850872, + "grad_norm": 0.6484163841141876, + "learning_rate": 3.4341577657849163e-06, + "loss": 0.2863, + "step": 13553 + }, + { + "epoch": 0.6134419551934827, + "grad_norm": 0.6710404290800567, + "learning_rate": 3.433461729280657e-06, + "loss": 0.3299, + "step": 13554 + }, + { + "epoch": 0.6134872143018782, + "grad_norm": 0.662797078639147, + "learning_rate": 3.4327657264364913e-06, + "loss": 0.3115, + "step": 13555 + }, + { + "epoch": 0.6135324734102738, + "grad_norm": 0.331292948132087, + "learning_rate": 3.4320697572673774e-06, + "loss": 0.4806, + "step": 13556 + }, + { + "epoch": 0.6135777325186694, + "grad_norm": 0.6338532740399662, + "learning_rate": 3.4313738217882676e-06, + "loss": 0.3057, + "step": 13557 + }, + { + "epoch": 0.6136229916270649, + "grad_norm": 0.28146032818850647, + "learning_rate": 3.4306779200141204e-06, + "loss": 0.4512, + "step": 13558 + }, + { + "epoch": 0.6136682507354605, + "grad_norm": 0.28191321203490505, + "learning_rate": 3.4299820519598814e-06, + "loss": 0.4511, + "step": 13559 + }, + { + "epoch": 0.6137135098438561, + "grad_norm": 0.7828211842898387, + "learning_rate": 3.4292862176405075e-06, + "loss": 0.3293, + "step": 13560 + }, + { + "epoch": 0.6137587689522517, + "grad_norm": 0.6837348984558683, + "learning_rate": 3.4285904170709495e-06, + "loss": 0.3475, + "step": 13561 + }, + { + "epoch": 0.6138040280606472, + "grad_norm": 0.6027187377328831, + "learning_rate": 3.427894650266156e-06, + "loss": 0.3363, + "step": 13562 + }, + { + "epoch": 0.6138492871690427, + "grad_norm": 0.6826603664435904, + "learning_rate": 3.4271989172410768e-06, + "loss": 0.3355, + "step": 13563 + }, + { + "epoch": 0.6138945462774383, + "grad_norm": 0.6563556306672855, + "learning_rate": 3.4265032180106656e-06, + "loss": 0.3017, + "step": 13564 + }, + { + "epoch": 0.6139398053858339, + "grad_norm": 0.6249553420140989, + "learning_rate": 3.425807552589866e-06, + "loss": 0.3064, + "step": 13565 + }, + { + "epoch": 0.6139850644942295, + "grad_norm": 0.6720765103225914, + "learning_rate": 3.425111920993627e-06, + "loss": 0.3024, + "step": 13566 + }, + { + "epoch": 0.614030323602625, + "grad_norm": 0.6304178675344458, + "learning_rate": 3.424416323236897e-06, + "loss": 0.3224, + "step": 13567 + }, + { + "epoch": 0.6140755827110206, + "grad_norm": 0.6751915719397409, + "learning_rate": 3.4237207593346207e-06, + "loss": 0.3275, + "step": 13568 + }, + { + "epoch": 0.6141208418194162, + "grad_norm": 0.6877550526977185, + "learning_rate": 3.423025229301743e-06, + "loss": 0.3224, + "step": 13569 + }, + { + "epoch": 0.6141661009278118, + "grad_norm": 0.65181587448465, + "learning_rate": 3.42232973315321e-06, + "loss": 0.3017, + "step": 13570 + }, + { + "epoch": 0.6142113600362072, + "grad_norm": 0.39223110812417794, + "learning_rate": 3.4216342709039675e-06, + "loss": 0.4635, + "step": 13571 + }, + { + "epoch": 0.6142566191446028, + "grad_norm": 0.33188089678483657, + "learning_rate": 3.4209388425689556e-06, + "loss": 0.4698, + "step": 13572 + }, + { + "epoch": 0.6143018782529984, + "grad_norm": 0.6084418154167035, + "learning_rate": 3.420243448163117e-06, + "loss": 0.2885, + "step": 13573 + }, + { + "epoch": 0.614347137361394, + "grad_norm": 0.5984272589446169, + "learning_rate": 3.4195480877013976e-06, + "loss": 0.329, + "step": 13574 + }, + { + "epoch": 0.6143923964697895, + "grad_norm": 0.5804821195101811, + "learning_rate": 3.4188527611987343e-06, + "loss": 0.3345, + "step": 13575 + }, + { + "epoch": 0.6144376555781851, + "grad_norm": 0.31348491520577604, + "learning_rate": 3.4181574686700687e-06, + "loss": 0.4794, + "step": 13576 + }, + { + "epoch": 0.6144829146865807, + "grad_norm": 0.6456037759163459, + "learning_rate": 3.417462210130342e-06, + "loss": 0.3458, + "step": 13577 + }, + { + "epoch": 0.6145281737949763, + "grad_norm": 0.6486113186177295, + "learning_rate": 3.4167669855944905e-06, + "loss": 0.3037, + "step": 13578 + }, + { + "epoch": 0.6145734329033719, + "grad_norm": 0.6203561762672279, + "learning_rate": 3.416071795077455e-06, + "loss": 0.3292, + "step": 13579 + }, + { + "epoch": 0.6146186920117673, + "grad_norm": 0.6063378791570977, + "learning_rate": 3.415376638594172e-06, + "loss": 0.3388, + "step": 13580 + }, + { + "epoch": 0.6146639511201629, + "grad_norm": 0.6807597534346058, + "learning_rate": 3.414681516159578e-06, + "loss": 0.2982, + "step": 13581 + }, + { + "epoch": 0.6147092102285585, + "grad_norm": 1.0380220289205693, + "learning_rate": 3.4139864277886083e-06, + "loss": 0.3341, + "step": 13582 + }, + { + "epoch": 0.6147544693369541, + "grad_norm": 0.3318394658340282, + "learning_rate": 3.413291373496202e-06, + "loss": 0.4856, + "step": 13583 + }, + { + "epoch": 0.6147997284453496, + "grad_norm": 0.6834381237273527, + "learning_rate": 3.4125963532972878e-06, + "loss": 0.3554, + "step": 13584 + }, + { + "epoch": 0.6148449875537452, + "grad_norm": 0.661222968496695, + "learning_rate": 3.4119013672068034e-06, + "loss": 0.3444, + "step": 13585 + }, + { + "epoch": 0.6148902466621408, + "grad_norm": 0.6150340736437391, + "learning_rate": 3.411206415239681e-06, + "loss": 0.2932, + "step": 13586 + }, + { + "epoch": 0.6149355057705364, + "grad_norm": 0.5982665467514687, + "learning_rate": 3.4105114974108553e-06, + "loss": 0.3131, + "step": 13587 + }, + { + "epoch": 0.6149807648789319, + "grad_norm": 0.6062719537081716, + "learning_rate": 3.4098166137352534e-06, + "loss": 0.3097, + "step": 13588 + }, + { + "epoch": 0.6150260239873274, + "grad_norm": 0.5801537581376082, + "learning_rate": 3.409121764227809e-06, + "loss": 0.3013, + "step": 13589 + }, + { + "epoch": 0.615071283095723, + "grad_norm": 0.6342492363047709, + "learning_rate": 3.408426948903453e-06, + "loss": 0.3208, + "step": 13590 + }, + { + "epoch": 0.6151165422041186, + "grad_norm": 0.618670713571157, + "learning_rate": 3.4077321677771137e-06, + "loss": 0.2913, + "step": 13591 + }, + { + "epoch": 0.6151618013125142, + "grad_norm": 0.6074155228706901, + "learning_rate": 3.4070374208637173e-06, + "loss": 0.3249, + "step": 13592 + }, + { + "epoch": 0.6152070604209097, + "grad_norm": 0.6094925796063878, + "learning_rate": 3.4063427081781973e-06, + "loss": 0.2985, + "step": 13593 + }, + { + "epoch": 0.6152523195293053, + "grad_norm": 0.6737245100747968, + "learning_rate": 3.4056480297354767e-06, + "loss": 0.336, + "step": 13594 + }, + { + "epoch": 0.6152975786377008, + "grad_norm": 0.3467582044192305, + "learning_rate": 3.4049533855504835e-06, + "loss": 0.486, + "step": 13595 + }, + { + "epoch": 0.6153428377460964, + "grad_norm": 0.6309695090828856, + "learning_rate": 3.404258775638144e-06, + "loss": 0.321, + "step": 13596 + }, + { + "epoch": 0.6153880968544919, + "grad_norm": 0.5930313711457792, + "learning_rate": 3.4035642000133806e-06, + "loss": 0.2694, + "step": 13597 + }, + { + "epoch": 0.6154333559628875, + "grad_norm": 0.5859657562645595, + "learning_rate": 3.4028696586911203e-06, + "loss": 0.3151, + "step": 13598 + }, + { + "epoch": 0.6154786150712831, + "grad_norm": 0.6732209596085635, + "learning_rate": 3.4021751516862856e-06, + "loss": 0.2716, + "step": 13599 + }, + { + "epoch": 0.6155238741796787, + "grad_norm": 0.3069295366174163, + "learning_rate": 3.401480679013801e-06, + "loss": 0.4739, + "step": 13600 + }, + { + "epoch": 0.6155691332880743, + "grad_norm": 0.7672622217379496, + "learning_rate": 3.4007862406885863e-06, + "loss": 0.2899, + "step": 13601 + }, + { + "epoch": 0.6156143923964698, + "grad_norm": 0.6369540019262062, + "learning_rate": 3.400091836725562e-06, + "loss": 0.3028, + "step": 13602 + }, + { + "epoch": 0.6156596515048653, + "grad_norm": 0.5712373454878116, + "learning_rate": 3.3993974671396523e-06, + "loss": 0.2963, + "step": 13603 + }, + { + "epoch": 0.6157049106132609, + "grad_norm": 0.30381427668797734, + "learning_rate": 3.3987031319457747e-06, + "loss": 0.4829, + "step": 13604 + }, + { + "epoch": 0.6157501697216565, + "grad_norm": 0.8279645261570301, + "learning_rate": 3.398008831158849e-06, + "loss": 0.3064, + "step": 13605 + }, + { + "epoch": 0.615795428830052, + "grad_norm": 0.6295116827920167, + "learning_rate": 3.3973145647937935e-06, + "loss": 0.3673, + "step": 13606 + }, + { + "epoch": 0.6158406879384476, + "grad_norm": 0.6245798726321056, + "learning_rate": 3.3966203328655244e-06, + "loss": 0.2947, + "step": 13607 + }, + { + "epoch": 0.6158859470468432, + "grad_norm": 0.5876837841079967, + "learning_rate": 3.3959261353889605e-06, + "loss": 0.3124, + "step": 13608 + }, + { + "epoch": 0.6159312061552388, + "grad_norm": 0.6718860068837423, + "learning_rate": 3.395231972379019e-06, + "loss": 0.3449, + "step": 13609 + }, + { + "epoch": 0.6159764652636343, + "grad_norm": 0.327732997048033, + "learning_rate": 3.3945378438506125e-06, + "loss": 0.4554, + "step": 13610 + }, + { + "epoch": 0.6160217243720298, + "grad_norm": 0.6726952775245555, + "learning_rate": 3.393843749818656e-06, + "loss": 0.2966, + "step": 13611 + }, + { + "epoch": 0.6160669834804254, + "grad_norm": 0.6939323728620533, + "learning_rate": 3.393149690298067e-06, + "loss": 0.2989, + "step": 13612 + }, + { + "epoch": 0.616112242588821, + "grad_norm": 0.7009716702689694, + "learning_rate": 3.3924556653037533e-06, + "loss": 0.3446, + "step": 13613 + }, + { + "epoch": 0.6161575016972166, + "grad_norm": 0.6311392257799732, + "learning_rate": 3.391761674850631e-06, + "loss": 0.3226, + "step": 13614 + }, + { + "epoch": 0.6162027608056121, + "grad_norm": 0.6330456115405911, + "learning_rate": 3.39106771895361e-06, + "loss": 0.2801, + "step": 13615 + }, + { + "epoch": 0.6162480199140077, + "grad_norm": 0.5927001608548688, + "learning_rate": 3.3903737976276064e-06, + "loss": 0.3049, + "step": 13616 + }, + { + "epoch": 0.6162932790224033, + "grad_norm": 0.6139805996534979, + "learning_rate": 3.389679910887522e-06, + "loss": 0.3208, + "step": 13617 + }, + { + "epoch": 0.6163385381307989, + "grad_norm": 0.7454624269731916, + "learning_rate": 3.3889860587482716e-06, + "loss": 0.2736, + "step": 13618 + }, + { + "epoch": 0.6163837972391943, + "grad_norm": 0.5839197225597309, + "learning_rate": 3.3882922412247644e-06, + "loss": 0.3121, + "step": 13619 + }, + { + "epoch": 0.6164290563475899, + "grad_norm": 0.32865271198484763, + "learning_rate": 3.387598458331906e-06, + "loss": 0.4687, + "step": 13620 + }, + { + "epoch": 0.6164743154559855, + "grad_norm": 0.6346643957182696, + "learning_rate": 3.386904710084603e-06, + "loss": 0.3113, + "step": 13621 + }, + { + "epoch": 0.6165195745643811, + "grad_norm": 0.5804358646950067, + "learning_rate": 3.3862109964977665e-06, + "loss": 0.2919, + "step": 13622 + }, + { + "epoch": 0.6165648336727766, + "grad_norm": 0.6657660824615055, + "learning_rate": 3.3855173175862976e-06, + "loss": 0.3376, + "step": 13623 + }, + { + "epoch": 0.6166100927811722, + "grad_norm": 0.6726518102362474, + "learning_rate": 3.3848236733651034e-06, + "loss": 0.3341, + "step": 13624 + }, + { + "epoch": 0.6166553518895678, + "grad_norm": 0.6619560109542502, + "learning_rate": 3.3841300638490885e-06, + "loss": 0.2905, + "step": 13625 + }, + { + "epoch": 0.6167006109979634, + "grad_norm": 0.4164067724363868, + "learning_rate": 3.383436489053154e-06, + "loss": 0.4705, + "step": 13626 + }, + { + "epoch": 0.616745870106359, + "grad_norm": 0.630389271345759, + "learning_rate": 3.3827429489922053e-06, + "loss": 0.3422, + "step": 13627 + }, + { + "epoch": 0.6167911292147544, + "grad_norm": 0.59583288899598, + "learning_rate": 3.3820494436811435e-06, + "loss": 0.2897, + "step": 13628 + }, + { + "epoch": 0.61683638832315, + "grad_norm": 0.5847224930970383, + "learning_rate": 3.3813559731348716e-06, + "loss": 0.2902, + "step": 13629 + }, + { + "epoch": 0.6168816474315456, + "grad_norm": 0.6896455432590854, + "learning_rate": 3.380662537368286e-06, + "loss": 0.3259, + "step": 13630 + }, + { + "epoch": 0.6169269065399412, + "grad_norm": 0.5948295445386829, + "learning_rate": 3.3799691363962904e-06, + "loss": 0.2756, + "step": 13631 + }, + { + "epoch": 0.6169721656483367, + "grad_norm": 0.2790831913582214, + "learning_rate": 3.379275770233783e-06, + "loss": 0.4591, + "step": 13632 + }, + { + "epoch": 0.6170174247567323, + "grad_norm": 0.698686932276024, + "learning_rate": 3.3785824388956613e-06, + "loss": 0.32, + "step": 13633 + }, + { + "epoch": 0.6170626838651279, + "grad_norm": 0.7019383457076268, + "learning_rate": 3.377889142396822e-06, + "loss": 0.3113, + "step": 13634 + }, + { + "epoch": 0.6171079429735234, + "grad_norm": 0.2976661593405612, + "learning_rate": 3.3771958807521656e-06, + "loss": 0.4626, + "step": 13635 + }, + { + "epoch": 0.617153202081919, + "grad_norm": 0.3217511310855456, + "learning_rate": 3.3765026539765832e-06, + "loss": 0.464, + "step": 13636 + }, + { + "epoch": 0.6171984611903145, + "grad_norm": 0.623664016439237, + "learning_rate": 3.3758094620849737e-06, + "loss": 0.3252, + "step": 13637 + }, + { + "epoch": 0.6172437202987101, + "grad_norm": 0.5967522282609519, + "learning_rate": 3.3751163050922307e-06, + "loss": 0.3349, + "step": 13638 + }, + { + "epoch": 0.6172889794071057, + "grad_norm": 0.2962581492991003, + "learning_rate": 3.3744231830132473e-06, + "loss": 0.4745, + "step": 13639 + }, + { + "epoch": 0.6173342385155013, + "grad_norm": 0.6495079733390845, + "learning_rate": 3.373730095862916e-06, + "loss": 0.3707, + "step": 13640 + }, + { + "epoch": 0.6173794976238968, + "grad_norm": 0.6280898772506784, + "learning_rate": 3.3730370436561316e-06, + "loss": 0.295, + "step": 13641 + }, + { + "epoch": 0.6174247567322924, + "grad_norm": 0.604642646016628, + "learning_rate": 3.372344026407785e-06, + "loss": 0.3142, + "step": 13642 + }, + { + "epoch": 0.617470015840688, + "grad_norm": 0.6327627082802671, + "learning_rate": 3.3716510441327653e-06, + "loss": 0.3517, + "step": 13643 + }, + { + "epoch": 0.6175152749490835, + "grad_norm": 0.6490284830665943, + "learning_rate": 3.3709580968459628e-06, + "loss": 0.2857, + "step": 13644 + }, + { + "epoch": 0.617560534057479, + "grad_norm": 0.6287983386684702, + "learning_rate": 3.3702651845622703e-06, + "loss": 0.3027, + "step": 13645 + }, + { + "epoch": 0.6176057931658746, + "grad_norm": 0.6207686356181382, + "learning_rate": 3.3695723072965707e-06, + "loss": 0.3167, + "step": 13646 + }, + { + "epoch": 0.6176510522742702, + "grad_norm": 0.5933952431953913, + "learning_rate": 3.3688794650637557e-06, + "loss": 0.2965, + "step": 13647 + }, + { + "epoch": 0.6176963113826658, + "grad_norm": 0.6157345800940398, + "learning_rate": 3.3681866578787124e-06, + "loss": 0.3241, + "step": 13648 + }, + { + "epoch": 0.6177415704910614, + "grad_norm": 0.5934092362653312, + "learning_rate": 3.3674938857563256e-06, + "loss": 0.3288, + "step": 13649 + }, + { + "epoch": 0.6177868295994569, + "grad_norm": 0.6173483257897173, + "learning_rate": 3.3668011487114798e-06, + "loss": 0.2881, + "step": 13650 + }, + { + "epoch": 0.6178320887078524, + "grad_norm": 0.5965277040554047, + "learning_rate": 3.3661084467590637e-06, + "loss": 0.2957, + "step": 13651 + }, + { + "epoch": 0.617877347816248, + "grad_norm": 0.6436909221396913, + "learning_rate": 3.3654157799139576e-06, + "loss": 0.3309, + "step": 13652 + }, + { + "epoch": 0.6179226069246436, + "grad_norm": 0.6945090155316245, + "learning_rate": 3.3647231481910464e-06, + "loss": 0.2989, + "step": 13653 + }, + { + "epoch": 0.6179678660330391, + "grad_norm": 0.8550100311569186, + "learning_rate": 3.364030551605213e-06, + "loss": 0.3137, + "step": 13654 + }, + { + "epoch": 0.6180131251414347, + "grad_norm": 0.6315291386273997, + "learning_rate": 3.363337990171337e-06, + "loss": 0.3127, + "step": 13655 + }, + { + "epoch": 0.6180583842498303, + "grad_norm": 0.6369925272081775, + "learning_rate": 3.3626454639043018e-06, + "loss": 0.3174, + "step": 13656 + }, + { + "epoch": 0.6181036433582259, + "grad_norm": 0.6373841023921314, + "learning_rate": 3.361952972818987e-06, + "loss": 0.321, + "step": 13657 + }, + { + "epoch": 0.6181489024666214, + "grad_norm": 0.6862637267107752, + "learning_rate": 3.3612605169302724e-06, + "loss": 0.3059, + "step": 13658 + }, + { + "epoch": 0.6181941615750169, + "grad_norm": 0.3469364433723692, + "learning_rate": 3.360568096253035e-06, + "loss": 0.4704, + "step": 13659 + }, + { + "epoch": 0.6182394206834125, + "grad_norm": 0.33049897548744755, + "learning_rate": 3.3598757108021546e-06, + "loss": 0.4795, + "step": 13660 + }, + { + "epoch": 0.6182846797918081, + "grad_norm": 0.3221609743944461, + "learning_rate": 3.359183360592509e-06, + "loss": 0.4743, + "step": 13661 + }, + { + "epoch": 0.6183299389002037, + "grad_norm": 0.6382213127197119, + "learning_rate": 3.3584910456389726e-06, + "loss": 0.2902, + "step": 13662 + }, + { + "epoch": 0.6183751980085992, + "grad_norm": 0.5931313383666015, + "learning_rate": 3.357798765956421e-06, + "loss": 0.2928, + "step": 13663 + }, + { + "epoch": 0.6184204571169948, + "grad_norm": 0.6169420726404996, + "learning_rate": 3.357106521559733e-06, + "loss": 0.2874, + "step": 13664 + }, + { + "epoch": 0.6184657162253904, + "grad_norm": 0.6228570680089183, + "learning_rate": 3.356414312463778e-06, + "loss": 0.2929, + "step": 13665 + }, + { + "epoch": 0.618510975333786, + "grad_norm": 0.4429317158088941, + "learning_rate": 3.3557221386834323e-06, + "loss": 0.4908, + "step": 13666 + }, + { + "epoch": 0.6185562344421814, + "grad_norm": 0.339620418569209, + "learning_rate": 3.3550300002335685e-06, + "loss": 0.4649, + "step": 13667 + }, + { + "epoch": 0.618601493550577, + "grad_norm": 0.6310993574186863, + "learning_rate": 3.354337897129057e-06, + "loss": 0.3275, + "step": 13668 + }, + { + "epoch": 0.6186467526589726, + "grad_norm": 0.6359004375019375, + "learning_rate": 3.3536458293847686e-06, + "loss": 0.2751, + "step": 13669 + }, + { + "epoch": 0.6186920117673682, + "grad_norm": 0.6743555355132227, + "learning_rate": 3.3529537970155756e-06, + "loss": 0.3195, + "step": 13670 + }, + { + "epoch": 0.6187372708757638, + "grad_norm": 0.31263598762639505, + "learning_rate": 3.3522618000363487e-06, + "loss": 0.5041, + "step": 13671 + }, + { + "epoch": 0.6187825299841593, + "grad_norm": 0.6122582855146629, + "learning_rate": 3.3515698384619543e-06, + "loss": 0.3067, + "step": 13672 + }, + { + "epoch": 0.6188277890925549, + "grad_norm": 0.6522907156600588, + "learning_rate": 3.35087791230726e-06, + "loss": 0.3204, + "step": 13673 + }, + { + "epoch": 0.6188730482009505, + "grad_norm": 0.2912571970881983, + "learning_rate": 3.3501860215871363e-06, + "loss": 0.4722, + "step": 13674 + }, + { + "epoch": 0.618918307309346, + "grad_norm": 0.6105368967235455, + "learning_rate": 3.3494941663164465e-06, + "loss": 0.2829, + "step": 13675 + }, + { + "epoch": 0.6189635664177415, + "grad_norm": 0.2997500457463555, + "learning_rate": 3.348802346510058e-06, + "loss": 0.4621, + "step": 13676 + }, + { + "epoch": 0.6190088255261371, + "grad_norm": 0.6123374708352166, + "learning_rate": 3.348110562182838e-06, + "loss": 0.2748, + "step": 13677 + }, + { + "epoch": 0.6190540846345327, + "grad_norm": 0.6084595710799209, + "learning_rate": 3.3474188133496466e-06, + "loss": 0.3066, + "step": 13678 + }, + { + "epoch": 0.6190993437429283, + "grad_norm": 0.5729332026915518, + "learning_rate": 3.346727100025349e-06, + "loss": 0.2863, + "step": 13679 + }, + { + "epoch": 0.6191446028513238, + "grad_norm": 0.6368878796261367, + "learning_rate": 3.34603542222481e-06, + "loss": 0.3138, + "step": 13680 + }, + { + "epoch": 0.6191898619597194, + "grad_norm": 0.5823494292580653, + "learning_rate": 3.3453437799628885e-06, + "loss": 0.3202, + "step": 13681 + }, + { + "epoch": 0.619235121068115, + "grad_norm": 0.6531662241634709, + "learning_rate": 3.344652173254448e-06, + "loss": 0.3633, + "step": 13682 + }, + { + "epoch": 0.6192803801765105, + "grad_norm": 0.30126341048357147, + "learning_rate": 3.343960602114349e-06, + "loss": 0.4781, + "step": 13683 + }, + { + "epoch": 0.6193256392849061, + "grad_norm": 0.6433215004630558, + "learning_rate": 3.3432690665574485e-06, + "loss": 0.2774, + "step": 13684 + }, + { + "epoch": 0.6193708983933016, + "grad_norm": 0.29760997546260387, + "learning_rate": 3.3425775665986093e-06, + "loss": 0.4561, + "step": 13685 + }, + { + "epoch": 0.6194161575016972, + "grad_norm": 0.6400974170920216, + "learning_rate": 3.341886102252687e-06, + "loss": 0.2772, + "step": 13686 + }, + { + "epoch": 0.6194614166100928, + "grad_norm": 0.6834423109711578, + "learning_rate": 3.3411946735345412e-06, + "loss": 0.3839, + "step": 13687 + }, + { + "epoch": 0.6195066757184884, + "grad_norm": 0.6234401108842078, + "learning_rate": 3.340503280459024e-06, + "loss": 0.2889, + "step": 13688 + }, + { + "epoch": 0.6195519348268839, + "grad_norm": 0.2790971693531577, + "learning_rate": 3.3398119230409976e-06, + "loss": 0.4897, + "step": 13689 + }, + { + "epoch": 0.6195971939352795, + "grad_norm": 0.6686486132169249, + "learning_rate": 3.339120601295314e-06, + "loss": 0.3546, + "step": 13690 + }, + { + "epoch": 0.619642453043675, + "grad_norm": 0.6426563328291927, + "learning_rate": 3.3384293152368264e-06, + "loss": 0.322, + "step": 13691 + }, + { + "epoch": 0.6196877121520706, + "grad_norm": 0.6026238741279107, + "learning_rate": 3.3377380648803894e-06, + "loss": 0.2905, + "step": 13692 + }, + { + "epoch": 0.6197329712604661, + "grad_norm": 0.6161566790579164, + "learning_rate": 3.3370468502408584e-06, + "loss": 0.3009, + "step": 13693 + }, + { + "epoch": 0.6197782303688617, + "grad_norm": 0.6770974820021, + "learning_rate": 3.3363556713330806e-06, + "loss": 0.3089, + "step": 13694 + }, + { + "epoch": 0.6198234894772573, + "grad_norm": 0.5855209014504454, + "learning_rate": 3.3356645281719114e-06, + "loss": 0.3018, + "step": 13695 + }, + { + "epoch": 0.6198687485856529, + "grad_norm": 0.594537096198144, + "learning_rate": 3.3349734207722e-06, + "loss": 0.4007, + "step": 13696 + }, + { + "epoch": 0.6199140076940485, + "grad_norm": 0.575094066503019, + "learning_rate": 3.334282349148795e-06, + "loss": 0.2955, + "step": 13697 + }, + { + "epoch": 0.619959266802444, + "grad_norm": 0.6381673155404338, + "learning_rate": 3.3335913133165467e-06, + "loss": 0.3129, + "step": 13698 + }, + { + "epoch": 0.6200045259108395, + "grad_norm": 0.310952462490026, + "learning_rate": 3.332900313290303e-06, + "loss": 0.4803, + "step": 13699 + }, + { + "epoch": 0.6200497850192351, + "grad_norm": 0.6186094329233779, + "learning_rate": 3.332209349084913e-06, + "loss": 0.28, + "step": 13700 + }, + { + "epoch": 0.6200950441276307, + "grad_norm": 0.6440602505732157, + "learning_rate": 3.3315184207152208e-06, + "loss": 0.3011, + "step": 13701 + }, + { + "epoch": 0.6201403032360262, + "grad_norm": 0.29370766930924735, + "learning_rate": 3.330827528196072e-06, + "loss": 0.4786, + "step": 13702 + }, + { + "epoch": 0.6201855623444218, + "grad_norm": 0.5860643236849766, + "learning_rate": 3.330136671542315e-06, + "loss": 0.3187, + "step": 13703 + }, + { + "epoch": 0.6202308214528174, + "grad_norm": 0.6053177624397245, + "learning_rate": 3.329445850768792e-06, + "loss": 0.3108, + "step": 13704 + }, + { + "epoch": 0.620276080561213, + "grad_norm": 0.6287777257029172, + "learning_rate": 3.3287550658903466e-06, + "loss": 0.3194, + "step": 13705 + }, + { + "epoch": 0.6203213396696086, + "grad_norm": 0.6483596673339858, + "learning_rate": 3.328064316921823e-06, + "loss": 0.2893, + "step": 13706 + }, + { + "epoch": 0.620366598778004, + "grad_norm": 0.640477653434398, + "learning_rate": 3.3273736038780604e-06, + "loss": 0.3129, + "step": 13707 + }, + { + "epoch": 0.6204118578863996, + "grad_norm": 0.6681407434538018, + "learning_rate": 3.3266829267739026e-06, + "loss": 0.3462, + "step": 13708 + }, + { + "epoch": 0.6204571169947952, + "grad_norm": 0.29857528906145336, + "learning_rate": 3.325992285624191e-06, + "loss": 0.4843, + "step": 13709 + }, + { + "epoch": 0.6205023761031908, + "grad_norm": 0.6447706460657421, + "learning_rate": 3.325301680443762e-06, + "loss": 0.3636, + "step": 13710 + }, + { + "epoch": 0.6205476352115863, + "grad_norm": 0.6682698026092924, + "learning_rate": 3.3246111112474578e-06, + "loss": 0.3289, + "step": 13711 + }, + { + "epoch": 0.6205928943199819, + "grad_norm": 0.6591637616635906, + "learning_rate": 3.3239205780501134e-06, + "loss": 0.3497, + "step": 13712 + }, + { + "epoch": 0.6206381534283775, + "grad_norm": 0.6754149109862705, + "learning_rate": 3.3232300808665703e-06, + "loss": 0.2837, + "step": 13713 + }, + { + "epoch": 0.6206834125367731, + "grad_norm": 0.6193464763113535, + "learning_rate": 3.3225396197116616e-06, + "loss": 0.3218, + "step": 13714 + }, + { + "epoch": 0.6207286716451685, + "grad_norm": 0.6162007209511707, + "learning_rate": 3.321849194600225e-06, + "loss": 0.328, + "step": 13715 + }, + { + "epoch": 0.6207739307535641, + "grad_norm": 0.6253423668804438, + "learning_rate": 3.321158805547096e-06, + "loss": 0.2941, + "step": 13716 + }, + { + "epoch": 0.6208191898619597, + "grad_norm": 0.6661802237947486, + "learning_rate": 3.320468452567106e-06, + "loss": 0.3408, + "step": 13717 + }, + { + "epoch": 0.6208644489703553, + "grad_norm": 0.6118279427263077, + "learning_rate": 3.319778135675092e-06, + "loss": 0.3066, + "step": 13718 + }, + { + "epoch": 0.6209097080787509, + "grad_norm": 0.6382050988995399, + "learning_rate": 3.3190878548858862e-06, + "loss": 0.3464, + "step": 13719 + }, + { + "epoch": 0.6209549671871464, + "grad_norm": 0.6587494163288371, + "learning_rate": 3.318397610214319e-06, + "loss": 0.2996, + "step": 13720 + }, + { + "epoch": 0.621000226295542, + "grad_norm": 0.7209291907841151, + "learning_rate": 3.317707401675221e-06, + "loss": 0.2783, + "step": 13721 + }, + { + "epoch": 0.6210454854039376, + "grad_norm": 0.5451826261203058, + "learning_rate": 3.317017229283428e-06, + "loss": 0.2755, + "step": 13722 + }, + { + "epoch": 0.6210907445123331, + "grad_norm": 0.6646853793016391, + "learning_rate": 3.3163270930537623e-06, + "loss": 0.3281, + "step": 13723 + }, + { + "epoch": 0.6211360036207286, + "grad_norm": 0.7129622897575655, + "learning_rate": 3.3156369930010574e-06, + "loss": 0.2968, + "step": 13724 + }, + { + "epoch": 0.6211812627291242, + "grad_norm": 0.6001944567292407, + "learning_rate": 3.3149469291401413e-06, + "loss": 0.3496, + "step": 13725 + }, + { + "epoch": 0.6212265218375198, + "grad_norm": 0.31675565312106924, + "learning_rate": 3.3142569014858395e-06, + "loss": 0.4998, + "step": 13726 + }, + { + "epoch": 0.6212717809459154, + "grad_norm": 0.5826162656953073, + "learning_rate": 3.313566910052979e-06, + "loss": 0.2726, + "step": 13727 + }, + { + "epoch": 0.6213170400543109, + "grad_norm": 0.6090598312717707, + "learning_rate": 3.3128769548563864e-06, + "loss": 0.3287, + "step": 13728 + }, + { + "epoch": 0.6213622991627065, + "grad_norm": 0.6458666374249951, + "learning_rate": 3.312187035910888e-06, + "loss": 0.3374, + "step": 13729 + }, + { + "epoch": 0.6214075582711021, + "grad_norm": 0.583242363800884, + "learning_rate": 3.3114971532313058e-06, + "loss": 0.3267, + "step": 13730 + }, + { + "epoch": 0.6214528173794976, + "grad_norm": 0.6149879962998592, + "learning_rate": 3.310807306832462e-06, + "loss": 0.2917, + "step": 13731 + }, + { + "epoch": 0.6214980764878932, + "grad_norm": 0.622620677794301, + "learning_rate": 3.310117496729184e-06, + "loss": 0.3231, + "step": 13732 + }, + { + "epoch": 0.6215433355962887, + "grad_norm": 0.6379068039069801, + "learning_rate": 3.309427722936289e-06, + "loss": 0.3134, + "step": 13733 + }, + { + "epoch": 0.6215885947046843, + "grad_norm": 0.6829768054297026, + "learning_rate": 3.308737985468601e-06, + "loss": 0.3594, + "step": 13734 + }, + { + "epoch": 0.6216338538130799, + "grad_norm": 0.6276077247995031, + "learning_rate": 3.3080482843409402e-06, + "loss": 0.3395, + "step": 13735 + }, + { + "epoch": 0.6216791129214755, + "grad_norm": 0.5825313407520454, + "learning_rate": 3.307358619568123e-06, + "loss": 0.2992, + "step": 13736 + }, + { + "epoch": 0.621724372029871, + "grad_norm": 0.6154254435580296, + "learning_rate": 3.3066689911649714e-06, + "loss": 0.3046, + "step": 13737 + }, + { + "epoch": 0.6217696311382666, + "grad_norm": 0.30001794513072544, + "learning_rate": 3.305979399146304e-06, + "loss": 0.4916, + "step": 13738 + }, + { + "epoch": 0.6218148902466621, + "grad_norm": 0.29097572170197145, + "learning_rate": 3.305289843526935e-06, + "loss": 0.4988, + "step": 13739 + }, + { + "epoch": 0.6218601493550577, + "grad_norm": 0.6322721344768839, + "learning_rate": 3.304600324321682e-06, + "loss": 0.3225, + "step": 13740 + }, + { + "epoch": 0.6219054084634533, + "grad_norm": 0.6406997502751267, + "learning_rate": 3.3039108415453614e-06, + "loss": 0.2876, + "step": 13741 + }, + { + "epoch": 0.6219506675718488, + "grad_norm": 0.6260918992215052, + "learning_rate": 3.303221395212789e-06, + "loss": 0.3173, + "step": 13742 + }, + { + "epoch": 0.6219959266802444, + "grad_norm": 0.2673962411768744, + "learning_rate": 3.302531985338776e-06, + "loss": 0.4688, + "step": 13743 + }, + { + "epoch": 0.62204118578864, + "grad_norm": 0.6301571532531297, + "learning_rate": 3.3018426119381364e-06, + "loss": 0.3163, + "step": 13744 + }, + { + "epoch": 0.6220864448970356, + "grad_norm": 0.28869214485696515, + "learning_rate": 3.3011532750256874e-06, + "loss": 0.4791, + "step": 13745 + }, + { + "epoch": 0.622131704005431, + "grad_norm": 0.2968194690831498, + "learning_rate": 3.300463974616234e-06, + "loss": 0.5083, + "step": 13746 + }, + { + "epoch": 0.6221769631138266, + "grad_norm": 0.2750714673096283, + "learning_rate": 3.2997747107245898e-06, + "loss": 0.4796, + "step": 13747 + }, + { + "epoch": 0.6222222222222222, + "grad_norm": 0.6431241474261704, + "learning_rate": 3.2990854833655674e-06, + "loss": 0.3564, + "step": 13748 + }, + { + "epoch": 0.6222674813306178, + "grad_norm": 0.3403552256461183, + "learning_rate": 3.298396292553972e-06, + "loss": 0.5099, + "step": 13749 + }, + { + "epoch": 0.6223127404390133, + "grad_norm": 0.5901200168171835, + "learning_rate": 3.2977071383046134e-06, + "loss": 0.3121, + "step": 13750 + }, + { + "epoch": 0.6223579995474089, + "grad_norm": 0.5680916309917373, + "learning_rate": 3.297018020632304e-06, + "loss": 0.3034, + "step": 13751 + }, + { + "epoch": 0.6224032586558045, + "grad_norm": 0.2810511524918745, + "learning_rate": 3.2963289395518434e-06, + "loss": 0.4828, + "step": 13752 + }, + { + "epoch": 0.6224485177642001, + "grad_norm": 0.6593233991957315, + "learning_rate": 3.295639895078042e-06, + "loss": 0.3447, + "step": 13753 + }, + { + "epoch": 0.6224937768725957, + "grad_norm": 0.6151045949479557, + "learning_rate": 3.294950887225707e-06, + "loss": 0.2901, + "step": 13754 + }, + { + "epoch": 0.6225390359809911, + "grad_norm": 0.2867113598124026, + "learning_rate": 3.294261916009639e-06, + "loss": 0.478, + "step": 13755 + }, + { + "epoch": 0.6225842950893867, + "grad_norm": 0.6284771216660319, + "learning_rate": 3.2935729814446426e-06, + "loss": 0.3057, + "step": 13756 + }, + { + "epoch": 0.6226295541977823, + "grad_norm": 0.6213561568312568, + "learning_rate": 3.2928840835455233e-06, + "loss": 0.3069, + "step": 13757 + }, + { + "epoch": 0.6226748133061779, + "grad_norm": 0.5948747707066405, + "learning_rate": 3.2921952223270824e-06, + "loss": 0.3111, + "step": 13758 + }, + { + "epoch": 0.6227200724145734, + "grad_norm": 0.6322336320261707, + "learning_rate": 3.2915063978041205e-06, + "loss": 0.2965, + "step": 13759 + }, + { + "epoch": 0.622765331522969, + "grad_norm": 0.6610317478714497, + "learning_rate": 3.290817609991438e-06, + "loss": 0.3032, + "step": 13760 + }, + { + "epoch": 0.6228105906313646, + "grad_norm": 0.6406700715494363, + "learning_rate": 3.290128858903837e-06, + "loss": 0.3142, + "step": 13761 + }, + { + "epoch": 0.6228558497397602, + "grad_norm": 0.5882719121004318, + "learning_rate": 3.2894401445561154e-06, + "loss": 0.2807, + "step": 13762 + }, + { + "epoch": 0.6229011088481556, + "grad_norm": 0.6216161164157775, + "learning_rate": 3.2887514669630706e-06, + "loss": 0.2932, + "step": 13763 + }, + { + "epoch": 0.6229463679565512, + "grad_norm": 0.6124949720716838, + "learning_rate": 3.2880628261395033e-06, + "loss": 0.3113, + "step": 13764 + }, + { + "epoch": 0.6229916270649468, + "grad_norm": 0.5685852505759508, + "learning_rate": 3.287374222100205e-06, + "loss": 0.3131, + "step": 13765 + }, + { + "epoch": 0.6230368861733424, + "grad_norm": 0.5748963189048516, + "learning_rate": 3.2866856548599757e-06, + "loss": 0.2906, + "step": 13766 + }, + { + "epoch": 0.623082145281738, + "grad_norm": 0.6265152952502434, + "learning_rate": 3.2859971244336107e-06, + "loss": 0.313, + "step": 13767 + }, + { + "epoch": 0.6231274043901335, + "grad_norm": 0.6210138216125896, + "learning_rate": 3.285308630835903e-06, + "loss": 0.326, + "step": 13768 + }, + { + "epoch": 0.6231726634985291, + "grad_norm": 0.6093822225680078, + "learning_rate": 3.2846201740816446e-06, + "loss": 0.278, + "step": 13769 + }, + { + "epoch": 0.6232179226069247, + "grad_norm": 0.6318730141083124, + "learning_rate": 3.2839317541856317e-06, + "loss": 0.3086, + "step": 13770 + }, + { + "epoch": 0.6232631817153202, + "grad_norm": 0.6329790145727102, + "learning_rate": 3.2832433711626562e-06, + "loss": 0.3258, + "step": 13771 + }, + { + "epoch": 0.6233084408237157, + "grad_norm": 0.5543442506257924, + "learning_rate": 3.282555025027507e-06, + "loss": 0.3155, + "step": 13772 + }, + { + "epoch": 0.6233536999321113, + "grad_norm": 0.6117032449228192, + "learning_rate": 3.2818667157949742e-06, + "loss": 0.3133, + "step": 13773 + }, + { + "epoch": 0.6233989590405069, + "grad_norm": 0.6131769415951153, + "learning_rate": 3.281178443479852e-06, + "loss": 0.314, + "step": 13774 + }, + { + "epoch": 0.6234442181489025, + "grad_norm": 0.754090728019248, + "learning_rate": 3.2804902080969233e-06, + "loss": 0.3104, + "step": 13775 + }, + { + "epoch": 0.6234894772572981, + "grad_norm": 0.4622437492790766, + "learning_rate": 3.2798020096609795e-06, + "loss": 0.4743, + "step": 13776 + }, + { + "epoch": 0.6235347363656936, + "grad_norm": 0.5918545279223919, + "learning_rate": 3.2791138481868084e-06, + "loss": 0.3231, + "step": 13777 + }, + { + "epoch": 0.6235799954740892, + "grad_norm": 0.6049844320425433, + "learning_rate": 3.2784257236891948e-06, + "loss": 0.3062, + "step": 13778 + }, + { + "epoch": 0.6236252545824847, + "grad_norm": 0.6220890451444148, + "learning_rate": 3.2777376361829237e-06, + "loss": 0.2907, + "step": 13779 + }, + { + "epoch": 0.6236705136908803, + "grad_norm": 0.5917243166323288, + "learning_rate": 3.2770495856827834e-06, + "loss": 0.3416, + "step": 13780 + }, + { + "epoch": 0.6237157727992758, + "grad_norm": 0.3768603010876834, + "learning_rate": 3.2763615722035548e-06, + "loss": 0.4654, + "step": 13781 + }, + { + "epoch": 0.6237610319076714, + "grad_norm": 1.1432024816430995, + "learning_rate": 3.275673595760022e-06, + "loss": 0.3292, + "step": 13782 + }, + { + "epoch": 0.623806291016067, + "grad_norm": 0.6519811601739103, + "learning_rate": 3.274985656366967e-06, + "loss": 0.3377, + "step": 13783 + }, + { + "epoch": 0.6238515501244626, + "grad_norm": 0.6088029234300733, + "learning_rate": 3.2742977540391747e-06, + "loss": 0.284, + "step": 13784 + }, + { + "epoch": 0.6238968092328581, + "grad_norm": 0.6060950905983736, + "learning_rate": 3.273609888791422e-06, + "loss": 0.3033, + "step": 13785 + }, + { + "epoch": 0.6239420683412537, + "grad_norm": 1.1703658968985153, + "learning_rate": 3.2729220606384905e-06, + "loss": 0.2962, + "step": 13786 + }, + { + "epoch": 0.6239873274496492, + "grad_norm": 0.3148542397514561, + "learning_rate": 3.2722342695951612e-06, + "loss": 0.4635, + "step": 13787 + }, + { + "epoch": 0.6240325865580448, + "grad_norm": 0.5937671301737225, + "learning_rate": 3.2715465156762095e-06, + "loss": 0.3219, + "step": 13788 + }, + { + "epoch": 0.6240778456664404, + "grad_norm": 0.6898405827371933, + "learning_rate": 3.2708587988964134e-06, + "loss": 0.3268, + "step": 13789 + }, + { + "epoch": 0.6241231047748359, + "grad_norm": 0.6329187007223473, + "learning_rate": 3.270171119270554e-06, + "loss": 0.3403, + "step": 13790 + }, + { + "epoch": 0.6241683638832315, + "grad_norm": 0.6249047775111826, + "learning_rate": 3.269483476813403e-06, + "loss": 0.3126, + "step": 13791 + }, + { + "epoch": 0.6242136229916271, + "grad_norm": 0.287356452626235, + "learning_rate": 3.2687958715397373e-06, + "loss": 0.4665, + "step": 13792 + }, + { + "epoch": 0.6242588821000227, + "grad_norm": 0.30557783476155476, + "learning_rate": 3.2681083034643323e-06, + "loss": 0.4719, + "step": 13793 + }, + { + "epoch": 0.6243041412084182, + "grad_norm": 0.6428013543053296, + "learning_rate": 3.2674207726019586e-06, + "loss": 0.3011, + "step": 13794 + }, + { + "epoch": 0.6243494003168137, + "grad_norm": 0.6440078992859094, + "learning_rate": 3.2667332789673923e-06, + "loss": 0.3334, + "step": 13795 + }, + { + "epoch": 0.6243946594252093, + "grad_norm": 0.6103591359768376, + "learning_rate": 3.2660458225754053e-06, + "loss": 0.291, + "step": 13796 + }, + { + "epoch": 0.6244399185336049, + "grad_norm": 0.5711343505430136, + "learning_rate": 3.2653584034407677e-06, + "loss": 0.3366, + "step": 13797 + }, + { + "epoch": 0.6244851776420004, + "grad_norm": 0.6753677755634805, + "learning_rate": 3.264671021578249e-06, + "loss": 0.2947, + "step": 13798 + }, + { + "epoch": 0.624530436750396, + "grad_norm": 0.647574021925651, + "learning_rate": 3.2639836770026215e-06, + "loss": 0.3245, + "step": 13799 + }, + { + "epoch": 0.6245756958587916, + "grad_norm": 0.6507967950709432, + "learning_rate": 3.2632963697286546e-06, + "loss": 0.3375, + "step": 13800 + }, + { + "epoch": 0.6246209549671872, + "grad_norm": 0.7578310633233802, + "learning_rate": 3.262609099771113e-06, + "loss": 0.3414, + "step": 13801 + }, + { + "epoch": 0.6246662140755828, + "grad_norm": 0.7524822672827859, + "learning_rate": 3.261921867144765e-06, + "loss": 0.317, + "step": 13802 + }, + { + "epoch": 0.6247114731839782, + "grad_norm": 0.6484206522561464, + "learning_rate": 3.2612346718643818e-06, + "loss": 0.3302, + "step": 13803 + }, + { + "epoch": 0.6247567322923738, + "grad_norm": 0.6192293017632162, + "learning_rate": 3.2605475139447207e-06, + "loss": 0.2862, + "step": 13804 + }, + { + "epoch": 0.6248019914007694, + "grad_norm": 0.6352264745867553, + "learning_rate": 3.2598603934005535e-06, + "loss": 0.3266, + "step": 13805 + }, + { + "epoch": 0.624847250509165, + "grad_norm": 0.6771239055173687, + "learning_rate": 3.259173310246643e-06, + "loss": 0.3212, + "step": 13806 + }, + { + "epoch": 0.6248925096175605, + "grad_norm": 0.6036924888487291, + "learning_rate": 3.25848626449775e-06, + "loss": 0.3014, + "step": 13807 + }, + { + "epoch": 0.6249377687259561, + "grad_norm": 0.5917879949587903, + "learning_rate": 3.2577992561686377e-06, + "loss": 0.3092, + "step": 13808 + }, + { + "epoch": 0.6249830278343517, + "grad_norm": 0.6359248283898935, + "learning_rate": 3.2571122852740703e-06, + "loss": 0.3168, + "step": 13809 + }, + { + "epoch": 0.6250282869427473, + "grad_norm": 0.6815532910505805, + "learning_rate": 3.256425351828807e-06, + "loss": 0.3212, + "step": 13810 + }, + { + "epoch": 0.6250735460511428, + "grad_norm": 0.6523054381738229, + "learning_rate": 3.2557384558476067e-06, + "loss": 0.3324, + "step": 13811 + }, + { + "epoch": 0.6251188051595383, + "grad_norm": 0.36420794672238715, + "learning_rate": 3.2550515973452295e-06, + "loss": 0.4923, + "step": 13812 + }, + { + "epoch": 0.6251640642679339, + "grad_norm": 0.6536888411766718, + "learning_rate": 3.2543647763364362e-06, + "loss": 0.3078, + "step": 13813 + }, + { + "epoch": 0.6252093233763295, + "grad_norm": 0.6278223232996571, + "learning_rate": 3.2536779928359818e-06, + "loss": 0.2874, + "step": 13814 + }, + { + "epoch": 0.6252545824847251, + "grad_norm": 0.6031600414981914, + "learning_rate": 3.252991246858623e-06, + "loss": 0.3187, + "step": 13815 + }, + { + "epoch": 0.6252998415931206, + "grad_norm": 0.8775349594361912, + "learning_rate": 3.2523045384191186e-06, + "loss": 0.3119, + "step": 13816 + }, + { + "epoch": 0.6253451007015162, + "grad_norm": 0.6574777145007359, + "learning_rate": 3.25161786753222e-06, + "loss": 0.3461, + "step": 13817 + }, + { + "epoch": 0.6253903598099118, + "grad_norm": 0.6385496332359393, + "learning_rate": 3.2509312342126846e-06, + "loss": 0.3365, + "step": 13818 + }, + { + "epoch": 0.6254356189183073, + "grad_norm": 0.5592285709201028, + "learning_rate": 3.250244638475266e-06, + "loss": 0.319, + "step": 13819 + }, + { + "epoch": 0.6254808780267028, + "grad_norm": 0.3097990064128394, + "learning_rate": 3.249558080334716e-06, + "loss": 0.48, + "step": 13820 + }, + { + "epoch": 0.6255261371350984, + "grad_norm": 0.27962557719584863, + "learning_rate": 3.2488715598057856e-06, + "loss": 0.4853, + "step": 13821 + }, + { + "epoch": 0.625571396243494, + "grad_norm": 0.9387839473662496, + "learning_rate": 3.2481850769032287e-06, + "loss": 0.3368, + "step": 13822 + }, + { + "epoch": 0.6256166553518896, + "grad_norm": 0.6884232226794563, + "learning_rate": 3.2474986316417923e-06, + "loss": 0.317, + "step": 13823 + }, + { + "epoch": 0.6256619144602852, + "grad_norm": 0.2886797804451184, + "learning_rate": 3.2468122240362287e-06, + "loss": 0.4574, + "step": 13824 + }, + { + "epoch": 0.6257071735686807, + "grad_norm": 0.6355643364928295, + "learning_rate": 3.246125854101287e-06, + "loss": 0.3144, + "step": 13825 + }, + { + "epoch": 0.6257524326770763, + "grad_norm": 0.6015479752126067, + "learning_rate": 3.2454395218517132e-06, + "loss": 0.3257, + "step": 13826 + }, + { + "epoch": 0.6257976917854718, + "grad_norm": 0.6398553600797272, + "learning_rate": 3.2447532273022536e-06, + "loss": 0.2851, + "step": 13827 + }, + { + "epoch": 0.6258429508938674, + "grad_norm": 0.6120003353559114, + "learning_rate": 3.244066970467658e-06, + "loss": 0.2759, + "step": 13828 + }, + { + "epoch": 0.6258882100022629, + "grad_norm": 0.6475841047237043, + "learning_rate": 3.2433807513626714e-06, + "loss": 0.2943, + "step": 13829 + }, + { + "epoch": 0.6259334691106585, + "grad_norm": 0.6480570558579203, + "learning_rate": 3.242694570002036e-06, + "loss": 0.321, + "step": 13830 + }, + { + "epoch": 0.6259787282190541, + "grad_norm": 0.6847686675984392, + "learning_rate": 3.2420084264004966e-06, + "loss": 0.297, + "step": 13831 + }, + { + "epoch": 0.6260239873274497, + "grad_norm": 0.6184729580512613, + "learning_rate": 3.2413223205727995e-06, + "loss": 0.3233, + "step": 13832 + }, + { + "epoch": 0.6260692464358452, + "grad_norm": 0.63232383158112, + "learning_rate": 3.240636252533681e-06, + "loss": 0.285, + "step": 13833 + }, + { + "epoch": 0.6261145055442408, + "grad_norm": 1.6564727417893674, + "learning_rate": 3.2399502222978875e-06, + "loss": 0.328, + "step": 13834 + }, + { + "epoch": 0.6261597646526363, + "grad_norm": 0.6573502586519927, + "learning_rate": 3.239264229880159e-06, + "loss": 0.2903, + "step": 13835 + }, + { + "epoch": 0.6262050237610319, + "grad_norm": 0.6437835906829803, + "learning_rate": 3.2385782752952336e-06, + "loss": 0.2902, + "step": 13836 + }, + { + "epoch": 0.6262502828694275, + "grad_norm": 0.6287223521759137, + "learning_rate": 3.2378923585578504e-06, + "loss": 0.3426, + "step": 13837 + }, + { + "epoch": 0.626295541977823, + "grad_norm": 0.6936936100572169, + "learning_rate": 3.237206479682751e-06, + "loss": 0.3348, + "step": 13838 + }, + { + "epoch": 0.6263408010862186, + "grad_norm": 0.6140669836485039, + "learning_rate": 3.236520638684668e-06, + "loss": 0.3061, + "step": 13839 + }, + { + "epoch": 0.6263860601946142, + "grad_norm": 0.6145631143983877, + "learning_rate": 3.235834835578341e-06, + "loss": 0.3029, + "step": 13840 + }, + { + "epoch": 0.6264313193030098, + "grad_norm": 0.6446262287679247, + "learning_rate": 3.235149070378504e-06, + "loss": 0.3551, + "step": 13841 + }, + { + "epoch": 0.6264765784114052, + "grad_norm": 0.41859033443519317, + "learning_rate": 3.2344633430998955e-06, + "loss": 0.4625, + "step": 13842 + }, + { + "epoch": 0.6265218375198008, + "grad_norm": 0.6986814785352065, + "learning_rate": 3.233777653757246e-06, + "loss": 0.2925, + "step": 13843 + }, + { + "epoch": 0.6265670966281964, + "grad_norm": 0.6146056230805496, + "learning_rate": 3.2330920023652906e-06, + "loss": 0.2838, + "step": 13844 + }, + { + "epoch": 0.626612355736592, + "grad_norm": 0.6248541826618237, + "learning_rate": 3.2324063889387624e-06, + "loss": 0.2855, + "step": 13845 + }, + { + "epoch": 0.6266576148449875, + "grad_norm": 0.29127147352835037, + "learning_rate": 3.2317208134923895e-06, + "loss": 0.4494, + "step": 13846 + }, + { + "epoch": 0.6267028739533831, + "grad_norm": 0.5426443301129628, + "learning_rate": 3.2310352760409067e-06, + "loss": 0.4794, + "step": 13847 + }, + { + "epoch": 0.6267481330617787, + "grad_norm": 0.256103861392261, + "learning_rate": 3.2303497765990445e-06, + "loss": 0.4425, + "step": 13848 + }, + { + "epoch": 0.6267933921701743, + "grad_norm": 0.6424238921109408, + "learning_rate": 3.229664315181529e-06, + "loss": 0.3382, + "step": 13849 + }, + { + "epoch": 0.6268386512785699, + "grad_norm": 0.27190227539063644, + "learning_rate": 3.2289788918030894e-06, + "loss": 0.4543, + "step": 13850 + }, + { + "epoch": 0.6268839103869653, + "grad_norm": 0.6729867835394304, + "learning_rate": 3.228293506478457e-06, + "loss": 0.3145, + "step": 13851 + }, + { + "epoch": 0.6269291694953609, + "grad_norm": 0.6188356839840186, + "learning_rate": 3.227608159222353e-06, + "loss": 0.3033, + "step": 13852 + }, + { + "epoch": 0.6269744286037565, + "grad_norm": 0.5950567912039953, + "learning_rate": 3.2269228500495066e-06, + "loss": 0.2733, + "step": 13853 + }, + { + "epoch": 0.6270196877121521, + "grad_norm": 0.6284604076040228, + "learning_rate": 3.2262375789746426e-06, + "loss": 0.2744, + "step": 13854 + }, + { + "epoch": 0.6270649468205476, + "grad_norm": 0.726216221839223, + "learning_rate": 3.225552346012487e-06, + "loss": 0.3253, + "step": 13855 + }, + { + "epoch": 0.6271102059289432, + "grad_norm": 0.3194964820786272, + "learning_rate": 3.22486715117776e-06, + "loss": 0.4537, + "step": 13856 + }, + { + "epoch": 0.6271554650373388, + "grad_norm": 0.7452714533400248, + "learning_rate": 3.224181994485186e-06, + "loss": 0.2999, + "step": 13857 + }, + { + "epoch": 0.6272007241457344, + "grad_norm": 0.5865218640010116, + "learning_rate": 3.2234968759494883e-06, + "loss": 0.2673, + "step": 13858 + }, + { + "epoch": 0.62724598325413, + "grad_norm": 0.625521131109761, + "learning_rate": 3.2228117955853853e-06, + "loss": 0.2728, + "step": 13859 + }, + { + "epoch": 0.6272912423625254, + "grad_norm": 0.6312711646093929, + "learning_rate": 3.2221267534075986e-06, + "loss": 0.3171, + "step": 13860 + }, + { + "epoch": 0.627336501470921, + "grad_norm": 0.6587997158261353, + "learning_rate": 3.221441749430849e-06, + "loss": 0.3308, + "step": 13861 + }, + { + "epoch": 0.6273817605793166, + "grad_norm": 0.3269749147811726, + "learning_rate": 3.220756783669852e-06, + "loss": 0.4494, + "step": 13862 + }, + { + "epoch": 0.6274270196877122, + "grad_norm": 0.6300477574521686, + "learning_rate": 3.2200718561393283e-06, + "loss": 0.3255, + "step": 13863 + }, + { + "epoch": 0.6274722787961077, + "grad_norm": 0.7105608912769671, + "learning_rate": 3.2193869668539947e-06, + "loss": 0.3018, + "step": 13864 + }, + { + "epoch": 0.6275175379045033, + "grad_norm": 0.6095324079681443, + "learning_rate": 3.2187021158285646e-06, + "loss": 0.3009, + "step": 13865 + }, + { + "epoch": 0.6275627970128989, + "grad_norm": 0.585902388220184, + "learning_rate": 3.2180173030777552e-06, + "loss": 0.3255, + "step": 13866 + }, + { + "epoch": 0.6276080561212944, + "grad_norm": 0.6738114149519168, + "learning_rate": 3.2173325286162825e-06, + "loss": 0.324, + "step": 13867 + }, + { + "epoch": 0.6276533152296899, + "grad_norm": 0.6705044356390757, + "learning_rate": 3.216647792458858e-06, + "loss": 0.3191, + "step": 13868 + }, + { + "epoch": 0.6276985743380855, + "grad_norm": 0.6308960620317975, + "learning_rate": 3.215963094620195e-06, + "loss": 0.3322, + "step": 13869 + }, + { + "epoch": 0.6277438334464811, + "grad_norm": 0.6366572702729194, + "learning_rate": 3.215278435115005e-06, + "loss": 0.3152, + "step": 13870 + }, + { + "epoch": 0.6277890925548767, + "grad_norm": 0.6480624211144015, + "learning_rate": 3.2145938139580015e-06, + "loss": 0.2958, + "step": 13871 + }, + { + "epoch": 0.6278343516632723, + "grad_norm": 0.6289197414985261, + "learning_rate": 3.2139092311638932e-06, + "loss": 0.3296, + "step": 13872 + }, + { + "epoch": 0.6278796107716678, + "grad_norm": 0.5775930571137048, + "learning_rate": 3.2132246867473892e-06, + "loss": 0.3154, + "step": 13873 + }, + { + "epoch": 0.6279248698800634, + "grad_norm": 0.6611953191837328, + "learning_rate": 3.2125401807232008e-06, + "loss": 0.3109, + "step": 13874 + }, + { + "epoch": 0.6279701289884589, + "grad_norm": 0.6291090272339493, + "learning_rate": 3.2118557131060323e-06, + "loss": 0.3177, + "step": 13875 + }, + { + "epoch": 0.6280153880968545, + "grad_norm": 0.6368641840116436, + "learning_rate": 3.211171283910593e-06, + "loss": 0.3307, + "step": 13876 + }, + { + "epoch": 0.62806064720525, + "grad_norm": 0.2998450724408988, + "learning_rate": 3.21048689315159e-06, + "loss": 0.4719, + "step": 13877 + }, + { + "epoch": 0.6281059063136456, + "grad_norm": 0.3263249370296473, + "learning_rate": 3.209802540843727e-06, + "loss": 0.4801, + "step": 13878 + }, + { + "epoch": 0.6281511654220412, + "grad_norm": 0.6287527403460311, + "learning_rate": 3.2091182270017073e-06, + "loss": 0.3182, + "step": 13879 + }, + { + "epoch": 0.6281964245304368, + "grad_norm": 0.6882364507965004, + "learning_rate": 3.208433951640241e-06, + "loss": 0.2956, + "step": 13880 + }, + { + "epoch": 0.6282416836388323, + "grad_norm": 0.6499708052175333, + "learning_rate": 3.207749714774023e-06, + "loss": 0.2979, + "step": 13881 + }, + { + "epoch": 0.6282869427472279, + "grad_norm": 0.27344950178467453, + "learning_rate": 3.20706551641776e-06, + "loss": 0.4612, + "step": 13882 + }, + { + "epoch": 0.6283322018556234, + "grad_norm": 0.6161048425175149, + "learning_rate": 3.206381356586151e-06, + "loss": 0.3316, + "step": 13883 + }, + { + "epoch": 0.628377460964019, + "grad_norm": 0.5950934825653854, + "learning_rate": 3.205697235293902e-06, + "loss": 0.3029, + "step": 13884 + }, + { + "epoch": 0.6284227200724146, + "grad_norm": 0.5932586492807993, + "learning_rate": 3.205013152555705e-06, + "loss": 0.3429, + "step": 13885 + }, + { + "epoch": 0.6284679791808101, + "grad_norm": 0.666508282891372, + "learning_rate": 3.2043291083862636e-06, + "loss": 0.311, + "step": 13886 + }, + { + "epoch": 0.6285132382892057, + "grad_norm": 0.5829000266122752, + "learning_rate": 3.203645102800276e-06, + "loss": 0.2759, + "step": 13887 + }, + { + "epoch": 0.6285584973976013, + "grad_norm": 0.6251673579480885, + "learning_rate": 3.202961135812437e-06, + "loss": 0.3163, + "step": 13888 + }, + { + "epoch": 0.6286037565059969, + "grad_norm": 0.6107779886231521, + "learning_rate": 3.2022772074374424e-06, + "loss": 0.3294, + "step": 13889 + }, + { + "epoch": 0.6286490156143923, + "grad_norm": 0.6090162862844448, + "learning_rate": 3.2015933176899915e-06, + "loss": 0.3132, + "step": 13890 + }, + { + "epoch": 0.6286942747227879, + "grad_norm": 0.6906544995735248, + "learning_rate": 3.2009094665847763e-06, + "loss": 0.3301, + "step": 13891 + }, + { + "epoch": 0.6287395338311835, + "grad_norm": 0.6114037897338339, + "learning_rate": 3.200225654136491e-06, + "loss": 0.3585, + "step": 13892 + }, + { + "epoch": 0.6287847929395791, + "grad_norm": 0.6393766558008107, + "learning_rate": 3.19954188035983e-06, + "loss": 0.3176, + "step": 13893 + }, + { + "epoch": 0.6288300520479747, + "grad_norm": 0.7565660542118592, + "learning_rate": 3.1988581452694815e-06, + "loss": 0.3214, + "step": 13894 + }, + { + "epoch": 0.6288753111563702, + "grad_norm": 0.624572649998261, + "learning_rate": 3.1981744488801416e-06, + "loss": 0.3344, + "step": 13895 + }, + { + "epoch": 0.6289205702647658, + "grad_norm": 0.33261469621894185, + "learning_rate": 3.1974907912064986e-06, + "loss": 0.4628, + "step": 13896 + }, + { + "epoch": 0.6289658293731614, + "grad_norm": 0.7448596327261254, + "learning_rate": 3.1968071722632432e-06, + "loss": 0.3199, + "step": 13897 + }, + { + "epoch": 0.629011088481557, + "grad_norm": 0.5851809149186197, + "learning_rate": 3.196123592065063e-06, + "loss": 0.3175, + "step": 13898 + }, + { + "epoch": 0.6290563475899524, + "grad_norm": 0.6151664808855503, + "learning_rate": 3.1954400506266453e-06, + "loss": 0.3231, + "step": 13899 + }, + { + "epoch": 0.629101606698348, + "grad_norm": 0.61384899017604, + "learning_rate": 3.194756547962681e-06, + "loss": 0.3243, + "step": 13900 + }, + { + "epoch": 0.6291468658067436, + "grad_norm": 0.6155705262342234, + "learning_rate": 3.1940730840878532e-06, + "loss": 0.2846, + "step": 13901 + }, + { + "epoch": 0.6291921249151392, + "grad_norm": 0.6030867830465274, + "learning_rate": 3.193389659016848e-06, + "loss": 0.323, + "step": 13902 + }, + { + "epoch": 0.6292373840235347, + "grad_norm": 0.5851968191163726, + "learning_rate": 3.192706272764351e-06, + "loss": 0.2796, + "step": 13903 + }, + { + "epoch": 0.6292826431319303, + "grad_norm": 0.6389463259250212, + "learning_rate": 3.192022925345044e-06, + "loss": 0.33, + "step": 13904 + }, + { + "epoch": 0.6293279022403259, + "grad_norm": 0.6699153846748191, + "learning_rate": 3.191339616773612e-06, + "loss": 0.3088, + "step": 13905 + }, + { + "epoch": 0.6293731613487215, + "grad_norm": 0.6763927720203098, + "learning_rate": 3.190656347064739e-06, + "loss": 0.3375, + "step": 13906 + }, + { + "epoch": 0.629418420457117, + "grad_norm": 0.6410174157777444, + "learning_rate": 3.189973116233103e-06, + "loss": 0.3117, + "step": 13907 + }, + { + "epoch": 0.6294636795655125, + "grad_norm": 0.613551811374985, + "learning_rate": 3.1892899242933834e-06, + "loss": 0.3496, + "step": 13908 + }, + { + "epoch": 0.6295089386739081, + "grad_norm": 0.606707201595439, + "learning_rate": 3.1886067712602656e-06, + "loss": 0.3513, + "step": 13909 + }, + { + "epoch": 0.6295541977823037, + "grad_norm": 0.6494404568011635, + "learning_rate": 3.1879236571484224e-06, + "loss": 0.3551, + "step": 13910 + }, + { + "epoch": 0.6295994568906993, + "grad_norm": 0.5912183003606637, + "learning_rate": 3.1872405819725356e-06, + "loss": 0.2697, + "step": 13911 + }, + { + "epoch": 0.6296447159990948, + "grad_norm": 0.6521680921138964, + "learning_rate": 3.1865575457472797e-06, + "loss": 0.2802, + "step": 13912 + }, + { + "epoch": 0.6296899751074904, + "grad_norm": 0.5875091213842563, + "learning_rate": 3.1858745484873356e-06, + "loss": 0.2909, + "step": 13913 + }, + { + "epoch": 0.629735234215886, + "grad_norm": 0.6159229710750475, + "learning_rate": 3.1851915902073734e-06, + "loss": 0.3577, + "step": 13914 + }, + { + "epoch": 0.6297804933242815, + "grad_norm": 0.6312091121112716, + "learning_rate": 3.184508670922071e-06, + "loss": 0.3101, + "step": 13915 + }, + { + "epoch": 0.629825752432677, + "grad_norm": 0.6328397534853961, + "learning_rate": 3.1838257906461016e-06, + "loss": 0.3303, + "step": 13916 + }, + { + "epoch": 0.6298710115410726, + "grad_norm": 0.6220419513305495, + "learning_rate": 3.183142949394138e-06, + "loss": 0.2836, + "step": 13917 + }, + { + "epoch": 0.6299162706494682, + "grad_norm": 0.6483485814812089, + "learning_rate": 3.1824601471808504e-06, + "loss": 0.3122, + "step": 13918 + }, + { + "epoch": 0.6299615297578638, + "grad_norm": 0.6153026674344495, + "learning_rate": 3.181777384020915e-06, + "loss": 0.3569, + "step": 13919 + }, + { + "epoch": 0.6300067888662594, + "grad_norm": 1.1590618909167922, + "learning_rate": 3.1810946599289983e-06, + "loss": 0.3436, + "step": 13920 + }, + { + "epoch": 0.6300520479746549, + "grad_norm": 0.6600727506975453, + "learning_rate": 3.1804119749197703e-06, + "loss": 0.3378, + "step": 13921 + }, + { + "epoch": 0.6300973070830505, + "grad_norm": 0.570883949381704, + "learning_rate": 3.179729329007902e-06, + "loss": 0.2901, + "step": 13922 + }, + { + "epoch": 0.630142566191446, + "grad_norm": 0.6916262353759627, + "learning_rate": 3.179046722208058e-06, + "loss": 0.3714, + "step": 13923 + }, + { + "epoch": 0.6301878252998416, + "grad_norm": 0.580028197703851, + "learning_rate": 3.1783641545349074e-06, + "loss": 0.3072, + "step": 13924 + }, + { + "epoch": 0.6302330844082371, + "grad_norm": 0.2973455929085098, + "learning_rate": 3.1776816260031172e-06, + "loss": 0.4392, + "step": 13925 + }, + { + "epoch": 0.6302783435166327, + "grad_norm": 0.6103130810803539, + "learning_rate": 3.1769991366273533e-06, + "loss": 0.2949, + "step": 13926 + }, + { + "epoch": 0.6303236026250283, + "grad_norm": 0.6174746681694337, + "learning_rate": 3.1763166864222766e-06, + "loss": 0.3511, + "step": 13927 + }, + { + "epoch": 0.6303688617334239, + "grad_norm": 0.6284170932260028, + "learning_rate": 3.175634275402555e-06, + "loss": 0.309, + "step": 13928 + }, + { + "epoch": 0.6304141208418195, + "grad_norm": 0.6609520162027985, + "learning_rate": 3.1749519035828495e-06, + "loss": 0.3246, + "step": 13929 + }, + { + "epoch": 0.630459379950215, + "grad_norm": 0.7255600667588301, + "learning_rate": 3.1742695709778222e-06, + "loss": 0.325, + "step": 13930 + }, + { + "epoch": 0.6305046390586105, + "grad_norm": 0.5971870426282323, + "learning_rate": 3.1735872776021344e-06, + "loss": 0.2919, + "step": 13931 + }, + { + "epoch": 0.6305498981670061, + "grad_norm": 0.6410748277342618, + "learning_rate": 3.1729050234704474e-06, + "loss": 0.3205, + "step": 13932 + }, + { + "epoch": 0.6305951572754017, + "grad_norm": 0.3077461694511978, + "learning_rate": 3.1722228085974183e-06, + "loss": 0.4587, + "step": 13933 + }, + { + "epoch": 0.6306404163837972, + "grad_norm": 0.2873055720141124, + "learning_rate": 3.1715406329977083e-06, + "loss": 0.4757, + "step": 13934 + }, + { + "epoch": 0.6306856754921928, + "grad_norm": 0.6291234455503854, + "learning_rate": 3.1708584966859745e-06, + "loss": 0.2686, + "step": 13935 + }, + { + "epoch": 0.6307309346005884, + "grad_norm": 0.2676697398113157, + "learning_rate": 3.1701763996768744e-06, + "loss": 0.4715, + "step": 13936 + }, + { + "epoch": 0.630776193708984, + "grad_norm": 0.2841129202382441, + "learning_rate": 3.1694943419850616e-06, + "loss": 0.4649, + "step": 13937 + }, + { + "epoch": 0.6308214528173794, + "grad_norm": 0.5972951884939902, + "learning_rate": 3.1688123236251967e-06, + "loss": 0.2485, + "step": 13938 + }, + { + "epoch": 0.630866711925775, + "grad_norm": 0.6976197308832803, + "learning_rate": 3.1681303446119277e-06, + "loss": 0.3125, + "step": 13939 + }, + { + "epoch": 0.6309119710341706, + "grad_norm": 0.6712376562482888, + "learning_rate": 3.167448404959913e-06, + "loss": 0.3349, + "step": 13940 + }, + { + "epoch": 0.6309572301425662, + "grad_norm": 0.581681353721388, + "learning_rate": 3.166766504683802e-06, + "loss": 0.3106, + "step": 13941 + }, + { + "epoch": 0.6310024892509618, + "grad_norm": 0.3447944047961648, + "learning_rate": 3.166084643798252e-06, + "loss": 0.494, + "step": 13942 + }, + { + "epoch": 0.6310477483593573, + "grad_norm": 0.6397469634467333, + "learning_rate": 3.165402822317908e-06, + "loss": 0.3649, + "step": 13943 + }, + { + "epoch": 0.6310930074677529, + "grad_norm": 1.0218459586067286, + "learning_rate": 3.1647210402574223e-06, + "loss": 0.2909, + "step": 13944 + }, + { + "epoch": 0.6311382665761485, + "grad_norm": 0.5746655024858726, + "learning_rate": 3.1640392976314472e-06, + "loss": 0.3024, + "step": 13945 + }, + { + "epoch": 0.6311835256845441, + "grad_norm": 0.6052059717797788, + "learning_rate": 3.1633575944546273e-06, + "loss": 0.2883, + "step": 13946 + }, + { + "epoch": 0.6312287847929395, + "grad_norm": 0.6117422105639195, + "learning_rate": 3.162675930741611e-06, + "loss": 0.302, + "step": 13947 + }, + { + "epoch": 0.6312740439013351, + "grad_norm": 0.2782623106961924, + "learning_rate": 3.161994306507048e-06, + "loss": 0.457, + "step": 13948 + }, + { + "epoch": 0.6313193030097307, + "grad_norm": 0.6013180195685133, + "learning_rate": 3.1613127217655814e-06, + "loss": 0.309, + "step": 13949 + }, + { + "epoch": 0.6313645621181263, + "grad_norm": 0.6245021539144215, + "learning_rate": 3.160631176531858e-06, + "loss": 0.2948, + "step": 13950 + }, + { + "epoch": 0.6314098212265218, + "grad_norm": 0.6155792584363897, + "learning_rate": 3.1599496708205212e-06, + "loss": 0.2789, + "step": 13951 + }, + { + "epoch": 0.6314550803349174, + "grad_norm": 0.5400955697561185, + "learning_rate": 3.159268204646213e-06, + "loss": 0.4629, + "step": 13952 + }, + { + "epoch": 0.631500339443313, + "grad_norm": 0.6008466199890503, + "learning_rate": 3.158586778023579e-06, + "loss": 0.2852, + "step": 13953 + }, + { + "epoch": 0.6315455985517086, + "grad_norm": 0.6247523900052501, + "learning_rate": 3.1579053909672597e-06, + "loss": 0.2784, + "step": 13954 + }, + { + "epoch": 0.6315908576601041, + "grad_norm": 0.624934184912694, + "learning_rate": 3.1572240434918975e-06, + "loss": 0.3079, + "step": 13955 + }, + { + "epoch": 0.6316361167684996, + "grad_norm": 0.6361115372253076, + "learning_rate": 3.156542735612128e-06, + "loss": 0.3273, + "step": 13956 + }, + { + "epoch": 0.6316813758768952, + "grad_norm": 0.2770654450453919, + "learning_rate": 3.1558614673425946e-06, + "loss": 0.4658, + "step": 13957 + }, + { + "epoch": 0.6317266349852908, + "grad_norm": 0.26875663521487186, + "learning_rate": 3.1551802386979356e-06, + "loss": 0.4614, + "step": 13958 + }, + { + "epoch": 0.6317718940936864, + "grad_norm": 0.8911871688377059, + "learning_rate": 3.1544990496927864e-06, + "loss": 0.2884, + "step": 13959 + }, + { + "epoch": 0.6318171532020819, + "grad_norm": 0.6482525152061249, + "learning_rate": 3.1538179003417836e-06, + "loss": 0.3352, + "step": 13960 + }, + { + "epoch": 0.6318624123104775, + "grad_norm": 0.6406084975429104, + "learning_rate": 3.1531367906595665e-06, + "loss": 0.3274, + "step": 13961 + }, + { + "epoch": 0.631907671418873, + "grad_norm": 0.6568044044364002, + "learning_rate": 3.1524557206607655e-06, + "loss": 0.3397, + "step": 13962 + }, + { + "epoch": 0.6319529305272686, + "grad_norm": 0.6302651681053412, + "learning_rate": 3.1517746903600173e-06, + "loss": 0.3109, + "step": 13963 + }, + { + "epoch": 0.6319981896356642, + "grad_norm": 0.27192785144332376, + "learning_rate": 3.1510936997719557e-06, + "loss": 0.4441, + "step": 13964 + }, + { + "epoch": 0.6320434487440597, + "grad_norm": 0.7875442893777145, + "learning_rate": 3.1504127489112105e-06, + "loss": 0.3169, + "step": 13965 + }, + { + "epoch": 0.6320887078524553, + "grad_norm": 0.2886598711250437, + "learning_rate": 3.149731837792414e-06, + "loss": 0.4735, + "step": 13966 + }, + { + "epoch": 0.6321339669608509, + "grad_norm": 0.5776455071697102, + "learning_rate": 3.149050966430199e-06, + "loss": 0.3278, + "step": 13967 + }, + { + "epoch": 0.6321792260692465, + "grad_norm": 0.6388254430484523, + "learning_rate": 3.148370134839195e-06, + "loss": 0.3332, + "step": 13968 + }, + { + "epoch": 0.632224485177642, + "grad_norm": 0.6266131600997259, + "learning_rate": 3.1476893430340282e-06, + "loss": 0.326, + "step": 13969 + }, + { + "epoch": 0.6322697442860375, + "grad_norm": 0.6778133379330554, + "learning_rate": 3.147008591029328e-06, + "loss": 0.3019, + "step": 13970 + }, + { + "epoch": 0.6323150033944331, + "grad_norm": 0.5863638792175061, + "learning_rate": 3.1463278788397256e-06, + "loss": 0.3309, + "step": 13971 + }, + { + "epoch": 0.6323602625028287, + "grad_norm": 0.6172203148817229, + "learning_rate": 3.1456472064798403e-06, + "loss": 0.2714, + "step": 13972 + }, + { + "epoch": 0.6324055216112242, + "grad_norm": 0.2877354389046555, + "learning_rate": 3.144966573964302e-06, + "loss": 0.472, + "step": 13973 + }, + { + "epoch": 0.6324507807196198, + "grad_norm": 0.26938771291979924, + "learning_rate": 3.1442859813077364e-06, + "loss": 0.4476, + "step": 13974 + }, + { + "epoch": 0.6324960398280154, + "grad_norm": 0.6809283680606592, + "learning_rate": 3.1436054285247645e-06, + "loss": 0.3145, + "step": 13975 + }, + { + "epoch": 0.632541298936411, + "grad_norm": 0.6095048093758612, + "learning_rate": 3.1429249156300094e-06, + "loss": 0.3005, + "step": 13976 + }, + { + "epoch": 0.6325865580448066, + "grad_norm": 0.5992126505470327, + "learning_rate": 3.1422444426380964e-06, + "loss": 0.3093, + "step": 13977 + }, + { + "epoch": 0.632631817153202, + "grad_norm": 0.5853842538818939, + "learning_rate": 3.1415640095636436e-06, + "loss": 0.2985, + "step": 13978 + }, + { + "epoch": 0.6326770762615976, + "grad_norm": 0.5870067931295567, + "learning_rate": 3.1408836164212724e-06, + "loss": 0.3389, + "step": 13979 + }, + { + "epoch": 0.6327223353699932, + "grad_norm": 0.588258900006323, + "learning_rate": 3.140203263225604e-06, + "loss": 0.2815, + "step": 13980 + }, + { + "epoch": 0.6327675944783888, + "grad_norm": 0.644122902292735, + "learning_rate": 3.139522949991253e-06, + "loss": 0.2697, + "step": 13981 + }, + { + "epoch": 0.6328128535867843, + "grad_norm": 0.7828996185510263, + "learning_rate": 3.1388426767328408e-06, + "loss": 0.26, + "step": 13982 + }, + { + "epoch": 0.6328581126951799, + "grad_norm": 0.6284030766321109, + "learning_rate": 3.138162443464983e-06, + "loss": 0.3584, + "step": 13983 + }, + { + "epoch": 0.6329033718035755, + "grad_norm": 0.6178985167885237, + "learning_rate": 3.137482250202298e-06, + "loss": 0.3107, + "step": 13984 + }, + { + "epoch": 0.6329486309119711, + "grad_norm": 0.6510107780176263, + "learning_rate": 3.1368020969593967e-06, + "loss": 0.3128, + "step": 13985 + }, + { + "epoch": 0.6329938900203665, + "grad_norm": 0.7311425717620409, + "learning_rate": 3.136121983750897e-06, + "loss": 0.3402, + "step": 13986 + }, + { + "epoch": 0.6330391491287621, + "grad_norm": 0.6064276109335814, + "learning_rate": 3.1354419105914127e-06, + "loss": 0.3011, + "step": 13987 + }, + { + "epoch": 0.6330844082371577, + "grad_norm": 0.6193044912933445, + "learning_rate": 3.1347618774955534e-06, + "loss": 0.3333, + "step": 13988 + }, + { + "epoch": 0.6331296673455533, + "grad_norm": 0.6514901654955743, + "learning_rate": 3.134081884477932e-06, + "loss": 0.304, + "step": 13989 + }, + { + "epoch": 0.6331749264539489, + "grad_norm": 0.7694051174167692, + "learning_rate": 3.133401931553163e-06, + "loss": 0.3437, + "step": 13990 + }, + { + "epoch": 0.6332201855623444, + "grad_norm": 0.6834306358109133, + "learning_rate": 3.1327220187358515e-06, + "loss": 0.2673, + "step": 13991 + }, + { + "epoch": 0.63326544467074, + "grad_norm": 0.6285507208508668, + "learning_rate": 3.1320421460406093e-06, + "loss": 0.3151, + "step": 13992 + }, + { + "epoch": 0.6333107037791356, + "grad_norm": 0.29874642143798624, + "learning_rate": 3.1313623134820454e-06, + "loss": 0.4871, + "step": 13993 + }, + { + "epoch": 0.6333559628875312, + "grad_norm": 0.6455014552179699, + "learning_rate": 3.1306825210747654e-06, + "loss": 0.3333, + "step": 13994 + }, + { + "epoch": 0.6334012219959266, + "grad_norm": 0.6898874640279824, + "learning_rate": 3.130002768833376e-06, + "loss": 0.3577, + "step": 13995 + }, + { + "epoch": 0.6334464811043222, + "grad_norm": 0.6025726694594395, + "learning_rate": 3.1293230567724843e-06, + "loss": 0.3018, + "step": 13996 + }, + { + "epoch": 0.6334917402127178, + "grad_norm": 0.6278449707753837, + "learning_rate": 3.1286433849066965e-06, + "loss": 0.3003, + "step": 13997 + }, + { + "epoch": 0.6335369993211134, + "grad_norm": 0.6014718230979809, + "learning_rate": 3.1279637532506134e-06, + "loss": 0.2732, + "step": 13998 + }, + { + "epoch": 0.633582258429509, + "grad_norm": 0.2878032134378391, + "learning_rate": 3.1272841618188388e-06, + "loss": 0.478, + "step": 13999 + }, + { + "epoch": 0.6336275175379045, + "grad_norm": 0.38175351117616113, + "learning_rate": 3.1266046106259784e-06, + "loss": 0.4708, + "step": 14000 + }, + { + "epoch": 0.6336727766463001, + "grad_norm": 0.6449141988543913, + "learning_rate": 3.1259250996866296e-06, + "loss": 0.3235, + "step": 14001 + }, + { + "epoch": 0.6337180357546957, + "grad_norm": 0.7625685410368898, + "learning_rate": 3.1252456290153952e-06, + "loss": 0.2579, + "step": 14002 + }, + { + "epoch": 0.6337632948630912, + "grad_norm": 0.6300075430673482, + "learning_rate": 3.124566198626875e-06, + "loss": 0.2912, + "step": 14003 + }, + { + "epoch": 0.6338085539714867, + "grad_norm": 0.26417259947224064, + "learning_rate": 3.1238868085356656e-06, + "loss": 0.4758, + "step": 14004 + }, + { + "epoch": 0.6338538130798823, + "grad_norm": 0.5877373818985486, + "learning_rate": 3.1232074587563667e-06, + "loss": 0.3039, + "step": 14005 + }, + { + "epoch": 0.6338990721882779, + "grad_norm": 0.6570464396043647, + "learning_rate": 3.1225281493035776e-06, + "loss": 0.3232, + "step": 14006 + }, + { + "epoch": 0.6339443312966735, + "grad_norm": 0.6492915172805375, + "learning_rate": 3.12184888019189e-06, + "loss": 0.2726, + "step": 14007 + }, + { + "epoch": 0.633989590405069, + "grad_norm": 0.6045436003490696, + "learning_rate": 3.121169651435903e-06, + "loss": 0.319, + "step": 14008 + }, + { + "epoch": 0.6340348495134646, + "grad_norm": 0.6003389466902463, + "learning_rate": 3.12049046305021e-06, + "loss": 0.3086, + "step": 14009 + }, + { + "epoch": 0.6340801086218602, + "grad_norm": 0.6248635843347558, + "learning_rate": 3.1198113150494026e-06, + "loss": 0.3019, + "step": 14010 + }, + { + "epoch": 0.6341253677302557, + "grad_norm": 0.6040295162437355, + "learning_rate": 3.1191322074480766e-06, + "loss": 0.278, + "step": 14011 + }, + { + "epoch": 0.6341706268386513, + "grad_norm": 0.5699431480642985, + "learning_rate": 3.118453140260823e-06, + "loss": 0.2921, + "step": 14012 + }, + { + "epoch": 0.6342158859470468, + "grad_norm": 0.6460627176672942, + "learning_rate": 3.1177741135022334e-06, + "loss": 0.3305, + "step": 14013 + }, + { + "epoch": 0.6342611450554424, + "grad_norm": 0.6028203619114058, + "learning_rate": 3.1170951271868953e-06, + "loss": 0.318, + "step": 14014 + }, + { + "epoch": 0.634306404163838, + "grad_norm": 0.6705489517107884, + "learning_rate": 3.1164161813294014e-06, + "loss": 0.3098, + "step": 14015 + }, + { + "epoch": 0.6343516632722336, + "grad_norm": 0.6932377918562581, + "learning_rate": 3.1157372759443396e-06, + "loss": 0.2868, + "step": 14016 + }, + { + "epoch": 0.6343969223806291, + "grad_norm": 0.5981430645831732, + "learning_rate": 3.1150584110462955e-06, + "loss": 0.2307, + "step": 14017 + }, + { + "epoch": 0.6344421814890246, + "grad_norm": 0.6158121063213936, + "learning_rate": 3.114379586649856e-06, + "loss": 0.3063, + "step": 14018 + }, + { + "epoch": 0.6344874405974202, + "grad_norm": 0.6415675114557254, + "learning_rate": 3.1137008027696113e-06, + "loss": 0.3035, + "step": 14019 + }, + { + "epoch": 0.6345326997058158, + "grad_norm": 0.6342444158352719, + "learning_rate": 3.1130220594201395e-06, + "loss": 0.2814, + "step": 14020 + }, + { + "epoch": 0.6345779588142113, + "grad_norm": 0.6398484780157805, + "learning_rate": 3.1123433566160293e-06, + "loss": 0.368, + "step": 14021 + }, + { + "epoch": 0.6346232179226069, + "grad_norm": 0.6312930733263915, + "learning_rate": 3.1116646943718642e-06, + "loss": 0.351, + "step": 14022 + }, + { + "epoch": 0.6346684770310025, + "grad_norm": 0.606712973490306, + "learning_rate": 3.110986072702224e-06, + "loss": 0.2892, + "step": 14023 + }, + { + "epoch": 0.6347137361393981, + "grad_norm": 0.6732129537847016, + "learning_rate": 3.1103074916216903e-06, + "loss": 0.3021, + "step": 14024 + }, + { + "epoch": 0.6347589952477937, + "grad_norm": 0.5789095832572656, + "learning_rate": 3.1096289511448464e-06, + "loss": 0.32, + "step": 14025 + }, + { + "epoch": 0.6348042543561891, + "grad_norm": 0.5983393100348733, + "learning_rate": 3.108950451286271e-06, + "loss": 0.329, + "step": 14026 + }, + { + "epoch": 0.6348495134645847, + "grad_norm": 0.3569180718168922, + "learning_rate": 3.1082719920605413e-06, + "loss": 0.4729, + "step": 14027 + }, + { + "epoch": 0.6348947725729803, + "grad_norm": 0.6096092009852317, + "learning_rate": 3.107593573482236e-06, + "loss": 0.2957, + "step": 14028 + }, + { + "epoch": 0.6349400316813759, + "grad_norm": 0.5723108604017576, + "learning_rate": 3.106915195565935e-06, + "loss": 0.3194, + "step": 14029 + }, + { + "epoch": 0.6349852907897714, + "grad_norm": 0.29577567478017497, + "learning_rate": 3.1062368583262103e-06, + "loss": 0.4703, + "step": 14030 + }, + { + "epoch": 0.635030549898167, + "grad_norm": 0.6064904924551691, + "learning_rate": 3.1055585617776397e-06, + "loss": 0.3194, + "step": 14031 + }, + { + "epoch": 0.6350758090065626, + "grad_norm": 0.2820866429678342, + "learning_rate": 3.104880305934799e-06, + "loss": 0.4598, + "step": 14032 + }, + { + "epoch": 0.6351210681149582, + "grad_norm": 0.7375791329775107, + "learning_rate": 3.104202090812257e-06, + "loss": 0.2889, + "step": 14033 + }, + { + "epoch": 0.6351663272233538, + "grad_norm": 0.6573603364676751, + "learning_rate": 3.1035239164245913e-06, + "loss": 0.2739, + "step": 14034 + }, + { + "epoch": 0.6352115863317492, + "grad_norm": 0.6032982927863696, + "learning_rate": 3.1028457827863723e-06, + "loss": 0.2995, + "step": 14035 + }, + { + "epoch": 0.6352568454401448, + "grad_norm": 0.28255894706465395, + "learning_rate": 3.1021676899121703e-06, + "loss": 0.4676, + "step": 14036 + }, + { + "epoch": 0.6353021045485404, + "grad_norm": 0.6202280264903082, + "learning_rate": 3.101489637816555e-06, + "loss": 0.2935, + "step": 14037 + }, + { + "epoch": 0.635347363656936, + "grad_norm": 1.0338433813847048, + "learning_rate": 3.1008116265140974e-06, + "loss": 0.2959, + "step": 14038 + }, + { + "epoch": 0.6353926227653315, + "grad_norm": 0.6175401370396773, + "learning_rate": 3.100133656019366e-06, + "loss": 0.2817, + "step": 14039 + }, + { + "epoch": 0.6354378818737271, + "grad_norm": 0.28522691456631716, + "learning_rate": 3.0994557263469267e-06, + "loss": 0.4735, + "step": 14040 + }, + { + "epoch": 0.6354831409821227, + "grad_norm": 0.5962872012390195, + "learning_rate": 3.0987778375113464e-06, + "loss": 0.286, + "step": 14041 + }, + { + "epoch": 0.6355284000905183, + "grad_norm": 0.28706614162773536, + "learning_rate": 3.0980999895271923e-06, + "loss": 0.4837, + "step": 14042 + }, + { + "epoch": 0.6355736591989137, + "grad_norm": 0.6489585933566904, + "learning_rate": 3.0974221824090263e-06, + "loss": 0.3296, + "step": 14043 + }, + { + "epoch": 0.6356189183073093, + "grad_norm": 0.5938771744151462, + "learning_rate": 3.096744416171415e-06, + "loss": 0.31, + "step": 14044 + }, + { + "epoch": 0.6356641774157049, + "grad_norm": 0.295895117425673, + "learning_rate": 3.0960666908289217e-06, + "loss": 0.4782, + "step": 14045 + }, + { + "epoch": 0.6357094365241005, + "grad_norm": 0.7077041445755813, + "learning_rate": 3.095389006396107e-06, + "loss": 0.3508, + "step": 14046 + }, + { + "epoch": 0.6357546956324961, + "grad_norm": 0.6472413942804193, + "learning_rate": 3.0947113628875327e-06, + "loss": 0.3049, + "step": 14047 + }, + { + "epoch": 0.6357999547408916, + "grad_norm": 0.6015905003099128, + "learning_rate": 3.094033760317761e-06, + "loss": 0.2803, + "step": 14048 + }, + { + "epoch": 0.6358452138492872, + "grad_norm": 0.6162874944280102, + "learning_rate": 3.0933561987013484e-06, + "loss": 0.3106, + "step": 14049 + }, + { + "epoch": 0.6358904729576828, + "grad_norm": 0.5711546807757878, + "learning_rate": 3.092678678052855e-06, + "loss": 0.3219, + "step": 14050 + }, + { + "epoch": 0.6359357320660783, + "grad_norm": 0.6153270001605267, + "learning_rate": 3.0920011983868413e-06, + "loss": 0.3423, + "step": 14051 + }, + { + "epoch": 0.6359809911744738, + "grad_norm": 0.29725827368371055, + "learning_rate": 3.0913237597178603e-06, + "loss": 0.4676, + "step": 14052 + }, + { + "epoch": 0.6360262502828694, + "grad_norm": 0.5803809341027548, + "learning_rate": 3.0906463620604688e-06, + "loss": 0.3056, + "step": 14053 + }, + { + "epoch": 0.636071509391265, + "grad_norm": 0.5935044897307553, + "learning_rate": 3.089969005429223e-06, + "loss": 0.2788, + "step": 14054 + }, + { + "epoch": 0.6361167684996606, + "grad_norm": 0.7871930290907165, + "learning_rate": 3.089291689838679e-06, + "loss": 0.3317, + "step": 14055 + }, + { + "epoch": 0.6361620276080561, + "grad_norm": 0.5927085282311848, + "learning_rate": 3.088614415303387e-06, + "loss": 0.2705, + "step": 14056 + }, + { + "epoch": 0.6362072867164517, + "grad_norm": 0.5601225069918039, + "learning_rate": 3.0879371818379e-06, + "loss": 0.2861, + "step": 14057 + }, + { + "epoch": 0.6362525458248472, + "grad_norm": 0.6310018963259992, + "learning_rate": 3.0872599894567723e-06, + "loss": 0.2878, + "step": 14058 + }, + { + "epoch": 0.6362978049332428, + "grad_norm": 0.6657166888741248, + "learning_rate": 3.0865828381745515e-06, + "loss": 0.3544, + "step": 14059 + }, + { + "epoch": 0.6363430640416384, + "grad_norm": 0.6768852705295367, + "learning_rate": 3.08590572800579e-06, + "loss": 0.3219, + "step": 14060 + }, + { + "epoch": 0.6363883231500339, + "grad_norm": 0.30506889420259875, + "learning_rate": 3.085228658965036e-06, + "loss": 0.4713, + "step": 14061 + }, + { + "epoch": 0.6364335822584295, + "grad_norm": 0.5846094679846036, + "learning_rate": 3.0845516310668348e-06, + "loss": 0.3135, + "step": 14062 + }, + { + "epoch": 0.6364788413668251, + "grad_norm": 0.5972984714076979, + "learning_rate": 3.0838746443257385e-06, + "loss": 0.3101, + "step": 14063 + }, + { + "epoch": 0.6365241004752207, + "grad_norm": 0.6280799877614217, + "learning_rate": 3.0831976987562906e-06, + "loss": 0.3254, + "step": 14064 + }, + { + "epoch": 0.6365693595836162, + "grad_norm": 0.7375942101160714, + "learning_rate": 3.0825207943730375e-06, + "loss": 0.2823, + "step": 14065 + }, + { + "epoch": 0.6366146186920117, + "grad_norm": 0.6223337925347012, + "learning_rate": 3.081843931190522e-06, + "loss": 0.2988, + "step": 14066 + }, + { + "epoch": 0.6366598778004073, + "grad_norm": 0.5629945588989438, + "learning_rate": 3.0811671092232896e-06, + "loss": 0.2809, + "step": 14067 + }, + { + "epoch": 0.6367051369088029, + "grad_norm": 0.5995456341261228, + "learning_rate": 3.0804903284858844e-06, + "loss": 0.3316, + "step": 14068 + }, + { + "epoch": 0.6367503960171985, + "grad_norm": 0.6028071031391948, + "learning_rate": 3.079813588992846e-06, + "loss": 0.3203, + "step": 14069 + }, + { + "epoch": 0.636795655125594, + "grad_norm": 0.6773538683823238, + "learning_rate": 3.079136890758715e-06, + "loss": 0.3213, + "step": 14070 + }, + { + "epoch": 0.6368409142339896, + "grad_norm": 0.30538410810758704, + "learning_rate": 3.078460233798036e-06, + "loss": 0.4667, + "step": 14071 + }, + { + "epoch": 0.6368861733423852, + "grad_norm": 0.5734688367277772, + "learning_rate": 3.077783618125341e-06, + "loss": 0.306, + "step": 14072 + }, + { + "epoch": 0.6369314324507808, + "grad_norm": 0.647864019317746, + "learning_rate": 3.0771070437551743e-06, + "loss": 0.2836, + "step": 14073 + }, + { + "epoch": 0.6369766915591762, + "grad_norm": 0.6234597688592606, + "learning_rate": 3.076430510702072e-06, + "loss": 0.3498, + "step": 14074 + }, + { + "epoch": 0.6370219506675718, + "grad_norm": 0.5875273985255427, + "learning_rate": 3.0757540189805695e-06, + "loss": 0.2771, + "step": 14075 + }, + { + "epoch": 0.6370672097759674, + "grad_norm": 0.5434031054147186, + "learning_rate": 3.0750775686052024e-06, + "loss": 0.2913, + "step": 14076 + }, + { + "epoch": 0.637112468884363, + "grad_norm": 0.6519079447295152, + "learning_rate": 3.0744011595905084e-06, + "loss": 0.3241, + "step": 14077 + }, + { + "epoch": 0.6371577279927585, + "grad_norm": 0.6046272817777023, + "learning_rate": 3.0737247919510182e-06, + "loss": 0.316, + "step": 14078 + }, + { + "epoch": 0.6372029871011541, + "grad_norm": 0.5697051594331598, + "learning_rate": 3.073048465701266e-06, + "loss": 0.2697, + "step": 14079 + }, + { + "epoch": 0.6372482462095497, + "grad_norm": 0.6877517308747793, + "learning_rate": 3.0723721808557857e-06, + "loss": 0.3651, + "step": 14080 + }, + { + "epoch": 0.6372935053179453, + "grad_norm": 0.7527290715522306, + "learning_rate": 3.0716959374291053e-06, + "loss": 0.3316, + "step": 14081 + }, + { + "epoch": 0.6373387644263409, + "grad_norm": 0.6233015972507545, + "learning_rate": 3.071019735435756e-06, + "loss": 0.3241, + "step": 14082 + }, + { + "epoch": 0.6373840235347363, + "grad_norm": 0.6072325059612153, + "learning_rate": 3.0703435748902693e-06, + "loss": 0.3037, + "step": 14083 + }, + { + "epoch": 0.6374292826431319, + "grad_norm": 0.6523962092443791, + "learning_rate": 3.069667455807174e-06, + "loss": 0.3355, + "step": 14084 + }, + { + "epoch": 0.6374745417515275, + "grad_norm": 0.6225076419749288, + "learning_rate": 3.068991378200995e-06, + "loss": 0.2885, + "step": 14085 + }, + { + "epoch": 0.6375198008599231, + "grad_norm": 0.6509686915667399, + "learning_rate": 3.06831534208626e-06, + "loss": 0.3127, + "step": 14086 + }, + { + "epoch": 0.6375650599683186, + "grad_norm": 0.5662595934973575, + "learning_rate": 3.0676393474774972e-06, + "loss": 0.2725, + "step": 14087 + }, + { + "epoch": 0.6376103190767142, + "grad_norm": 0.29967031884905504, + "learning_rate": 3.0669633943892294e-06, + "loss": 0.4708, + "step": 14088 + }, + { + "epoch": 0.6376555781851098, + "grad_norm": 0.5931619888481149, + "learning_rate": 3.066287482835981e-06, + "loss": 0.313, + "step": 14089 + }, + { + "epoch": 0.6377008372935054, + "grad_norm": 0.6794717002108048, + "learning_rate": 3.0656116128322773e-06, + "loss": 0.3233, + "step": 14090 + }, + { + "epoch": 0.6377460964019008, + "grad_norm": 0.6069205047274574, + "learning_rate": 3.0649357843926365e-06, + "loss": 0.2784, + "step": 14091 + }, + { + "epoch": 0.6377913555102964, + "grad_norm": 0.5762227454928642, + "learning_rate": 3.0642599975315836e-06, + "loss": 0.2856, + "step": 14092 + }, + { + "epoch": 0.637836614618692, + "grad_norm": 0.8781504213385238, + "learning_rate": 3.0635842522636392e-06, + "loss": 0.2966, + "step": 14093 + }, + { + "epoch": 0.6378818737270876, + "grad_norm": 0.5917374303485107, + "learning_rate": 3.0629085486033217e-06, + "loss": 0.3098, + "step": 14094 + }, + { + "epoch": 0.6379271328354832, + "grad_norm": 0.6064820929771447, + "learning_rate": 3.0622328865651486e-06, + "loss": 0.2716, + "step": 14095 + }, + { + "epoch": 0.6379723919438787, + "grad_norm": 0.621100849610007, + "learning_rate": 3.06155726616364e-06, + "loss": 0.3389, + "step": 14096 + }, + { + "epoch": 0.6380176510522743, + "grad_norm": 0.6248818555128179, + "learning_rate": 3.0608816874133135e-06, + "loss": 0.2949, + "step": 14097 + }, + { + "epoch": 0.6380629101606698, + "grad_norm": 0.6350564177824697, + "learning_rate": 3.0602061503286827e-06, + "loss": 0.2765, + "step": 14098 + }, + { + "epoch": 0.6381081692690654, + "grad_norm": 0.6025741385211456, + "learning_rate": 3.0595306549242643e-06, + "loss": 0.2896, + "step": 14099 + }, + { + "epoch": 0.6381534283774609, + "grad_norm": 0.7198585599741678, + "learning_rate": 3.0588552012145743e-06, + "loss": 0.359, + "step": 14100 + }, + { + "epoch": 0.6381986874858565, + "grad_norm": 0.2987848378295094, + "learning_rate": 3.058179789214122e-06, + "loss": 0.4593, + "step": 14101 + }, + { + "epoch": 0.6382439465942521, + "grad_norm": 0.610061918764689, + "learning_rate": 3.0575044189374225e-06, + "loss": 0.3294, + "step": 14102 + }, + { + "epoch": 0.6382892057026477, + "grad_norm": 0.5692385277557372, + "learning_rate": 3.0568290903989885e-06, + "loss": 0.3345, + "step": 14103 + }, + { + "epoch": 0.6383344648110432, + "grad_norm": 0.6333484754121524, + "learning_rate": 3.0561538036133275e-06, + "loss": 0.2944, + "step": 14104 + }, + { + "epoch": 0.6383797239194388, + "grad_norm": 0.5669055540397957, + "learning_rate": 3.0554785585949514e-06, + "loss": 0.2976, + "step": 14105 + }, + { + "epoch": 0.6384249830278343, + "grad_norm": 0.6554879825600077, + "learning_rate": 3.0548033553583707e-06, + "loss": 0.312, + "step": 14106 + }, + { + "epoch": 0.6384702421362299, + "grad_norm": 0.5691376303708063, + "learning_rate": 3.05412819391809e-06, + "loss": 0.2654, + "step": 14107 + }, + { + "epoch": 0.6385155012446255, + "grad_norm": 0.28976649932335347, + "learning_rate": 3.0534530742886187e-06, + "loss": 0.4849, + "step": 14108 + }, + { + "epoch": 0.638560760353021, + "grad_norm": 0.6500680482864272, + "learning_rate": 3.052777996484462e-06, + "loss": 0.3153, + "step": 14109 + }, + { + "epoch": 0.6386060194614166, + "grad_norm": 0.6022213605742276, + "learning_rate": 3.052102960520126e-06, + "loss": 0.313, + "step": 14110 + }, + { + "epoch": 0.6386512785698122, + "grad_norm": 0.26796570792864116, + "learning_rate": 3.0514279664101153e-06, + "loss": 0.4745, + "step": 14111 + }, + { + "epoch": 0.6386965376782078, + "grad_norm": 0.5572222325190271, + "learning_rate": 3.0507530141689324e-06, + "loss": 0.2933, + "step": 14112 + }, + { + "epoch": 0.6387417967866033, + "grad_norm": 0.6644756722110994, + "learning_rate": 3.050078103811082e-06, + "loss": 0.3544, + "step": 14113 + }, + { + "epoch": 0.6387870558949988, + "grad_norm": 0.5716566187790919, + "learning_rate": 3.0494032353510634e-06, + "loss": 0.3171, + "step": 14114 + }, + { + "epoch": 0.6388323150033944, + "grad_norm": 0.6707308766695779, + "learning_rate": 3.0487284088033776e-06, + "loss": 0.3356, + "step": 14115 + }, + { + "epoch": 0.63887757411179, + "grad_norm": 0.27757105153482103, + "learning_rate": 3.0480536241825263e-06, + "loss": 0.4822, + "step": 14116 + }, + { + "epoch": 0.6389228332201856, + "grad_norm": 0.7721032612446138, + "learning_rate": 3.047378881503008e-06, + "loss": 0.3391, + "step": 14117 + }, + { + "epoch": 0.6389680923285811, + "grad_norm": 0.2996677332075844, + "learning_rate": 3.0467041807793198e-06, + "loss": 0.4815, + "step": 14118 + }, + { + "epoch": 0.6390133514369767, + "grad_norm": 0.633219965755037, + "learning_rate": 3.046029522025961e-06, + "loss": 0.3202, + "step": 14119 + }, + { + "epoch": 0.6390586105453723, + "grad_norm": 0.7161682752114296, + "learning_rate": 3.045354905257425e-06, + "loss": 0.2799, + "step": 14120 + }, + { + "epoch": 0.6391038696537679, + "grad_norm": 0.5959397462784317, + "learning_rate": 3.044680330488209e-06, + "loss": 0.3324, + "step": 14121 + }, + { + "epoch": 0.6391491287621633, + "grad_norm": 0.629765307972555, + "learning_rate": 3.0440057977328086e-06, + "loss": 0.324, + "step": 14122 + }, + { + "epoch": 0.6391943878705589, + "grad_norm": 0.6622370050010291, + "learning_rate": 3.0433313070057157e-06, + "loss": 0.3315, + "step": 14123 + }, + { + "epoch": 0.6392396469789545, + "grad_norm": 0.2798673125588423, + "learning_rate": 3.0426568583214224e-06, + "loss": 0.4666, + "step": 14124 + }, + { + "epoch": 0.6392849060873501, + "grad_norm": 0.6499396878375385, + "learning_rate": 3.041982451694422e-06, + "loss": 0.3218, + "step": 14125 + }, + { + "epoch": 0.6393301651957456, + "grad_norm": 0.5736196118655933, + "learning_rate": 3.0413080871392063e-06, + "loss": 0.3067, + "step": 14126 + }, + { + "epoch": 0.6393754243041412, + "grad_norm": 0.6391268496601686, + "learning_rate": 3.0406337646702638e-06, + "loss": 0.3067, + "step": 14127 + }, + { + "epoch": 0.6394206834125368, + "grad_norm": 1.020993104950133, + "learning_rate": 3.039959484302083e-06, + "loss": 0.3353, + "step": 14128 + }, + { + "epoch": 0.6394659425209324, + "grad_norm": 0.6174232971913675, + "learning_rate": 3.039285246049155e-06, + "loss": 0.2749, + "step": 14129 + }, + { + "epoch": 0.639511201629328, + "grad_norm": 0.6704099198505801, + "learning_rate": 3.0386110499259635e-06, + "loss": 0.3148, + "step": 14130 + }, + { + "epoch": 0.6395564607377234, + "grad_norm": 0.6262980876605436, + "learning_rate": 3.0379368959469967e-06, + "loss": 0.2975, + "step": 14131 + }, + { + "epoch": 0.639601719846119, + "grad_norm": 0.6167601797243967, + "learning_rate": 3.0372627841267418e-06, + "loss": 0.2947, + "step": 14132 + }, + { + "epoch": 0.6396469789545146, + "grad_norm": 0.6249766585899085, + "learning_rate": 3.0365887144796796e-06, + "loss": 0.3014, + "step": 14133 + }, + { + "epoch": 0.6396922380629102, + "grad_norm": 0.5942506404857038, + "learning_rate": 3.0359146870202954e-06, + "loss": 0.3278, + "step": 14134 + }, + { + "epoch": 0.6397374971713057, + "grad_norm": 0.5785357614177752, + "learning_rate": 3.035240701763074e-06, + "loss": 0.2704, + "step": 14135 + }, + { + "epoch": 0.6397827562797013, + "grad_norm": 0.5573281982727035, + "learning_rate": 3.0345667587224946e-06, + "loss": 0.2888, + "step": 14136 + }, + { + "epoch": 0.6398280153880969, + "grad_norm": 0.6203447435262367, + "learning_rate": 3.03389285791304e-06, + "loss": 0.2976, + "step": 14137 + }, + { + "epoch": 0.6398732744964925, + "grad_norm": 0.5662763294622898, + "learning_rate": 3.0332189993491877e-06, + "loss": 0.3066, + "step": 14138 + }, + { + "epoch": 0.6399185336048879, + "grad_norm": 0.6295862196549189, + "learning_rate": 3.0325451830454207e-06, + "loss": 0.3161, + "step": 14139 + }, + { + "epoch": 0.6399637927132835, + "grad_norm": 0.6594638994579315, + "learning_rate": 3.031871409016214e-06, + "loss": 0.3402, + "step": 14140 + }, + { + "epoch": 0.6400090518216791, + "grad_norm": 0.6413837854225898, + "learning_rate": 3.0311976772760466e-06, + "loss": 0.321, + "step": 14141 + }, + { + "epoch": 0.6400543109300747, + "grad_norm": 0.636807890867043, + "learning_rate": 3.0305239878393947e-06, + "loss": 0.3008, + "step": 14142 + }, + { + "epoch": 0.6400995700384703, + "grad_norm": 0.6562468927418301, + "learning_rate": 3.0298503407207317e-06, + "loss": 0.2723, + "step": 14143 + }, + { + "epoch": 0.6401448291468658, + "grad_norm": 0.31525091302289265, + "learning_rate": 3.029176735934536e-06, + "loss": 0.4841, + "step": 14144 + }, + { + "epoch": 0.6401900882552614, + "grad_norm": 0.5849769289677633, + "learning_rate": 3.028503173495279e-06, + "loss": 0.2699, + "step": 14145 + }, + { + "epoch": 0.640235347363657, + "grad_norm": 0.6111170056322482, + "learning_rate": 3.0278296534174334e-06, + "loss": 0.2883, + "step": 14146 + }, + { + "epoch": 0.6402806064720525, + "grad_norm": 0.700306479916621, + "learning_rate": 3.0271561757154705e-06, + "loss": 0.3315, + "step": 14147 + }, + { + "epoch": 0.640325865580448, + "grad_norm": 0.6045621286992134, + "learning_rate": 3.0264827404038655e-06, + "loss": 0.3392, + "step": 14148 + }, + { + "epoch": 0.6403711246888436, + "grad_norm": 0.613524506215485, + "learning_rate": 3.0258093474970817e-06, + "loss": 0.3427, + "step": 14149 + }, + { + "epoch": 0.6404163837972392, + "grad_norm": 0.6087397259443168, + "learning_rate": 3.0251359970095927e-06, + "loss": 0.3591, + "step": 14150 + }, + { + "epoch": 0.6404616429056348, + "grad_norm": 0.7792919719497309, + "learning_rate": 3.024462688955867e-06, + "loss": 0.3239, + "step": 14151 + }, + { + "epoch": 0.6405069020140304, + "grad_norm": 0.6220333276322899, + "learning_rate": 3.0237894233503697e-06, + "loss": 0.316, + "step": 14152 + }, + { + "epoch": 0.6405521611224259, + "grad_norm": 0.31641963417420904, + "learning_rate": 3.0231162002075678e-06, + "loss": 0.4724, + "step": 14153 + }, + { + "epoch": 0.6405974202308214, + "grad_norm": 0.6263995623866292, + "learning_rate": 3.0224430195419274e-06, + "loss": 0.2947, + "step": 14154 + }, + { + "epoch": 0.640642679339217, + "grad_norm": 0.6354504950160101, + "learning_rate": 3.021769881367914e-06, + "loss": 0.3623, + "step": 14155 + }, + { + "epoch": 0.6406879384476126, + "grad_norm": 0.6123559216921669, + "learning_rate": 3.0210967856999896e-06, + "loss": 0.2683, + "step": 14156 + }, + { + "epoch": 0.6407331975560081, + "grad_norm": 0.6038707224113978, + "learning_rate": 3.0204237325526166e-06, + "loss": 0.315, + "step": 14157 + }, + { + "epoch": 0.6407784566644037, + "grad_norm": 0.2684083420193887, + "learning_rate": 3.01975072194026e-06, + "loss": 0.4695, + "step": 14158 + }, + { + "epoch": 0.6408237157727993, + "grad_norm": 0.791181978079724, + "learning_rate": 3.0190777538773763e-06, + "loss": 0.2578, + "step": 14159 + }, + { + "epoch": 0.6408689748811949, + "grad_norm": 0.5665999544527709, + "learning_rate": 3.0184048283784284e-06, + "loss": 0.2635, + "step": 14160 + }, + { + "epoch": 0.6409142339895904, + "grad_norm": 0.6494721148390382, + "learning_rate": 3.0177319454578756e-06, + "loss": 0.3019, + "step": 14161 + }, + { + "epoch": 0.6409594930979859, + "grad_norm": 0.6559197261771749, + "learning_rate": 3.0170591051301746e-06, + "loss": 0.3231, + "step": 14162 + }, + { + "epoch": 0.6410047522063815, + "grad_norm": 0.29533722371071636, + "learning_rate": 3.0163863074097823e-06, + "loss": 0.4769, + "step": 14163 + }, + { + "epoch": 0.6410500113147771, + "grad_norm": 0.7705795692488842, + "learning_rate": 3.0157135523111574e-06, + "loss": 0.2935, + "step": 14164 + }, + { + "epoch": 0.6410952704231727, + "grad_norm": 0.8557306437058548, + "learning_rate": 3.0150408398487536e-06, + "loss": 0.3477, + "step": 14165 + }, + { + "epoch": 0.6411405295315682, + "grad_norm": 0.6391934999181982, + "learning_rate": 3.0143681700370253e-06, + "loss": 0.3246, + "step": 14166 + }, + { + "epoch": 0.6411857886399638, + "grad_norm": 0.27606869359092295, + "learning_rate": 3.013695542890426e-06, + "loss": 0.4821, + "step": 14167 + }, + { + "epoch": 0.6412310477483594, + "grad_norm": 0.7056753474223514, + "learning_rate": 3.0130229584234117e-06, + "loss": 0.2753, + "step": 14168 + }, + { + "epoch": 0.641276306856755, + "grad_norm": 0.6001043987094551, + "learning_rate": 3.0123504166504293e-06, + "loss": 0.313, + "step": 14169 + }, + { + "epoch": 0.6413215659651504, + "grad_norm": 0.6753891660126754, + "learning_rate": 3.0116779175859322e-06, + "loss": 0.3387, + "step": 14170 + }, + { + "epoch": 0.641366825073546, + "grad_norm": 0.6610764557175421, + "learning_rate": 3.011005461244372e-06, + "loss": 0.3401, + "step": 14171 + }, + { + "epoch": 0.6414120841819416, + "grad_norm": 0.6515044167069902, + "learning_rate": 3.010333047640192e-06, + "loss": 0.3199, + "step": 14172 + }, + { + "epoch": 0.6414573432903372, + "grad_norm": 0.6496929131103447, + "learning_rate": 3.009660676787846e-06, + "loss": 0.3511, + "step": 14173 + }, + { + "epoch": 0.6415026023987327, + "grad_norm": 0.3124187274189695, + "learning_rate": 3.0089883487017803e-06, + "loss": 0.4819, + "step": 14174 + }, + { + "epoch": 0.6415478615071283, + "grad_norm": 0.6471645470233871, + "learning_rate": 3.0083160633964385e-06, + "loss": 0.3403, + "step": 14175 + }, + { + "epoch": 0.6415931206155239, + "grad_norm": 0.30259448985732956, + "learning_rate": 3.007643820886267e-06, + "loss": 0.4696, + "step": 14176 + }, + { + "epoch": 0.6416383797239195, + "grad_norm": 0.6346368867233724, + "learning_rate": 3.0069716211857137e-06, + "loss": 0.2916, + "step": 14177 + }, + { + "epoch": 0.641683638832315, + "grad_norm": 0.8301621225683237, + "learning_rate": 3.006299464309216e-06, + "loss": 0.2947, + "step": 14178 + }, + { + "epoch": 0.6417288979407105, + "grad_norm": 0.6255210650197129, + "learning_rate": 3.0056273502712203e-06, + "loss": 0.329, + "step": 14179 + }, + { + "epoch": 0.6417741570491061, + "grad_norm": 0.5838680131848357, + "learning_rate": 3.004955279086167e-06, + "loss": 0.3265, + "step": 14180 + }, + { + "epoch": 0.6418194161575017, + "grad_norm": 0.31067328632448243, + "learning_rate": 3.0042832507685005e-06, + "loss": 0.4702, + "step": 14181 + }, + { + "epoch": 0.6418646752658973, + "grad_norm": 0.6004743983019462, + "learning_rate": 3.0036112653326544e-06, + "loss": 0.3028, + "step": 14182 + }, + { + "epoch": 0.6419099343742928, + "grad_norm": 0.551667150339416, + "learning_rate": 3.0029393227930712e-06, + "loss": 0.2985, + "step": 14183 + }, + { + "epoch": 0.6419551934826884, + "grad_norm": 0.2957063245086027, + "learning_rate": 3.0022674231641903e-06, + "loss": 0.4687, + "step": 14184 + }, + { + "epoch": 0.642000452591084, + "grad_norm": 0.624071202751652, + "learning_rate": 3.001595566460446e-06, + "loss": 0.3463, + "step": 14185 + }, + { + "epoch": 0.6420457116994795, + "grad_norm": 0.6038896558156434, + "learning_rate": 3.0009237526962735e-06, + "loss": 0.3186, + "step": 14186 + }, + { + "epoch": 0.6420909708078751, + "grad_norm": 0.5568839175147954, + "learning_rate": 3.0002519818861126e-06, + "loss": 0.3022, + "step": 14187 + }, + { + "epoch": 0.6421362299162706, + "grad_norm": 0.6025428150532423, + "learning_rate": 2.999580254044393e-06, + "loss": 0.3807, + "step": 14188 + }, + { + "epoch": 0.6421814890246662, + "grad_norm": 0.5803168353423076, + "learning_rate": 2.9989085691855513e-06, + "loss": 0.2849, + "step": 14189 + }, + { + "epoch": 0.6422267481330618, + "grad_norm": 0.2851186215276721, + "learning_rate": 2.9982369273240186e-06, + "loss": 0.4689, + "step": 14190 + }, + { + "epoch": 0.6422720072414574, + "grad_norm": 0.5989723155273211, + "learning_rate": 2.9975653284742257e-06, + "loss": 0.3065, + "step": 14191 + }, + { + "epoch": 0.6423172663498529, + "grad_norm": 0.26671375041777434, + "learning_rate": 2.996893772650602e-06, + "loss": 0.4629, + "step": 14192 + }, + { + "epoch": 0.6423625254582485, + "grad_norm": 0.6129745855779944, + "learning_rate": 2.996222259867582e-06, + "loss": 0.3266, + "step": 14193 + }, + { + "epoch": 0.642407784566644, + "grad_norm": 0.5675197444532076, + "learning_rate": 2.9955507901395908e-06, + "loss": 0.3478, + "step": 14194 + }, + { + "epoch": 0.6424530436750396, + "grad_norm": 0.28583493374778624, + "learning_rate": 2.994879363481056e-06, + "loss": 0.4871, + "step": 14195 + }, + { + "epoch": 0.6424983027834351, + "grad_norm": 0.60330637142904, + "learning_rate": 2.994207979906405e-06, + "loss": 0.3435, + "step": 14196 + }, + { + "epoch": 0.6425435618918307, + "grad_norm": 0.6031206315634002, + "learning_rate": 2.993536639430066e-06, + "loss": 0.3244, + "step": 14197 + }, + { + "epoch": 0.6425888210002263, + "grad_norm": 0.6196682127212395, + "learning_rate": 2.992865342066461e-06, + "loss": 0.2869, + "step": 14198 + }, + { + "epoch": 0.6426340801086219, + "grad_norm": 0.2655158263326608, + "learning_rate": 2.992194087830016e-06, + "loss": 0.4426, + "step": 14199 + }, + { + "epoch": 0.6426793392170175, + "grad_norm": 0.2960483703553002, + "learning_rate": 2.991522876735154e-06, + "loss": 0.4621, + "step": 14200 + }, + { + "epoch": 0.642724598325413, + "grad_norm": 0.6015521179132051, + "learning_rate": 2.990851708796295e-06, + "loss": 0.302, + "step": 14201 + }, + { + "epoch": 0.6427698574338085, + "grad_norm": 0.5963876428881998, + "learning_rate": 2.990180584027863e-06, + "loss": 0.3565, + "step": 14202 + }, + { + "epoch": 0.6428151165422041, + "grad_norm": 0.2691629477578797, + "learning_rate": 2.989509502444279e-06, + "loss": 0.4862, + "step": 14203 + }, + { + "epoch": 0.6428603756505997, + "grad_norm": 0.7691217666066809, + "learning_rate": 2.98883846405996e-06, + "loss": 0.34, + "step": 14204 + }, + { + "epoch": 0.6429056347589952, + "grad_norm": 0.28863810537045187, + "learning_rate": 2.988167468889324e-06, + "loss": 0.4938, + "step": 14205 + }, + { + "epoch": 0.6429508938673908, + "grad_norm": 0.662039036583885, + "learning_rate": 2.9874965169467934e-06, + "loss": 0.2781, + "step": 14206 + }, + { + "epoch": 0.6429961529757864, + "grad_norm": 0.6085765306963504, + "learning_rate": 2.986825608246779e-06, + "loss": 0.2576, + "step": 14207 + }, + { + "epoch": 0.643041412084182, + "grad_norm": 0.6022563110175009, + "learning_rate": 2.9861547428037003e-06, + "loss": 0.3167, + "step": 14208 + }, + { + "epoch": 0.6430866711925775, + "grad_norm": 0.5465923287275317, + "learning_rate": 2.9854839206319697e-06, + "loss": 0.2836, + "step": 14209 + }, + { + "epoch": 0.643131930300973, + "grad_norm": 0.28547996312858637, + "learning_rate": 2.984813141746006e-06, + "loss": 0.4954, + "step": 14210 + }, + { + "epoch": 0.6431771894093686, + "grad_norm": 0.2592260567682769, + "learning_rate": 2.9841424061602153e-06, + "loss": 0.4616, + "step": 14211 + }, + { + "epoch": 0.6432224485177642, + "grad_norm": 0.5807378407030216, + "learning_rate": 2.9834717138890145e-06, + "loss": 0.275, + "step": 14212 + }, + { + "epoch": 0.6432677076261598, + "grad_norm": 0.5648631166659934, + "learning_rate": 2.9828010649468144e-06, + "loss": 0.284, + "step": 14213 + }, + { + "epoch": 0.6433129667345553, + "grad_norm": 0.5817751136785265, + "learning_rate": 2.982130459348022e-06, + "loss": 0.3002, + "step": 14214 + }, + { + "epoch": 0.6433582258429509, + "grad_norm": 0.6588689318366263, + "learning_rate": 2.9814598971070487e-06, + "loss": 0.3609, + "step": 14215 + }, + { + "epoch": 0.6434034849513465, + "grad_norm": 0.6150071152737294, + "learning_rate": 2.980789378238305e-06, + "loss": 0.3373, + "step": 14216 + }, + { + "epoch": 0.6434487440597421, + "grad_norm": 0.6178255211284963, + "learning_rate": 2.980118902756194e-06, + "loss": 0.3161, + "step": 14217 + }, + { + "epoch": 0.6434940031681375, + "grad_norm": 0.6240835861392467, + "learning_rate": 2.9794484706751243e-06, + "loss": 0.3345, + "step": 14218 + }, + { + "epoch": 0.6435392622765331, + "grad_norm": 0.6383863142520622, + "learning_rate": 2.9787780820095025e-06, + "loss": 0.2888, + "step": 14219 + }, + { + "epoch": 0.6435845213849287, + "grad_norm": 0.6012313492309478, + "learning_rate": 2.97810773677373e-06, + "loss": 0.2687, + "step": 14220 + }, + { + "epoch": 0.6436297804933243, + "grad_norm": 0.6116343534165997, + "learning_rate": 2.977437434982214e-06, + "loss": 0.2902, + "step": 14221 + }, + { + "epoch": 0.6436750396017199, + "grad_norm": 0.5824607060502492, + "learning_rate": 2.976767176649356e-06, + "loss": 0.2817, + "step": 14222 + }, + { + "epoch": 0.6437202987101154, + "grad_norm": 0.6720034791116571, + "learning_rate": 2.9760969617895567e-06, + "loss": 0.3218, + "step": 14223 + }, + { + "epoch": 0.643765557818511, + "grad_norm": 0.30218453334348905, + "learning_rate": 2.975426790417218e-06, + "loss": 0.4402, + "step": 14224 + }, + { + "epoch": 0.6438108169269066, + "grad_norm": 0.618944995342216, + "learning_rate": 2.974756662546738e-06, + "loss": 0.3273, + "step": 14225 + }, + { + "epoch": 0.6438560760353021, + "grad_norm": 0.27838162273474976, + "learning_rate": 2.97408657819252e-06, + "loss": 0.4503, + "step": 14226 + }, + { + "epoch": 0.6439013351436976, + "grad_norm": 0.6088025550095602, + "learning_rate": 2.9734165373689577e-06, + "loss": 0.2749, + "step": 14227 + }, + { + "epoch": 0.6439465942520932, + "grad_norm": 0.6119993938372056, + "learning_rate": 2.97274654009045e-06, + "loss": 0.3192, + "step": 14228 + }, + { + "epoch": 0.6439918533604888, + "grad_norm": 0.6109517436523327, + "learning_rate": 2.972076586371394e-06, + "loss": 0.3238, + "step": 14229 + }, + { + "epoch": 0.6440371124688844, + "grad_norm": 0.6097190740331956, + "learning_rate": 2.9714066762261825e-06, + "loss": 0.3165, + "step": 14230 + }, + { + "epoch": 0.6440823715772799, + "grad_norm": 0.6508118809135099, + "learning_rate": 2.9707368096692113e-06, + "loss": 0.3748, + "step": 14231 + }, + { + "epoch": 0.6441276306856755, + "grad_norm": 0.5701642957641045, + "learning_rate": 2.9700669867148747e-06, + "loss": 0.2921, + "step": 14232 + }, + { + "epoch": 0.6441728897940711, + "grad_norm": 0.5970835905768958, + "learning_rate": 2.9693972073775633e-06, + "loss": 0.2968, + "step": 14233 + }, + { + "epoch": 0.6442181489024666, + "grad_norm": 0.2789643423704904, + "learning_rate": 2.9687274716716686e-06, + "loss": 0.4623, + "step": 14234 + }, + { + "epoch": 0.6442634080108622, + "grad_norm": 0.6103176597058867, + "learning_rate": 2.968057779611585e-06, + "loss": 0.2662, + "step": 14235 + }, + { + "epoch": 0.6443086671192577, + "grad_norm": 0.5945299475385736, + "learning_rate": 2.967388131211696e-06, + "loss": 0.291, + "step": 14236 + }, + { + "epoch": 0.6443539262276533, + "grad_norm": 0.6105465442782668, + "learning_rate": 2.966718526486394e-06, + "loss": 0.3026, + "step": 14237 + }, + { + "epoch": 0.6443991853360489, + "grad_norm": 0.7439019656806879, + "learning_rate": 2.966048965450065e-06, + "loss": 0.3068, + "step": 14238 + }, + { + "epoch": 0.6444444444444445, + "grad_norm": 0.6234767585672157, + "learning_rate": 2.9653794481171006e-06, + "loss": 0.3215, + "step": 14239 + }, + { + "epoch": 0.64448970355284, + "grad_norm": 0.6511768410489892, + "learning_rate": 2.9647099745018794e-06, + "loss": 0.355, + "step": 14240 + }, + { + "epoch": 0.6445349626612356, + "grad_norm": 0.5577217240585588, + "learning_rate": 2.9640405446187915e-06, + "loss": 0.2834, + "step": 14241 + }, + { + "epoch": 0.6445802217696311, + "grad_norm": 0.6636043203449777, + "learning_rate": 2.96337115848222e-06, + "loss": 0.2845, + "step": 14242 + }, + { + "epoch": 0.6446254808780267, + "grad_norm": 0.5674048522001951, + "learning_rate": 2.9627018161065456e-06, + "loss": 0.2784, + "step": 14243 + }, + { + "epoch": 0.6446707399864222, + "grad_norm": 0.6702110182418993, + "learning_rate": 2.962032517506152e-06, + "loss": 0.3145, + "step": 14244 + }, + { + "epoch": 0.6447159990948178, + "grad_norm": 0.6525622517016991, + "learning_rate": 2.9613632626954226e-06, + "loss": 0.3155, + "step": 14245 + }, + { + "epoch": 0.6447612582032134, + "grad_norm": 0.7183003943697874, + "learning_rate": 2.960694051688734e-06, + "loss": 0.2913, + "step": 14246 + }, + { + "epoch": 0.644806517311609, + "grad_norm": 0.6211610036592395, + "learning_rate": 2.960024884500467e-06, + "loss": 0.2935, + "step": 14247 + }, + { + "epoch": 0.6448517764200046, + "grad_norm": 0.2738736290787737, + "learning_rate": 2.959355761145001e-06, + "loss": 0.4627, + "step": 14248 + }, + { + "epoch": 0.6448970355284, + "grad_norm": 0.640655623750578, + "learning_rate": 2.9586866816367104e-06, + "loss": 0.303, + "step": 14249 + }, + { + "epoch": 0.6449422946367956, + "grad_norm": 0.6000781467813328, + "learning_rate": 2.9580176459899747e-06, + "loss": 0.3239, + "step": 14250 + }, + { + "epoch": 0.6449875537451912, + "grad_norm": 0.26561936790025625, + "learning_rate": 2.9573486542191682e-06, + "loss": 0.4631, + "step": 14251 + }, + { + "epoch": 0.6450328128535868, + "grad_norm": 0.9501142452737259, + "learning_rate": 2.9566797063386665e-06, + "loss": 0.2882, + "step": 14252 + }, + { + "epoch": 0.6450780719619823, + "grad_norm": 0.6272983545080569, + "learning_rate": 2.9560108023628403e-06, + "loss": 0.3373, + "step": 14253 + }, + { + "epoch": 0.6451233310703779, + "grad_norm": 0.6390746721660203, + "learning_rate": 2.955341942306066e-06, + "loss": 0.3377, + "step": 14254 + }, + { + "epoch": 0.6451685901787735, + "grad_norm": 0.6762475747292197, + "learning_rate": 2.9546731261827135e-06, + "loss": 0.2609, + "step": 14255 + }, + { + "epoch": 0.6452138492871691, + "grad_norm": 0.6354579445660217, + "learning_rate": 2.9540043540071535e-06, + "loss": 0.3167, + "step": 14256 + }, + { + "epoch": 0.6452591083955647, + "grad_norm": 0.2658740095150606, + "learning_rate": 2.953335625793755e-06, + "loss": 0.4601, + "step": 14257 + }, + { + "epoch": 0.6453043675039601, + "grad_norm": 0.6089096635417536, + "learning_rate": 2.952666941556891e-06, + "loss": 0.3051, + "step": 14258 + }, + { + "epoch": 0.6453496266123557, + "grad_norm": 0.6345912634128601, + "learning_rate": 2.9519983013109233e-06, + "loss": 0.3246, + "step": 14259 + }, + { + "epoch": 0.6453948857207513, + "grad_norm": 0.6944907784932608, + "learning_rate": 2.9513297050702238e-06, + "loss": 0.3671, + "step": 14260 + }, + { + "epoch": 0.6454401448291469, + "grad_norm": 0.5834899490919657, + "learning_rate": 2.9506611528491574e-06, + "loss": 0.3174, + "step": 14261 + }, + { + "epoch": 0.6454854039375424, + "grad_norm": 0.6705937698241765, + "learning_rate": 2.949992644662088e-06, + "loss": 0.3125, + "step": 14262 + }, + { + "epoch": 0.645530663045938, + "grad_norm": 0.5996420799570138, + "learning_rate": 2.9493241805233795e-06, + "loss": 0.3252, + "step": 14263 + }, + { + "epoch": 0.6455759221543336, + "grad_norm": 0.26899489308008584, + "learning_rate": 2.9486557604473993e-06, + "loss": 0.4378, + "step": 14264 + }, + { + "epoch": 0.6456211812627292, + "grad_norm": 0.7507072498117943, + "learning_rate": 2.947987384448503e-06, + "loss": 0.3032, + "step": 14265 + }, + { + "epoch": 0.6456664403711246, + "grad_norm": 0.6252518968544177, + "learning_rate": 2.9473190525410573e-06, + "loss": 0.3055, + "step": 14266 + }, + { + "epoch": 0.6457116994795202, + "grad_norm": 0.6217862203131317, + "learning_rate": 2.9466507647394193e-06, + "loss": 0.3365, + "step": 14267 + }, + { + "epoch": 0.6457569585879158, + "grad_norm": 0.6136071409273623, + "learning_rate": 2.9459825210579534e-06, + "loss": 0.2922, + "step": 14268 + }, + { + "epoch": 0.6458022176963114, + "grad_norm": 0.6190482145623376, + "learning_rate": 2.9453143215110113e-06, + "loss": 0.2907, + "step": 14269 + }, + { + "epoch": 0.645847476804707, + "grad_norm": 0.6836419932946133, + "learning_rate": 2.9446461661129553e-06, + "loss": 0.2979, + "step": 14270 + }, + { + "epoch": 0.6458927359131025, + "grad_norm": 0.6098661425702997, + "learning_rate": 2.9439780548781414e-06, + "loss": 0.2856, + "step": 14271 + }, + { + "epoch": 0.6459379950214981, + "grad_norm": 0.6647327199400282, + "learning_rate": 2.9433099878209238e-06, + "loss": 0.3018, + "step": 14272 + }, + { + "epoch": 0.6459832541298937, + "grad_norm": 0.40327286634865583, + "learning_rate": 2.9426419649556566e-06, + "loss": 0.4411, + "step": 14273 + }, + { + "epoch": 0.6460285132382892, + "grad_norm": 0.6334619344167035, + "learning_rate": 2.941973986296697e-06, + "loss": 0.3332, + "step": 14274 + }, + { + "epoch": 0.6460737723466847, + "grad_norm": 0.6411940748488537, + "learning_rate": 2.9413060518583948e-06, + "loss": 0.2829, + "step": 14275 + }, + { + "epoch": 0.6461190314550803, + "grad_norm": 0.5965215268418078, + "learning_rate": 2.9406381616551026e-06, + "loss": 0.2762, + "step": 14276 + }, + { + "epoch": 0.6461642905634759, + "grad_norm": 0.6653536857292324, + "learning_rate": 2.939970315701173e-06, + "loss": 0.3011, + "step": 14277 + }, + { + "epoch": 0.6462095496718715, + "grad_norm": 0.2752597094240355, + "learning_rate": 2.939302514010951e-06, + "loss": 0.4659, + "step": 14278 + }, + { + "epoch": 0.646254808780267, + "grad_norm": 0.6272241354713876, + "learning_rate": 2.9386347565987917e-06, + "loss": 0.3371, + "step": 14279 + }, + { + "epoch": 0.6463000678886626, + "grad_norm": 0.64838854296624, + "learning_rate": 2.937967043479039e-06, + "loss": 0.3134, + "step": 14280 + }, + { + "epoch": 0.6463453269970582, + "grad_norm": 0.628894173057687, + "learning_rate": 2.937299374666044e-06, + "loss": 0.3058, + "step": 14281 + }, + { + "epoch": 0.6463905861054537, + "grad_norm": 0.6724655897300117, + "learning_rate": 2.936631750174147e-06, + "loss": 0.3215, + "step": 14282 + }, + { + "epoch": 0.6464358452138493, + "grad_norm": 0.5889457910829546, + "learning_rate": 2.9359641700176977e-06, + "loss": 0.2922, + "step": 14283 + }, + { + "epoch": 0.6464811043222448, + "grad_norm": 0.7019494698064456, + "learning_rate": 2.935296634211041e-06, + "loss": 0.2842, + "step": 14284 + }, + { + "epoch": 0.6465263634306404, + "grad_norm": 0.30655246499787175, + "learning_rate": 2.934629142768517e-06, + "loss": 0.469, + "step": 14285 + }, + { + "epoch": 0.646571622539036, + "grad_norm": 0.6274803319498268, + "learning_rate": 2.9339616957044683e-06, + "loss": 0.285, + "step": 14286 + }, + { + "epoch": 0.6466168816474316, + "grad_norm": 0.5830271213214427, + "learning_rate": 2.9332942930332404e-06, + "loss": 0.3083, + "step": 14287 + }, + { + "epoch": 0.6466621407558271, + "grad_norm": 0.5883275567298492, + "learning_rate": 2.9326269347691675e-06, + "loss": 0.2746, + "step": 14288 + }, + { + "epoch": 0.6467073998642227, + "grad_norm": 0.640035380891334, + "learning_rate": 2.931959620926594e-06, + "loss": 0.3313, + "step": 14289 + }, + { + "epoch": 0.6467526589726182, + "grad_norm": 0.6397116725595818, + "learning_rate": 2.9312923515198577e-06, + "loss": 0.2949, + "step": 14290 + }, + { + "epoch": 0.6467979180810138, + "grad_norm": 0.8665629168087222, + "learning_rate": 2.9306251265632932e-06, + "loss": 0.2905, + "step": 14291 + }, + { + "epoch": 0.6468431771894094, + "grad_norm": 0.6764450931946282, + "learning_rate": 2.929957946071239e-06, + "loss": 0.3104, + "step": 14292 + }, + { + "epoch": 0.6468884362978049, + "grad_norm": 0.5662457813127475, + "learning_rate": 2.929290810058032e-06, + "loss": 0.2939, + "step": 14293 + }, + { + "epoch": 0.6469336954062005, + "grad_norm": 0.6583252374043029, + "learning_rate": 2.928623718538006e-06, + "loss": 0.3027, + "step": 14294 + }, + { + "epoch": 0.6469789545145961, + "grad_norm": 0.6466550381257198, + "learning_rate": 2.9279566715254944e-06, + "loss": 0.3758, + "step": 14295 + }, + { + "epoch": 0.6470242136229917, + "grad_norm": 0.7036033115749474, + "learning_rate": 2.9272896690348283e-06, + "loss": 0.3053, + "step": 14296 + }, + { + "epoch": 0.6470694727313872, + "grad_norm": 0.3131935441913814, + "learning_rate": 2.926622711080345e-06, + "loss": 0.4613, + "step": 14297 + }, + { + "epoch": 0.6471147318397827, + "grad_norm": 0.6368356188862032, + "learning_rate": 2.9259557976763686e-06, + "loss": 0.2803, + "step": 14298 + }, + { + "epoch": 0.6471599909481783, + "grad_norm": 0.629431616289445, + "learning_rate": 2.9252889288372335e-06, + "loss": 0.3066, + "step": 14299 + }, + { + "epoch": 0.6472052500565739, + "grad_norm": 0.3006838383038752, + "learning_rate": 2.9246221045772683e-06, + "loss": 0.444, + "step": 14300 + }, + { + "epoch": 0.6472505091649694, + "grad_norm": 1.038397028608156, + "learning_rate": 2.9239553249107985e-06, + "loss": 0.3333, + "step": 14301 + }, + { + "epoch": 0.647295768273365, + "grad_norm": 0.613277700934174, + "learning_rate": 2.9232885898521516e-06, + "loss": 0.3064, + "step": 14302 + }, + { + "epoch": 0.6473410273817606, + "grad_norm": 0.6310216092316894, + "learning_rate": 2.9226218994156574e-06, + "loss": 0.3362, + "step": 14303 + }, + { + "epoch": 0.6473862864901562, + "grad_norm": 0.6578581401449011, + "learning_rate": 2.921955253615637e-06, + "loss": 0.2963, + "step": 14304 + }, + { + "epoch": 0.6474315455985518, + "grad_norm": 0.6242737955561755, + "learning_rate": 2.9212886524664164e-06, + "loss": 0.3143, + "step": 14305 + }, + { + "epoch": 0.6474768047069472, + "grad_norm": 0.28960963861197336, + "learning_rate": 2.9206220959823183e-06, + "loss": 0.4529, + "step": 14306 + }, + { + "epoch": 0.6475220638153428, + "grad_norm": 0.3077247899182825, + "learning_rate": 2.9199555841776637e-06, + "loss": 0.4931, + "step": 14307 + }, + { + "epoch": 0.6475673229237384, + "grad_norm": 0.2732012975551414, + "learning_rate": 2.919289117066777e-06, + "loss": 0.4699, + "step": 14308 + }, + { + "epoch": 0.647612582032134, + "grad_norm": 0.6653553447247057, + "learning_rate": 2.918622694663975e-06, + "loss": 0.3195, + "step": 14309 + }, + { + "epoch": 0.6476578411405295, + "grad_norm": 0.6963381989202885, + "learning_rate": 2.9179563169835808e-06, + "loss": 0.3181, + "step": 14310 + }, + { + "epoch": 0.6477031002489251, + "grad_norm": 0.642825853602677, + "learning_rate": 2.9172899840399106e-06, + "loss": 0.3637, + "step": 14311 + }, + { + "epoch": 0.6477483593573207, + "grad_norm": 0.2854289444108715, + "learning_rate": 2.9166236958472805e-06, + "loss": 0.4687, + "step": 14312 + }, + { + "epoch": 0.6477936184657163, + "grad_norm": 0.7019237006351507, + "learning_rate": 2.9159574524200105e-06, + "loss": 0.2866, + "step": 14313 + }, + { + "epoch": 0.6478388775741117, + "grad_norm": 0.6214376307211359, + "learning_rate": 2.915291253772412e-06, + "loss": 0.3105, + "step": 14314 + }, + { + "epoch": 0.6478841366825073, + "grad_norm": 0.6254524080494197, + "learning_rate": 2.9146250999188043e-06, + "loss": 0.3209, + "step": 14315 + }, + { + "epoch": 0.6479293957909029, + "grad_norm": 0.57960723110441, + "learning_rate": 2.9139589908734977e-06, + "loss": 0.3289, + "step": 14316 + }, + { + "epoch": 0.6479746548992985, + "grad_norm": 0.8486329736280915, + "learning_rate": 2.9132929266508043e-06, + "loss": 0.2834, + "step": 14317 + }, + { + "epoch": 0.6480199140076941, + "grad_norm": 0.5990172383692677, + "learning_rate": 2.912626907265037e-06, + "loss": 0.3208, + "step": 14318 + }, + { + "epoch": 0.6480651731160896, + "grad_norm": 0.31174741569254, + "learning_rate": 2.91196093273051e-06, + "loss": 0.4633, + "step": 14319 + }, + { + "epoch": 0.6481104322244852, + "grad_norm": 0.5803472112943121, + "learning_rate": 2.911295003061526e-06, + "loss": 0.2711, + "step": 14320 + }, + { + "epoch": 0.6481556913328808, + "grad_norm": 0.2820889514644517, + "learning_rate": 2.910629118272398e-06, + "loss": 0.4638, + "step": 14321 + }, + { + "epoch": 0.6482009504412763, + "grad_norm": 0.6109816105803477, + "learning_rate": 2.9099632783774325e-06, + "loss": 0.3118, + "step": 14322 + }, + { + "epoch": 0.6482462095496718, + "grad_norm": 0.6306317742637505, + "learning_rate": 2.909297483390941e-06, + "loss": 0.2832, + "step": 14323 + }, + { + "epoch": 0.6482914686580674, + "grad_norm": 0.6263910342435784, + "learning_rate": 2.9086317333272218e-06, + "loss": 0.3358, + "step": 14324 + }, + { + "epoch": 0.648336727766463, + "grad_norm": 0.9421587567712512, + "learning_rate": 2.9079660282005833e-06, + "loss": 0.2469, + "step": 14325 + }, + { + "epoch": 0.6483819868748586, + "grad_norm": 0.6924369118078506, + "learning_rate": 2.907300368025332e-06, + "loss": 0.2902, + "step": 14326 + }, + { + "epoch": 0.6484272459832541, + "grad_norm": 0.6439189029583597, + "learning_rate": 2.906634752815768e-06, + "loss": 0.3156, + "step": 14327 + }, + { + "epoch": 0.6484725050916497, + "grad_norm": 0.6731846534259659, + "learning_rate": 2.9059691825861926e-06, + "loss": 0.294, + "step": 14328 + }, + { + "epoch": 0.6485177642000453, + "grad_norm": 0.3488924832775132, + "learning_rate": 2.9053036573509096e-06, + "loss": 0.4662, + "step": 14329 + }, + { + "epoch": 0.6485630233084408, + "grad_norm": 0.3336693302384131, + "learning_rate": 2.904638177124216e-06, + "loss": 0.4322, + "step": 14330 + }, + { + "epoch": 0.6486082824168364, + "grad_norm": 0.6285069632218658, + "learning_rate": 2.9039727419204146e-06, + "loss": 0.2927, + "step": 14331 + }, + { + "epoch": 0.6486535415252319, + "grad_norm": 0.6141502723029155, + "learning_rate": 2.9033073517538008e-06, + "loss": 0.2927, + "step": 14332 + }, + { + "epoch": 0.6486988006336275, + "grad_norm": 0.6560939059690654, + "learning_rate": 2.9026420066386705e-06, + "loss": 0.3329, + "step": 14333 + }, + { + "epoch": 0.6487440597420231, + "grad_norm": 0.6558394371129095, + "learning_rate": 2.9019767065893227e-06, + "loss": 0.2866, + "step": 14334 + }, + { + "epoch": 0.6487893188504187, + "grad_norm": 0.6485885431882231, + "learning_rate": 2.9013114516200537e-06, + "loss": 0.2918, + "step": 14335 + }, + { + "epoch": 0.6488345779588142, + "grad_norm": 0.6142572990614329, + "learning_rate": 2.900646241745156e-06, + "loss": 0.3494, + "step": 14336 + }, + { + "epoch": 0.6488798370672098, + "grad_norm": 0.6310608157762924, + "learning_rate": 2.8999810769789204e-06, + "loss": 0.3, + "step": 14337 + }, + { + "epoch": 0.6489250961756053, + "grad_norm": 0.6451028508599663, + "learning_rate": 2.899315957335642e-06, + "loss": 0.2935, + "step": 14338 + }, + { + "epoch": 0.6489703552840009, + "grad_norm": 0.38797774658659645, + "learning_rate": 2.8986508828296144e-06, + "loss": 0.4867, + "step": 14339 + }, + { + "epoch": 0.6490156143923965, + "grad_norm": 0.7113345408661017, + "learning_rate": 2.897985853475125e-06, + "loss": 0.2877, + "step": 14340 + }, + { + "epoch": 0.649060873500792, + "grad_norm": 0.5809836155986944, + "learning_rate": 2.8973208692864623e-06, + "loss": 0.2924, + "step": 14341 + }, + { + "epoch": 0.6491061326091876, + "grad_norm": 0.8393594686255713, + "learning_rate": 2.896655930277918e-06, + "loss": 0.3202, + "step": 14342 + }, + { + "epoch": 0.6491513917175832, + "grad_norm": 0.6353389822032426, + "learning_rate": 2.8959910364637755e-06, + "loss": 0.2833, + "step": 14343 + }, + { + "epoch": 0.6491966508259788, + "grad_norm": 0.6497489518809756, + "learning_rate": 2.8953261878583263e-06, + "loss": 0.3251, + "step": 14344 + }, + { + "epoch": 0.6492419099343743, + "grad_norm": 0.6217565026702535, + "learning_rate": 2.8946613844758526e-06, + "loss": 0.2892, + "step": 14345 + }, + { + "epoch": 0.6492871690427698, + "grad_norm": 0.682100611004778, + "learning_rate": 2.893996626330638e-06, + "loss": 0.3525, + "step": 14346 + }, + { + "epoch": 0.6493324281511654, + "grad_norm": 0.6481556737222688, + "learning_rate": 2.8933319134369677e-06, + "loss": 0.3273, + "step": 14347 + }, + { + "epoch": 0.649377687259561, + "grad_norm": 0.5924432907301626, + "learning_rate": 2.8926672458091265e-06, + "loss": 0.3111, + "step": 14348 + }, + { + "epoch": 0.6494229463679565, + "grad_norm": 0.35012226221446846, + "learning_rate": 2.892002623461394e-06, + "loss": 0.4781, + "step": 14349 + }, + { + "epoch": 0.6494682054763521, + "grad_norm": 0.7342121078319623, + "learning_rate": 2.8913380464080487e-06, + "loss": 0.311, + "step": 14350 + }, + { + "epoch": 0.6495134645847477, + "grad_norm": 0.6000927905765256, + "learning_rate": 2.890673514663373e-06, + "loss": 0.3, + "step": 14351 + }, + { + "epoch": 0.6495587236931433, + "grad_norm": 1.7539158633690095, + "learning_rate": 2.890009028241647e-06, + "loss": 0.2875, + "step": 14352 + }, + { + "epoch": 0.6496039828015389, + "grad_norm": 0.6715777521885723, + "learning_rate": 2.8893445871571463e-06, + "loss": 0.3334, + "step": 14353 + }, + { + "epoch": 0.6496492419099343, + "grad_norm": 0.6676157791442822, + "learning_rate": 2.8886801914241465e-06, + "loss": 0.2926, + "step": 14354 + }, + { + "epoch": 0.6496945010183299, + "grad_norm": 0.6263183832848004, + "learning_rate": 2.8880158410569264e-06, + "loss": 0.3457, + "step": 14355 + }, + { + "epoch": 0.6497397601267255, + "grad_norm": 0.675025958023004, + "learning_rate": 2.88735153606976e-06, + "loss": 0.2787, + "step": 14356 + }, + { + "epoch": 0.6497850192351211, + "grad_norm": 0.30587303186604464, + "learning_rate": 2.8866872764769183e-06, + "loss": 0.4732, + "step": 14357 + }, + { + "epoch": 0.6498302783435166, + "grad_norm": 0.6729939352178069, + "learning_rate": 2.8860230622926787e-06, + "loss": 0.3298, + "step": 14358 + }, + { + "epoch": 0.6498755374519122, + "grad_norm": 0.596371834667015, + "learning_rate": 2.885358893531308e-06, + "loss": 0.3025, + "step": 14359 + }, + { + "epoch": 0.6499207965603078, + "grad_norm": 0.8082889237237667, + "learning_rate": 2.884694770207083e-06, + "loss": 0.33, + "step": 14360 + }, + { + "epoch": 0.6499660556687034, + "grad_norm": 0.5788032355268777, + "learning_rate": 2.8840306923342707e-06, + "loss": 0.3092, + "step": 14361 + }, + { + "epoch": 0.6500113147770988, + "grad_norm": 0.599923225178742, + "learning_rate": 2.883366659927138e-06, + "loss": 0.3058, + "step": 14362 + }, + { + "epoch": 0.6500565738854944, + "grad_norm": 0.6499818118745128, + "learning_rate": 2.8827026729999553e-06, + "loss": 0.3054, + "step": 14363 + }, + { + "epoch": 0.65010183299389, + "grad_norm": 0.2773497872558239, + "learning_rate": 2.882038731566991e-06, + "loss": 0.4747, + "step": 14364 + }, + { + "epoch": 0.6501470921022856, + "grad_norm": 0.6571747074412797, + "learning_rate": 2.881374835642509e-06, + "loss": 0.2914, + "step": 14365 + }, + { + "epoch": 0.6501923512106812, + "grad_norm": 0.7022076125813193, + "learning_rate": 2.880710985240774e-06, + "loss": 0.3449, + "step": 14366 + }, + { + "epoch": 0.6502376103190767, + "grad_norm": 0.6016931902558902, + "learning_rate": 2.8800471803760504e-06, + "loss": 0.3139, + "step": 14367 + }, + { + "epoch": 0.6502828694274723, + "grad_norm": 0.5979079268440192, + "learning_rate": 2.8793834210626036e-06, + "loss": 0.2877, + "step": 14368 + }, + { + "epoch": 0.6503281285358679, + "grad_norm": 0.5774696254930354, + "learning_rate": 2.878719707314695e-06, + "loss": 0.312, + "step": 14369 + }, + { + "epoch": 0.6503733876442634, + "grad_norm": 0.6664975643935593, + "learning_rate": 2.8780560391465828e-06, + "loss": 0.2982, + "step": 14370 + }, + { + "epoch": 0.6504186467526589, + "grad_norm": 0.6311890521309541, + "learning_rate": 2.877392416572531e-06, + "loss": 0.338, + "step": 14371 + }, + { + "epoch": 0.6504639058610545, + "grad_norm": 0.6102114402960955, + "learning_rate": 2.876728839606795e-06, + "loss": 0.2884, + "step": 14372 + }, + { + "epoch": 0.6505091649694501, + "grad_norm": 0.6672385468388162, + "learning_rate": 2.876065308263637e-06, + "loss": 0.2693, + "step": 14373 + }, + { + "epoch": 0.6505544240778457, + "grad_norm": 0.6171736644178366, + "learning_rate": 2.875401822557312e-06, + "loss": 0.2921, + "step": 14374 + }, + { + "epoch": 0.6505996831862413, + "grad_norm": 0.6134210902409978, + "learning_rate": 2.8747383825020753e-06, + "loss": 0.3198, + "step": 14375 + }, + { + "epoch": 0.6506449422946368, + "grad_norm": 0.6485596815035234, + "learning_rate": 2.874074988112183e-06, + "loss": 0.3354, + "step": 14376 + }, + { + "epoch": 0.6506902014030324, + "grad_norm": 0.6538269871511858, + "learning_rate": 2.873411639401893e-06, + "loss": 0.2816, + "step": 14377 + }, + { + "epoch": 0.6507354605114279, + "grad_norm": 0.6694802347127378, + "learning_rate": 2.8727483363854547e-06, + "loss": 0.3235, + "step": 14378 + }, + { + "epoch": 0.6507807196198235, + "grad_norm": 0.619858970538202, + "learning_rate": 2.872085079077119e-06, + "loss": 0.3212, + "step": 14379 + }, + { + "epoch": 0.650825978728219, + "grad_norm": 0.2800973148325299, + "learning_rate": 2.8714218674911397e-06, + "loss": 0.4462, + "step": 14380 + }, + { + "epoch": 0.6508712378366146, + "grad_norm": 0.61955442849468, + "learning_rate": 2.8707587016417695e-06, + "loss": 0.3097, + "step": 14381 + }, + { + "epoch": 0.6509164969450102, + "grad_norm": 0.5888297171014283, + "learning_rate": 2.870095581543255e-06, + "loss": 0.3206, + "step": 14382 + }, + { + "epoch": 0.6509617560534058, + "grad_norm": 0.5962007118260905, + "learning_rate": 2.8694325072098434e-06, + "loss": 0.3221, + "step": 14383 + }, + { + "epoch": 0.6510070151618013, + "grad_norm": 0.5353932920407563, + "learning_rate": 2.868769478655785e-06, + "loss": 0.2852, + "step": 14384 + }, + { + "epoch": 0.6510522742701969, + "grad_norm": 0.6399218831839524, + "learning_rate": 2.868106495895323e-06, + "loss": 0.2848, + "step": 14385 + }, + { + "epoch": 0.6510975333785924, + "grad_norm": 0.6031502416346952, + "learning_rate": 2.8674435589427075e-06, + "loss": 0.312, + "step": 14386 + }, + { + "epoch": 0.651142792486988, + "grad_norm": 0.6202017345451638, + "learning_rate": 2.86678066781218e-06, + "loss": 0.3196, + "step": 14387 + }, + { + "epoch": 0.6511880515953836, + "grad_norm": 0.6092919726405652, + "learning_rate": 2.866117822517982e-06, + "loss": 0.2961, + "step": 14388 + }, + { + "epoch": 0.6512333107037791, + "grad_norm": 0.7608931850326427, + "learning_rate": 2.8654550230743605e-06, + "loss": 0.2981, + "step": 14389 + }, + { + "epoch": 0.6512785698121747, + "grad_norm": 0.7392695622420434, + "learning_rate": 2.8647922694955544e-06, + "loss": 0.3183, + "step": 14390 + }, + { + "epoch": 0.6513238289205703, + "grad_norm": 0.704571196624388, + "learning_rate": 2.8641295617958033e-06, + "loss": 0.322, + "step": 14391 + }, + { + "epoch": 0.6513690880289659, + "grad_norm": 0.282888891059829, + "learning_rate": 2.8634668999893477e-06, + "loss": 0.4592, + "step": 14392 + }, + { + "epoch": 0.6514143471373613, + "grad_norm": 0.2980961398662729, + "learning_rate": 2.862804284090428e-06, + "loss": 0.4833, + "step": 14393 + }, + { + "epoch": 0.6514596062457569, + "grad_norm": 1.271083687124575, + "learning_rate": 2.8621417141132813e-06, + "loss": 0.2558, + "step": 14394 + }, + { + "epoch": 0.6515048653541525, + "grad_norm": 0.43418125969404187, + "learning_rate": 2.8614791900721407e-06, + "loss": 0.4753, + "step": 14395 + }, + { + "epoch": 0.6515501244625481, + "grad_norm": 0.5849742285716373, + "learning_rate": 2.860816711981245e-06, + "loss": 0.3046, + "step": 14396 + }, + { + "epoch": 0.6515953835709436, + "grad_norm": 0.6102146219858934, + "learning_rate": 2.8601542798548295e-06, + "loss": 0.2849, + "step": 14397 + }, + { + "epoch": 0.6516406426793392, + "grad_norm": 0.2841653912168513, + "learning_rate": 2.8594918937071264e-06, + "loss": 0.4796, + "step": 14398 + }, + { + "epoch": 0.6516859017877348, + "grad_norm": 0.6324213646858495, + "learning_rate": 2.8588295535523667e-06, + "loss": 0.3134, + "step": 14399 + }, + { + "epoch": 0.6517311608961304, + "grad_norm": 0.6129634366352824, + "learning_rate": 2.858167259404786e-06, + "loss": 0.3675, + "step": 14400 + }, + { + "epoch": 0.651776420004526, + "grad_norm": 0.28533615643166216, + "learning_rate": 2.85750501127861e-06, + "loss": 0.4834, + "step": 14401 + }, + { + "epoch": 0.6518216791129214, + "grad_norm": 0.699339220550563, + "learning_rate": 2.856842809188074e-06, + "loss": 0.3116, + "step": 14402 + }, + { + "epoch": 0.651866938221317, + "grad_norm": 0.6195956920362005, + "learning_rate": 2.8561806531474035e-06, + "loss": 0.2572, + "step": 14403 + }, + { + "epoch": 0.6519121973297126, + "grad_norm": 0.5413467945492126, + "learning_rate": 2.855518543170824e-06, + "loss": 0.2432, + "step": 14404 + }, + { + "epoch": 0.6519574564381082, + "grad_norm": 0.600232623190571, + "learning_rate": 2.8548564792725652e-06, + "loss": 0.3013, + "step": 14405 + }, + { + "epoch": 0.6520027155465037, + "grad_norm": 0.2723010923448121, + "learning_rate": 2.8541944614668548e-06, + "loss": 0.4688, + "step": 14406 + }, + { + "epoch": 0.6520479746548993, + "grad_norm": 0.6251866065262676, + "learning_rate": 2.8535324897679153e-06, + "loss": 0.3261, + "step": 14407 + }, + { + "epoch": 0.6520932337632949, + "grad_norm": 0.6008352127614647, + "learning_rate": 2.852870564189967e-06, + "loss": 0.3214, + "step": 14408 + }, + { + "epoch": 0.6521384928716905, + "grad_norm": 0.7247530449061969, + "learning_rate": 2.8522086847472365e-06, + "loss": 0.2951, + "step": 14409 + }, + { + "epoch": 0.652183751980086, + "grad_norm": 0.5944063153735807, + "learning_rate": 2.851546851453947e-06, + "loss": 0.2495, + "step": 14410 + }, + { + "epoch": 0.6522290110884815, + "grad_norm": 0.6242000460070781, + "learning_rate": 2.8508850643243168e-06, + "loss": 0.2665, + "step": 14411 + }, + { + "epoch": 0.6522742701968771, + "grad_norm": 0.6220752224649899, + "learning_rate": 2.8502233233725647e-06, + "loss": 0.3021, + "step": 14412 + }, + { + "epoch": 0.6523195293052727, + "grad_norm": 0.6201982821878654, + "learning_rate": 2.8495616286129125e-06, + "loss": 0.2813, + "step": 14413 + }, + { + "epoch": 0.6523647884136683, + "grad_norm": 0.2602797614505162, + "learning_rate": 2.848899980059574e-06, + "loss": 0.4717, + "step": 14414 + }, + { + "epoch": 0.6524100475220638, + "grad_norm": 0.5564924797920047, + "learning_rate": 2.8482383777267707e-06, + "loss": 0.3093, + "step": 14415 + }, + { + "epoch": 0.6524553066304594, + "grad_norm": 0.5661725215802511, + "learning_rate": 2.847576821628716e-06, + "loss": 0.2676, + "step": 14416 + }, + { + "epoch": 0.652500565738855, + "grad_norm": 0.605395525670404, + "learning_rate": 2.8469153117796226e-06, + "loss": 0.2867, + "step": 14417 + }, + { + "epoch": 0.6525458248472505, + "grad_norm": 0.5962137534020132, + "learning_rate": 2.8462538481937067e-06, + "loss": 0.2992, + "step": 14418 + }, + { + "epoch": 0.652591083955646, + "grad_norm": 0.27038535192728275, + "learning_rate": 2.8455924308851843e-06, + "loss": 0.465, + "step": 14419 + }, + { + "epoch": 0.6526363430640416, + "grad_norm": 0.6366746385318536, + "learning_rate": 2.844931059868261e-06, + "loss": 0.3161, + "step": 14420 + }, + { + "epoch": 0.6526816021724372, + "grad_norm": 0.6479535834948442, + "learning_rate": 2.8442697351571496e-06, + "loss": 0.3016, + "step": 14421 + }, + { + "epoch": 0.6527268612808328, + "grad_norm": 0.7228641491464504, + "learning_rate": 2.8436084567660604e-06, + "loss": 0.3387, + "step": 14422 + }, + { + "epoch": 0.6527721203892284, + "grad_norm": 0.6542739679281547, + "learning_rate": 2.8429472247092077e-06, + "loss": 0.3561, + "step": 14423 + }, + { + "epoch": 0.6528173794976239, + "grad_norm": 0.7079313320065974, + "learning_rate": 2.8422860390007896e-06, + "loss": 0.3671, + "step": 14424 + }, + { + "epoch": 0.6528626386060195, + "grad_norm": 0.6326576281591828, + "learning_rate": 2.8416248996550176e-06, + "loss": 0.2878, + "step": 14425 + }, + { + "epoch": 0.652907897714415, + "grad_norm": 0.2668548599523566, + "learning_rate": 2.8409638066860994e-06, + "loss": 0.459, + "step": 14426 + }, + { + "epoch": 0.6529531568228106, + "grad_norm": 0.6751405890400213, + "learning_rate": 2.8403027601082385e-06, + "loss": 0.3585, + "step": 14427 + }, + { + "epoch": 0.6529984159312061, + "grad_norm": 0.2812480485423244, + "learning_rate": 2.8396417599356363e-06, + "loss": 0.4647, + "step": 14428 + }, + { + "epoch": 0.6530436750396017, + "grad_norm": 0.6065274845901797, + "learning_rate": 2.838980806182499e-06, + "loss": 0.3163, + "step": 14429 + }, + { + "epoch": 0.6530889341479973, + "grad_norm": 0.6540976748783749, + "learning_rate": 2.8383198988630257e-06, + "loss": 0.2846, + "step": 14430 + }, + { + "epoch": 0.6531341932563929, + "grad_norm": 0.5858231834276961, + "learning_rate": 2.83765903799142e-06, + "loss": 0.3243, + "step": 14431 + }, + { + "epoch": 0.6531794523647884, + "grad_norm": 0.6227041477821702, + "learning_rate": 2.8369982235818817e-06, + "loss": 0.3672, + "step": 14432 + }, + { + "epoch": 0.653224711473184, + "grad_norm": 1.1696453604041717, + "learning_rate": 2.836337455648605e-06, + "loss": 0.2545, + "step": 14433 + }, + { + "epoch": 0.6532699705815795, + "grad_norm": 0.6102567853373334, + "learning_rate": 2.835676734205792e-06, + "loss": 0.3471, + "step": 14434 + }, + { + "epoch": 0.6533152296899751, + "grad_norm": 0.617072658570023, + "learning_rate": 2.8350160592676407e-06, + "loss": 0.3261, + "step": 14435 + }, + { + "epoch": 0.6533604887983707, + "grad_norm": 0.640343205295688, + "learning_rate": 2.8343554308483444e-06, + "loss": 0.3148, + "step": 14436 + }, + { + "epoch": 0.6534057479067662, + "grad_norm": 0.5843377197812158, + "learning_rate": 2.8336948489620973e-06, + "loss": 0.3159, + "step": 14437 + }, + { + "epoch": 0.6534510070151618, + "grad_norm": 0.6244745426145994, + "learning_rate": 2.833034313623095e-06, + "loss": 0.2863, + "step": 14438 + }, + { + "epoch": 0.6534962661235574, + "grad_norm": 0.27814110294909966, + "learning_rate": 2.8323738248455313e-06, + "loss": 0.481, + "step": 14439 + }, + { + "epoch": 0.653541525231953, + "grad_norm": 0.6083305305579919, + "learning_rate": 2.8317133826435968e-06, + "loss": 0.2858, + "step": 14440 + }, + { + "epoch": 0.6535867843403484, + "grad_norm": 0.6288104167954419, + "learning_rate": 2.8310529870314805e-06, + "loss": 0.3011, + "step": 14441 + }, + { + "epoch": 0.653632043448744, + "grad_norm": 0.6484817765709348, + "learning_rate": 2.830392638023376e-06, + "loss": 0.2908, + "step": 14442 + }, + { + "epoch": 0.6536773025571396, + "grad_norm": 0.6555705986626023, + "learning_rate": 2.8297323356334683e-06, + "loss": 0.3, + "step": 14443 + }, + { + "epoch": 0.6537225616655352, + "grad_norm": 0.6426833166344065, + "learning_rate": 2.829072079875949e-06, + "loss": 0.3228, + "step": 14444 + }, + { + "epoch": 0.6537678207739308, + "grad_norm": 0.6407112303345188, + "learning_rate": 2.8284118707650033e-06, + "loss": 0.3043, + "step": 14445 + }, + { + "epoch": 0.6538130798823263, + "grad_norm": 0.5983310459118458, + "learning_rate": 2.8277517083148155e-06, + "loss": 0.3129, + "step": 14446 + }, + { + "epoch": 0.6538583389907219, + "grad_norm": 0.6640789563914657, + "learning_rate": 2.8270915925395714e-06, + "loss": 0.3132, + "step": 14447 + }, + { + "epoch": 0.6539035980991175, + "grad_norm": 0.6133590414182064, + "learning_rate": 2.8264315234534594e-06, + "loss": 0.2707, + "step": 14448 + }, + { + "epoch": 0.6539488572075131, + "grad_norm": 0.6340453316708997, + "learning_rate": 2.8257715010706544e-06, + "loss": 0.3189, + "step": 14449 + }, + { + "epoch": 0.6539941163159085, + "grad_norm": 0.27372905646296936, + "learning_rate": 2.8251115254053426e-06, + "loss": 0.4638, + "step": 14450 + }, + { + "epoch": 0.6540393754243041, + "grad_norm": 0.5895567568955655, + "learning_rate": 2.824451596471704e-06, + "loss": 0.3543, + "step": 14451 + }, + { + "epoch": 0.6540846345326997, + "grad_norm": 0.6503694689675438, + "learning_rate": 2.823791714283923e-06, + "loss": 0.3237, + "step": 14452 + }, + { + "epoch": 0.6541298936410953, + "grad_norm": 0.5918394190940748, + "learning_rate": 2.8231318788561702e-06, + "loss": 0.3103, + "step": 14453 + }, + { + "epoch": 0.6541751527494908, + "grad_norm": 0.6474765844945153, + "learning_rate": 2.8224720902026283e-06, + "loss": 0.2933, + "step": 14454 + }, + { + "epoch": 0.6542204118578864, + "grad_norm": 0.5915692141412208, + "learning_rate": 2.821812348337475e-06, + "loss": 0.3107, + "step": 14455 + }, + { + "epoch": 0.654265670966282, + "grad_norm": 0.2727139188700424, + "learning_rate": 2.821152653274884e-06, + "loss": 0.4804, + "step": 14456 + }, + { + "epoch": 0.6543109300746776, + "grad_norm": 0.6330423123605577, + "learning_rate": 2.820493005029029e-06, + "loss": 0.3106, + "step": 14457 + }, + { + "epoch": 0.6543561891830731, + "grad_norm": 0.26587214402432924, + "learning_rate": 2.8198334036140873e-06, + "loss": 0.467, + "step": 14458 + }, + { + "epoch": 0.6544014482914686, + "grad_norm": 0.612997939811741, + "learning_rate": 2.819173849044229e-06, + "loss": 0.3307, + "step": 14459 + }, + { + "epoch": 0.6544467073998642, + "grad_norm": 0.6129959000307879, + "learning_rate": 2.8185143413336272e-06, + "loss": 0.2693, + "step": 14460 + }, + { + "epoch": 0.6544919665082598, + "grad_norm": 0.6254208228940998, + "learning_rate": 2.8178548804964536e-06, + "loss": 0.3163, + "step": 14461 + }, + { + "epoch": 0.6545372256166554, + "grad_norm": 0.9923324801370447, + "learning_rate": 2.817195466546874e-06, + "loss": 0.3117, + "step": 14462 + }, + { + "epoch": 0.6545824847250509, + "grad_norm": 0.3034442698917206, + "learning_rate": 2.8165360994990598e-06, + "loss": 0.4408, + "step": 14463 + }, + { + "epoch": 0.6546277438334465, + "grad_norm": 0.6557756043198308, + "learning_rate": 2.815876779367181e-06, + "loss": 0.3154, + "step": 14464 + }, + { + "epoch": 0.654673002941842, + "grad_norm": 0.6200793549760715, + "learning_rate": 2.8152175061654017e-06, + "loss": 0.308, + "step": 14465 + }, + { + "epoch": 0.6547182620502376, + "grad_norm": 0.541491605820331, + "learning_rate": 2.8145582799078873e-06, + "loss": 0.2597, + "step": 14466 + }, + { + "epoch": 0.6547635211586331, + "grad_norm": 0.6239722423207902, + "learning_rate": 2.8138991006088024e-06, + "loss": 0.3003, + "step": 14467 + }, + { + "epoch": 0.6548087802670287, + "grad_norm": 0.6460007049209576, + "learning_rate": 2.813239968282314e-06, + "loss": 0.3497, + "step": 14468 + }, + { + "epoch": 0.6548540393754243, + "grad_norm": 0.2721666372049071, + "learning_rate": 2.812580882942583e-06, + "loss": 0.4872, + "step": 14469 + }, + { + "epoch": 0.6548992984838199, + "grad_norm": 0.673252288948146, + "learning_rate": 2.811921844603768e-06, + "loss": 0.3248, + "step": 14470 + }, + { + "epoch": 0.6549445575922155, + "grad_norm": 0.2721882793861817, + "learning_rate": 2.8112628532800345e-06, + "loss": 0.4691, + "step": 14471 + }, + { + "epoch": 0.654989816700611, + "grad_norm": 0.2922565844858254, + "learning_rate": 2.8106039089855385e-06, + "loss": 0.4552, + "step": 14472 + }, + { + "epoch": 0.6550350758090066, + "grad_norm": 0.735896164955326, + "learning_rate": 2.809945011734442e-06, + "loss": 0.2839, + "step": 14473 + }, + { + "epoch": 0.6550803349174021, + "grad_norm": 0.6297271221726509, + "learning_rate": 2.8092861615409004e-06, + "loss": 0.3003, + "step": 14474 + }, + { + "epoch": 0.6551255940257977, + "grad_norm": 0.6394901774218247, + "learning_rate": 2.8086273584190704e-06, + "loss": 0.3047, + "step": 14475 + }, + { + "epoch": 0.6551708531341932, + "grad_norm": 0.6439208339140995, + "learning_rate": 2.807968602383107e-06, + "loss": 0.3283, + "step": 14476 + }, + { + "epoch": 0.6552161122425888, + "grad_norm": 0.5642350700947738, + "learning_rate": 2.8073098934471703e-06, + "loss": 0.3227, + "step": 14477 + }, + { + "epoch": 0.6552613713509844, + "grad_norm": 0.6685770730535802, + "learning_rate": 2.806651231625406e-06, + "loss": 0.2996, + "step": 14478 + }, + { + "epoch": 0.65530663045938, + "grad_norm": 0.6824815601127436, + "learning_rate": 2.8059926169319694e-06, + "loss": 0.2653, + "step": 14479 + }, + { + "epoch": 0.6553518895677756, + "grad_norm": 0.30267560974840607, + "learning_rate": 2.8053340493810143e-06, + "loss": 0.4775, + "step": 14480 + }, + { + "epoch": 0.655397148676171, + "grad_norm": 0.671206729091289, + "learning_rate": 2.804675528986693e-06, + "loss": 0.3277, + "step": 14481 + }, + { + "epoch": 0.6554424077845666, + "grad_norm": 0.5600886177378184, + "learning_rate": 2.804017055763149e-06, + "loss": 0.3015, + "step": 14482 + }, + { + "epoch": 0.6554876668929622, + "grad_norm": 0.6033961960418963, + "learning_rate": 2.8033586297245336e-06, + "loss": 0.3511, + "step": 14483 + }, + { + "epoch": 0.6555329260013578, + "grad_norm": 0.5938406866774515, + "learning_rate": 2.8027002508849967e-06, + "loss": 0.3397, + "step": 14484 + }, + { + "epoch": 0.6555781851097533, + "grad_norm": 0.6218836501499261, + "learning_rate": 2.8020419192586836e-06, + "loss": 0.2954, + "step": 14485 + }, + { + "epoch": 0.6556234442181489, + "grad_norm": 0.6338006210940533, + "learning_rate": 2.801383634859737e-06, + "loss": 0.3092, + "step": 14486 + }, + { + "epoch": 0.6556687033265445, + "grad_norm": 0.5836146648909326, + "learning_rate": 2.8007253977023045e-06, + "loss": 0.2932, + "step": 14487 + }, + { + "epoch": 0.6557139624349401, + "grad_norm": 0.735478218662718, + "learning_rate": 2.8000672078005277e-06, + "loss": 0.3117, + "step": 14488 + }, + { + "epoch": 0.6557592215433355, + "grad_norm": 0.5864591443386192, + "learning_rate": 2.799409065168551e-06, + "loss": 0.2882, + "step": 14489 + }, + { + "epoch": 0.6558044806517311, + "grad_norm": 0.6412764443291205, + "learning_rate": 2.7987509698205163e-06, + "loss": 0.3331, + "step": 14490 + }, + { + "epoch": 0.6558497397601267, + "grad_norm": 0.6238632744185769, + "learning_rate": 2.79809292177056e-06, + "loss": 0.3269, + "step": 14491 + }, + { + "epoch": 0.6558949988685223, + "grad_norm": 0.6110432933489035, + "learning_rate": 2.7974349210328234e-06, + "loss": 0.3298, + "step": 14492 + }, + { + "epoch": 0.6559402579769179, + "grad_norm": 0.6878994627966224, + "learning_rate": 2.7967769676214486e-06, + "loss": 0.3514, + "step": 14493 + }, + { + "epoch": 0.6559855170853134, + "grad_norm": 0.6380556906955277, + "learning_rate": 2.7961190615505695e-06, + "loss": 0.3398, + "step": 14494 + }, + { + "epoch": 0.656030776193709, + "grad_norm": 0.6014487945413114, + "learning_rate": 2.7954612028343218e-06, + "loss": 0.2999, + "step": 14495 + }, + { + "epoch": 0.6560760353021046, + "grad_norm": 0.678185293870894, + "learning_rate": 2.7948033914868415e-06, + "loss": 0.3078, + "step": 14496 + }, + { + "epoch": 0.6561212944105002, + "grad_norm": 0.577372925066938, + "learning_rate": 2.7941456275222658e-06, + "loss": 0.2989, + "step": 14497 + }, + { + "epoch": 0.6561665535188956, + "grad_norm": 0.5706492885380627, + "learning_rate": 2.793487910954726e-06, + "loss": 0.3397, + "step": 14498 + }, + { + "epoch": 0.6562118126272912, + "grad_norm": 0.6273409052262731, + "learning_rate": 2.7928302417983524e-06, + "loss": 0.2975, + "step": 14499 + }, + { + "epoch": 0.6562570717356868, + "grad_norm": 0.5952325031578662, + "learning_rate": 2.7921726200672793e-06, + "loss": 0.2611, + "step": 14500 + }, + { + "epoch": 0.6563023308440824, + "grad_norm": 0.6492498565560609, + "learning_rate": 2.791515045775634e-06, + "loss": 0.2996, + "step": 14501 + }, + { + "epoch": 0.6563475899524779, + "grad_norm": 0.2819515413997284, + "learning_rate": 2.79085751893755e-06, + "loss": 0.4532, + "step": 14502 + }, + { + "epoch": 0.6563928490608735, + "grad_norm": 0.6220399803635098, + "learning_rate": 2.7902000395671523e-06, + "loss": 0.3138, + "step": 14503 + }, + { + "epoch": 0.6564381081692691, + "grad_norm": 0.625456312620898, + "learning_rate": 2.7895426076785676e-06, + "loss": 0.3258, + "step": 14504 + }, + { + "epoch": 0.6564833672776647, + "grad_norm": 0.28459561254149424, + "learning_rate": 2.788885223285923e-06, + "loss": 0.4833, + "step": 14505 + }, + { + "epoch": 0.6565286263860602, + "grad_norm": 1.8729040089538573, + "learning_rate": 2.7882278864033465e-06, + "loss": 0.2799, + "step": 14506 + }, + { + "epoch": 0.6565738854944557, + "grad_norm": 0.6141294598649133, + "learning_rate": 2.787570597044959e-06, + "loss": 0.3439, + "step": 14507 + }, + { + "epoch": 0.6566191446028513, + "grad_norm": 0.6304622492167625, + "learning_rate": 2.786913355224883e-06, + "loss": 0.2964, + "step": 14508 + }, + { + "epoch": 0.6566644037112469, + "grad_norm": 0.6245041485695481, + "learning_rate": 2.7862561609572414e-06, + "loss": 0.321, + "step": 14509 + }, + { + "epoch": 0.6567096628196425, + "grad_norm": 0.6741403524459103, + "learning_rate": 2.7855990142561606e-06, + "loss": 0.3239, + "step": 14510 + }, + { + "epoch": 0.656754921928038, + "grad_norm": 0.5674773642493651, + "learning_rate": 2.7849419151357513e-06, + "loss": 0.3053, + "step": 14511 + }, + { + "epoch": 0.6568001810364336, + "grad_norm": 0.28760962113666705, + "learning_rate": 2.784284863610138e-06, + "loss": 0.4902, + "step": 14512 + }, + { + "epoch": 0.6568454401448292, + "grad_norm": 0.6315742915634163, + "learning_rate": 2.7836278596934395e-06, + "loss": 0.3242, + "step": 14513 + }, + { + "epoch": 0.6568906992532247, + "grad_norm": 0.7371476507524212, + "learning_rate": 2.782970903399771e-06, + "loss": 0.2842, + "step": 14514 + }, + { + "epoch": 0.6569359583616203, + "grad_norm": 0.7316752146725508, + "learning_rate": 2.782313994743247e-06, + "loss": 0.2426, + "step": 14515 + }, + { + "epoch": 0.6569812174700158, + "grad_norm": 0.6650642757049506, + "learning_rate": 2.781657133737986e-06, + "loss": 0.299, + "step": 14516 + }, + { + "epoch": 0.6570264765784114, + "grad_norm": 0.5989291909812126, + "learning_rate": 2.7810003203980983e-06, + "loss": 0.2837, + "step": 14517 + }, + { + "epoch": 0.657071735686807, + "grad_norm": 0.6512623588879672, + "learning_rate": 2.7803435547377006e-06, + "loss": 0.3266, + "step": 14518 + }, + { + "epoch": 0.6571169947952026, + "grad_norm": 0.2594526263361003, + "learning_rate": 2.779686836770903e-06, + "loss": 0.4504, + "step": 14519 + }, + { + "epoch": 0.6571622539035981, + "grad_norm": 0.3290550773122507, + "learning_rate": 2.7790301665118137e-06, + "loss": 0.4825, + "step": 14520 + }, + { + "epoch": 0.6572075130119936, + "grad_norm": 0.6135740857375592, + "learning_rate": 2.7783735439745447e-06, + "loss": 0.2996, + "step": 14521 + }, + { + "epoch": 0.6572527721203892, + "grad_norm": 0.6624231097263641, + "learning_rate": 2.7777169691732074e-06, + "loss": 0.295, + "step": 14522 + }, + { + "epoch": 0.6572980312287848, + "grad_norm": 0.6008466558370399, + "learning_rate": 2.777060442121907e-06, + "loss": 0.302, + "step": 14523 + }, + { + "epoch": 0.6573432903371803, + "grad_norm": 0.6499752987880355, + "learning_rate": 2.7764039628347484e-06, + "loss": 0.3174, + "step": 14524 + }, + { + "epoch": 0.6573885494455759, + "grad_norm": 0.5845108030775217, + "learning_rate": 2.7757475313258397e-06, + "loss": 0.3008, + "step": 14525 + }, + { + "epoch": 0.6574338085539715, + "grad_norm": 0.6367031103099358, + "learning_rate": 2.775091147609287e-06, + "loss": 0.3072, + "step": 14526 + }, + { + "epoch": 0.6574790676623671, + "grad_norm": 0.29519705624979603, + "learning_rate": 2.7744348116991925e-06, + "loss": 0.4913, + "step": 14527 + }, + { + "epoch": 0.6575243267707627, + "grad_norm": 0.9076162605324154, + "learning_rate": 2.7737785236096563e-06, + "loss": 0.3021, + "step": 14528 + }, + { + "epoch": 0.6575695858791581, + "grad_norm": 0.7232551838129118, + "learning_rate": 2.7731222833547842e-06, + "loss": 0.3272, + "step": 14529 + }, + { + "epoch": 0.6576148449875537, + "grad_norm": 0.665210978002039, + "learning_rate": 2.7724660909486732e-06, + "loss": 0.3128, + "step": 14530 + }, + { + "epoch": 0.6576601040959493, + "grad_norm": 0.6183998088090225, + "learning_rate": 2.771809946405427e-06, + "loss": 0.2759, + "step": 14531 + }, + { + "epoch": 0.6577053632043449, + "grad_norm": 0.6421908889548431, + "learning_rate": 2.771153849739141e-06, + "loss": 0.2959, + "step": 14532 + }, + { + "epoch": 0.6577506223127404, + "grad_norm": 0.6180099869793233, + "learning_rate": 2.7704978009639117e-06, + "loss": 0.3348, + "step": 14533 + }, + { + "epoch": 0.657795881421136, + "grad_norm": 0.6045601813827167, + "learning_rate": 2.7698418000938374e-06, + "loss": 0.3214, + "step": 14534 + }, + { + "epoch": 0.6578411405295316, + "grad_norm": 0.6655538988058615, + "learning_rate": 2.7691858471430157e-06, + "loss": 0.3022, + "step": 14535 + }, + { + "epoch": 0.6578863996379272, + "grad_norm": 0.6071011266492075, + "learning_rate": 2.7685299421255373e-06, + "loss": 0.2832, + "step": 14536 + }, + { + "epoch": 0.6579316587463226, + "grad_norm": 0.6744160545583696, + "learning_rate": 2.7678740850554965e-06, + "loss": 0.3455, + "step": 14537 + }, + { + "epoch": 0.6579769178547182, + "grad_norm": 0.5592789821618848, + "learning_rate": 2.7672182759469857e-06, + "loss": 0.2837, + "step": 14538 + }, + { + "epoch": 0.6580221769631138, + "grad_norm": 0.5920929266921543, + "learning_rate": 2.7665625148141e-06, + "loss": 0.2734, + "step": 14539 + }, + { + "epoch": 0.6580674360715094, + "grad_norm": 0.5622962505052304, + "learning_rate": 2.7659068016709234e-06, + "loss": 0.2813, + "step": 14540 + }, + { + "epoch": 0.658112695179905, + "grad_norm": 0.7655350616626647, + "learning_rate": 2.7652511365315473e-06, + "loss": 0.3048, + "step": 14541 + }, + { + "epoch": 0.6581579542883005, + "grad_norm": 0.6174261956501719, + "learning_rate": 2.764595519410063e-06, + "loss": 0.2995, + "step": 14542 + }, + { + "epoch": 0.6582032133966961, + "grad_norm": 0.6266561514215079, + "learning_rate": 2.763939950320556e-06, + "loss": 0.3058, + "step": 14543 + }, + { + "epoch": 0.6582484725050917, + "grad_norm": 0.28002556277807183, + "learning_rate": 2.7632844292771094e-06, + "loss": 0.4655, + "step": 14544 + }, + { + "epoch": 0.6582937316134873, + "grad_norm": 0.6485738125198387, + "learning_rate": 2.762628956293813e-06, + "loss": 0.3146, + "step": 14545 + }, + { + "epoch": 0.6583389907218827, + "grad_norm": 0.7066485212378323, + "learning_rate": 2.7619735313847467e-06, + "loss": 0.3156, + "step": 14546 + }, + { + "epoch": 0.6583842498302783, + "grad_norm": 0.2891774709824707, + "learning_rate": 2.761318154563998e-06, + "loss": 0.4787, + "step": 14547 + }, + { + "epoch": 0.6584295089386739, + "grad_norm": 0.6208970598323403, + "learning_rate": 2.7606628258456457e-06, + "loss": 0.283, + "step": 14548 + }, + { + "epoch": 0.6584747680470695, + "grad_norm": 0.5981822572032913, + "learning_rate": 2.760007545243771e-06, + "loss": 0.2955, + "step": 14549 + }, + { + "epoch": 0.658520027155465, + "grad_norm": 0.629617347306717, + "learning_rate": 2.759352312772454e-06, + "loss": 0.3219, + "step": 14550 + }, + { + "epoch": 0.6585652862638606, + "grad_norm": 0.6254752051909233, + "learning_rate": 2.7586971284457753e-06, + "loss": 0.2668, + "step": 14551 + }, + { + "epoch": 0.6586105453722562, + "grad_norm": 0.6137413177807575, + "learning_rate": 2.7580419922778124e-06, + "loss": 0.3469, + "step": 14552 + }, + { + "epoch": 0.6586558044806518, + "grad_norm": 0.714623058271599, + "learning_rate": 2.7573869042826396e-06, + "loss": 0.3049, + "step": 14553 + }, + { + "epoch": 0.6587010635890473, + "grad_norm": 0.5973950768771438, + "learning_rate": 2.7567318644743344e-06, + "loss": 0.305, + "step": 14554 + }, + { + "epoch": 0.6587463226974428, + "grad_norm": 0.5703781422935155, + "learning_rate": 2.756076872866974e-06, + "loss": 0.3131, + "step": 14555 + }, + { + "epoch": 0.6587915818058384, + "grad_norm": 0.6563924959666125, + "learning_rate": 2.755421929474629e-06, + "loss": 0.3368, + "step": 14556 + }, + { + "epoch": 0.658836840914234, + "grad_norm": 0.314269215921467, + "learning_rate": 2.7547670343113718e-06, + "loss": 0.4752, + "step": 14557 + }, + { + "epoch": 0.6588821000226296, + "grad_norm": 0.267282794090128, + "learning_rate": 2.7541121873912774e-06, + "loss": 0.436, + "step": 14558 + }, + { + "epoch": 0.6589273591310251, + "grad_norm": 0.638997590884908, + "learning_rate": 2.7534573887284123e-06, + "loss": 0.3465, + "step": 14559 + }, + { + "epoch": 0.6589726182394207, + "grad_norm": 0.8136242615153971, + "learning_rate": 2.75280263833685e-06, + "loss": 0.3123, + "step": 14560 + }, + { + "epoch": 0.6590178773478162, + "grad_norm": 0.5918204058084633, + "learning_rate": 2.7521479362306574e-06, + "loss": 0.333, + "step": 14561 + }, + { + "epoch": 0.6590631364562118, + "grad_norm": 0.6079493814701858, + "learning_rate": 2.7514932824239e-06, + "loss": 0.3035, + "step": 14562 + }, + { + "epoch": 0.6591083955646074, + "grad_norm": 0.6118825012765209, + "learning_rate": 2.7508386769306462e-06, + "loss": 0.3094, + "step": 14563 + }, + { + "epoch": 0.6591536546730029, + "grad_norm": 0.7444506793187203, + "learning_rate": 2.7501841197649627e-06, + "loss": 0.2915, + "step": 14564 + }, + { + "epoch": 0.6591989137813985, + "grad_norm": 0.6609723903965469, + "learning_rate": 2.7495296109409136e-06, + "loss": 0.3388, + "step": 14565 + }, + { + "epoch": 0.6592441728897941, + "grad_norm": 0.5776648608785567, + "learning_rate": 2.7488751504725587e-06, + "loss": 0.3072, + "step": 14566 + }, + { + "epoch": 0.6592894319981897, + "grad_norm": 0.2930781439292153, + "learning_rate": 2.7482207383739636e-06, + "loss": 0.4886, + "step": 14567 + }, + { + "epoch": 0.6593346911065852, + "grad_norm": 0.6247275615918767, + "learning_rate": 2.7475663746591906e-06, + "loss": 0.3029, + "step": 14568 + }, + { + "epoch": 0.6593799502149807, + "grad_norm": 0.6498235375519393, + "learning_rate": 2.746912059342299e-06, + "loss": 0.3377, + "step": 14569 + }, + { + "epoch": 0.6594252093233763, + "grad_norm": 0.29449251536630294, + "learning_rate": 2.7462577924373448e-06, + "loss": 0.4664, + "step": 14570 + }, + { + "epoch": 0.6594704684317719, + "grad_norm": 0.572784277483015, + "learning_rate": 2.745603573958391e-06, + "loss": 0.2717, + "step": 14571 + }, + { + "epoch": 0.6595157275401674, + "grad_norm": 0.728629363071293, + "learning_rate": 2.74494940391949e-06, + "loss": 0.3143, + "step": 14572 + }, + { + "epoch": 0.659560986648563, + "grad_norm": 0.6563748386276432, + "learning_rate": 2.7442952823347035e-06, + "loss": 0.3188, + "step": 14573 + }, + { + "epoch": 0.6596062457569586, + "grad_norm": 0.28586724962853566, + "learning_rate": 2.743641209218083e-06, + "loss": 0.4824, + "step": 14574 + }, + { + "epoch": 0.6596515048653542, + "grad_norm": 0.6000616689940764, + "learning_rate": 2.742987184583681e-06, + "loss": 0.2562, + "step": 14575 + }, + { + "epoch": 0.6596967639737498, + "grad_norm": 0.6180064457315072, + "learning_rate": 2.7423332084455543e-06, + "loss": 0.3191, + "step": 14576 + }, + { + "epoch": 0.6597420230821452, + "grad_norm": 0.5672784538288332, + "learning_rate": 2.7416792808177516e-06, + "loss": 0.32, + "step": 14577 + }, + { + "epoch": 0.6597872821905408, + "grad_norm": 0.6002952662222252, + "learning_rate": 2.741025401714327e-06, + "loss": 0.3216, + "step": 14578 + }, + { + "epoch": 0.6598325412989364, + "grad_norm": 0.7181766236652201, + "learning_rate": 2.7403715711493264e-06, + "loss": 0.3128, + "step": 14579 + }, + { + "epoch": 0.659877800407332, + "grad_norm": 0.6649504513424246, + "learning_rate": 2.7397177891368033e-06, + "loss": 0.3064, + "step": 14580 + }, + { + "epoch": 0.6599230595157275, + "grad_norm": 0.5940222820479465, + "learning_rate": 2.7390640556908023e-06, + "loss": 0.2912, + "step": 14581 + }, + { + "epoch": 0.6599683186241231, + "grad_norm": 0.28298728873634355, + "learning_rate": 2.7384103708253697e-06, + "loss": 0.4343, + "step": 14582 + }, + { + "epoch": 0.6600135777325187, + "grad_norm": 0.6560700005915628, + "learning_rate": 2.7377567345545514e-06, + "loss": 0.2864, + "step": 14583 + }, + { + "epoch": 0.6600588368409143, + "grad_norm": 0.6090951121414581, + "learning_rate": 2.737103146892395e-06, + "loss": 0.357, + "step": 14584 + }, + { + "epoch": 0.6601040959493097, + "grad_norm": 0.6028122135930079, + "learning_rate": 2.7364496078529425e-06, + "loss": 0.317, + "step": 14585 + }, + { + "epoch": 0.6601493550577053, + "grad_norm": 0.5996556745656106, + "learning_rate": 2.7357961174502335e-06, + "loss": 0.2849, + "step": 14586 + }, + { + "epoch": 0.6601946141661009, + "grad_norm": 0.5778079712939874, + "learning_rate": 2.7351426756983145e-06, + "loss": 0.2917, + "step": 14587 + }, + { + "epoch": 0.6602398732744965, + "grad_norm": 0.6911837816561222, + "learning_rate": 2.734489282611221e-06, + "loss": 0.4023, + "step": 14588 + }, + { + "epoch": 0.6602851323828921, + "grad_norm": 0.2729175501123961, + "learning_rate": 2.733835938202997e-06, + "loss": 0.4774, + "step": 14589 + }, + { + "epoch": 0.6603303914912876, + "grad_norm": 0.5936874447342413, + "learning_rate": 2.7331826424876782e-06, + "loss": 0.2917, + "step": 14590 + }, + { + "epoch": 0.6603756505996832, + "grad_norm": 0.6315346424071278, + "learning_rate": 2.7325293954793013e-06, + "loss": 0.3421, + "step": 14591 + }, + { + "epoch": 0.6604209097080788, + "grad_norm": 0.302728605590915, + "learning_rate": 2.7318761971919034e-06, + "loss": 0.4755, + "step": 14592 + }, + { + "epoch": 0.6604661688164744, + "grad_norm": 0.6244775173123975, + "learning_rate": 2.731223047639522e-06, + "loss": 0.2912, + "step": 14593 + }, + { + "epoch": 0.6605114279248698, + "grad_norm": 0.60301323677301, + "learning_rate": 2.730569946836189e-06, + "loss": 0.329, + "step": 14594 + }, + { + "epoch": 0.6605566870332654, + "grad_norm": 0.7322230548298987, + "learning_rate": 2.7299168947959365e-06, + "loss": 0.3207, + "step": 14595 + }, + { + "epoch": 0.660601946141661, + "grad_norm": 0.6134897267742255, + "learning_rate": 2.7292638915327975e-06, + "loss": 0.3251, + "step": 14596 + }, + { + "epoch": 0.6606472052500566, + "grad_norm": 0.5731490592855168, + "learning_rate": 2.728610937060805e-06, + "loss": 0.2786, + "step": 14597 + }, + { + "epoch": 0.6606924643584522, + "grad_norm": 0.6774614477652757, + "learning_rate": 2.727958031393988e-06, + "loss": 0.3816, + "step": 14598 + }, + { + "epoch": 0.6607377234668477, + "grad_norm": 0.6043544377177085, + "learning_rate": 2.727305174546372e-06, + "loss": 0.2787, + "step": 14599 + }, + { + "epoch": 0.6607829825752433, + "grad_norm": 0.30317006861177326, + "learning_rate": 2.7266523665319904e-06, + "loss": 0.4921, + "step": 14600 + }, + { + "epoch": 0.6608282416836389, + "grad_norm": 0.5828626242746316, + "learning_rate": 2.725999607364865e-06, + "loss": 0.2868, + "step": 14601 + }, + { + "epoch": 0.6608735007920344, + "grad_norm": 0.6287623493610928, + "learning_rate": 2.725346897059027e-06, + "loss": 0.3035, + "step": 14602 + }, + { + "epoch": 0.6609187599004299, + "grad_norm": 0.6667618633210299, + "learning_rate": 2.724694235628498e-06, + "loss": 0.3314, + "step": 14603 + }, + { + "epoch": 0.6609640190088255, + "grad_norm": 0.6039172989770963, + "learning_rate": 2.724041623087299e-06, + "loss": 0.2995, + "step": 14604 + }, + { + "epoch": 0.6610092781172211, + "grad_norm": 0.6335276592368326, + "learning_rate": 2.723389059449455e-06, + "loss": 0.3158, + "step": 14605 + }, + { + "epoch": 0.6610545372256167, + "grad_norm": 0.6461064971619715, + "learning_rate": 2.722736544728991e-06, + "loss": 0.3307, + "step": 14606 + }, + { + "epoch": 0.6610997963340122, + "grad_norm": 0.5979661875521738, + "learning_rate": 2.7220840789399243e-06, + "loss": 0.3228, + "step": 14607 + }, + { + "epoch": 0.6611450554424078, + "grad_norm": 0.650606256072474, + "learning_rate": 2.7214316620962727e-06, + "loss": 0.2936, + "step": 14608 + }, + { + "epoch": 0.6611903145508033, + "grad_norm": 0.6703756569986283, + "learning_rate": 2.720779294212059e-06, + "loss": 0.3279, + "step": 14609 + }, + { + "epoch": 0.6612355736591989, + "grad_norm": 0.7022940751613966, + "learning_rate": 2.720126975301297e-06, + "loss": 0.3444, + "step": 14610 + }, + { + "epoch": 0.6612808327675945, + "grad_norm": 0.27846024025290605, + "learning_rate": 2.7194747053780037e-06, + "loss": 0.4496, + "step": 14611 + }, + { + "epoch": 0.66132609187599, + "grad_norm": 0.6336076704471227, + "learning_rate": 2.718822484456194e-06, + "loss": 0.3225, + "step": 14612 + }, + { + "epoch": 0.6613713509843856, + "grad_norm": 0.6162067561278477, + "learning_rate": 2.718170312549885e-06, + "loss": 0.2723, + "step": 14613 + }, + { + "epoch": 0.6614166100927812, + "grad_norm": 0.5828372659832474, + "learning_rate": 2.717518189673088e-06, + "loss": 0.334, + "step": 14614 + }, + { + "epoch": 0.6614618692011768, + "grad_norm": 0.6171211495647955, + "learning_rate": 2.716866115839813e-06, + "loss": 0.2896, + "step": 14615 + }, + { + "epoch": 0.6615071283095723, + "grad_norm": 0.6540331289151489, + "learning_rate": 2.716214091064075e-06, + "loss": 0.2921, + "step": 14616 + }, + { + "epoch": 0.6615523874179678, + "grad_norm": 0.5574949221745757, + "learning_rate": 2.71556211535988e-06, + "loss": 0.2761, + "step": 14617 + }, + { + "epoch": 0.6615976465263634, + "grad_norm": 0.6485906746506321, + "learning_rate": 2.714910188741241e-06, + "loss": 0.3316, + "step": 14618 + }, + { + "epoch": 0.661642905634759, + "grad_norm": 0.27752831386377874, + "learning_rate": 2.714258311222162e-06, + "loss": 0.4668, + "step": 14619 + }, + { + "epoch": 0.6616881647431545, + "grad_norm": 0.2789879546078319, + "learning_rate": 2.7136064828166543e-06, + "loss": 0.475, + "step": 14620 + }, + { + "epoch": 0.6617334238515501, + "grad_norm": 0.7431375370864788, + "learning_rate": 2.7129547035387187e-06, + "loss": 0.3271, + "step": 14621 + }, + { + "epoch": 0.6617786829599457, + "grad_norm": 0.6110256512004637, + "learning_rate": 2.7123029734023643e-06, + "loss": 0.3225, + "step": 14622 + }, + { + "epoch": 0.6618239420683413, + "grad_norm": 0.5927904023046602, + "learning_rate": 2.711651292421593e-06, + "loss": 0.2673, + "step": 14623 + }, + { + "epoch": 0.6618692011767369, + "grad_norm": 0.6309109945227651, + "learning_rate": 2.7109996606104054e-06, + "loss": 0.3119, + "step": 14624 + }, + { + "epoch": 0.6619144602851323, + "grad_norm": 0.27083279929004345, + "learning_rate": 2.710348077982805e-06, + "loss": 0.4362, + "step": 14625 + }, + { + "epoch": 0.6619597193935279, + "grad_norm": 0.6326640005584911, + "learning_rate": 2.7096965445527947e-06, + "loss": 0.2773, + "step": 14626 + }, + { + "epoch": 0.6620049785019235, + "grad_norm": 0.26875623419781375, + "learning_rate": 2.7090450603343703e-06, + "loss": 0.4829, + "step": 14627 + }, + { + "epoch": 0.6620502376103191, + "grad_norm": 0.5908815928580272, + "learning_rate": 2.70839362534153e-06, + "loss": 0.3308, + "step": 14628 + }, + { + "epoch": 0.6620954967187146, + "grad_norm": 0.591809248196907, + "learning_rate": 2.7077422395882745e-06, + "loss": 0.2937, + "step": 14629 + }, + { + "epoch": 0.6621407558271102, + "grad_norm": 0.25957882080678385, + "learning_rate": 2.7070909030885967e-06, + "loss": 0.4638, + "step": 14630 + }, + { + "epoch": 0.6621860149355058, + "grad_norm": 0.5857590862321028, + "learning_rate": 2.706439615856495e-06, + "loss": 0.3007, + "step": 14631 + }, + { + "epoch": 0.6622312740439014, + "grad_norm": 0.6156546064522539, + "learning_rate": 2.705788377905961e-06, + "loss": 0.2595, + "step": 14632 + }, + { + "epoch": 0.662276533152297, + "grad_norm": 0.27266227794839076, + "learning_rate": 2.705137189250988e-06, + "loss": 0.4496, + "step": 14633 + }, + { + "epoch": 0.6623217922606924, + "grad_norm": 0.7641074447659439, + "learning_rate": 2.7044860499055682e-06, + "loss": 0.3283, + "step": 14634 + }, + { + "epoch": 0.662367051369088, + "grad_norm": 0.26066804349701916, + "learning_rate": 2.7038349598836944e-06, + "loss": 0.4556, + "step": 14635 + }, + { + "epoch": 0.6624123104774836, + "grad_norm": 0.5907273354900149, + "learning_rate": 2.703183919199356e-06, + "loss": 0.2849, + "step": 14636 + }, + { + "epoch": 0.6624575695858792, + "grad_norm": 0.631603567997779, + "learning_rate": 2.702532927866538e-06, + "loss": 0.2869, + "step": 14637 + }, + { + "epoch": 0.6625028286942747, + "grad_norm": 0.61739783806948, + "learning_rate": 2.7018819858992323e-06, + "loss": 0.2978, + "step": 14638 + }, + { + "epoch": 0.6625480878026703, + "grad_norm": 0.2919577399403899, + "learning_rate": 2.7012310933114283e-06, + "loss": 0.5085, + "step": 14639 + }, + { + "epoch": 0.6625933469110659, + "grad_norm": 0.6630811709636194, + "learning_rate": 2.7005802501171037e-06, + "loss": 0.2701, + "step": 14640 + }, + { + "epoch": 0.6626386060194615, + "grad_norm": 0.6682876965040512, + "learning_rate": 2.6999294563302474e-06, + "loss": 0.3429, + "step": 14641 + }, + { + "epoch": 0.6626838651278569, + "grad_norm": 0.6592272286289782, + "learning_rate": 2.6992787119648456e-06, + "loss": 0.3079, + "step": 14642 + }, + { + "epoch": 0.6627291242362525, + "grad_norm": 0.6500740029003796, + "learning_rate": 2.698628017034877e-06, + "loss": 0.2948, + "step": 14643 + }, + { + "epoch": 0.6627743833446481, + "grad_norm": 0.3547010827896515, + "learning_rate": 2.6979773715543234e-06, + "loss": 0.4846, + "step": 14644 + }, + { + "epoch": 0.6628196424530437, + "grad_norm": 0.8248138219132292, + "learning_rate": 2.697326775537167e-06, + "loss": 0.3246, + "step": 14645 + }, + { + "epoch": 0.6628649015614393, + "grad_norm": 0.6486771166763444, + "learning_rate": 2.696676228997385e-06, + "loss": 0.2961, + "step": 14646 + }, + { + "epoch": 0.6629101606698348, + "grad_norm": 0.5916296106868336, + "learning_rate": 2.696025731948958e-06, + "loss": 0.2985, + "step": 14647 + }, + { + "epoch": 0.6629554197782304, + "grad_norm": 0.5843377254274962, + "learning_rate": 2.69537528440586e-06, + "loss": 0.2885, + "step": 14648 + }, + { + "epoch": 0.663000678886626, + "grad_norm": 0.6058458147622786, + "learning_rate": 2.6947248863820712e-06, + "loss": 0.3126, + "step": 14649 + }, + { + "epoch": 0.6630459379950215, + "grad_norm": 0.7042748783059469, + "learning_rate": 2.6940745378915623e-06, + "loss": 0.324, + "step": 14650 + }, + { + "epoch": 0.663091197103417, + "grad_norm": 0.7634106275562195, + "learning_rate": 2.6934242389483118e-06, + "loss": 0.3391, + "step": 14651 + }, + { + "epoch": 0.6631364562118126, + "grad_norm": 0.6186184423505748, + "learning_rate": 2.6927739895662897e-06, + "loss": 0.3108, + "step": 14652 + }, + { + "epoch": 0.6631817153202082, + "grad_norm": 0.28352438648346373, + "learning_rate": 2.692123789759467e-06, + "loss": 0.4476, + "step": 14653 + }, + { + "epoch": 0.6632269744286038, + "grad_norm": 0.6628832546732235, + "learning_rate": 2.6914736395418162e-06, + "loss": 0.3498, + "step": 14654 + }, + { + "epoch": 0.6632722335369993, + "grad_norm": 0.6349727134291555, + "learning_rate": 2.6908235389273086e-06, + "loss": 0.2811, + "step": 14655 + }, + { + "epoch": 0.6633174926453949, + "grad_norm": 0.5571570233509476, + "learning_rate": 2.69017348792991e-06, + "loss": 0.3177, + "step": 14656 + }, + { + "epoch": 0.6633627517537904, + "grad_norm": 0.587265023201979, + "learning_rate": 2.6895234865635883e-06, + "loss": 0.33, + "step": 14657 + }, + { + "epoch": 0.663408010862186, + "grad_norm": 0.6182820774276857, + "learning_rate": 2.688873534842312e-06, + "loss": 0.3366, + "step": 14658 + }, + { + "epoch": 0.6634532699705816, + "grad_norm": 0.7652304431041801, + "learning_rate": 2.688223632780044e-06, + "loss": 0.2685, + "step": 14659 + }, + { + "epoch": 0.6634985290789771, + "grad_norm": 0.6361782325815011, + "learning_rate": 2.687573780390752e-06, + "loss": 0.2825, + "step": 14660 + }, + { + "epoch": 0.6635437881873727, + "grad_norm": 0.5523403925336813, + "learning_rate": 2.686923977688397e-06, + "loss": 0.2845, + "step": 14661 + }, + { + "epoch": 0.6635890472957683, + "grad_norm": 0.5916077285971831, + "learning_rate": 2.68627422468694e-06, + "loss": 0.2967, + "step": 14662 + }, + { + "epoch": 0.6636343064041639, + "grad_norm": 0.6534276462669631, + "learning_rate": 2.685624521400344e-06, + "loss": 0.3592, + "step": 14663 + }, + { + "epoch": 0.6636795655125594, + "grad_norm": 0.5755201570950208, + "learning_rate": 2.68497486784257e-06, + "loss": 0.3006, + "step": 14664 + }, + { + "epoch": 0.663724824620955, + "grad_norm": 0.6065498661857095, + "learning_rate": 2.684325264027577e-06, + "loss": 0.3319, + "step": 14665 + }, + { + "epoch": 0.6637700837293505, + "grad_norm": 0.7403589657336263, + "learning_rate": 2.68367570996932e-06, + "loss": 0.2887, + "step": 14666 + }, + { + "epoch": 0.6638153428377461, + "grad_norm": 0.9069088530531536, + "learning_rate": 2.6830262056817574e-06, + "loss": 0.2969, + "step": 14667 + }, + { + "epoch": 0.6638606019461417, + "grad_norm": 0.37938075410330124, + "learning_rate": 2.68237675117885e-06, + "loss": 0.4687, + "step": 14668 + }, + { + "epoch": 0.6639058610545372, + "grad_norm": 0.36833643317394965, + "learning_rate": 2.6817273464745443e-06, + "loss": 0.4758, + "step": 14669 + }, + { + "epoch": 0.6639511201629328, + "grad_norm": 0.6321562537827897, + "learning_rate": 2.681077991582797e-06, + "loss": 0.3122, + "step": 14670 + }, + { + "epoch": 0.6639963792713284, + "grad_norm": 0.6201058925027416, + "learning_rate": 2.6804286865175645e-06, + "loss": 0.3505, + "step": 14671 + }, + { + "epoch": 0.664041638379724, + "grad_norm": 0.7259966491374608, + "learning_rate": 2.679779431292795e-06, + "loss": 0.3678, + "step": 14672 + }, + { + "epoch": 0.6640868974881194, + "grad_norm": 0.5941258634548173, + "learning_rate": 2.6791302259224385e-06, + "loss": 0.285, + "step": 14673 + }, + { + "epoch": 0.664132156596515, + "grad_norm": 0.6272193636667374, + "learning_rate": 2.678481070420446e-06, + "loss": 0.3586, + "step": 14674 + }, + { + "epoch": 0.6641774157049106, + "grad_norm": 0.5736449544025902, + "learning_rate": 2.6778319648007645e-06, + "loss": 0.3205, + "step": 14675 + }, + { + "epoch": 0.6642226748133062, + "grad_norm": 0.7449192943128308, + "learning_rate": 2.677182909077343e-06, + "loss": 0.3317, + "step": 14676 + }, + { + "epoch": 0.6642679339217017, + "grad_norm": 0.6651381927164225, + "learning_rate": 2.6765339032641256e-06, + "loss": 0.295, + "step": 14677 + }, + { + "epoch": 0.6643131930300973, + "grad_norm": 0.6301906237222805, + "learning_rate": 2.6758849473750605e-06, + "loss": 0.3309, + "step": 14678 + }, + { + "epoch": 0.6643584521384929, + "grad_norm": 0.685974008499579, + "learning_rate": 2.6752360414240874e-06, + "loss": 0.3705, + "step": 14679 + }, + { + "epoch": 0.6644037112468885, + "grad_norm": 0.7277368945564235, + "learning_rate": 2.674587185425155e-06, + "loss": 0.3604, + "step": 14680 + }, + { + "epoch": 0.664448970355284, + "grad_norm": 0.668819004807301, + "learning_rate": 2.6739383793922007e-06, + "loss": 0.336, + "step": 14681 + }, + { + "epoch": 0.6644942294636795, + "grad_norm": 0.7308638274786853, + "learning_rate": 2.673289623339165e-06, + "loss": 0.3389, + "step": 14682 + }, + { + "epoch": 0.6645394885720751, + "grad_norm": 0.5964153150842396, + "learning_rate": 2.67264091727999e-06, + "loss": 0.3391, + "step": 14683 + }, + { + "epoch": 0.6645847476804707, + "grad_norm": 0.6417431207623295, + "learning_rate": 2.6719922612286152e-06, + "loss": 0.3304, + "step": 14684 + }, + { + "epoch": 0.6646300067888663, + "grad_norm": 0.5823163333153432, + "learning_rate": 2.6713436551989767e-06, + "loss": 0.2997, + "step": 14685 + }, + { + "epoch": 0.6646752658972618, + "grad_norm": 0.6141979768525521, + "learning_rate": 2.6706950992050097e-06, + "loss": 0.273, + "step": 14686 + }, + { + "epoch": 0.6647205250056574, + "grad_norm": 0.6255508535122599, + "learning_rate": 2.670046593260652e-06, + "loss": 0.2546, + "step": 14687 + }, + { + "epoch": 0.664765784114053, + "grad_norm": 0.5823902037514233, + "learning_rate": 2.669398137379837e-06, + "loss": 0.2532, + "step": 14688 + }, + { + "epoch": 0.6648110432224485, + "grad_norm": 0.6167108534057749, + "learning_rate": 2.6687497315764987e-06, + "loss": 0.2779, + "step": 14689 + }, + { + "epoch": 0.664856302330844, + "grad_norm": 0.7864013107862138, + "learning_rate": 2.668101375864567e-06, + "loss": 0.2896, + "step": 14690 + }, + { + "epoch": 0.6649015614392396, + "grad_norm": 0.7672884746572013, + "learning_rate": 2.667453070257977e-06, + "loss": 0.3622, + "step": 14691 + }, + { + "epoch": 0.6649468205476352, + "grad_norm": 0.6104806892085198, + "learning_rate": 2.666804814770654e-06, + "loss": 0.3145, + "step": 14692 + }, + { + "epoch": 0.6649920796560308, + "grad_norm": 0.6513842948804288, + "learning_rate": 2.6661566094165327e-06, + "loss": 0.3222, + "step": 14693 + }, + { + "epoch": 0.6650373387644264, + "grad_norm": 0.6239399644369519, + "learning_rate": 2.665508454209538e-06, + "loss": 0.3105, + "step": 14694 + }, + { + "epoch": 0.6650825978728219, + "grad_norm": 0.6439218172036969, + "learning_rate": 2.664860349163594e-06, + "loss": 0.3106, + "step": 14695 + }, + { + "epoch": 0.6651278569812175, + "grad_norm": 0.6535200562376338, + "learning_rate": 2.6642122942926297e-06, + "loss": 0.3386, + "step": 14696 + }, + { + "epoch": 0.665173116089613, + "grad_norm": 0.6673961251279874, + "learning_rate": 2.663564289610573e-06, + "loss": 0.2991, + "step": 14697 + }, + { + "epoch": 0.6652183751980086, + "grad_norm": 0.6856669351238862, + "learning_rate": 2.66291633513134e-06, + "loss": 0.3338, + "step": 14698 + }, + { + "epoch": 0.6652636343064041, + "grad_norm": 0.6393130628186594, + "learning_rate": 2.6622684308688575e-06, + "loss": 0.292, + "step": 14699 + }, + { + "epoch": 0.6653088934147997, + "grad_norm": 0.6061214167728383, + "learning_rate": 2.6616205768370483e-06, + "loss": 0.3133, + "step": 14700 + }, + { + "epoch": 0.6653541525231953, + "grad_norm": 0.6467006444397589, + "learning_rate": 2.660972773049831e-06, + "loss": 0.3005, + "step": 14701 + }, + { + "epoch": 0.6653994116315909, + "grad_norm": 0.6369249651369691, + "learning_rate": 2.6603250195211235e-06, + "loss": 0.3107, + "step": 14702 + }, + { + "epoch": 0.6654446707399865, + "grad_norm": 0.70653484849908, + "learning_rate": 2.659677316264847e-06, + "loss": 0.3086, + "step": 14703 + }, + { + "epoch": 0.665489929848382, + "grad_norm": 0.6565328879057835, + "learning_rate": 2.6590296632949157e-06, + "loss": 0.2752, + "step": 14704 + }, + { + "epoch": 0.6655351889567775, + "grad_norm": 0.35181142116798614, + "learning_rate": 2.658382060625249e-06, + "loss": 0.4851, + "step": 14705 + }, + { + "epoch": 0.6655804480651731, + "grad_norm": 0.630447726301422, + "learning_rate": 2.657734508269758e-06, + "loss": 0.3556, + "step": 14706 + }, + { + "epoch": 0.6656257071735687, + "grad_norm": 0.3048831867135513, + "learning_rate": 2.6570870062423616e-06, + "loss": 0.4654, + "step": 14707 + }, + { + "epoch": 0.6656709662819642, + "grad_norm": 0.7057071837671293, + "learning_rate": 2.6564395545569667e-06, + "loss": 0.3463, + "step": 14708 + }, + { + "epoch": 0.6657162253903598, + "grad_norm": 0.5558048212112113, + "learning_rate": 2.65579215322749e-06, + "loss": 0.3569, + "step": 14709 + }, + { + "epoch": 0.6657614844987554, + "grad_norm": 0.6253333082889162, + "learning_rate": 2.6551448022678406e-06, + "loss": 0.3089, + "step": 14710 + }, + { + "epoch": 0.665806743607151, + "grad_norm": 0.2712330226795423, + "learning_rate": 2.6544975016919263e-06, + "loss": 0.461, + "step": 14711 + }, + { + "epoch": 0.6658520027155465, + "grad_norm": 0.6592651233475737, + "learning_rate": 2.653850251513656e-06, + "loss": 0.3745, + "step": 14712 + }, + { + "epoch": 0.665897261823942, + "grad_norm": 0.28287393808975175, + "learning_rate": 2.6532030517469408e-06, + "loss": 0.4944, + "step": 14713 + }, + { + "epoch": 0.6659425209323376, + "grad_norm": 0.655168003309896, + "learning_rate": 2.652555902405684e-06, + "loss": 0.3081, + "step": 14714 + }, + { + "epoch": 0.6659877800407332, + "grad_norm": 0.6746233358903633, + "learning_rate": 2.651908803503789e-06, + "loss": 0.3165, + "step": 14715 + }, + { + "epoch": 0.6660330391491288, + "grad_norm": 0.6332929557042597, + "learning_rate": 2.651261755055165e-06, + "loss": 0.2922, + "step": 14716 + }, + { + "epoch": 0.6660782982575243, + "grad_norm": 0.6089090989543154, + "learning_rate": 2.6506147570737094e-06, + "loss": 0.3304, + "step": 14717 + }, + { + "epoch": 0.6661235573659199, + "grad_norm": 0.6286355370023886, + "learning_rate": 2.64996780957333e-06, + "loss": 0.3131, + "step": 14718 + }, + { + "epoch": 0.6661688164743155, + "grad_norm": 0.7411944904056007, + "learning_rate": 2.649320912567922e-06, + "loss": 0.3271, + "step": 14719 + }, + { + "epoch": 0.6662140755827111, + "grad_norm": 0.6004250448542339, + "learning_rate": 2.6486740660713904e-06, + "loss": 0.3336, + "step": 14720 + }, + { + "epoch": 0.6662593346911065, + "grad_norm": 0.6536229703714422, + "learning_rate": 2.64802727009763e-06, + "loss": 0.3026, + "step": 14721 + }, + { + "epoch": 0.6663045937995021, + "grad_norm": 0.6600307770153987, + "learning_rate": 2.6473805246605416e-06, + "loss": 0.3047, + "step": 14722 + }, + { + "epoch": 0.6663498529078977, + "grad_norm": 0.6713346700277442, + "learning_rate": 2.64673382977402e-06, + "loss": 0.3154, + "step": 14723 + }, + { + "epoch": 0.6663951120162933, + "grad_norm": 0.6903849972604962, + "learning_rate": 2.6460871854519594e-06, + "loss": 0.2783, + "step": 14724 + }, + { + "epoch": 0.6664403711246888, + "grad_norm": 0.581681796442678, + "learning_rate": 2.6454405917082556e-06, + "loss": 0.3169, + "step": 14725 + }, + { + "epoch": 0.6664856302330844, + "grad_norm": 0.6353521186320087, + "learning_rate": 2.6447940485568057e-06, + "loss": 0.2909, + "step": 14726 + }, + { + "epoch": 0.66653088934148, + "grad_norm": 0.6398498279203021, + "learning_rate": 2.6441475560114938e-06, + "loss": 0.3011, + "step": 14727 + }, + { + "epoch": 0.6665761484498756, + "grad_norm": 0.6425698529167011, + "learning_rate": 2.6435011140862167e-06, + "loss": 0.3179, + "step": 14728 + }, + { + "epoch": 0.6666214075582712, + "grad_norm": 0.3115862301345666, + "learning_rate": 2.642854722794864e-06, + "loss": 0.4648, + "step": 14729 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.5943414758115273, + "learning_rate": 2.6422083821513246e-06, + "loss": 0.2429, + "step": 14730 + }, + { + "epoch": 0.6667119257750622, + "grad_norm": 0.5934254282346557, + "learning_rate": 2.6415620921694836e-06, + "loss": 0.3025, + "step": 14731 + }, + { + "epoch": 0.6667571848834578, + "grad_norm": 0.5653671869192441, + "learning_rate": 2.6409158528632315e-06, + "loss": 0.2915, + "step": 14732 + }, + { + "epoch": 0.6668024439918534, + "grad_norm": 0.7070394724449415, + "learning_rate": 2.640269664246451e-06, + "loss": 0.3329, + "step": 14733 + }, + { + "epoch": 0.6668477031002489, + "grad_norm": 0.6389565087574908, + "learning_rate": 2.6396235263330293e-06, + "loss": 0.3314, + "step": 14734 + }, + { + "epoch": 0.6668929622086445, + "grad_norm": 0.6245585495817344, + "learning_rate": 2.638977439136847e-06, + "loss": 0.3354, + "step": 14735 + }, + { + "epoch": 0.6669382213170401, + "grad_norm": 0.28605449137789823, + "learning_rate": 2.6383314026717903e-06, + "loss": 0.4906, + "step": 14736 + }, + { + "epoch": 0.6669834804254356, + "grad_norm": 0.6119075839120809, + "learning_rate": 2.637685416951736e-06, + "loss": 0.2988, + "step": 14737 + }, + { + "epoch": 0.6670287395338312, + "grad_norm": 0.3082055751433085, + "learning_rate": 2.6370394819905698e-06, + "loss": 0.4716, + "step": 14738 + }, + { + "epoch": 0.6670739986422267, + "grad_norm": 0.6307907284535162, + "learning_rate": 2.636393597802167e-06, + "loss": 0.3388, + "step": 14739 + }, + { + "epoch": 0.6671192577506223, + "grad_norm": 0.6078912974944578, + "learning_rate": 2.635747764400405e-06, + "loss": 0.2721, + "step": 14740 + }, + { + "epoch": 0.6671645168590179, + "grad_norm": 0.5925676623298787, + "learning_rate": 2.635101981799162e-06, + "loss": 0.3125, + "step": 14741 + }, + { + "epoch": 0.6672097759674135, + "grad_norm": 0.5533639263255467, + "learning_rate": 2.634456250012316e-06, + "loss": 0.2931, + "step": 14742 + }, + { + "epoch": 0.667255035075809, + "grad_norm": 0.29181393082657847, + "learning_rate": 2.6338105690537402e-06, + "loss": 0.4449, + "step": 14743 + }, + { + "epoch": 0.6673002941842046, + "grad_norm": 0.3115948826921394, + "learning_rate": 2.633164938937306e-06, + "loss": 0.512, + "step": 14744 + }, + { + "epoch": 0.6673455532926001, + "grad_norm": 0.28064957458736633, + "learning_rate": 2.6325193596768905e-06, + "loss": 0.4772, + "step": 14745 + }, + { + "epoch": 0.6673908124009957, + "grad_norm": 0.7221727407308424, + "learning_rate": 2.63187383128636e-06, + "loss": 0.2879, + "step": 14746 + }, + { + "epoch": 0.6674360715093912, + "grad_norm": 0.6131893416410314, + "learning_rate": 2.6312283537795902e-06, + "loss": 0.2931, + "step": 14747 + }, + { + "epoch": 0.6674813306177868, + "grad_norm": 0.6204251169985142, + "learning_rate": 2.630582927170446e-06, + "loss": 0.3419, + "step": 14748 + }, + { + "epoch": 0.6675265897261824, + "grad_norm": 0.6119163871445997, + "learning_rate": 2.6299375514727998e-06, + "loss": 0.3098, + "step": 14749 + }, + { + "epoch": 0.667571848834578, + "grad_norm": 0.5971973040823447, + "learning_rate": 2.629292226700514e-06, + "loss": 0.3163, + "step": 14750 + }, + { + "epoch": 0.6676171079429736, + "grad_norm": 0.3046693253865641, + "learning_rate": 2.6286469528674598e-06, + "loss": 0.4745, + "step": 14751 + }, + { + "epoch": 0.667662367051369, + "grad_norm": 0.5983885615746021, + "learning_rate": 2.6280017299874984e-06, + "loss": 0.2506, + "step": 14752 + }, + { + "epoch": 0.6677076261597646, + "grad_norm": 0.35135599929684164, + "learning_rate": 2.6273565580744942e-06, + "loss": 0.4827, + "step": 14753 + }, + { + "epoch": 0.6677528852681602, + "grad_norm": 0.6258238245008194, + "learning_rate": 2.6267114371423097e-06, + "loss": 0.2888, + "step": 14754 + }, + { + "epoch": 0.6677981443765558, + "grad_norm": 0.6635996374843116, + "learning_rate": 2.6260663672048094e-06, + "loss": 0.2864, + "step": 14755 + }, + { + "epoch": 0.6678434034849513, + "grad_norm": 0.6056793405065296, + "learning_rate": 2.6254213482758518e-06, + "loss": 0.2592, + "step": 14756 + }, + { + "epoch": 0.6678886625933469, + "grad_norm": 0.6187894917066431, + "learning_rate": 2.624776380369295e-06, + "loss": 0.2924, + "step": 14757 + }, + { + "epoch": 0.6679339217017425, + "grad_norm": 0.6193570671741726, + "learning_rate": 2.6241314634990005e-06, + "loss": 0.3077, + "step": 14758 + }, + { + "epoch": 0.6679791808101381, + "grad_norm": 0.7064378066940625, + "learning_rate": 2.6234865976788236e-06, + "loss": 0.3027, + "step": 14759 + }, + { + "epoch": 0.6680244399185336, + "grad_norm": 0.7637733039531868, + "learning_rate": 2.6228417829226195e-06, + "loss": 0.2954, + "step": 14760 + }, + { + "epoch": 0.6680696990269291, + "grad_norm": 0.6751147642440454, + "learning_rate": 2.622197019244245e-06, + "loss": 0.2501, + "step": 14761 + }, + { + "epoch": 0.6681149581353247, + "grad_norm": 0.622694810711134, + "learning_rate": 2.6215523066575542e-06, + "loss": 0.3243, + "step": 14762 + }, + { + "epoch": 0.6681602172437203, + "grad_norm": 0.7096323697849127, + "learning_rate": 2.6209076451764004e-06, + "loss": 0.313, + "step": 14763 + }, + { + "epoch": 0.6682054763521159, + "grad_norm": 0.575045414444866, + "learning_rate": 2.6202630348146323e-06, + "loss": 0.3191, + "step": 14764 + }, + { + "epoch": 0.6682507354605114, + "grad_norm": 0.3627578595993997, + "learning_rate": 2.6196184755861054e-06, + "loss": 0.4933, + "step": 14765 + }, + { + "epoch": 0.668295994568907, + "grad_norm": 0.6621699630985428, + "learning_rate": 2.618973967504664e-06, + "loss": 0.3007, + "step": 14766 + }, + { + "epoch": 0.6683412536773026, + "grad_norm": 0.6963598038335564, + "learning_rate": 2.618329510584161e-06, + "loss": 0.2904, + "step": 14767 + }, + { + "epoch": 0.6683865127856982, + "grad_norm": 0.6530712366145673, + "learning_rate": 2.617685104838443e-06, + "loss": 0.2846, + "step": 14768 + }, + { + "epoch": 0.6684317718940936, + "grad_norm": 0.551809083567866, + "learning_rate": 2.617040750281352e-06, + "loss": 0.2612, + "step": 14769 + }, + { + "epoch": 0.6684770310024892, + "grad_norm": 0.5585593830608696, + "learning_rate": 2.616396446926738e-06, + "loss": 0.3178, + "step": 14770 + }, + { + "epoch": 0.6685222901108848, + "grad_norm": 0.6878872213137671, + "learning_rate": 2.615752194788445e-06, + "loss": 0.3394, + "step": 14771 + }, + { + "epoch": 0.6685675492192804, + "grad_norm": 0.6736646003467953, + "learning_rate": 2.615107993880315e-06, + "loss": 0.3154, + "step": 14772 + }, + { + "epoch": 0.6686128083276759, + "grad_norm": 0.577778634752951, + "learning_rate": 2.614463844216187e-06, + "loss": 0.2896, + "step": 14773 + }, + { + "epoch": 0.6686580674360715, + "grad_norm": 0.5888540087169923, + "learning_rate": 2.613819745809907e-06, + "loss": 0.2919, + "step": 14774 + }, + { + "epoch": 0.6687033265444671, + "grad_norm": 0.3142271462539952, + "learning_rate": 2.6131756986753097e-06, + "loss": 0.4709, + "step": 14775 + }, + { + "epoch": 0.6687485856528627, + "grad_norm": 0.2816735923390981, + "learning_rate": 2.6125317028262383e-06, + "loss": 0.4692, + "step": 14776 + }, + { + "epoch": 0.6687938447612582, + "grad_norm": 0.5990464043433165, + "learning_rate": 2.6118877582765255e-06, + "loss": 0.3335, + "step": 14777 + }, + { + "epoch": 0.6688391038696537, + "grad_norm": 0.6297307102369214, + "learning_rate": 2.611243865040013e-06, + "loss": 0.3245, + "step": 14778 + }, + { + "epoch": 0.6688843629780493, + "grad_norm": 0.6290230209495843, + "learning_rate": 2.6106000231305306e-06, + "loss": 0.3014, + "step": 14779 + }, + { + "epoch": 0.6689296220864449, + "grad_norm": 0.8288293083591458, + "learning_rate": 2.6099562325619175e-06, + "loss": 0.294, + "step": 14780 + }, + { + "epoch": 0.6689748811948405, + "grad_norm": 0.6020139200786987, + "learning_rate": 2.6093124933480052e-06, + "loss": 0.2819, + "step": 14781 + }, + { + "epoch": 0.669020140303236, + "grad_norm": 0.27612277737532254, + "learning_rate": 2.608668805502622e-06, + "loss": 0.4852, + "step": 14782 + }, + { + "epoch": 0.6690653994116316, + "grad_norm": 0.6359528732903538, + "learning_rate": 2.6080251690396026e-06, + "loss": 0.2924, + "step": 14783 + }, + { + "epoch": 0.6691106585200272, + "grad_norm": 0.2736896880795578, + "learning_rate": 2.607381583972777e-06, + "loss": 0.4634, + "step": 14784 + }, + { + "epoch": 0.6691559176284227, + "grad_norm": 0.9012747453630279, + "learning_rate": 2.6067380503159735e-06, + "loss": 0.3164, + "step": 14785 + }, + { + "epoch": 0.6692011767368183, + "grad_norm": 0.6229379860976303, + "learning_rate": 2.606094568083017e-06, + "loss": 0.3233, + "step": 14786 + }, + { + "epoch": 0.6692464358452138, + "grad_norm": 0.6330120866522541, + "learning_rate": 2.605451137287738e-06, + "loss": 0.3263, + "step": 14787 + }, + { + "epoch": 0.6692916949536094, + "grad_norm": 0.6707506830477721, + "learning_rate": 2.604807757943957e-06, + "loss": 0.3047, + "step": 14788 + }, + { + "epoch": 0.669336954062005, + "grad_norm": 0.301561285535464, + "learning_rate": 2.6041644300655035e-06, + "loss": 0.4905, + "step": 14789 + }, + { + "epoch": 0.6693822131704006, + "grad_norm": 0.5884118503855394, + "learning_rate": 2.6035211536661966e-06, + "loss": 0.3398, + "step": 14790 + }, + { + "epoch": 0.6694274722787961, + "grad_norm": 0.5863125088203237, + "learning_rate": 2.6028779287598606e-06, + "loss": 0.2668, + "step": 14791 + }, + { + "epoch": 0.6694727313871917, + "grad_norm": 0.6488902625627246, + "learning_rate": 2.6022347553603145e-06, + "loss": 0.3191, + "step": 14792 + }, + { + "epoch": 0.6695179904955872, + "grad_norm": 0.27688118481367496, + "learning_rate": 2.6015916334813818e-06, + "loss": 0.4875, + "step": 14793 + }, + { + "epoch": 0.6695632496039828, + "grad_norm": 0.6130194968066307, + "learning_rate": 2.600948563136878e-06, + "loss": 0.2703, + "step": 14794 + }, + { + "epoch": 0.6696085087123783, + "grad_norm": 0.5475132725839402, + "learning_rate": 2.60030554434062e-06, + "loss": 0.2429, + "step": 14795 + }, + { + "epoch": 0.6696537678207739, + "grad_norm": 0.609963564618995, + "learning_rate": 2.599662577106427e-06, + "loss": 0.2978, + "step": 14796 + }, + { + "epoch": 0.6696990269291695, + "grad_norm": 0.5612618651319895, + "learning_rate": 2.5990196614481135e-06, + "loss": 0.2818, + "step": 14797 + }, + { + "epoch": 0.6697442860375651, + "grad_norm": 0.6252181494789513, + "learning_rate": 2.5983767973794915e-06, + "loss": 0.3169, + "step": 14798 + }, + { + "epoch": 0.6697895451459607, + "grad_norm": 0.6255870501678163, + "learning_rate": 2.597733984914377e-06, + "loss": 0.3206, + "step": 14799 + }, + { + "epoch": 0.6698348042543562, + "grad_norm": 0.2880044890391729, + "learning_rate": 2.5970912240665815e-06, + "loss": 0.4794, + "step": 14800 + }, + { + "epoch": 0.6698800633627517, + "grad_norm": 0.7775456388447965, + "learning_rate": 2.5964485148499165e-06, + "loss": 0.2681, + "step": 14801 + }, + { + "epoch": 0.6699253224711473, + "grad_norm": 0.8829594166813511, + "learning_rate": 2.595805857278189e-06, + "loss": 0.2694, + "step": 14802 + }, + { + "epoch": 0.6699705815795429, + "grad_norm": 0.28253780830597774, + "learning_rate": 2.5951632513652113e-06, + "loss": 0.4689, + "step": 14803 + }, + { + "epoch": 0.6700158406879384, + "grad_norm": 0.6075988341196107, + "learning_rate": 2.594520697124788e-06, + "loss": 0.3272, + "step": 14804 + }, + { + "epoch": 0.670061099796334, + "grad_norm": 0.6036323472055383, + "learning_rate": 2.5938781945707293e-06, + "loss": 0.3277, + "step": 14805 + }, + { + "epoch": 0.6701063589047296, + "grad_norm": 0.5779008475528601, + "learning_rate": 2.5932357437168353e-06, + "loss": 0.3211, + "step": 14806 + }, + { + "epoch": 0.6701516180131252, + "grad_norm": 0.6180057462429976, + "learning_rate": 2.592593344576916e-06, + "loss": 0.3489, + "step": 14807 + }, + { + "epoch": 0.6701968771215207, + "grad_norm": 0.2792284287855959, + "learning_rate": 2.59195099716477e-06, + "loss": 0.4964, + "step": 14808 + }, + { + "epoch": 0.6702421362299162, + "grad_norm": 0.2840852019818451, + "learning_rate": 2.591308701494203e-06, + "loss": 0.4862, + "step": 14809 + }, + { + "epoch": 0.6702873953383118, + "grad_norm": 0.6465522456857574, + "learning_rate": 2.590666457579014e-06, + "loss": 0.3202, + "step": 14810 + }, + { + "epoch": 0.6703326544467074, + "grad_norm": 0.2608260318289237, + "learning_rate": 2.590024265433002e-06, + "loss": 0.4471, + "step": 14811 + }, + { + "epoch": 0.670377913555103, + "grad_norm": 0.6305923595751279, + "learning_rate": 2.589382125069967e-06, + "loss": 0.3108, + "step": 14812 + }, + { + "epoch": 0.6704231726634985, + "grad_norm": 1.0727394971790696, + "learning_rate": 2.5887400365037075e-06, + "loss": 0.33, + "step": 14813 + }, + { + "epoch": 0.6704684317718941, + "grad_norm": 0.8948768742495324, + "learning_rate": 2.5880979997480193e-06, + "loss": 0.3479, + "step": 14814 + }, + { + "epoch": 0.6705136908802897, + "grad_norm": 0.6213365279867468, + "learning_rate": 2.5874560148166953e-06, + "loss": 0.3509, + "step": 14815 + }, + { + "epoch": 0.6705589499886853, + "grad_norm": 0.2882151533209925, + "learning_rate": 2.5868140817235344e-06, + "loss": 0.4971, + "step": 14816 + }, + { + "epoch": 0.6706042090970807, + "grad_norm": 0.6089141964028402, + "learning_rate": 2.5861722004823254e-06, + "loss": 0.2446, + "step": 14817 + }, + { + "epoch": 0.6706494682054763, + "grad_norm": 0.6515023495329259, + "learning_rate": 2.585530371106864e-06, + "loss": 0.3552, + "step": 14818 + }, + { + "epoch": 0.6706947273138719, + "grad_norm": 0.26172932451240744, + "learning_rate": 2.5848885936109382e-06, + "loss": 0.4446, + "step": 14819 + }, + { + "epoch": 0.6707399864222675, + "grad_norm": 0.6545152496550372, + "learning_rate": 2.58424686800834e-06, + "loss": 0.3255, + "step": 14820 + }, + { + "epoch": 0.6707852455306631, + "grad_norm": 0.6269818687343297, + "learning_rate": 2.583605194312856e-06, + "loss": 0.3356, + "step": 14821 + }, + { + "epoch": 0.6708305046390586, + "grad_norm": 0.7356448283387264, + "learning_rate": 2.5829635725382764e-06, + "loss": 0.2754, + "step": 14822 + }, + { + "epoch": 0.6708757637474542, + "grad_norm": 0.6411225363370119, + "learning_rate": 2.5823220026983865e-06, + "loss": 0.3226, + "step": 14823 + }, + { + "epoch": 0.6709210228558498, + "grad_norm": 0.6612202553526849, + "learning_rate": 2.5816804848069693e-06, + "loss": 0.3262, + "step": 14824 + }, + { + "epoch": 0.6709662819642453, + "grad_norm": 0.29496916689818664, + "learning_rate": 2.581039018877811e-06, + "loss": 0.4557, + "step": 14825 + }, + { + "epoch": 0.6710115410726408, + "grad_norm": 0.5952504723621468, + "learning_rate": 2.580397604924699e-06, + "loss": 0.3078, + "step": 14826 + }, + { + "epoch": 0.6710568001810364, + "grad_norm": 0.6380343137276523, + "learning_rate": 2.5797562429614075e-06, + "loss": 0.3059, + "step": 14827 + }, + { + "epoch": 0.671102059289432, + "grad_norm": 0.6199335184293526, + "learning_rate": 2.579114933001722e-06, + "loss": 0.2787, + "step": 14828 + }, + { + "epoch": 0.6711473183978276, + "grad_norm": 1.0874228926103768, + "learning_rate": 2.5784736750594218e-06, + "loss": 0.2945, + "step": 14829 + }, + { + "epoch": 0.6711925775062231, + "grad_norm": 0.6125779118419148, + "learning_rate": 2.577832469148286e-06, + "loss": 0.3024, + "step": 14830 + }, + { + "epoch": 0.6712378366146187, + "grad_norm": 0.7788844335689945, + "learning_rate": 2.5771913152820895e-06, + "loss": 0.2795, + "step": 14831 + }, + { + "epoch": 0.6712830957230143, + "grad_norm": 0.6314433647143567, + "learning_rate": 2.57655021347461e-06, + "loss": 0.3204, + "step": 14832 + }, + { + "epoch": 0.6713283548314098, + "grad_norm": 0.6141001508362471, + "learning_rate": 2.5759091637396254e-06, + "loss": 0.287, + "step": 14833 + }, + { + "epoch": 0.6713736139398054, + "grad_norm": 0.6273161713818961, + "learning_rate": 2.575268166090908e-06, + "loss": 0.2765, + "step": 14834 + }, + { + "epoch": 0.6714188730482009, + "grad_norm": 0.6215596863860496, + "learning_rate": 2.5746272205422285e-06, + "loss": 0.3148, + "step": 14835 + }, + { + "epoch": 0.6714641321565965, + "grad_norm": 0.6978944273632913, + "learning_rate": 2.5739863271073634e-06, + "loss": 0.3477, + "step": 14836 + }, + { + "epoch": 0.6715093912649921, + "grad_norm": 0.6040212388799695, + "learning_rate": 2.5733454858000795e-06, + "loss": 0.2893, + "step": 14837 + }, + { + "epoch": 0.6715546503733877, + "grad_norm": 0.6080583118847751, + "learning_rate": 2.5727046966341495e-06, + "loss": 0.2562, + "step": 14838 + }, + { + "epoch": 0.6715999094817832, + "grad_norm": 0.5755508530350106, + "learning_rate": 2.572063959623341e-06, + "loss": 0.2728, + "step": 14839 + }, + { + "epoch": 0.6716451685901788, + "grad_norm": 0.6491815330242983, + "learning_rate": 2.5714232747814192e-06, + "loss": 0.3187, + "step": 14840 + }, + { + "epoch": 0.6716904276985743, + "grad_norm": 0.5995387960306222, + "learning_rate": 2.5707826421221527e-06, + "loss": 0.3104, + "step": 14841 + }, + { + "epoch": 0.6717356868069699, + "grad_norm": 0.9448323782002909, + "learning_rate": 2.5701420616593078e-06, + "loss": 0.3251, + "step": 14842 + }, + { + "epoch": 0.6717809459153654, + "grad_norm": 0.6522763308065314, + "learning_rate": 2.5695015334066475e-06, + "loss": 0.2946, + "step": 14843 + }, + { + "epoch": 0.671826205023761, + "grad_norm": 0.5770132512230949, + "learning_rate": 2.5688610573779327e-06, + "loss": 0.3095, + "step": 14844 + }, + { + "epoch": 0.6718714641321566, + "grad_norm": 0.29970308751832503, + "learning_rate": 2.568220633586929e-06, + "loss": 0.4789, + "step": 14845 + }, + { + "epoch": 0.6719167232405522, + "grad_norm": 0.30084471159795745, + "learning_rate": 2.567580262047393e-06, + "loss": 0.4513, + "step": 14846 + }, + { + "epoch": 0.6719619823489478, + "grad_norm": 0.6722694384592166, + "learning_rate": 2.566939942773089e-06, + "loss": 0.3568, + "step": 14847 + }, + { + "epoch": 0.6720072414573433, + "grad_norm": 0.6594993876155704, + "learning_rate": 2.5662996757777716e-06, + "loss": 0.2789, + "step": 14848 + }, + { + "epoch": 0.6720525005657388, + "grad_norm": 0.9012927279021385, + "learning_rate": 2.5656594610752005e-06, + "loss": 0.2859, + "step": 14849 + }, + { + "epoch": 0.6720977596741344, + "grad_norm": 0.8042398329352978, + "learning_rate": 2.5650192986791293e-06, + "loss": 0.3334, + "step": 14850 + }, + { + "epoch": 0.67214301878253, + "grad_norm": 0.5570366902178083, + "learning_rate": 2.5643791886033177e-06, + "loss": 0.3362, + "step": 14851 + }, + { + "epoch": 0.6721882778909255, + "grad_norm": 0.6377163244803467, + "learning_rate": 2.5637391308615155e-06, + "loss": 0.2737, + "step": 14852 + }, + { + "epoch": 0.6722335369993211, + "grad_norm": 0.3154837118667572, + "learning_rate": 2.5630991254674764e-06, + "loss": 0.4655, + "step": 14853 + }, + { + "epoch": 0.6722787961077167, + "grad_norm": 0.3066986197362134, + "learning_rate": 2.562459172434952e-06, + "loss": 0.5063, + "step": 14854 + }, + { + "epoch": 0.6723240552161123, + "grad_norm": 0.2595637218282012, + "learning_rate": 2.561819271777698e-06, + "loss": 0.4417, + "step": 14855 + }, + { + "epoch": 0.6723693143245079, + "grad_norm": 0.27092358982107523, + "learning_rate": 2.5611794235094545e-06, + "loss": 0.465, + "step": 14856 + }, + { + "epoch": 0.6724145734329033, + "grad_norm": 0.6557612974703131, + "learning_rate": 2.5605396276439764e-06, + "loss": 0.2826, + "step": 14857 + }, + { + "epoch": 0.6724598325412989, + "grad_norm": 0.5703719087363823, + "learning_rate": 2.5598998841950105e-06, + "loss": 0.2981, + "step": 14858 + }, + { + "epoch": 0.6725050916496945, + "grad_norm": 0.28514248737355297, + "learning_rate": 2.5592601931763024e-06, + "loss": 0.466, + "step": 14859 + }, + { + "epoch": 0.6725503507580901, + "grad_norm": 0.2741781805340848, + "learning_rate": 2.558620554601594e-06, + "loss": 0.4683, + "step": 14860 + }, + { + "epoch": 0.6725956098664856, + "grad_norm": 0.6923206061416766, + "learning_rate": 2.5579809684846323e-06, + "loss": 0.2875, + "step": 14861 + }, + { + "epoch": 0.6726408689748812, + "grad_norm": 0.26899865757289887, + "learning_rate": 2.5573414348391613e-06, + "loss": 0.4465, + "step": 14862 + }, + { + "epoch": 0.6726861280832768, + "grad_norm": 0.2803036034475733, + "learning_rate": 2.5567019536789204e-06, + "loss": 0.4642, + "step": 14863 + }, + { + "epoch": 0.6727313871916724, + "grad_norm": 0.2722707798580064, + "learning_rate": 2.5560625250176495e-06, + "loss": 0.478, + "step": 14864 + }, + { + "epoch": 0.6727766463000678, + "grad_norm": 0.5717873551481465, + "learning_rate": 2.5554231488690908e-06, + "loss": 0.2754, + "step": 14865 + }, + { + "epoch": 0.6728219054084634, + "grad_norm": 0.7573567287495568, + "learning_rate": 2.554783825246978e-06, + "loss": 0.2943, + "step": 14866 + }, + { + "epoch": 0.672867164516859, + "grad_norm": 0.310794806996194, + "learning_rate": 2.5541445541650536e-06, + "loss": 0.4947, + "step": 14867 + }, + { + "epoch": 0.6729124236252546, + "grad_norm": 0.5633981967848383, + "learning_rate": 2.55350533563705e-06, + "loss": 0.3025, + "step": 14868 + }, + { + "epoch": 0.6729576827336502, + "grad_norm": 0.6291189797545162, + "learning_rate": 2.552866169676701e-06, + "loss": 0.3401, + "step": 14869 + }, + { + "epoch": 0.6730029418420457, + "grad_norm": 0.5796014872041827, + "learning_rate": 2.5522270562977424e-06, + "loss": 0.3003, + "step": 14870 + }, + { + "epoch": 0.6730482009504413, + "grad_norm": 0.5882744559978718, + "learning_rate": 2.551587995513909e-06, + "loss": 0.3058, + "step": 14871 + }, + { + "epoch": 0.6730934600588369, + "grad_norm": 0.5832979545729652, + "learning_rate": 2.550948987338929e-06, + "loss": 0.302, + "step": 14872 + }, + { + "epoch": 0.6731387191672324, + "grad_norm": 0.6154236592404919, + "learning_rate": 2.5503100317865324e-06, + "loss": 0.3275, + "step": 14873 + }, + { + "epoch": 0.6731839782756279, + "grad_norm": 0.6174955768741429, + "learning_rate": 2.549671128870452e-06, + "loss": 0.3201, + "step": 14874 + }, + { + "epoch": 0.6732292373840235, + "grad_norm": 0.6488605190840145, + "learning_rate": 2.549032278604411e-06, + "loss": 0.269, + "step": 14875 + }, + { + "epoch": 0.6732744964924191, + "grad_norm": 0.649150724749603, + "learning_rate": 2.54839348100214e-06, + "loss": 0.326, + "step": 14876 + }, + { + "epoch": 0.6733197556008147, + "grad_norm": 0.684597473992833, + "learning_rate": 2.5477547360773626e-06, + "loss": 0.2756, + "step": 14877 + }, + { + "epoch": 0.6733650147092102, + "grad_norm": 0.6649204591550062, + "learning_rate": 2.5471160438438058e-06, + "loss": 0.2814, + "step": 14878 + }, + { + "epoch": 0.6734102738176058, + "grad_norm": 0.6315462929374838, + "learning_rate": 2.5464774043151897e-06, + "loss": 0.2824, + "step": 14879 + }, + { + "epoch": 0.6734555329260014, + "grad_norm": 0.6376100767180388, + "learning_rate": 2.5458388175052407e-06, + "loss": 0.3113, + "step": 14880 + }, + { + "epoch": 0.673500792034397, + "grad_norm": 0.3157602884109542, + "learning_rate": 2.5452002834276784e-06, + "loss": 0.4864, + "step": 14881 + }, + { + "epoch": 0.6735460511427925, + "grad_norm": 0.3159061388350569, + "learning_rate": 2.5445618020962203e-06, + "loss": 0.4724, + "step": 14882 + }, + { + "epoch": 0.673591310251188, + "grad_norm": 0.6535128707460278, + "learning_rate": 2.543923373524588e-06, + "loss": 0.2934, + "step": 14883 + }, + { + "epoch": 0.6736365693595836, + "grad_norm": 0.6120965324245942, + "learning_rate": 2.543284997726504e-06, + "loss": 0.3202, + "step": 14884 + }, + { + "epoch": 0.6736818284679792, + "grad_norm": 0.6501927712827466, + "learning_rate": 2.542646674715675e-06, + "loss": 0.3191, + "step": 14885 + }, + { + "epoch": 0.6737270875763748, + "grad_norm": 0.6882156235163461, + "learning_rate": 2.5420084045058226e-06, + "loss": 0.2935, + "step": 14886 + }, + { + "epoch": 0.6737723466847703, + "grad_norm": 0.6085103868276374, + "learning_rate": 2.5413701871106618e-06, + "loss": 0.3092, + "step": 14887 + }, + { + "epoch": 0.6738176057931659, + "grad_norm": 0.32797325748302764, + "learning_rate": 2.540732022543905e-06, + "loss": 0.4571, + "step": 14888 + }, + { + "epoch": 0.6738628649015614, + "grad_norm": 0.5767157669986146, + "learning_rate": 2.5400939108192615e-06, + "loss": 0.259, + "step": 14889 + }, + { + "epoch": 0.673908124009957, + "grad_norm": 0.6497609474966436, + "learning_rate": 2.539455851950445e-06, + "loss": 0.32, + "step": 14890 + }, + { + "epoch": 0.6739533831183526, + "grad_norm": 0.6661441669420962, + "learning_rate": 2.5388178459511676e-06, + "loss": 0.3382, + "step": 14891 + }, + { + "epoch": 0.6739986422267481, + "grad_norm": 0.6545120640034809, + "learning_rate": 2.5381798928351355e-06, + "loss": 0.3246, + "step": 14892 + }, + { + "epoch": 0.6740439013351437, + "grad_norm": 0.7178892402201152, + "learning_rate": 2.537541992616055e-06, + "loss": 0.3176, + "step": 14893 + }, + { + "epoch": 0.6740891604435393, + "grad_norm": 0.5834186467443114, + "learning_rate": 2.5369041453076355e-06, + "loss": 0.3053, + "step": 14894 + }, + { + "epoch": 0.6741344195519349, + "grad_norm": 0.637241992326166, + "learning_rate": 2.5362663509235796e-06, + "loss": 0.316, + "step": 14895 + }, + { + "epoch": 0.6741796786603304, + "grad_norm": 0.6823378248252496, + "learning_rate": 2.5356286094775943e-06, + "loss": 0.2712, + "step": 14896 + }, + { + "epoch": 0.6742249377687259, + "grad_norm": 0.6170334175678358, + "learning_rate": 2.5349909209833823e-06, + "loss": 0.2777, + "step": 14897 + }, + { + "epoch": 0.6742701968771215, + "grad_norm": 0.30346031839056486, + "learning_rate": 2.5343532854546425e-06, + "loss": 0.4658, + "step": 14898 + }, + { + "epoch": 0.6743154559855171, + "grad_norm": 0.5985682336587592, + "learning_rate": 2.533715702905078e-06, + "loss": 0.3181, + "step": 14899 + }, + { + "epoch": 0.6743607150939126, + "grad_norm": 0.6128606460509117, + "learning_rate": 2.53307817334839e-06, + "loss": 0.3288, + "step": 14900 + }, + { + "epoch": 0.6744059742023082, + "grad_norm": 0.2968114827100891, + "learning_rate": 2.5324406967982764e-06, + "loss": 0.4709, + "step": 14901 + }, + { + "epoch": 0.6744512333107038, + "grad_norm": 0.38491508192950186, + "learning_rate": 2.5318032732684306e-06, + "loss": 0.4621, + "step": 14902 + }, + { + "epoch": 0.6744964924190994, + "grad_norm": 0.6463875398224902, + "learning_rate": 2.5311659027725523e-06, + "loss": 0.3006, + "step": 14903 + }, + { + "epoch": 0.674541751527495, + "grad_norm": 0.7427266610588816, + "learning_rate": 2.530528585324339e-06, + "loss": 0.2863, + "step": 14904 + }, + { + "epoch": 0.6745870106358904, + "grad_norm": 0.6320123767632743, + "learning_rate": 2.529891320937481e-06, + "loss": 0.2886, + "step": 14905 + }, + { + "epoch": 0.674632269744286, + "grad_norm": 0.282299480515388, + "learning_rate": 2.5292541096256706e-06, + "loss": 0.4613, + "step": 14906 + }, + { + "epoch": 0.6746775288526816, + "grad_norm": 0.5895777919573687, + "learning_rate": 2.528616951402603e-06, + "loss": 0.3129, + "step": 14907 + }, + { + "epoch": 0.6747227879610772, + "grad_norm": 0.5958128659036863, + "learning_rate": 2.5279798462819647e-06, + "loss": 0.3194, + "step": 14908 + }, + { + "epoch": 0.6747680470694727, + "grad_norm": 0.6247947020635181, + "learning_rate": 2.52734279427745e-06, + "loss": 0.3085, + "step": 14909 + }, + { + "epoch": 0.6748133061778683, + "grad_norm": 0.6390878216793594, + "learning_rate": 2.5267057954027437e-06, + "loss": 0.3221, + "step": 14910 + }, + { + "epoch": 0.6748585652862639, + "grad_norm": 0.6288651134205863, + "learning_rate": 2.5260688496715318e-06, + "loss": 0.2886, + "step": 14911 + }, + { + "epoch": 0.6749038243946595, + "grad_norm": 0.6489868403838829, + "learning_rate": 2.5254319570975026e-06, + "loss": 0.3176, + "step": 14912 + }, + { + "epoch": 0.6749490835030549, + "grad_norm": 0.6122035663289863, + "learning_rate": 2.524795117694344e-06, + "loss": 0.3026, + "step": 14913 + }, + { + "epoch": 0.6749943426114505, + "grad_norm": 0.277427821508405, + "learning_rate": 2.5241583314757327e-06, + "loss": 0.4706, + "step": 14914 + }, + { + "epoch": 0.6750396017198461, + "grad_norm": 0.6597618750541544, + "learning_rate": 2.523521598455355e-06, + "loss": 0.3289, + "step": 14915 + }, + { + "epoch": 0.6750848608282417, + "grad_norm": 0.5947017395599469, + "learning_rate": 2.522884918646894e-06, + "loss": 0.3565, + "step": 14916 + }, + { + "epoch": 0.6751301199366373, + "grad_norm": 0.5862091108400317, + "learning_rate": 2.5222482920640285e-06, + "loss": 0.2782, + "step": 14917 + }, + { + "epoch": 0.6751753790450328, + "grad_norm": 0.2960613611617704, + "learning_rate": 2.5216117187204346e-06, + "loss": 0.4796, + "step": 14918 + }, + { + "epoch": 0.6752206381534284, + "grad_norm": 0.2674426621288423, + "learning_rate": 2.520975198629794e-06, + "loss": 0.4825, + "step": 14919 + }, + { + "epoch": 0.675265897261824, + "grad_norm": 0.6306769854950436, + "learning_rate": 2.520338731805785e-06, + "loss": 0.2978, + "step": 14920 + }, + { + "epoch": 0.6753111563702195, + "grad_norm": 0.26766885441575, + "learning_rate": 2.5197023182620795e-06, + "loss": 0.4577, + "step": 14921 + }, + { + "epoch": 0.675356415478615, + "grad_norm": 0.6303183429655842, + "learning_rate": 2.5190659580123524e-06, + "loss": 0.3197, + "step": 14922 + }, + { + "epoch": 0.6754016745870106, + "grad_norm": 0.624135549266601, + "learning_rate": 2.51842965107028e-06, + "loss": 0.2715, + "step": 14923 + }, + { + "epoch": 0.6754469336954062, + "grad_norm": 0.6602309754641182, + "learning_rate": 2.517793397449531e-06, + "loss": 0.3257, + "step": 14924 + }, + { + "epoch": 0.6754921928038018, + "grad_norm": 0.43063048420359085, + "learning_rate": 2.5171571971637805e-06, + "loss": 0.4771, + "step": 14925 + }, + { + "epoch": 0.6755374519121974, + "grad_norm": 0.6352771016933549, + "learning_rate": 2.5165210502266964e-06, + "loss": 0.2835, + "step": 14926 + }, + { + "epoch": 0.6755827110205929, + "grad_norm": 0.7075697434121344, + "learning_rate": 2.515884956651945e-06, + "loss": 0.3086, + "step": 14927 + }, + { + "epoch": 0.6756279701289885, + "grad_norm": 0.6011383335968612, + "learning_rate": 2.515248916453197e-06, + "loss": 0.2733, + "step": 14928 + }, + { + "epoch": 0.675673229237384, + "grad_norm": 0.5922423977368473, + "learning_rate": 2.51461292964412e-06, + "loss": 0.2945, + "step": 14929 + }, + { + "epoch": 0.6757184883457796, + "grad_norm": 1.2989889900263096, + "learning_rate": 2.5139769962383788e-06, + "loss": 0.2941, + "step": 14930 + }, + { + "epoch": 0.6757637474541751, + "grad_norm": 0.5764395400902315, + "learning_rate": 2.5133411162496335e-06, + "loss": 0.2791, + "step": 14931 + }, + { + "epoch": 0.6758090065625707, + "grad_norm": 0.6428344078064796, + "learning_rate": 2.512705289691551e-06, + "loss": 0.3288, + "step": 14932 + }, + { + "epoch": 0.6758542656709663, + "grad_norm": 0.6114776529509702, + "learning_rate": 2.5120695165777946e-06, + "loss": 0.2842, + "step": 14933 + }, + { + "epoch": 0.6758995247793619, + "grad_norm": 0.28320462128242, + "learning_rate": 2.5114337969220233e-06, + "loss": 0.4543, + "step": 14934 + }, + { + "epoch": 0.6759447838877574, + "grad_norm": 0.6086254527757627, + "learning_rate": 2.510798130737895e-06, + "loss": 0.3156, + "step": 14935 + }, + { + "epoch": 0.675990042996153, + "grad_norm": 0.6228585294371244, + "learning_rate": 2.510162518039071e-06, + "loss": 0.3279, + "step": 14936 + }, + { + "epoch": 0.6760353021045485, + "grad_norm": 0.62743672614483, + "learning_rate": 2.5095269588392055e-06, + "loss": 0.3093, + "step": 14937 + }, + { + "epoch": 0.6760805612129441, + "grad_norm": 0.637625832238023, + "learning_rate": 2.50889145315196e-06, + "loss": 0.3188, + "step": 14938 + }, + { + "epoch": 0.6761258203213397, + "grad_norm": 0.7949598540965647, + "learning_rate": 2.508256000990985e-06, + "loss": 0.2864, + "step": 14939 + }, + { + "epoch": 0.6761710794297352, + "grad_norm": 0.606394208948491, + "learning_rate": 2.5076206023699344e-06, + "loss": 0.2753, + "step": 14940 + }, + { + "epoch": 0.6762163385381308, + "grad_norm": 0.5686953213430728, + "learning_rate": 2.5069852573024624e-06, + "loss": 0.3026, + "step": 14941 + }, + { + "epoch": 0.6762615976465264, + "grad_norm": 0.6984108573696061, + "learning_rate": 2.5063499658022227e-06, + "loss": 0.3118, + "step": 14942 + }, + { + "epoch": 0.676306856754922, + "grad_norm": 0.6034097239048911, + "learning_rate": 2.505714727882863e-06, + "loss": 0.2925, + "step": 14943 + }, + { + "epoch": 0.6763521158633174, + "grad_norm": 0.6563126248198178, + "learning_rate": 2.505079543558031e-06, + "loss": 0.3273, + "step": 14944 + }, + { + "epoch": 0.676397374971713, + "grad_norm": 0.2716205123914153, + "learning_rate": 2.504444412841378e-06, + "loss": 0.4755, + "step": 14945 + }, + { + "epoch": 0.6764426340801086, + "grad_norm": 0.628172470558325, + "learning_rate": 2.503809335746553e-06, + "loss": 0.3471, + "step": 14946 + }, + { + "epoch": 0.6764878931885042, + "grad_norm": 0.6236765157508242, + "learning_rate": 2.5031743122871954e-06, + "loss": 0.2969, + "step": 14947 + }, + { + "epoch": 0.6765331522968997, + "grad_norm": 0.7013063988857922, + "learning_rate": 2.502539342476953e-06, + "loss": 0.3084, + "step": 14948 + }, + { + "epoch": 0.6765784114052953, + "grad_norm": 0.630089781026667, + "learning_rate": 2.5019044263294724e-06, + "loss": 0.2788, + "step": 14949 + }, + { + "epoch": 0.6766236705136909, + "grad_norm": 0.5768425223257683, + "learning_rate": 2.5012695638583933e-06, + "loss": 0.2592, + "step": 14950 + }, + { + "epoch": 0.6766689296220865, + "grad_norm": 0.669027420317218, + "learning_rate": 2.5006347550773547e-06, + "loss": 0.2956, + "step": 14951 + }, + { + "epoch": 0.6767141887304821, + "grad_norm": 0.3014969538495637, + "learning_rate": 2.5000000000000015e-06, + "loss": 0.4678, + "step": 14952 + }, + { + "epoch": 0.6767594478388775, + "grad_norm": 0.28886597729116914, + "learning_rate": 2.4993652986399675e-06, + "loss": 0.4667, + "step": 14953 + }, + { + "epoch": 0.6768047069472731, + "grad_norm": 0.5987131742441169, + "learning_rate": 2.4987306510108956e-06, + "loss": 0.3097, + "step": 14954 + }, + { + "epoch": 0.6768499660556687, + "grad_norm": 0.2877713827912253, + "learning_rate": 2.4980960571264195e-06, + "loss": 0.491, + "step": 14955 + }, + { + "epoch": 0.6768952251640643, + "grad_norm": 0.7496311178592051, + "learning_rate": 2.497461517000173e-06, + "loss": 0.2975, + "step": 14956 + }, + { + "epoch": 0.6769404842724598, + "grad_norm": 0.3042963939527462, + "learning_rate": 2.496827030645793e-06, + "loss": 0.4762, + "step": 14957 + }, + { + "epoch": 0.6769857433808554, + "grad_norm": 0.6794376426236794, + "learning_rate": 2.4961925980769144e-06, + "loss": 0.2719, + "step": 14958 + }, + { + "epoch": 0.677031002489251, + "grad_norm": 0.682983746007749, + "learning_rate": 2.4955582193071664e-06, + "loss": 0.318, + "step": 14959 + }, + { + "epoch": 0.6770762615976466, + "grad_norm": 0.5837459735848904, + "learning_rate": 2.494923894350179e-06, + "loss": 0.3331, + "step": 14960 + }, + { + "epoch": 0.6771215207060421, + "grad_norm": 0.6577176249026865, + "learning_rate": 2.494289623219583e-06, + "loss": 0.3305, + "step": 14961 + }, + { + "epoch": 0.6771667798144376, + "grad_norm": 0.3025775812822773, + "learning_rate": 2.4936554059290095e-06, + "loss": 0.4543, + "step": 14962 + }, + { + "epoch": 0.6772120389228332, + "grad_norm": 0.6238240104432574, + "learning_rate": 2.4930212424920837e-06, + "loss": 0.3092, + "step": 14963 + }, + { + "epoch": 0.6772572980312288, + "grad_norm": 0.5594267201562955, + "learning_rate": 2.49238713292243e-06, + "loss": 0.2957, + "step": 14964 + }, + { + "epoch": 0.6773025571396244, + "grad_norm": 0.6158262707830824, + "learning_rate": 2.491753077233676e-06, + "loss": 0.3335, + "step": 14965 + }, + { + "epoch": 0.6773478162480199, + "grad_norm": 0.6131637951233143, + "learning_rate": 2.4911190754394445e-06, + "loss": 0.3059, + "step": 14966 + }, + { + "epoch": 0.6773930753564155, + "grad_norm": 0.5816517091849465, + "learning_rate": 2.49048512755336e-06, + "loss": 0.2906, + "step": 14967 + }, + { + "epoch": 0.677438334464811, + "grad_norm": 0.6345875687869158, + "learning_rate": 2.4898512335890425e-06, + "loss": 0.3485, + "step": 14968 + }, + { + "epoch": 0.6774835935732066, + "grad_norm": 0.6298797929591657, + "learning_rate": 2.4892173935601112e-06, + "loss": 0.3308, + "step": 14969 + }, + { + "epoch": 0.6775288526816021, + "grad_norm": 0.6800828759636052, + "learning_rate": 2.488583607480186e-06, + "loss": 0.3261, + "step": 14970 + }, + { + "epoch": 0.6775741117899977, + "grad_norm": 0.6050569028834316, + "learning_rate": 2.4879498753628885e-06, + "loss": 0.3023, + "step": 14971 + }, + { + "epoch": 0.6776193708983933, + "grad_norm": 0.6514948521987356, + "learning_rate": 2.487316197221833e-06, + "loss": 0.3389, + "step": 14972 + }, + { + "epoch": 0.6776646300067889, + "grad_norm": 0.6305047865132518, + "learning_rate": 2.486682573070633e-06, + "loss": 0.2832, + "step": 14973 + }, + { + "epoch": 0.6777098891151845, + "grad_norm": 0.642479019505682, + "learning_rate": 2.4860490029229056e-06, + "loss": 0.3041, + "step": 14974 + }, + { + "epoch": 0.67775514822358, + "grad_norm": 0.2966655752974817, + "learning_rate": 2.485415486792266e-06, + "loss": 0.4724, + "step": 14975 + }, + { + "epoch": 0.6778004073319756, + "grad_norm": 0.654410954345375, + "learning_rate": 2.4847820246923244e-06, + "loss": 0.3296, + "step": 14976 + }, + { + "epoch": 0.6778456664403711, + "grad_norm": 0.7127968738118841, + "learning_rate": 2.4841486166366908e-06, + "loss": 0.2785, + "step": 14977 + }, + { + "epoch": 0.6778909255487667, + "grad_norm": 0.6122340196596452, + "learning_rate": 2.483515262638978e-06, + "loss": 0.2813, + "step": 14978 + }, + { + "epoch": 0.6779361846571622, + "grad_norm": 0.6249973534876662, + "learning_rate": 2.482881962712794e-06, + "loss": 0.2824, + "step": 14979 + }, + { + "epoch": 0.6779814437655578, + "grad_norm": 0.6260218678357067, + "learning_rate": 2.4822487168717437e-06, + "loss": 0.2977, + "step": 14980 + }, + { + "epoch": 0.6780267028739534, + "grad_norm": 0.6507315239786348, + "learning_rate": 2.481615525129437e-06, + "loss": 0.2551, + "step": 14981 + }, + { + "epoch": 0.678071961982349, + "grad_norm": 0.5892910468708393, + "learning_rate": 2.480982387499477e-06, + "loss": 0.3069, + "step": 14982 + }, + { + "epoch": 0.6781172210907445, + "grad_norm": 1.8074788988332977, + "learning_rate": 2.480349303995471e-06, + "loss": 0.3318, + "step": 14983 + }, + { + "epoch": 0.67816248019914, + "grad_norm": 0.5737467735703359, + "learning_rate": 2.4797162746310193e-06, + "loss": 0.2766, + "step": 14984 + }, + { + "epoch": 0.6782077393075356, + "grad_norm": 0.3156381897305302, + "learning_rate": 2.479083299419723e-06, + "loss": 0.4643, + "step": 14985 + }, + { + "epoch": 0.6782529984159312, + "grad_norm": 0.7418544758899195, + "learning_rate": 2.4784503783751834e-06, + "loss": 0.3306, + "step": 14986 + }, + { + "epoch": 0.6782982575243268, + "grad_norm": 0.5487600317511814, + "learning_rate": 2.477817511511003e-06, + "loss": 0.2876, + "step": 14987 + }, + { + "epoch": 0.6783435166327223, + "grad_norm": 0.2603885575345069, + "learning_rate": 2.477184698840779e-06, + "loss": 0.4652, + "step": 14988 + }, + { + "epoch": 0.6783887757411179, + "grad_norm": 0.6194122431720369, + "learning_rate": 2.4765519403781048e-06, + "loss": 0.3801, + "step": 14989 + }, + { + "epoch": 0.6784340348495135, + "grad_norm": 0.27762558728478176, + "learning_rate": 2.475919236136579e-06, + "loss": 0.4808, + "step": 14990 + }, + { + "epoch": 0.6784792939579091, + "grad_norm": 0.31156010455713856, + "learning_rate": 2.4752865861297994e-06, + "loss": 0.4834, + "step": 14991 + }, + { + "epoch": 0.6785245530663045, + "grad_norm": 0.6896153422630451, + "learning_rate": 2.474653990371356e-06, + "loss": 0.3135, + "step": 14992 + }, + { + "epoch": 0.6785698121747001, + "grad_norm": 0.2979982913756971, + "learning_rate": 2.474021448874841e-06, + "loss": 0.4519, + "step": 14993 + }, + { + "epoch": 0.6786150712830957, + "grad_norm": 0.6265256108940277, + "learning_rate": 2.4733889616538493e-06, + "loss": 0.307, + "step": 14994 + }, + { + "epoch": 0.6786603303914913, + "grad_norm": 0.292726559882182, + "learning_rate": 2.472756528721966e-06, + "loss": 0.4919, + "step": 14995 + }, + { + "epoch": 0.6787055894998869, + "grad_norm": 0.29246799357364633, + "learning_rate": 2.4721241500927863e-06, + "loss": 0.4561, + "step": 14996 + }, + { + "epoch": 0.6787508486082824, + "grad_norm": 0.6261086562541617, + "learning_rate": 2.4714918257798936e-06, + "loss": 0.2792, + "step": 14997 + }, + { + "epoch": 0.678796107716678, + "grad_norm": 0.6299828393508288, + "learning_rate": 2.470859555796875e-06, + "loss": 0.3236, + "step": 14998 + }, + { + "epoch": 0.6788413668250736, + "grad_norm": 0.6333138654289642, + "learning_rate": 2.470227340157316e-06, + "loss": 0.254, + "step": 14999 + }, + { + "epoch": 0.6788866259334692, + "grad_norm": 0.6989859571225095, + "learning_rate": 2.4695951788748047e-06, + "loss": 0.291, + "step": 15000 + }, + { + "epoch": 0.6789318850418646, + "grad_norm": 0.6215116559144774, + "learning_rate": 2.4689630719629206e-06, + "loss": 0.2595, + "step": 15001 + }, + { + "epoch": 0.6789771441502602, + "grad_norm": 0.5904808625350783, + "learning_rate": 2.468331019435245e-06, + "loss": 0.2968, + "step": 15002 + }, + { + "epoch": 0.6790224032586558, + "grad_norm": 0.575496997947114, + "learning_rate": 2.4676990213053603e-06, + "loss": 0.2764, + "step": 15003 + }, + { + "epoch": 0.6790676623670514, + "grad_norm": 0.31230031547698267, + "learning_rate": 2.467067077586848e-06, + "loss": 0.463, + "step": 15004 + }, + { + "epoch": 0.6791129214754469, + "grad_norm": 0.7635158903039766, + "learning_rate": 2.466435188293286e-06, + "loss": 0.2692, + "step": 15005 + }, + { + "epoch": 0.6791581805838425, + "grad_norm": 1.3168042258773374, + "learning_rate": 2.4658033534382476e-06, + "loss": 0.3226, + "step": 15006 + }, + { + "epoch": 0.6792034396922381, + "grad_norm": 0.5862584713732727, + "learning_rate": 2.465171573035314e-06, + "loss": 0.2941, + "step": 15007 + }, + { + "epoch": 0.6792486988006337, + "grad_norm": 0.5971792872764682, + "learning_rate": 2.4645398470980564e-06, + "loss": 0.2765, + "step": 15008 + }, + { + "epoch": 0.6792939579090292, + "grad_norm": 0.6321924431964658, + "learning_rate": 2.463908175640052e-06, + "loss": 0.2697, + "step": 15009 + }, + { + "epoch": 0.6793392170174247, + "grad_norm": 0.643688802392928, + "learning_rate": 2.463276558674872e-06, + "loss": 0.2953, + "step": 15010 + }, + { + "epoch": 0.6793844761258203, + "grad_norm": 0.9119352682482706, + "learning_rate": 2.462644996216086e-06, + "loss": 0.2685, + "step": 15011 + }, + { + "epoch": 0.6794297352342159, + "grad_norm": 0.925526246168888, + "learning_rate": 2.4620134882772683e-06, + "loss": 0.2431, + "step": 15012 + }, + { + "epoch": 0.6794749943426115, + "grad_norm": 0.32426128200660725, + "learning_rate": 2.461382034871986e-06, + "loss": 0.4957, + "step": 15013 + }, + { + "epoch": 0.679520253451007, + "grad_norm": 0.6005725209893311, + "learning_rate": 2.4607506360138044e-06, + "loss": 0.3386, + "step": 15014 + }, + { + "epoch": 0.6795655125594026, + "grad_norm": 0.5949138426379929, + "learning_rate": 2.460119291716293e-06, + "loss": 0.3316, + "step": 15015 + }, + { + "epoch": 0.6796107716677982, + "grad_norm": 0.6225411278253692, + "learning_rate": 2.4594880019930194e-06, + "loss": 0.3183, + "step": 15016 + }, + { + "epoch": 0.6796560307761937, + "grad_norm": 0.6325554520778243, + "learning_rate": 2.4588567668575463e-06, + "loss": 0.3276, + "step": 15017 + }, + { + "epoch": 0.6797012898845892, + "grad_norm": 0.6760739235968206, + "learning_rate": 2.458225586323435e-06, + "loss": 0.3176, + "step": 15018 + }, + { + "epoch": 0.6797465489929848, + "grad_norm": 0.5645343967429904, + "learning_rate": 2.457594460404249e-06, + "loss": 0.3023, + "step": 15019 + }, + { + "epoch": 0.6797918081013804, + "grad_norm": 0.6337538050357513, + "learning_rate": 2.456963389113552e-06, + "loss": 0.2849, + "step": 15020 + }, + { + "epoch": 0.679837067209776, + "grad_norm": 0.2906828686861893, + "learning_rate": 2.4563323724649006e-06, + "loss": 0.4676, + "step": 15021 + }, + { + "epoch": 0.6798823263181716, + "grad_norm": 0.5591360322721972, + "learning_rate": 2.4557014104718536e-06, + "loss": 0.3091, + "step": 15022 + }, + { + "epoch": 0.6799275854265671, + "grad_norm": 0.6327486766443987, + "learning_rate": 2.4550705031479697e-06, + "loss": 0.3012, + "step": 15023 + }, + { + "epoch": 0.6799728445349627, + "grad_norm": 0.6229533038465822, + "learning_rate": 2.4544396505068037e-06, + "loss": 0.297, + "step": 15024 + }, + { + "epoch": 0.6800181036433582, + "grad_norm": 0.7074763849772877, + "learning_rate": 2.4538088525619124e-06, + "loss": 0.3375, + "step": 15025 + }, + { + "epoch": 0.6800633627517538, + "grad_norm": 0.641807895663935, + "learning_rate": 2.453178109326849e-06, + "loss": 0.3504, + "step": 15026 + }, + { + "epoch": 0.6801086218601493, + "grad_norm": 0.6876442627979192, + "learning_rate": 2.452547420815165e-06, + "loss": 0.3808, + "step": 15027 + }, + { + "epoch": 0.6801538809685449, + "grad_norm": 0.6633112177907754, + "learning_rate": 2.4519167870404126e-06, + "loss": 0.2731, + "step": 15028 + }, + { + "epoch": 0.6801991400769405, + "grad_norm": 0.3050394725195347, + "learning_rate": 2.451286208016144e-06, + "loss": 0.4741, + "step": 15029 + }, + { + "epoch": 0.6802443991853361, + "grad_norm": 0.3037036497796297, + "learning_rate": 2.4506556837559074e-06, + "loss": 0.4441, + "step": 15030 + }, + { + "epoch": 0.6802896582937316, + "grad_norm": 0.2735878686555206, + "learning_rate": 2.450025214273249e-06, + "loss": 0.4828, + "step": 15031 + }, + { + "epoch": 0.6803349174021271, + "grad_norm": 0.6187834499050971, + "learning_rate": 2.4493947995817165e-06, + "loss": 0.3022, + "step": 15032 + }, + { + "epoch": 0.6803801765105227, + "grad_norm": 0.5779258372319492, + "learning_rate": 2.4487644396948584e-06, + "loss": 0.2988, + "step": 15033 + }, + { + "epoch": 0.6804254356189183, + "grad_norm": 0.6103992843675301, + "learning_rate": 2.448134134626217e-06, + "loss": 0.2988, + "step": 15034 + }, + { + "epoch": 0.6804706947273139, + "grad_norm": 0.6238415603043627, + "learning_rate": 2.4475038843893327e-06, + "loss": 0.2891, + "step": 15035 + }, + { + "epoch": 0.6805159538357094, + "grad_norm": 0.6368674523002104, + "learning_rate": 2.4468736889977536e-06, + "loss": 0.3173, + "step": 15036 + }, + { + "epoch": 0.680561212944105, + "grad_norm": 0.29612709938276416, + "learning_rate": 2.4462435484650156e-06, + "loss": 0.468, + "step": 15037 + }, + { + "epoch": 0.6806064720525006, + "grad_norm": 0.6270179992072349, + "learning_rate": 2.4456134628046617e-06, + "loss": 0.3427, + "step": 15038 + }, + { + "epoch": 0.6806517311608962, + "grad_norm": 0.6686654576794504, + "learning_rate": 2.4449834320302297e-06, + "loss": 0.2887, + "step": 15039 + }, + { + "epoch": 0.6806969902692916, + "grad_norm": 0.3245521884159804, + "learning_rate": 2.4443534561552543e-06, + "loss": 0.4802, + "step": 15040 + }, + { + "epoch": 0.6807422493776872, + "grad_norm": 0.6097813124642308, + "learning_rate": 2.4437235351932746e-06, + "loss": 0.3216, + "step": 15041 + }, + { + "epoch": 0.6807875084860828, + "grad_norm": 0.5942174037417574, + "learning_rate": 2.4430936691578287e-06, + "loss": 0.3034, + "step": 15042 + }, + { + "epoch": 0.6808327675944784, + "grad_norm": 0.6237027666111084, + "learning_rate": 2.442463858062444e-06, + "loss": 0.3415, + "step": 15043 + }, + { + "epoch": 0.680878026702874, + "grad_norm": 0.6188913554656105, + "learning_rate": 2.441834101920655e-06, + "loss": 0.3145, + "step": 15044 + }, + { + "epoch": 0.6809232858112695, + "grad_norm": 0.6309016051437135, + "learning_rate": 2.4412044007459945e-06, + "loss": 0.291, + "step": 15045 + }, + { + "epoch": 0.6809685449196651, + "grad_norm": 0.26462806540351497, + "learning_rate": 2.4405747545519966e-06, + "loss": 0.468, + "step": 15046 + }, + { + "epoch": 0.6810138040280607, + "grad_norm": 0.2661694906424091, + "learning_rate": 2.4399451633521825e-06, + "loss": 0.4632, + "step": 15047 + }, + { + "epoch": 0.6810590631364563, + "grad_norm": 0.6641368818274791, + "learning_rate": 2.4393156271600847e-06, + "loss": 0.3349, + "step": 15048 + }, + { + "epoch": 0.6811043222448517, + "grad_norm": 0.5960234629586615, + "learning_rate": 2.4386861459892312e-06, + "loss": 0.2653, + "step": 15049 + }, + { + "epoch": 0.6811495813532473, + "grad_norm": 0.62521763613905, + "learning_rate": 2.4380567198531462e-06, + "loss": 0.3426, + "step": 15050 + }, + { + "epoch": 0.6811948404616429, + "grad_norm": 0.2717915731520093, + "learning_rate": 2.4374273487653517e-06, + "loss": 0.4493, + "step": 15051 + }, + { + "epoch": 0.6812400995700385, + "grad_norm": 0.5922236013507167, + "learning_rate": 2.4367980327393752e-06, + "loss": 0.3155, + "step": 15052 + }, + { + "epoch": 0.681285358678434, + "grad_norm": 0.6491962951976494, + "learning_rate": 2.4361687717887346e-06, + "loss": 0.3052, + "step": 15053 + }, + { + "epoch": 0.6813306177868296, + "grad_norm": 0.6055216662373926, + "learning_rate": 2.435539565926955e-06, + "loss": 0.2897, + "step": 15054 + }, + { + "epoch": 0.6813758768952252, + "grad_norm": 0.62030495638086, + "learning_rate": 2.434910415167554e-06, + "loss": 0.2912, + "step": 15055 + }, + { + "epoch": 0.6814211360036208, + "grad_norm": 0.5848255587798645, + "learning_rate": 2.4342813195240477e-06, + "loss": 0.2882, + "step": 15056 + }, + { + "epoch": 0.6814663951120163, + "grad_norm": 1.0867145517086927, + "learning_rate": 2.4336522790099563e-06, + "loss": 0.2777, + "step": 15057 + }, + { + "epoch": 0.6815116542204118, + "grad_norm": 0.616768628832872, + "learning_rate": 2.4330232936387975e-06, + "loss": 0.292, + "step": 15058 + }, + { + "epoch": 0.6815569133288074, + "grad_norm": 0.6299643341596347, + "learning_rate": 2.4323943634240838e-06, + "loss": 0.2836, + "step": 15059 + }, + { + "epoch": 0.681602172437203, + "grad_norm": 0.6101124308071156, + "learning_rate": 2.431765488379328e-06, + "loss": 0.3279, + "step": 15060 + }, + { + "epoch": 0.6816474315455986, + "grad_norm": 0.29891723094065137, + "learning_rate": 2.4311366685180436e-06, + "loss": 0.4191, + "step": 15061 + }, + { + "epoch": 0.6816926906539941, + "grad_norm": 0.31111970321204335, + "learning_rate": 2.430507903853745e-06, + "loss": 0.4834, + "step": 15062 + }, + { + "epoch": 0.6817379497623897, + "grad_norm": 0.5747182195248783, + "learning_rate": 2.42987919439994e-06, + "loss": 0.2647, + "step": 15063 + }, + { + "epoch": 0.6817832088707853, + "grad_norm": 0.6957102220606519, + "learning_rate": 2.429250540170135e-06, + "loss": 0.2548, + "step": 15064 + }, + { + "epoch": 0.6818284679791808, + "grad_norm": 0.5184383773596717, + "learning_rate": 2.428621941177843e-06, + "loss": 0.2803, + "step": 15065 + }, + { + "epoch": 0.6818737270875763, + "grad_norm": 0.649242266265955, + "learning_rate": 2.4279933974365662e-06, + "loss": 0.3289, + "step": 15066 + }, + { + "epoch": 0.6819189861959719, + "grad_norm": 0.6386315571308373, + "learning_rate": 2.4273649089598133e-06, + "loss": 0.2894, + "step": 15067 + }, + { + "epoch": 0.6819642453043675, + "grad_norm": 0.6135005713400414, + "learning_rate": 2.4267364757610878e-06, + "loss": 0.2738, + "step": 15068 + }, + { + "epoch": 0.6820095044127631, + "grad_norm": 0.6213338889164568, + "learning_rate": 2.4261080978538897e-06, + "loss": 0.3396, + "step": 15069 + }, + { + "epoch": 0.6820547635211587, + "grad_norm": 0.5964228446691353, + "learning_rate": 2.425479775251724e-06, + "loss": 0.2587, + "step": 15070 + }, + { + "epoch": 0.6821000226295542, + "grad_norm": 0.5767022668088903, + "learning_rate": 2.4248515079680945e-06, + "loss": 0.3102, + "step": 15071 + }, + { + "epoch": 0.6821452817379497, + "grad_norm": 0.6267563690965127, + "learning_rate": 2.4242232960164937e-06, + "loss": 0.2785, + "step": 15072 + }, + { + "epoch": 0.6821905408463453, + "grad_norm": 0.6050276519979759, + "learning_rate": 2.423595139410423e-06, + "loss": 0.2771, + "step": 15073 + }, + { + "epoch": 0.6822357999547409, + "grad_norm": 0.5967407347994533, + "learning_rate": 2.4229670381633804e-06, + "loss": 0.2796, + "step": 15074 + }, + { + "epoch": 0.6822810590631364, + "grad_norm": 0.5878917183116571, + "learning_rate": 2.4223389922888646e-06, + "loss": 0.3188, + "step": 15075 + }, + { + "epoch": 0.682326318171532, + "grad_norm": 0.5176462985315065, + "learning_rate": 2.4217110018003636e-06, + "loss": 0.3001, + "step": 15076 + }, + { + "epoch": 0.6823715772799276, + "grad_norm": 0.6936683358693684, + "learning_rate": 2.4210830667113745e-06, + "loss": 0.2778, + "step": 15077 + }, + { + "epoch": 0.6824168363883232, + "grad_norm": 0.6189248083875724, + "learning_rate": 2.4204551870353917e-06, + "loss": 0.3505, + "step": 15078 + }, + { + "epoch": 0.6824620954967188, + "grad_norm": 0.7311188825467586, + "learning_rate": 2.4198273627859043e-06, + "loss": 0.3475, + "step": 15079 + }, + { + "epoch": 0.6825073546051142, + "grad_norm": 0.6536316463000047, + "learning_rate": 2.419199593976401e-06, + "loss": 0.2967, + "step": 15080 + }, + { + "epoch": 0.6825526137135098, + "grad_norm": 0.6168381295450212, + "learning_rate": 2.4185718806203738e-06, + "loss": 0.2991, + "step": 15081 + }, + { + "epoch": 0.6825978728219054, + "grad_norm": 0.5989253129012235, + "learning_rate": 2.4179442227313065e-06, + "loss": 0.2927, + "step": 15082 + }, + { + "epoch": 0.682643131930301, + "grad_norm": 0.6387303771408718, + "learning_rate": 2.41731662032269e-06, + "loss": 0.3425, + "step": 15083 + }, + { + "epoch": 0.6826883910386965, + "grad_norm": 0.3572420858535034, + "learning_rate": 2.4166890734080066e-06, + "loss": 0.4635, + "step": 15084 + }, + { + "epoch": 0.6827336501470921, + "grad_norm": 0.3106526202462484, + "learning_rate": 2.41606158200074e-06, + "loss": 0.4804, + "step": 15085 + }, + { + "epoch": 0.6827789092554877, + "grad_norm": 0.6606184577508364, + "learning_rate": 2.4154341461143734e-06, + "loss": 0.2942, + "step": 15086 + }, + { + "epoch": 0.6828241683638833, + "grad_norm": 0.6548534213992573, + "learning_rate": 2.4148067657623907e-06, + "loss": 0.3086, + "step": 15087 + }, + { + "epoch": 0.6828694274722787, + "grad_norm": 0.6151721435049017, + "learning_rate": 2.4141794409582713e-06, + "loss": 0.2829, + "step": 15088 + }, + { + "epoch": 0.6829146865806743, + "grad_norm": 0.70234226813624, + "learning_rate": 2.413552171715492e-06, + "loss": 0.3002, + "step": 15089 + }, + { + "epoch": 0.6829599456890699, + "grad_norm": 0.5181581808026793, + "learning_rate": 2.412924958047533e-06, + "loss": 0.3023, + "step": 15090 + }, + { + "epoch": 0.6830052047974655, + "grad_norm": 0.655670861689387, + "learning_rate": 2.4122977999678727e-06, + "loss": 0.3185, + "step": 15091 + }, + { + "epoch": 0.6830504639058611, + "grad_norm": 0.5946982122245101, + "learning_rate": 2.4116706974899857e-06, + "loss": 0.3472, + "step": 15092 + }, + { + "epoch": 0.6830957230142566, + "grad_norm": 0.7419455436996496, + "learning_rate": 2.411043650627343e-06, + "loss": 0.3462, + "step": 15093 + }, + { + "epoch": 0.6831409821226522, + "grad_norm": 0.33988968281405163, + "learning_rate": 2.4104166593934237e-06, + "loss": 0.4753, + "step": 15094 + }, + { + "epoch": 0.6831862412310478, + "grad_norm": 0.643007225023249, + "learning_rate": 2.409789723801695e-06, + "loss": 0.3085, + "step": 15095 + }, + { + "epoch": 0.6832315003394434, + "grad_norm": 0.7106285604036804, + "learning_rate": 2.409162843865632e-06, + "loss": 0.2933, + "step": 15096 + }, + { + "epoch": 0.6832767594478388, + "grad_norm": 0.6235195273871946, + "learning_rate": 2.4085360195987017e-06, + "loss": 0.2589, + "step": 15097 + }, + { + "epoch": 0.6833220185562344, + "grad_norm": 0.5841690154702869, + "learning_rate": 2.4079092510143712e-06, + "loss": 0.3071, + "step": 15098 + }, + { + "epoch": 0.68336727766463, + "grad_norm": 0.5636313308066683, + "learning_rate": 2.407282538126111e-06, + "loss": 0.2403, + "step": 15099 + }, + { + "epoch": 0.6834125367730256, + "grad_norm": 0.6407811380404763, + "learning_rate": 2.4066558809473896e-06, + "loss": 0.2854, + "step": 15100 + }, + { + "epoch": 0.6834577958814211, + "grad_norm": 0.6759491624865307, + "learning_rate": 2.406029279491664e-06, + "loss": 0.3146, + "step": 15101 + }, + { + "epoch": 0.6835030549898167, + "grad_norm": 0.6259445406121261, + "learning_rate": 2.405402733772403e-06, + "loss": 0.3136, + "step": 15102 + }, + { + "epoch": 0.6835483140982123, + "grad_norm": 0.6051948786321726, + "learning_rate": 2.404776243803068e-06, + "loss": 0.2622, + "step": 15103 + }, + { + "epoch": 0.6835935732066079, + "grad_norm": 0.5876119935476364, + "learning_rate": 2.4041498095971253e-06, + "loss": 0.2993, + "step": 15104 + }, + { + "epoch": 0.6836388323150034, + "grad_norm": 0.5800757727860213, + "learning_rate": 2.4035234311680267e-06, + "loss": 0.3371, + "step": 15105 + }, + { + "epoch": 0.6836840914233989, + "grad_norm": 0.6108119594459906, + "learning_rate": 2.402897108529235e-06, + "loss": 0.2724, + "step": 15106 + }, + { + "epoch": 0.6837293505317945, + "grad_norm": 0.6520504016494019, + "learning_rate": 2.40227084169421e-06, + "loss": 0.3746, + "step": 15107 + }, + { + "epoch": 0.6837746096401901, + "grad_norm": 0.7391233436868181, + "learning_rate": 2.401644630676406e-06, + "loss": 0.3151, + "step": 15108 + }, + { + "epoch": 0.6838198687485857, + "grad_norm": 0.5998437164037514, + "learning_rate": 2.4010184754892773e-06, + "loss": 0.2969, + "step": 15109 + }, + { + "epoch": 0.6838651278569812, + "grad_norm": 0.612633443932278, + "learning_rate": 2.400392376146281e-06, + "loss": 0.3113, + "step": 15110 + }, + { + "epoch": 0.6839103869653768, + "grad_norm": 0.6737683247307297, + "learning_rate": 2.3997663326608663e-06, + "loss": 0.3705, + "step": 15111 + }, + { + "epoch": 0.6839556460737723, + "grad_norm": 0.5984500289040087, + "learning_rate": 2.3991403450464896e-06, + "loss": 0.2848, + "step": 15112 + }, + { + "epoch": 0.6840009051821679, + "grad_norm": 0.685521554319672, + "learning_rate": 2.398514413316598e-06, + "loss": 0.3334, + "step": 15113 + }, + { + "epoch": 0.6840461642905635, + "grad_norm": 0.6089728266060771, + "learning_rate": 2.397888537484641e-06, + "loss": 0.3064, + "step": 15114 + }, + { + "epoch": 0.684091423398959, + "grad_norm": 0.5548942735671424, + "learning_rate": 2.397262717564067e-06, + "loss": 0.3223, + "step": 15115 + }, + { + "epoch": 0.6841366825073546, + "grad_norm": 0.6383884701744859, + "learning_rate": 2.3966369535683254e-06, + "loss": 0.3467, + "step": 15116 + }, + { + "epoch": 0.6841819416157502, + "grad_norm": 0.5859354192074316, + "learning_rate": 2.3960112455108604e-06, + "loss": 0.2759, + "step": 15117 + }, + { + "epoch": 0.6842272007241458, + "grad_norm": 0.6497286961086137, + "learning_rate": 2.3953855934051135e-06, + "loss": 0.2808, + "step": 15118 + }, + { + "epoch": 0.6842724598325413, + "grad_norm": 0.6832885488840652, + "learning_rate": 2.3947599972645313e-06, + "loss": 0.3897, + "step": 15119 + }, + { + "epoch": 0.6843177189409368, + "grad_norm": 0.5990897995013262, + "learning_rate": 2.3941344571025575e-06, + "loss": 0.2846, + "step": 15120 + }, + { + "epoch": 0.6843629780493324, + "grad_norm": 0.6285928770397525, + "learning_rate": 2.3935089729326307e-06, + "loss": 0.295, + "step": 15121 + }, + { + "epoch": 0.684408237157728, + "grad_norm": 0.637133212579721, + "learning_rate": 2.3928835447681886e-06, + "loss": 0.3387, + "step": 15122 + }, + { + "epoch": 0.6844534962661235, + "grad_norm": 0.5783934039329125, + "learning_rate": 2.392258172622674e-06, + "loss": 0.2953, + "step": 15123 + }, + { + "epoch": 0.6844987553745191, + "grad_norm": 0.5715727546399437, + "learning_rate": 2.391632856509521e-06, + "loss": 0.2819, + "step": 15124 + }, + { + "epoch": 0.6845440144829147, + "grad_norm": 0.6303769605021657, + "learning_rate": 2.3910075964421682e-06, + "loss": 0.3124, + "step": 15125 + }, + { + "epoch": 0.6845892735913103, + "grad_norm": 0.6382169114047389, + "learning_rate": 2.390382392434049e-06, + "loss": 0.3138, + "step": 15126 + }, + { + "epoch": 0.6846345326997059, + "grad_norm": 1.884026468527679, + "learning_rate": 2.389757244498596e-06, + "loss": 0.3024, + "step": 15127 + }, + { + "epoch": 0.6846797918081013, + "grad_norm": 0.6009302379190262, + "learning_rate": 2.389132152649243e-06, + "loss": 0.304, + "step": 15128 + }, + { + "epoch": 0.6847250509164969, + "grad_norm": 0.2907404985498884, + "learning_rate": 2.3885071168994245e-06, + "loss": 0.4527, + "step": 15129 + }, + { + "epoch": 0.6847703100248925, + "grad_norm": 0.6375852995803484, + "learning_rate": 2.3878821372625645e-06, + "loss": 0.3325, + "step": 15130 + }, + { + "epoch": 0.6848155691332881, + "grad_norm": 0.6255821180704887, + "learning_rate": 2.3872572137520942e-06, + "loss": 0.2996, + "step": 15131 + }, + { + "epoch": 0.6848608282416836, + "grad_norm": 0.6084577807492569, + "learning_rate": 2.3866323463814426e-06, + "loss": 0.3524, + "step": 15132 + }, + { + "epoch": 0.6849060873500792, + "grad_norm": 0.6140593026995164, + "learning_rate": 2.386007535164039e-06, + "loss": 0.2679, + "step": 15133 + }, + { + "epoch": 0.6849513464584748, + "grad_norm": 0.37943621133603733, + "learning_rate": 2.3853827801133015e-06, + "loss": 0.4455, + "step": 15134 + }, + { + "epoch": 0.6849966055668704, + "grad_norm": 0.6303980113130074, + "learning_rate": 2.384758081242658e-06, + "loss": 0.32, + "step": 15135 + }, + { + "epoch": 0.6850418646752658, + "grad_norm": 0.6468168375303229, + "learning_rate": 2.384133438565533e-06, + "loss": 0.3279, + "step": 15136 + }, + { + "epoch": 0.6850871237836614, + "grad_norm": 0.6264960194763265, + "learning_rate": 2.383508852095346e-06, + "loss": 0.3224, + "step": 15137 + }, + { + "epoch": 0.685132382892057, + "grad_norm": 0.6333601588370803, + "learning_rate": 2.382884321845516e-06, + "loss": 0.3099, + "step": 15138 + }, + { + "epoch": 0.6851776420004526, + "grad_norm": 0.6282786241901707, + "learning_rate": 2.382259847829467e-06, + "loss": 0.3354, + "step": 15139 + }, + { + "epoch": 0.6852229011088482, + "grad_norm": 0.2730465512823068, + "learning_rate": 2.381635430060611e-06, + "loss": 0.4853, + "step": 15140 + }, + { + "epoch": 0.6852681602172437, + "grad_norm": 0.5941869212174515, + "learning_rate": 2.38101106855237e-06, + "loss": 0.2762, + "step": 15141 + }, + { + "epoch": 0.6853134193256393, + "grad_norm": 0.28845968455259635, + "learning_rate": 2.3803867633181575e-06, + "loss": 0.4624, + "step": 15142 + }, + { + "epoch": 0.6853586784340349, + "grad_norm": 0.6890545300013281, + "learning_rate": 2.3797625143713865e-06, + "loss": 0.2775, + "step": 15143 + }, + { + "epoch": 0.6854039375424305, + "grad_norm": 0.6849086964199705, + "learning_rate": 2.3791383217254717e-06, + "loss": 0.3612, + "step": 15144 + }, + { + "epoch": 0.6854491966508259, + "grad_norm": 0.6329796736475822, + "learning_rate": 2.3785141853938266e-06, + "loss": 0.2985, + "step": 15145 + }, + { + "epoch": 0.6854944557592215, + "grad_norm": 0.2658507084788963, + "learning_rate": 2.37789010538986e-06, + "loss": 0.4552, + "step": 15146 + }, + { + "epoch": 0.6855397148676171, + "grad_norm": 0.6106735785669123, + "learning_rate": 2.3772660817269806e-06, + "loss": 0.2875, + "step": 15147 + }, + { + "epoch": 0.6855849739760127, + "grad_norm": 0.5513527402067496, + "learning_rate": 2.3766421144185977e-06, + "loss": 0.2948, + "step": 15148 + }, + { + "epoch": 0.6856302330844083, + "grad_norm": 0.5290145509732991, + "learning_rate": 2.3760182034781203e-06, + "loss": 0.2645, + "step": 15149 + }, + { + "epoch": 0.6856754921928038, + "grad_norm": 0.6402660303302956, + "learning_rate": 2.3753943489189537e-06, + "loss": 0.3056, + "step": 15150 + }, + { + "epoch": 0.6857207513011994, + "grad_norm": 0.6428218167659006, + "learning_rate": 2.3747705507544986e-06, + "loss": 0.3194, + "step": 15151 + }, + { + "epoch": 0.685766010409595, + "grad_norm": 0.6706866565556091, + "learning_rate": 2.3741468089981646e-06, + "loss": 0.2862, + "step": 15152 + }, + { + "epoch": 0.6858112695179905, + "grad_norm": 0.5990018968682368, + "learning_rate": 2.3735231236633483e-06, + "loss": 0.2883, + "step": 15153 + }, + { + "epoch": 0.685856528626386, + "grad_norm": 0.6217163752330904, + "learning_rate": 2.372899494763456e-06, + "loss": 0.293, + "step": 15154 + }, + { + "epoch": 0.6859017877347816, + "grad_norm": 0.28048717025355663, + "learning_rate": 2.3722759223118846e-06, + "loss": 0.4643, + "step": 15155 + }, + { + "epoch": 0.6859470468431772, + "grad_norm": 0.6270455720998772, + "learning_rate": 2.371652406322031e-06, + "loss": 0.2873, + "step": 15156 + }, + { + "epoch": 0.6859923059515728, + "grad_norm": 0.5866299850272513, + "learning_rate": 2.3710289468072957e-06, + "loss": 0.3119, + "step": 15157 + }, + { + "epoch": 0.6860375650599683, + "grad_norm": 0.27914664793966554, + "learning_rate": 2.3704055437810754e-06, + "loss": 0.4836, + "step": 15158 + }, + { + "epoch": 0.6860828241683639, + "grad_norm": 0.5762223082408546, + "learning_rate": 2.3697821972567635e-06, + "loss": 0.2848, + "step": 15159 + }, + { + "epoch": 0.6861280832767594, + "grad_norm": 0.29689041409226063, + "learning_rate": 2.3691589072477527e-06, + "loss": 0.4544, + "step": 15160 + }, + { + "epoch": 0.686173342385155, + "grad_norm": 0.616291195054509, + "learning_rate": 2.3685356737674364e-06, + "loss": 0.3022, + "step": 15161 + }, + { + "epoch": 0.6862186014935506, + "grad_norm": 0.6195029821454514, + "learning_rate": 2.367912496829211e-06, + "loss": 0.2788, + "step": 15162 + }, + { + "epoch": 0.6862638606019461, + "grad_norm": 0.2606837427560347, + "learning_rate": 2.367289376446458e-06, + "loss": 0.4686, + "step": 15163 + }, + { + "epoch": 0.6863091197103417, + "grad_norm": 0.6483111444061642, + "learning_rate": 2.3666663126325705e-06, + "loss": 0.2994, + "step": 15164 + }, + { + "epoch": 0.6863543788187373, + "grad_norm": 0.5808106655048166, + "learning_rate": 2.3660433054009385e-06, + "loss": 0.2767, + "step": 15165 + }, + { + "epoch": 0.6863996379271329, + "grad_norm": 0.5582665769123862, + "learning_rate": 2.3654203547649463e-06, + "loss": 0.2759, + "step": 15166 + }, + { + "epoch": 0.6864448970355284, + "grad_norm": 0.6517516887106195, + "learning_rate": 2.364797460737977e-06, + "loss": 0.2635, + "step": 15167 + }, + { + "epoch": 0.686490156143924, + "grad_norm": 0.6199077902775992, + "learning_rate": 2.364174623333419e-06, + "loss": 0.2802, + "step": 15168 + }, + { + "epoch": 0.6865354152523195, + "grad_norm": 0.6461909522002824, + "learning_rate": 2.363551842564651e-06, + "loss": 0.3534, + "step": 15169 + }, + { + "epoch": 0.6865806743607151, + "grad_norm": 0.6299888083758769, + "learning_rate": 2.362929118445059e-06, + "loss": 0.3022, + "step": 15170 + }, + { + "epoch": 0.6866259334691106, + "grad_norm": 0.7662971676827985, + "learning_rate": 2.36230645098802e-06, + "loss": 0.3375, + "step": 15171 + }, + { + "epoch": 0.6866711925775062, + "grad_norm": 0.28553350487559287, + "learning_rate": 2.3616838402069132e-06, + "loss": 0.4623, + "step": 15172 + }, + { + "epoch": 0.6867164516859018, + "grad_norm": 0.28039720222164843, + "learning_rate": 2.361061286115118e-06, + "loss": 0.4512, + "step": 15173 + }, + { + "epoch": 0.6867617107942974, + "grad_norm": 0.6293996693057095, + "learning_rate": 2.3604387887260122e-06, + "loss": 0.2728, + "step": 15174 + }, + { + "epoch": 0.686806969902693, + "grad_norm": 0.620450588469678, + "learning_rate": 2.35981634805297e-06, + "loss": 0.2687, + "step": 15175 + }, + { + "epoch": 0.6868522290110884, + "grad_norm": 0.2611191221662981, + "learning_rate": 2.359193964109364e-06, + "loss": 0.4641, + "step": 15176 + }, + { + "epoch": 0.686897488119484, + "grad_norm": 0.2633681409227305, + "learning_rate": 2.3585716369085692e-06, + "loss": 0.4613, + "step": 15177 + }, + { + "epoch": 0.6869427472278796, + "grad_norm": 0.2854475002845172, + "learning_rate": 2.35794936646396e-06, + "loss": 0.4759, + "step": 15178 + }, + { + "epoch": 0.6869880063362752, + "grad_norm": 0.9651744734923887, + "learning_rate": 2.357327152788903e-06, + "loss": 0.3001, + "step": 15179 + }, + { + "epoch": 0.6870332654446707, + "grad_norm": 0.5976494048168159, + "learning_rate": 2.356704995896768e-06, + "loss": 0.277, + "step": 15180 + }, + { + "epoch": 0.6870785245530663, + "grad_norm": 0.637045182518737, + "learning_rate": 2.3560828958009265e-06, + "loss": 0.3234, + "step": 15181 + }, + { + "epoch": 0.6871237836614619, + "grad_norm": 0.6255287785575562, + "learning_rate": 2.355460852514741e-06, + "loss": 0.3296, + "step": 15182 + }, + { + "epoch": 0.6871690427698575, + "grad_norm": 0.6239150838054454, + "learning_rate": 2.354838866051582e-06, + "loss": 0.3245, + "step": 15183 + }, + { + "epoch": 0.687214301878253, + "grad_norm": 0.6051523620230199, + "learning_rate": 2.354216936424812e-06, + "loss": 0.2777, + "step": 15184 + }, + { + "epoch": 0.6872595609866485, + "grad_norm": 0.6090172591445984, + "learning_rate": 2.3535950636477915e-06, + "loss": 0.2943, + "step": 15185 + }, + { + "epoch": 0.6873048200950441, + "grad_norm": 0.627645662677417, + "learning_rate": 2.3529732477338857e-06, + "loss": 0.2999, + "step": 15186 + }, + { + "epoch": 0.6873500792034397, + "grad_norm": 0.2991361603487357, + "learning_rate": 2.352351488696457e-06, + "loss": 0.4835, + "step": 15187 + }, + { + "epoch": 0.6873953383118353, + "grad_norm": 0.636926639668503, + "learning_rate": 2.351729786548863e-06, + "loss": 0.3004, + "step": 15188 + }, + { + "epoch": 0.6874405974202308, + "grad_norm": 0.5933253379819048, + "learning_rate": 2.3511081413044605e-06, + "loss": 0.3057, + "step": 15189 + }, + { + "epoch": 0.6874858565286264, + "grad_norm": 0.6585305490498075, + "learning_rate": 2.3504865529766084e-06, + "loss": 0.3628, + "step": 15190 + }, + { + "epoch": 0.687531115637022, + "grad_norm": 0.791935913806427, + "learning_rate": 2.3498650215786656e-06, + "loss": 0.2863, + "step": 15191 + }, + { + "epoch": 0.6875763747454176, + "grad_norm": 0.3022278662815868, + "learning_rate": 2.349243547123983e-06, + "loss": 0.4629, + "step": 15192 + }, + { + "epoch": 0.687621633853813, + "grad_norm": 0.6842440528203508, + "learning_rate": 2.348622129625914e-06, + "loss": 0.3112, + "step": 15193 + }, + { + "epoch": 0.6876668929622086, + "grad_norm": 0.598615904191372, + "learning_rate": 2.3480007690978153e-06, + "loss": 0.2748, + "step": 15194 + }, + { + "epoch": 0.6877121520706042, + "grad_norm": 0.6166353577834562, + "learning_rate": 2.3473794655530317e-06, + "loss": 0.2794, + "step": 15195 + }, + { + "epoch": 0.6877574111789998, + "grad_norm": 0.6474296155650149, + "learning_rate": 2.3467582190049194e-06, + "loss": 0.3015, + "step": 15196 + }, + { + "epoch": 0.6878026702873954, + "grad_norm": 0.6586067699851095, + "learning_rate": 2.3461370294668234e-06, + "loss": 0.3272, + "step": 15197 + }, + { + "epoch": 0.6878479293957909, + "grad_norm": 0.6412806959305043, + "learning_rate": 2.3455158969520908e-06, + "loss": 0.3416, + "step": 15198 + }, + { + "epoch": 0.6878931885041865, + "grad_norm": 0.29779863556989944, + "learning_rate": 2.3448948214740703e-06, + "loss": 0.4603, + "step": 15199 + }, + { + "epoch": 0.687938447612582, + "grad_norm": 0.7199625021107681, + "learning_rate": 2.3442738030461054e-06, + "loss": 0.3547, + "step": 15200 + }, + { + "epoch": 0.6879837067209776, + "grad_norm": 0.6317413566544562, + "learning_rate": 2.3436528416815384e-06, + "loss": 0.3109, + "step": 15201 + }, + { + "epoch": 0.6880289658293731, + "grad_norm": 0.5831209937004782, + "learning_rate": 2.343031937393714e-06, + "loss": 0.2895, + "step": 15202 + }, + { + "epoch": 0.6880742249377687, + "grad_norm": 0.5753754117741635, + "learning_rate": 2.342411090195974e-06, + "loss": 0.2532, + "step": 15203 + }, + { + "epoch": 0.6881194840461643, + "grad_norm": 0.6494344248438111, + "learning_rate": 2.341790300101658e-06, + "loss": 0.3236, + "step": 15204 + }, + { + "epoch": 0.6881647431545599, + "grad_norm": 0.6113703342258673, + "learning_rate": 2.3411695671241026e-06, + "loss": 0.2958, + "step": 15205 + }, + { + "epoch": 0.6882100022629554, + "grad_norm": 0.6102866066688664, + "learning_rate": 2.3405488912766468e-06, + "loss": 0.324, + "step": 15206 + }, + { + "epoch": 0.688255261371351, + "grad_norm": 0.5710834246103191, + "learning_rate": 2.3399282725726297e-06, + "loss": 0.3133, + "step": 15207 + }, + { + "epoch": 0.6883005204797465, + "grad_norm": 0.5744056006601107, + "learning_rate": 2.3393077110253838e-06, + "loss": 0.2984, + "step": 15208 + }, + { + "epoch": 0.6883457795881421, + "grad_norm": 0.6411009454649048, + "learning_rate": 2.338687206648242e-06, + "loss": 0.3326, + "step": 15209 + }, + { + "epoch": 0.6883910386965377, + "grad_norm": 0.6492584420661419, + "learning_rate": 2.3380667594545402e-06, + "loss": 0.2735, + "step": 15210 + }, + { + "epoch": 0.6884362978049332, + "grad_norm": 0.6666730619678819, + "learning_rate": 2.337446369457607e-06, + "loss": 0.3121, + "step": 15211 + }, + { + "epoch": 0.6884815569133288, + "grad_norm": 0.6411538049352037, + "learning_rate": 2.3368260366707745e-06, + "loss": 0.3143, + "step": 15212 + }, + { + "epoch": 0.6885268160217244, + "grad_norm": 0.5730742986343003, + "learning_rate": 2.3362057611073722e-06, + "loss": 0.3211, + "step": 15213 + }, + { + "epoch": 0.68857207513012, + "grad_norm": 0.6724012403402507, + "learning_rate": 2.3355855427807247e-06, + "loss": 0.4833, + "step": 15214 + }, + { + "epoch": 0.6886173342385155, + "grad_norm": 0.8056592749483618, + "learning_rate": 2.3349653817041607e-06, + "loss": 0.2862, + "step": 15215 + }, + { + "epoch": 0.688662593346911, + "grad_norm": 0.6759884149762039, + "learning_rate": 2.3343452778910076e-06, + "loss": 0.3437, + "step": 15216 + }, + { + "epoch": 0.6887078524553066, + "grad_norm": 0.7560353506301231, + "learning_rate": 2.333725231354588e-06, + "loss": 0.3249, + "step": 15217 + }, + { + "epoch": 0.6887531115637022, + "grad_norm": 0.3039208009113988, + "learning_rate": 2.333105242108222e-06, + "loss": 0.4799, + "step": 15218 + }, + { + "epoch": 0.6887983706720978, + "grad_norm": 0.29755263599505566, + "learning_rate": 2.332485310165233e-06, + "loss": 0.466, + "step": 15219 + }, + { + "epoch": 0.6888436297804933, + "grad_norm": 0.5981605057766334, + "learning_rate": 2.3318654355389448e-06, + "loss": 0.3045, + "step": 15220 + }, + { + "epoch": 0.6888888888888889, + "grad_norm": 0.7221055998904644, + "learning_rate": 2.3312456182426736e-06, + "loss": 0.3102, + "step": 15221 + }, + { + "epoch": 0.6889341479972845, + "grad_norm": 0.5825786882913035, + "learning_rate": 2.330625858289736e-06, + "loss": 0.3337, + "step": 15222 + }, + { + "epoch": 0.6889794071056801, + "grad_norm": 0.6336510446811916, + "learning_rate": 2.330006155693451e-06, + "loss": 0.3228, + "step": 15223 + }, + { + "epoch": 0.6890246662140755, + "grad_norm": 0.32607904113700137, + "learning_rate": 2.3293865104671324e-06, + "loss": 0.4962, + "step": 15224 + }, + { + "epoch": 0.6890699253224711, + "grad_norm": 0.7399849731248741, + "learning_rate": 2.328766922624098e-06, + "loss": 0.3154, + "step": 15225 + }, + { + "epoch": 0.6891151844308667, + "grad_norm": 0.6466307348063, + "learning_rate": 2.3281473921776577e-06, + "loss": 0.2894, + "step": 15226 + }, + { + "epoch": 0.6891604435392623, + "grad_norm": 0.6474918515256418, + "learning_rate": 2.327527919141122e-06, + "loss": 0.3082, + "step": 15227 + }, + { + "epoch": 0.6892057026476578, + "grad_norm": 0.6907658263724318, + "learning_rate": 2.3269085035278037e-06, + "loss": 0.3248, + "step": 15228 + }, + { + "epoch": 0.6892509617560534, + "grad_norm": 0.668855494816506, + "learning_rate": 2.326289145351014e-06, + "loss": 0.302, + "step": 15229 + }, + { + "epoch": 0.689296220864449, + "grad_norm": 0.6269732722978408, + "learning_rate": 2.325669844624058e-06, + "loss": 0.3008, + "step": 15230 + }, + { + "epoch": 0.6893414799728446, + "grad_norm": 0.665790307558888, + "learning_rate": 2.3250506013602425e-06, + "loss": 0.2953, + "step": 15231 + }, + { + "epoch": 0.6893867390812402, + "grad_norm": 0.6336689756404082, + "learning_rate": 2.3244314155728758e-06, + "loss": 0.3252, + "step": 15232 + }, + { + "epoch": 0.6894319981896356, + "grad_norm": 0.6333133047342612, + "learning_rate": 2.3238122872752606e-06, + "loss": 0.2881, + "step": 15233 + }, + { + "epoch": 0.6894772572980312, + "grad_norm": 0.6604219335095796, + "learning_rate": 2.323193216480698e-06, + "loss": 0.2952, + "step": 15234 + }, + { + "epoch": 0.6895225164064268, + "grad_norm": 0.2641142503850825, + "learning_rate": 2.3225742032024923e-06, + "loss": 0.467, + "step": 15235 + }, + { + "epoch": 0.6895677755148224, + "grad_norm": 0.6703900163351237, + "learning_rate": 2.3219552474539452e-06, + "loss": 0.2812, + "step": 15236 + }, + { + "epoch": 0.6896130346232179, + "grad_norm": 0.6644438053059106, + "learning_rate": 2.3213363492483553e-06, + "loss": 0.291, + "step": 15237 + }, + { + "epoch": 0.6896582937316135, + "grad_norm": 0.6069432377435615, + "learning_rate": 2.3207175085990184e-06, + "loss": 0.2678, + "step": 15238 + }, + { + "epoch": 0.6897035528400091, + "grad_norm": 0.5916808380907982, + "learning_rate": 2.3200987255192354e-06, + "loss": 0.3135, + "step": 15239 + }, + { + "epoch": 0.6897488119484046, + "grad_norm": 0.3107890007219854, + "learning_rate": 2.3194800000222984e-06, + "loss": 0.4828, + "step": 15240 + }, + { + "epoch": 0.6897940710568001, + "grad_norm": 0.6601154188421439, + "learning_rate": 2.3188613321215046e-06, + "loss": 0.28, + "step": 15241 + }, + { + "epoch": 0.6898393301651957, + "grad_norm": 0.6937457942077409, + "learning_rate": 2.3182427218301473e-06, + "loss": 0.2971, + "step": 15242 + }, + { + "epoch": 0.6898845892735913, + "grad_norm": 0.5748328417560244, + "learning_rate": 2.317624169161515e-06, + "loss": 0.3301, + "step": 15243 + }, + { + "epoch": 0.6899298483819869, + "grad_norm": 0.5830720790065382, + "learning_rate": 2.3170056741289015e-06, + "loss": 0.307, + "step": 15244 + }, + { + "epoch": 0.6899751074903825, + "grad_norm": 0.6566537590448583, + "learning_rate": 2.3163872367455976e-06, + "loss": 0.2767, + "step": 15245 + }, + { + "epoch": 0.690020366598778, + "grad_norm": 0.6135232962091322, + "learning_rate": 2.31576885702489e-06, + "loss": 0.262, + "step": 15246 + }, + { + "epoch": 0.6900656257071736, + "grad_norm": 0.3635384431340751, + "learning_rate": 2.3151505349800635e-06, + "loss": 0.4986, + "step": 15247 + }, + { + "epoch": 0.6901108848155691, + "grad_norm": 0.3197510057064505, + "learning_rate": 2.314532270624406e-06, + "loss": 0.4831, + "step": 15248 + }, + { + "epoch": 0.6901561439239647, + "grad_norm": 0.3014046711126635, + "learning_rate": 2.3139140639712045e-06, + "loss": 0.4696, + "step": 15249 + }, + { + "epoch": 0.6902014030323602, + "grad_norm": 0.6362004212788522, + "learning_rate": 2.31329591503374e-06, + "loss": 0.3149, + "step": 15250 + }, + { + "epoch": 0.6902466621407558, + "grad_norm": 0.25358928563314215, + "learning_rate": 2.312677823825292e-06, + "loss": 0.4488, + "step": 15251 + }, + { + "epoch": 0.6902919212491514, + "grad_norm": 0.26585388827939566, + "learning_rate": 2.312059790359147e-06, + "loss": 0.4831, + "step": 15252 + }, + { + "epoch": 0.690337180357547, + "grad_norm": 0.6304065372531822, + "learning_rate": 2.3114418146485793e-06, + "loss": 0.3269, + "step": 15253 + }, + { + "epoch": 0.6903824394659425, + "grad_norm": 0.6465249995769476, + "learning_rate": 2.310823896706872e-06, + "loss": 0.282, + "step": 15254 + }, + { + "epoch": 0.6904276985743381, + "grad_norm": 0.6462067809944219, + "learning_rate": 2.3102060365473e-06, + "loss": 0.3379, + "step": 15255 + }, + { + "epoch": 0.6904729576827336, + "grad_norm": 0.27030553396271884, + "learning_rate": 2.309588234183137e-06, + "loss": 0.4506, + "step": 15256 + }, + { + "epoch": 0.6905182167911292, + "grad_norm": 0.6383121590737316, + "learning_rate": 2.3089704896276597e-06, + "loss": 0.3188, + "step": 15257 + }, + { + "epoch": 0.6905634758995248, + "grad_norm": 0.37044181031209134, + "learning_rate": 2.3083528028941444e-06, + "loss": 0.4898, + "step": 15258 + }, + { + "epoch": 0.6906087350079203, + "grad_norm": 0.6686546014233246, + "learning_rate": 2.30773517399586e-06, + "loss": 0.3093, + "step": 15259 + }, + { + "epoch": 0.6906539941163159, + "grad_norm": 0.5594686195477221, + "learning_rate": 2.307117602946076e-06, + "loss": 0.3016, + "step": 15260 + }, + { + "epoch": 0.6906992532247115, + "grad_norm": 0.6476786146664553, + "learning_rate": 2.306500089758065e-06, + "loss": 0.283, + "step": 15261 + }, + { + "epoch": 0.6907445123331071, + "grad_norm": 0.6184613319727662, + "learning_rate": 2.3058826344450973e-06, + "loss": 0.2755, + "step": 15262 + }, + { + "epoch": 0.6907897714415026, + "grad_norm": 0.602580473203544, + "learning_rate": 2.3052652370204344e-06, + "loss": 0.267, + "step": 15263 + }, + { + "epoch": 0.6908350305498981, + "grad_norm": 0.8129775661719012, + "learning_rate": 2.304647897497345e-06, + "loss": 0.308, + "step": 15264 + }, + { + "epoch": 0.6908802896582937, + "grad_norm": 0.5952174745127535, + "learning_rate": 2.3040306158890963e-06, + "loss": 0.3276, + "step": 15265 + }, + { + "epoch": 0.6909255487666893, + "grad_norm": 0.6401445111905103, + "learning_rate": 2.3034133922089496e-06, + "loss": 0.303, + "step": 15266 + }, + { + "epoch": 0.6909708078750849, + "grad_norm": 0.6491990782272895, + "learning_rate": 2.3027962264701654e-06, + "loss": 0.3166, + "step": 15267 + }, + { + "epoch": 0.6910160669834804, + "grad_norm": 0.6406130372218273, + "learning_rate": 2.3021791186860078e-06, + "loss": 0.3303, + "step": 15268 + }, + { + "epoch": 0.691061326091876, + "grad_norm": 0.27208154366542836, + "learning_rate": 2.3015620688697336e-06, + "loss": 0.4585, + "step": 15269 + }, + { + "epoch": 0.6911065852002716, + "grad_norm": 0.6524243568444674, + "learning_rate": 2.300945077034605e-06, + "loss": 0.2978, + "step": 15270 + }, + { + "epoch": 0.6911518443086672, + "grad_norm": 0.6690897724061226, + "learning_rate": 2.300328143193875e-06, + "loss": 0.32, + "step": 15271 + }, + { + "epoch": 0.6911971034170626, + "grad_norm": 0.608535062697799, + "learning_rate": 2.2997112673608035e-06, + "loss": 0.2847, + "step": 15272 + }, + { + "epoch": 0.6912423625254582, + "grad_norm": 0.5510052701718511, + "learning_rate": 2.299094449548642e-06, + "loss": 0.3089, + "step": 15273 + }, + { + "epoch": 0.6912876216338538, + "grad_norm": 0.62296168685443, + "learning_rate": 2.298477689770648e-06, + "loss": 0.3045, + "step": 15274 + }, + { + "epoch": 0.6913328807422494, + "grad_norm": 0.6005738358702823, + "learning_rate": 2.2978609880400706e-06, + "loss": 0.3012, + "step": 15275 + }, + { + "epoch": 0.6913781398506449, + "grad_norm": 0.6278202168048158, + "learning_rate": 2.29724434437016e-06, + "loss": 0.3577, + "step": 15276 + }, + { + "epoch": 0.6914233989590405, + "grad_norm": 0.6911484776265677, + "learning_rate": 2.296627758774167e-06, + "loss": 0.2752, + "step": 15277 + }, + { + "epoch": 0.6914686580674361, + "grad_norm": 0.2865799014938107, + "learning_rate": 2.296011231265343e-06, + "loss": 0.4458, + "step": 15278 + }, + { + "epoch": 0.6915139171758317, + "grad_norm": 0.7099050953246633, + "learning_rate": 2.2953947618569335e-06, + "loss": 0.3224, + "step": 15279 + }, + { + "epoch": 0.6915591762842273, + "grad_norm": 0.5985945647670636, + "learning_rate": 2.2947783505621813e-06, + "loss": 0.286, + "step": 15280 + }, + { + "epoch": 0.6916044353926227, + "grad_norm": 0.6077100325384621, + "learning_rate": 2.2941619973943363e-06, + "loss": 0.3016, + "step": 15281 + }, + { + "epoch": 0.6916496945010183, + "grad_norm": 0.7309766071163295, + "learning_rate": 2.2935457023666375e-06, + "loss": 0.3239, + "step": 15282 + }, + { + "epoch": 0.6916949536094139, + "grad_norm": 0.5335062188242945, + "learning_rate": 2.2929294654923313e-06, + "loss": 0.2771, + "step": 15283 + }, + { + "epoch": 0.6917402127178095, + "grad_norm": 0.6075102502850656, + "learning_rate": 2.2923132867846564e-06, + "loss": 0.298, + "step": 15284 + }, + { + "epoch": 0.691785471826205, + "grad_norm": 0.4341561501175963, + "learning_rate": 2.2916971662568514e-06, + "loss": 0.4757, + "step": 15285 + }, + { + "epoch": 0.6918307309346006, + "grad_norm": 0.6455224544042446, + "learning_rate": 2.2910811039221564e-06, + "loss": 0.3227, + "step": 15286 + }, + { + "epoch": 0.6918759900429962, + "grad_norm": 0.28667936098983365, + "learning_rate": 2.2904650997938105e-06, + "loss": 0.4699, + "step": 15287 + }, + { + "epoch": 0.6919212491513917, + "grad_norm": 0.28509191413429114, + "learning_rate": 2.2898491538850478e-06, + "loss": 0.4993, + "step": 15288 + }, + { + "epoch": 0.6919665082597872, + "grad_norm": 0.6057133211093666, + "learning_rate": 2.2892332662091017e-06, + "loss": 0.2808, + "step": 15289 + }, + { + "epoch": 0.6920117673681828, + "grad_norm": 0.5920501028162446, + "learning_rate": 2.288617436779207e-06, + "loss": 0.2905, + "step": 15290 + }, + { + "epoch": 0.6920570264765784, + "grad_norm": 0.26799070501983296, + "learning_rate": 2.2880016656085995e-06, + "loss": 0.4701, + "step": 15291 + }, + { + "epoch": 0.692102285584974, + "grad_norm": 0.30276461529003007, + "learning_rate": 2.2873859527105037e-06, + "loss": 0.4876, + "step": 15292 + }, + { + "epoch": 0.6921475446933696, + "grad_norm": 0.5982779481081313, + "learning_rate": 2.286770298098153e-06, + "loss": 0.3028, + "step": 15293 + }, + { + "epoch": 0.6921928038017651, + "grad_norm": 0.6289253461990337, + "learning_rate": 2.286154701784776e-06, + "loss": 0.3119, + "step": 15294 + }, + { + "epoch": 0.6922380629101607, + "grad_norm": 0.6455845115964702, + "learning_rate": 2.2855391637836006e-06, + "loss": 0.3064, + "step": 15295 + }, + { + "epoch": 0.6922833220185562, + "grad_norm": 0.6567945353620316, + "learning_rate": 2.2849236841078496e-06, + "loss": 0.2721, + "step": 15296 + }, + { + "epoch": 0.6923285811269518, + "grad_norm": 0.5886357694478286, + "learning_rate": 2.2843082627707517e-06, + "loss": 0.2861, + "step": 15297 + }, + { + "epoch": 0.6923738402353473, + "grad_norm": 0.7481216016016371, + "learning_rate": 2.2836928997855274e-06, + "loss": 0.3164, + "step": 15298 + }, + { + "epoch": 0.6924190993437429, + "grad_norm": 0.6880285776070417, + "learning_rate": 2.2830775951654018e-06, + "loss": 0.3323, + "step": 15299 + }, + { + "epoch": 0.6924643584521385, + "grad_norm": 0.5801076690074016, + "learning_rate": 2.282462348923592e-06, + "loss": 0.3055, + "step": 15300 + }, + { + "epoch": 0.6925096175605341, + "grad_norm": 0.5879289946383263, + "learning_rate": 2.281847161073322e-06, + "loss": 0.283, + "step": 15301 + }, + { + "epoch": 0.6925548766689297, + "grad_norm": 0.6179725409027124, + "learning_rate": 2.2812320316278065e-06, + "loss": 0.3125, + "step": 15302 + }, + { + "epoch": 0.6926001357773252, + "grad_norm": 0.6156506331013871, + "learning_rate": 2.2806169606002663e-06, + "loss": 0.279, + "step": 15303 + }, + { + "epoch": 0.6926453948857207, + "grad_norm": 1.2120338640195765, + "learning_rate": 2.280001948003916e-06, + "loss": 0.2796, + "step": 15304 + }, + { + "epoch": 0.6926906539941163, + "grad_norm": 0.6522398121819206, + "learning_rate": 2.279386993851968e-06, + "loss": 0.3045, + "step": 15305 + }, + { + "epoch": 0.6927359131025119, + "grad_norm": 0.6099418151117931, + "learning_rate": 2.278772098157638e-06, + "loss": 0.3168, + "step": 15306 + }, + { + "epoch": 0.6927811722109074, + "grad_norm": 0.6312106783246817, + "learning_rate": 2.2781572609341397e-06, + "loss": 0.3295, + "step": 15307 + }, + { + "epoch": 0.692826431319303, + "grad_norm": 0.6366029474353617, + "learning_rate": 2.2775424821946824e-06, + "loss": 0.3372, + "step": 15308 + }, + { + "epoch": 0.6928716904276986, + "grad_norm": 0.6116300351638355, + "learning_rate": 2.2769277619524737e-06, + "loss": 0.2665, + "step": 15309 + }, + { + "epoch": 0.6929169495360942, + "grad_norm": 0.5517062110299067, + "learning_rate": 2.276313100220726e-06, + "loss": 0.3119, + "step": 15310 + }, + { + "epoch": 0.6929622086444897, + "grad_norm": 0.6117086962300063, + "learning_rate": 2.275698497012643e-06, + "loss": 0.3067, + "step": 15311 + }, + { + "epoch": 0.6930074677528852, + "grad_norm": 0.6382350178222626, + "learning_rate": 2.275083952341434e-06, + "loss": 0.2745, + "step": 15312 + }, + { + "epoch": 0.6930527268612808, + "grad_norm": 0.6304432415131873, + "learning_rate": 2.2744694662203022e-06, + "loss": 0.3727, + "step": 15313 + }, + { + "epoch": 0.6930979859696764, + "grad_norm": 0.7568616098636195, + "learning_rate": 2.273855038662448e-06, + "loss": 0.2959, + "step": 15314 + }, + { + "epoch": 0.693143245078072, + "grad_norm": 0.5968925730201458, + "learning_rate": 2.2732406696810773e-06, + "loss": 0.3061, + "step": 15315 + }, + { + "epoch": 0.6931885041864675, + "grad_norm": 0.6491887672754614, + "learning_rate": 2.2726263592893914e-06, + "loss": 0.2965, + "step": 15316 + }, + { + "epoch": 0.6932337632948631, + "grad_norm": 0.5861213051717736, + "learning_rate": 2.2720121075005884e-06, + "loss": 0.3298, + "step": 15317 + }, + { + "epoch": 0.6932790224032587, + "grad_norm": 0.639189288756921, + "learning_rate": 2.271397914327865e-06, + "loss": 0.281, + "step": 15318 + }, + { + "epoch": 0.6933242815116543, + "grad_norm": 0.5877308010010714, + "learning_rate": 2.2707837797844208e-06, + "loss": 0.2718, + "step": 15319 + }, + { + "epoch": 0.6933695406200497, + "grad_norm": 0.32706793670548634, + "learning_rate": 2.2701697038834543e-06, + "loss": 0.4743, + "step": 15320 + }, + { + "epoch": 0.6934147997284453, + "grad_norm": 0.5812374384104216, + "learning_rate": 2.269555686638153e-06, + "loss": 0.3242, + "step": 15321 + }, + { + "epoch": 0.6934600588368409, + "grad_norm": 0.8841772008999619, + "learning_rate": 2.268941728061714e-06, + "loss": 0.3378, + "step": 15322 + }, + { + "epoch": 0.6935053179452365, + "grad_norm": 0.6341474894444818, + "learning_rate": 2.2683278281673315e-06, + "loss": 0.2963, + "step": 15323 + }, + { + "epoch": 0.693550577053632, + "grad_norm": 0.5919393409921639, + "learning_rate": 2.2677139869681943e-06, + "loss": 0.3238, + "step": 15324 + }, + { + "epoch": 0.6935958361620276, + "grad_norm": 0.5823503974483093, + "learning_rate": 2.2671002044774896e-06, + "loss": 0.2791, + "step": 15325 + }, + { + "epoch": 0.6936410952704232, + "grad_norm": 0.6825871993082148, + "learning_rate": 2.266486480708411e-06, + "loss": 0.3359, + "step": 15326 + }, + { + "epoch": 0.6936863543788188, + "grad_norm": 0.6033434672171372, + "learning_rate": 2.26587281567414e-06, + "loss": 0.3062, + "step": 15327 + }, + { + "epoch": 0.6937316134872143, + "grad_norm": 0.5871056512885556, + "learning_rate": 2.265259209387867e-06, + "loss": 0.2796, + "step": 15328 + }, + { + "epoch": 0.6937768725956098, + "grad_norm": 0.2792552271003976, + "learning_rate": 2.2646456618627723e-06, + "loss": 0.4542, + "step": 15329 + }, + { + "epoch": 0.6938221317040054, + "grad_norm": 0.6157049851124522, + "learning_rate": 2.2640321731120434e-06, + "loss": 0.3141, + "step": 15330 + }, + { + "epoch": 0.693867390812401, + "grad_norm": 0.6200462231323365, + "learning_rate": 2.2634187431488585e-06, + "loss": 0.2843, + "step": 15331 + }, + { + "epoch": 0.6939126499207966, + "grad_norm": 0.6690563909113196, + "learning_rate": 2.262805371986402e-06, + "loss": 0.3096, + "step": 15332 + }, + { + "epoch": 0.6939579090291921, + "grad_norm": 0.5942516202410125, + "learning_rate": 2.2621920596378503e-06, + "loss": 0.3136, + "step": 15333 + }, + { + "epoch": 0.6940031681375877, + "grad_norm": 0.6065599068867679, + "learning_rate": 2.2615788061163824e-06, + "loss": 0.2676, + "step": 15334 + }, + { + "epoch": 0.6940484272459833, + "grad_norm": 0.6547981697132818, + "learning_rate": 2.2609656114351745e-06, + "loss": 0.2857, + "step": 15335 + }, + { + "epoch": 0.6940936863543788, + "grad_norm": 0.6390839950958643, + "learning_rate": 2.2603524756074057e-06, + "loss": 0.36, + "step": 15336 + }, + { + "epoch": 0.6941389454627744, + "grad_norm": 0.6215447818598099, + "learning_rate": 2.2597393986462477e-06, + "loss": 0.3148, + "step": 15337 + }, + { + "epoch": 0.6941842045711699, + "grad_norm": 0.6120277468101288, + "learning_rate": 2.2591263805648724e-06, + "loss": 0.2996, + "step": 15338 + }, + { + "epoch": 0.6942294636795655, + "grad_norm": 0.2806594600194946, + "learning_rate": 2.258513421376455e-06, + "loss": 0.4759, + "step": 15339 + }, + { + "epoch": 0.6942747227879611, + "grad_norm": 0.6209858582212454, + "learning_rate": 2.2579005210941622e-06, + "loss": 0.3083, + "step": 15340 + }, + { + "epoch": 0.6943199818963567, + "grad_norm": 0.6114612468150938, + "learning_rate": 2.2572876797311676e-06, + "loss": 0.3039, + "step": 15341 + }, + { + "epoch": 0.6943652410047522, + "grad_norm": 0.29673365820543085, + "learning_rate": 2.256674897300635e-06, + "loss": 0.4914, + "step": 15342 + }, + { + "epoch": 0.6944105001131478, + "grad_norm": 0.5557606997037232, + "learning_rate": 2.2560621738157357e-06, + "loss": 0.2851, + "step": 15343 + }, + { + "epoch": 0.6944557592215433, + "grad_norm": 0.5788703839311166, + "learning_rate": 2.2554495092896306e-06, + "loss": 0.3078, + "step": 15344 + }, + { + "epoch": 0.6945010183299389, + "grad_norm": 0.6749206715587992, + "learning_rate": 2.254836903735488e-06, + "loss": 0.3457, + "step": 15345 + }, + { + "epoch": 0.6945462774383344, + "grad_norm": 0.6495157356241859, + "learning_rate": 2.25422435716647e-06, + "loss": 0.3052, + "step": 15346 + }, + { + "epoch": 0.69459153654673, + "grad_norm": 0.6655681762682285, + "learning_rate": 2.2536118695957353e-06, + "loss": 0.3935, + "step": 15347 + }, + { + "epoch": 0.6946367956551256, + "grad_norm": 0.6001421976661787, + "learning_rate": 2.252999441036447e-06, + "loss": 0.263, + "step": 15348 + }, + { + "epoch": 0.6946820547635212, + "grad_norm": 0.616823676348002, + "learning_rate": 2.252387071501767e-06, + "loss": 0.3224, + "step": 15349 + }, + { + "epoch": 0.6947273138719168, + "grad_norm": 0.6300197767755465, + "learning_rate": 2.2517747610048467e-06, + "loss": 0.3126, + "step": 15350 + }, + { + "epoch": 0.6947725729803123, + "grad_norm": 0.6020949362258067, + "learning_rate": 2.2511625095588465e-06, + "loss": 0.2893, + "step": 15351 + }, + { + "epoch": 0.6948178320887078, + "grad_norm": 0.8110079469776496, + "learning_rate": 2.2505503171769233e-06, + "loss": 0.2813, + "step": 15352 + }, + { + "epoch": 0.6948630911971034, + "grad_norm": 0.6305017685210226, + "learning_rate": 2.2499381838722296e-06, + "loss": 0.3671, + "step": 15353 + }, + { + "epoch": 0.694908350305499, + "grad_norm": 0.6055438844247768, + "learning_rate": 2.2493261096579163e-06, + "loss": 0.3377, + "step": 15354 + }, + { + "epoch": 0.6949536094138945, + "grad_norm": 0.8846887760337592, + "learning_rate": 2.2487140945471382e-06, + "loss": 0.3407, + "step": 15355 + }, + { + "epoch": 0.6949988685222901, + "grad_norm": 0.6130218136946555, + "learning_rate": 2.2481021385530427e-06, + "loss": 0.3077, + "step": 15356 + }, + { + "epoch": 0.6950441276306857, + "grad_norm": 0.30618512419935756, + "learning_rate": 2.2474902416887824e-06, + "loss": 0.4541, + "step": 15357 + }, + { + "epoch": 0.6950893867390813, + "grad_norm": 0.6349363257772199, + "learning_rate": 2.246878403967501e-06, + "loss": 0.331, + "step": 15358 + }, + { + "epoch": 0.6951346458474768, + "grad_norm": 0.5705475206325165, + "learning_rate": 2.2462666254023495e-06, + "loss": 0.3305, + "step": 15359 + }, + { + "epoch": 0.6951799049558723, + "grad_norm": 0.2784925673579084, + "learning_rate": 2.2456549060064684e-06, + "loss": 0.4916, + "step": 15360 + }, + { + "epoch": 0.6952251640642679, + "grad_norm": 0.6430028863711853, + "learning_rate": 2.245043245793006e-06, + "loss": 0.3054, + "step": 15361 + }, + { + "epoch": 0.6952704231726635, + "grad_norm": 0.5952655056608498, + "learning_rate": 2.2444316447751034e-06, + "loss": 0.3257, + "step": 15362 + }, + { + "epoch": 0.6953156822810591, + "grad_norm": 0.6920910924343269, + "learning_rate": 2.2438201029658995e-06, + "loss": 0.3128, + "step": 15363 + }, + { + "epoch": 0.6953609413894546, + "grad_norm": 0.5834875442704955, + "learning_rate": 2.243208620378537e-06, + "loss": 0.3147, + "step": 15364 + }, + { + "epoch": 0.6954062004978502, + "grad_norm": 0.6003270635637863, + "learning_rate": 2.2425971970261558e-06, + "loss": 0.2993, + "step": 15365 + }, + { + "epoch": 0.6954514596062458, + "grad_norm": 0.6027439237743176, + "learning_rate": 2.2419858329218926e-06, + "loss": 0.2991, + "step": 15366 + }, + { + "epoch": 0.6954967187146414, + "grad_norm": 0.6248205418356746, + "learning_rate": 2.2413745280788806e-06, + "loss": 0.3246, + "step": 15367 + }, + { + "epoch": 0.6955419778230368, + "grad_norm": 0.6615877596406995, + "learning_rate": 2.2407632825102605e-06, + "loss": 0.2491, + "step": 15368 + }, + { + "epoch": 0.6955872369314324, + "grad_norm": 0.63773715869281, + "learning_rate": 2.24015209622916e-06, + "loss": 0.2661, + "step": 15369 + }, + { + "epoch": 0.695632496039828, + "grad_norm": 0.5830994928670054, + "learning_rate": 2.2395409692487174e-06, + "loss": 0.2991, + "step": 15370 + }, + { + "epoch": 0.6956777551482236, + "grad_norm": 0.2917371253576962, + "learning_rate": 2.2389299015820592e-06, + "loss": 0.4761, + "step": 15371 + }, + { + "epoch": 0.6957230142566192, + "grad_norm": 0.64376924689846, + "learning_rate": 2.2383188932423192e-06, + "loss": 0.2921, + "step": 15372 + }, + { + "epoch": 0.6957682733650147, + "grad_norm": 0.5848437032666506, + "learning_rate": 2.237707944242623e-06, + "loss": 0.3158, + "step": 15373 + }, + { + "epoch": 0.6958135324734103, + "grad_norm": 0.5974157437942595, + "learning_rate": 2.2370970545961005e-06, + "loss": 0.3715, + "step": 15374 + }, + { + "epoch": 0.6958587915818059, + "grad_norm": 0.29087052445388767, + "learning_rate": 2.236486224315877e-06, + "loss": 0.469, + "step": 15375 + }, + { + "epoch": 0.6959040506902014, + "grad_norm": 0.605095854819963, + "learning_rate": 2.2358754534150752e-06, + "loss": 0.2879, + "step": 15376 + }, + { + "epoch": 0.6959493097985969, + "grad_norm": 0.6365435641897976, + "learning_rate": 2.2352647419068207e-06, + "loss": 0.3192, + "step": 15377 + }, + { + "epoch": 0.6959945689069925, + "grad_norm": 0.6548937754424179, + "learning_rate": 2.2346540898042372e-06, + "loss": 0.2828, + "step": 15378 + }, + { + "epoch": 0.6960398280153881, + "grad_norm": 0.5831388735421803, + "learning_rate": 2.2340434971204445e-06, + "loss": 0.3149, + "step": 15379 + }, + { + "epoch": 0.6960850871237837, + "grad_norm": 0.6870834276561678, + "learning_rate": 2.2334329638685598e-06, + "loss": 0.3101, + "step": 15380 + }, + { + "epoch": 0.6961303462321792, + "grad_norm": 0.5934653026487168, + "learning_rate": 2.2328224900617064e-06, + "loss": 0.3087, + "step": 15381 + }, + { + "epoch": 0.6961756053405748, + "grad_norm": 0.6883932566836034, + "learning_rate": 2.2322120757129983e-06, + "loss": 0.2841, + "step": 15382 + }, + { + "epoch": 0.6962208644489704, + "grad_norm": 0.7244763280980958, + "learning_rate": 2.2316017208355504e-06, + "loss": 0.3141, + "step": 15383 + }, + { + "epoch": 0.696266123557366, + "grad_norm": 0.6129160752040061, + "learning_rate": 2.2309914254424807e-06, + "loss": 0.2932, + "step": 15384 + }, + { + "epoch": 0.6963113826657615, + "grad_norm": 0.26808067124315665, + "learning_rate": 2.2303811895468996e-06, + "loss": 0.4697, + "step": 15385 + }, + { + "epoch": 0.696356641774157, + "grad_norm": 0.6290085659815462, + "learning_rate": 2.2297710131619214e-06, + "loss": 0.3376, + "step": 15386 + }, + { + "epoch": 0.6964019008825526, + "grad_norm": 0.6568816515106904, + "learning_rate": 2.229160896300655e-06, + "loss": 0.3676, + "step": 15387 + }, + { + "epoch": 0.6964471599909482, + "grad_norm": 0.610442344706643, + "learning_rate": 2.228550838976213e-06, + "loss": 0.2938, + "step": 15388 + }, + { + "epoch": 0.6964924190993438, + "grad_norm": 0.2898365808540318, + "learning_rate": 2.227940841201699e-06, + "loss": 0.4745, + "step": 15389 + }, + { + "epoch": 0.6965376782077393, + "grad_norm": 0.2822728999278438, + "learning_rate": 2.227330902990225e-06, + "loss": 0.4687, + "step": 15390 + }, + { + "epoch": 0.6965829373161349, + "grad_norm": 0.5926488550372246, + "learning_rate": 2.2267210243548943e-06, + "loss": 0.2607, + "step": 15391 + }, + { + "epoch": 0.6966281964245304, + "grad_norm": 0.6007425928292506, + "learning_rate": 2.226111205308809e-06, + "loss": 0.3012, + "step": 15392 + }, + { + "epoch": 0.696673455532926, + "grad_norm": 0.2771887358013033, + "learning_rate": 2.225501445865075e-06, + "loss": 0.4933, + "step": 15393 + }, + { + "epoch": 0.6967187146413215, + "grad_norm": 0.6469024784582776, + "learning_rate": 2.224891746036795e-06, + "loss": 0.2889, + "step": 15394 + }, + { + "epoch": 0.6967639737497171, + "grad_norm": 0.2972666825183385, + "learning_rate": 2.224282105837069e-06, + "loss": 0.493, + "step": 15395 + }, + { + "epoch": 0.6968092328581127, + "grad_norm": 0.6249787087006309, + "learning_rate": 2.2236725252789933e-06, + "loss": 0.2966, + "step": 15396 + }, + { + "epoch": 0.6968544919665083, + "grad_norm": 0.28522173700529135, + "learning_rate": 2.22306300437567e-06, + "loss": 0.468, + "step": 15397 + }, + { + "epoch": 0.6968997510749039, + "grad_norm": 0.6156039440420061, + "learning_rate": 2.222453543140192e-06, + "loss": 0.3143, + "step": 15398 + }, + { + "epoch": 0.6969450101832994, + "grad_norm": 0.5954022875600979, + "learning_rate": 2.221844141585659e-06, + "loss": 0.3107, + "step": 15399 + }, + { + "epoch": 0.6969902692916949, + "grad_norm": 0.6166093898007311, + "learning_rate": 2.221234799725161e-06, + "loss": 0.2811, + "step": 15400 + }, + { + "epoch": 0.6970355284000905, + "grad_norm": 0.6630235280814255, + "learning_rate": 2.220625517571795e-06, + "loss": 0.3344, + "step": 15401 + }, + { + "epoch": 0.6970807875084861, + "grad_norm": 0.6450303140316446, + "learning_rate": 2.2200162951386477e-06, + "loss": 0.3113, + "step": 15402 + }, + { + "epoch": 0.6971260466168816, + "grad_norm": 0.6059302954765791, + "learning_rate": 2.219407132438815e-06, + "loss": 0.3176, + "step": 15403 + }, + { + "epoch": 0.6971713057252772, + "grad_norm": 0.6265849775857281, + "learning_rate": 2.2187980294853827e-06, + "loss": 0.2914, + "step": 15404 + }, + { + "epoch": 0.6972165648336728, + "grad_norm": 0.6272461141911604, + "learning_rate": 2.2181889862914368e-06, + "loss": 0.2895, + "step": 15405 + }, + { + "epoch": 0.6972618239420684, + "grad_norm": 0.6329130486899887, + "learning_rate": 2.217580002870066e-06, + "loss": 0.309, + "step": 15406 + }, + { + "epoch": 0.697307083050464, + "grad_norm": 0.6510575440133307, + "learning_rate": 2.2169710792343574e-06, + "loss": 0.3516, + "step": 15407 + }, + { + "epoch": 0.6973523421588594, + "grad_norm": 0.6025879494225266, + "learning_rate": 2.216362215397393e-06, + "loss": 0.2705, + "step": 15408 + }, + { + "epoch": 0.697397601267255, + "grad_norm": 0.6229692611246024, + "learning_rate": 2.2157534113722533e-06, + "loss": 0.3211, + "step": 15409 + }, + { + "epoch": 0.6974428603756506, + "grad_norm": 0.6336128765611528, + "learning_rate": 2.215144667172023e-06, + "loss": 0.3557, + "step": 15410 + }, + { + "epoch": 0.6974881194840462, + "grad_norm": 0.6089725873892945, + "learning_rate": 2.21453598280978e-06, + "loss": 0.3174, + "step": 15411 + }, + { + "epoch": 0.6975333785924417, + "grad_norm": 0.6146045841194961, + "learning_rate": 2.213927358298605e-06, + "loss": 0.3422, + "step": 15412 + }, + { + "epoch": 0.6975786377008373, + "grad_norm": 0.6473314379938416, + "learning_rate": 2.213318793651573e-06, + "loss": 0.2705, + "step": 15413 + }, + { + "epoch": 0.6976238968092329, + "grad_norm": 0.5548174179736901, + "learning_rate": 2.2127102888817626e-06, + "loss": 0.2832, + "step": 15414 + }, + { + "epoch": 0.6976691559176285, + "grad_norm": 0.6272400617526707, + "learning_rate": 2.2121018440022458e-06, + "loss": 0.3175, + "step": 15415 + }, + { + "epoch": 0.6977144150260239, + "grad_norm": 0.2872778648761435, + "learning_rate": 2.2114934590261e-06, + "loss": 0.4648, + "step": 15416 + }, + { + "epoch": 0.6977596741344195, + "grad_norm": 0.6501873735094622, + "learning_rate": 2.2108851339663956e-06, + "loss": 0.3486, + "step": 15417 + }, + { + "epoch": 0.6978049332428151, + "grad_norm": 0.5714597352968046, + "learning_rate": 2.210276868836202e-06, + "loss": 0.2585, + "step": 15418 + }, + { + "epoch": 0.6978501923512107, + "grad_norm": 0.6699368490383701, + "learning_rate": 2.209668663648592e-06, + "loss": 0.2976, + "step": 15419 + }, + { + "epoch": 0.6978954514596063, + "grad_norm": 0.5839244450998203, + "learning_rate": 2.2090605184166325e-06, + "loss": 0.3516, + "step": 15420 + }, + { + "epoch": 0.6979407105680018, + "grad_norm": 0.6601736114108441, + "learning_rate": 2.208452433153389e-06, + "loss": 0.2602, + "step": 15421 + }, + { + "epoch": 0.6979859696763974, + "grad_norm": 1.2962557190320656, + "learning_rate": 2.207844407871929e-06, + "loss": 0.2763, + "step": 15422 + }, + { + "epoch": 0.698031228784793, + "grad_norm": 0.3335447019667144, + "learning_rate": 2.2072364425853193e-06, + "loss": 0.475, + "step": 15423 + }, + { + "epoch": 0.6980764878931885, + "grad_norm": 0.6271978005182927, + "learning_rate": 2.206628537306621e-06, + "loss": 0.3245, + "step": 15424 + }, + { + "epoch": 0.698121747001584, + "grad_norm": 0.5811086521395183, + "learning_rate": 2.206020692048895e-06, + "loss": 0.2691, + "step": 15425 + }, + { + "epoch": 0.6981670061099796, + "grad_norm": 0.5722795124330649, + "learning_rate": 2.2054129068252037e-06, + "loss": 0.2459, + "step": 15426 + }, + { + "epoch": 0.6982122652183752, + "grad_norm": 0.6096023565887093, + "learning_rate": 2.2048051816486054e-06, + "loss": 0.2928, + "step": 15427 + }, + { + "epoch": 0.6982575243267708, + "grad_norm": 0.294609066488518, + "learning_rate": 2.2041975165321606e-06, + "loss": 0.4718, + "step": 15428 + }, + { + "epoch": 0.6983027834351663, + "grad_norm": 0.2535641837168702, + "learning_rate": 2.2035899114889226e-06, + "loss": 0.4586, + "step": 15429 + }, + { + "epoch": 0.6983480425435619, + "grad_norm": 0.5771085270607175, + "learning_rate": 2.2029823665319504e-06, + "loss": 0.272, + "step": 15430 + }, + { + "epoch": 0.6983933016519575, + "grad_norm": 0.6769738222692228, + "learning_rate": 2.2023748816742955e-06, + "loss": 0.3222, + "step": 15431 + }, + { + "epoch": 0.698438560760353, + "grad_norm": 0.6033882521564669, + "learning_rate": 2.201767456929014e-06, + "loss": 0.2761, + "step": 15432 + }, + { + "epoch": 0.6984838198687486, + "grad_norm": 0.5714842277367986, + "learning_rate": 2.2011600923091554e-06, + "loss": 0.3192, + "step": 15433 + }, + { + "epoch": 0.6985290789771441, + "grad_norm": 0.6509570167193155, + "learning_rate": 2.200552787827768e-06, + "loss": 0.3089, + "step": 15434 + }, + { + "epoch": 0.6985743380855397, + "grad_norm": 0.6189100093640094, + "learning_rate": 2.1999455434979046e-06, + "loss": 0.2903, + "step": 15435 + }, + { + "epoch": 0.6986195971939353, + "grad_norm": 0.6344851280611853, + "learning_rate": 2.1993383593326127e-06, + "loss": 0.3026, + "step": 15436 + }, + { + "epoch": 0.6986648563023309, + "grad_norm": 0.27780582832440787, + "learning_rate": 2.1987312353449386e-06, + "loss": 0.4913, + "step": 15437 + }, + { + "epoch": 0.6987101154107264, + "grad_norm": 0.28615854234003973, + "learning_rate": 2.1981241715479247e-06, + "loss": 0.4925, + "step": 15438 + }, + { + "epoch": 0.698755374519122, + "grad_norm": 0.5812592354372973, + "learning_rate": 2.1975171679546187e-06, + "loss": 0.2965, + "step": 15439 + }, + { + "epoch": 0.6988006336275175, + "grad_norm": 0.6557422880930702, + "learning_rate": 2.1969102245780592e-06, + "loss": 0.3421, + "step": 15440 + }, + { + "epoch": 0.6988458927359131, + "grad_norm": 0.6649927896003363, + "learning_rate": 2.196303341431293e-06, + "loss": 0.269, + "step": 15441 + }, + { + "epoch": 0.6988911518443087, + "grad_norm": 0.5648141574982591, + "learning_rate": 2.1956965185273545e-06, + "loss": 0.3044, + "step": 15442 + }, + { + "epoch": 0.6989364109527042, + "grad_norm": 0.6455390831857626, + "learning_rate": 2.1950897558792873e-06, + "loss": 0.2672, + "step": 15443 + }, + { + "epoch": 0.6989816700610998, + "grad_norm": 0.65669426469483, + "learning_rate": 2.1944830535001244e-06, + "loss": 0.3157, + "step": 15444 + }, + { + "epoch": 0.6990269291694954, + "grad_norm": 0.6604232285727459, + "learning_rate": 2.193876411402906e-06, + "loss": 0.2951, + "step": 15445 + }, + { + "epoch": 0.699072188277891, + "grad_norm": 0.6278571713730251, + "learning_rate": 2.193269829600665e-06, + "loss": 0.3403, + "step": 15446 + }, + { + "epoch": 0.6991174473862865, + "grad_norm": 0.2877594596193233, + "learning_rate": 2.1926633081064336e-06, + "loss": 0.4824, + "step": 15447 + }, + { + "epoch": 0.699162706494682, + "grad_norm": 0.626805222348979, + "learning_rate": 2.1920568469332458e-06, + "loss": 0.2951, + "step": 15448 + }, + { + "epoch": 0.6992079656030776, + "grad_norm": 0.6295785599758981, + "learning_rate": 2.191450446094136e-06, + "loss": 0.3224, + "step": 15449 + }, + { + "epoch": 0.6992532247114732, + "grad_norm": 0.9393405038196936, + "learning_rate": 2.190844105602127e-06, + "loss": 0.283, + "step": 15450 + }, + { + "epoch": 0.6992984838198687, + "grad_norm": 0.33121434047560755, + "learning_rate": 2.19023782547025e-06, + "loss": 0.4909, + "step": 15451 + }, + { + "epoch": 0.6993437429282643, + "grad_norm": 0.3220401310269192, + "learning_rate": 2.1896316057115343e-06, + "loss": 0.4659, + "step": 15452 + }, + { + "epoch": 0.6993890020366599, + "grad_norm": 0.5645794389594799, + "learning_rate": 2.189025446339004e-06, + "loss": 0.266, + "step": 15453 + }, + { + "epoch": 0.6994342611450555, + "grad_norm": 0.6312858299620019, + "learning_rate": 2.1884193473656824e-06, + "loss": 0.2731, + "step": 15454 + }, + { + "epoch": 0.6994795202534511, + "grad_norm": 0.6166425710591585, + "learning_rate": 2.187813308804595e-06, + "loss": 0.3496, + "step": 15455 + }, + { + "epoch": 0.6995247793618465, + "grad_norm": 0.6094685675555125, + "learning_rate": 2.1872073306687614e-06, + "loss": 0.3373, + "step": 15456 + }, + { + "epoch": 0.6995700384702421, + "grad_norm": 0.6105705990820928, + "learning_rate": 2.186601412971205e-06, + "loss": 0.3242, + "step": 15457 + }, + { + "epoch": 0.6996152975786377, + "grad_norm": 0.2643205914938593, + "learning_rate": 2.185995555724942e-06, + "loss": 0.4515, + "step": 15458 + }, + { + "epoch": 0.6996605566870333, + "grad_norm": 0.29166666670093994, + "learning_rate": 2.1853897589429935e-06, + "loss": 0.4682, + "step": 15459 + }, + { + "epoch": 0.6997058157954288, + "grad_norm": 0.6264830100800826, + "learning_rate": 2.184784022638373e-06, + "loss": 0.3121, + "step": 15460 + }, + { + "epoch": 0.6997510749038244, + "grad_norm": 0.25817546316852996, + "learning_rate": 2.184178346824099e-06, + "loss": 0.4483, + "step": 15461 + }, + { + "epoch": 0.69979633401222, + "grad_norm": 0.2978882566016645, + "learning_rate": 2.1835727315131842e-06, + "loss": 0.4954, + "step": 15462 + }, + { + "epoch": 0.6998415931206156, + "grad_norm": 0.5901124855544712, + "learning_rate": 2.18296717671864e-06, + "loss": 0.3194, + "step": 15463 + }, + { + "epoch": 0.699886852229011, + "grad_norm": 0.28291400965652297, + "learning_rate": 2.1823616824534788e-06, + "loss": 0.4612, + "step": 15464 + }, + { + "epoch": 0.6999321113374066, + "grad_norm": 0.6461112710167152, + "learning_rate": 2.181756248730714e-06, + "loss": 0.2949, + "step": 15465 + }, + { + "epoch": 0.6999773704458022, + "grad_norm": 0.5991463451115281, + "learning_rate": 2.1811508755633508e-06, + "loss": 0.3271, + "step": 15466 + }, + { + "epoch": 0.7000226295541978, + "grad_norm": 0.6766384769010018, + "learning_rate": 2.1805455629643966e-06, + "loss": 0.255, + "step": 15467 + }, + { + "epoch": 0.7000678886625934, + "grad_norm": 0.6061024812032464, + "learning_rate": 2.179940310946861e-06, + "loss": 0.2896, + "step": 15468 + }, + { + "epoch": 0.7001131477709889, + "grad_norm": 0.6709462130279129, + "learning_rate": 2.179335119523745e-06, + "loss": 0.2962, + "step": 15469 + }, + { + "epoch": 0.7001584068793845, + "grad_norm": 0.581660104951167, + "learning_rate": 2.178729988708056e-06, + "loss": 0.2924, + "step": 15470 + }, + { + "epoch": 0.70020366598778, + "grad_norm": 0.6539112408764716, + "learning_rate": 2.178124918512793e-06, + "loss": 0.2999, + "step": 15471 + }, + { + "epoch": 0.7002489250961756, + "grad_norm": 0.6104589647548964, + "learning_rate": 2.17751990895096e-06, + "loss": 0.2721, + "step": 15472 + }, + { + "epoch": 0.7002941842045711, + "grad_norm": 0.6344018666873753, + "learning_rate": 2.1769149600355545e-06, + "loss": 0.2883, + "step": 15473 + }, + { + "epoch": 0.7003394433129667, + "grad_norm": 0.8703735890057297, + "learning_rate": 2.176310071779577e-06, + "loss": 0.2954, + "step": 15474 + }, + { + "epoch": 0.7003847024213623, + "grad_norm": 0.6420105953615923, + "learning_rate": 2.1757052441960248e-06, + "loss": 0.2981, + "step": 15475 + }, + { + "epoch": 0.7004299615297579, + "grad_norm": 0.5941777634412325, + "learning_rate": 2.17510047729789e-06, + "loss": 0.2762, + "step": 15476 + }, + { + "epoch": 0.7004752206381534, + "grad_norm": 0.3041590428798745, + "learning_rate": 2.174495771098171e-06, + "loss": 0.4802, + "step": 15477 + }, + { + "epoch": 0.700520479746549, + "grad_norm": 0.3065766008871142, + "learning_rate": 2.173891125609863e-06, + "loss": 0.4989, + "step": 15478 + }, + { + "epoch": 0.7005657388549446, + "grad_norm": 0.6260164462093338, + "learning_rate": 2.1732865408459508e-06, + "loss": 0.3296, + "step": 15479 + }, + { + "epoch": 0.7006109979633401, + "grad_norm": 0.2776070640653133, + "learning_rate": 2.17268201681943e-06, + "loss": 0.4473, + "step": 15480 + }, + { + "epoch": 0.7006562570717357, + "grad_norm": 0.2922294395288578, + "learning_rate": 2.172077553543291e-06, + "loss": 0.4879, + "step": 15481 + }, + { + "epoch": 0.7007015161801312, + "grad_norm": 0.6019312043691404, + "learning_rate": 2.17147315103052e-06, + "loss": 0.2829, + "step": 15482 + }, + { + "epoch": 0.7007467752885268, + "grad_norm": 0.5946946099216283, + "learning_rate": 2.1708688092941018e-06, + "loss": 0.3021, + "step": 15483 + }, + { + "epoch": 0.7007920343969224, + "grad_norm": 0.6213603603657362, + "learning_rate": 2.1702645283470238e-06, + "loss": 0.3011, + "step": 15484 + }, + { + "epoch": 0.700837293505318, + "grad_norm": 0.6604124753553283, + "learning_rate": 2.169660308202272e-06, + "loss": 0.3158, + "step": 15485 + }, + { + "epoch": 0.7008825526137135, + "grad_norm": 0.6684013995648608, + "learning_rate": 2.169056148872828e-06, + "loss": 0.2895, + "step": 15486 + }, + { + "epoch": 0.700927811722109, + "grad_norm": 0.7528060434075988, + "learning_rate": 2.1684520503716704e-06, + "loss": 0.2915, + "step": 15487 + }, + { + "epoch": 0.7009730708305046, + "grad_norm": 0.31181609176172187, + "learning_rate": 2.167848012711784e-06, + "loss": 0.4931, + "step": 15488 + }, + { + "epoch": 0.7010183299389002, + "grad_norm": 0.5794370482748491, + "learning_rate": 2.1672440359061435e-06, + "loss": 0.3021, + "step": 15489 + }, + { + "epoch": 0.7010635890472958, + "grad_norm": 0.6148117834325464, + "learning_rate": 2.16664011996773e-06, + "loss": 0.3133, + "step": 15490 + }, + { + "epoch": 0.7011088481556913, + "grad_norm": 0.7677040014486417, + "learning_rate": 2.166036264909519e-06, + "loss": 0.3024, + "step": 15491 + }, + { + "epoch": 0.7011541072640869, + "grad_norm": 0.6786446701334466, + "learning_rate": 2.165432470744483e-06, + "loss": 0.2839, + "step": 15492 + }, + { + "epoch": 0.7011993663724825, + "grad_norm": 0.8929527677804295, + "learning_rate": 2.164828737485597e-06, + "loss": 0.3138, + "step": 15493 + }, + { + "epoch": 0.7012446254808781, + "grad_norm": 0.6217136165511883, + "learning_rate": 2.164225065145836e-06, + "loss": 0.3477, + "step": 15494 + }, + { + "epoch": 0.7012898845892735, + "grad_norm": 0.6117178180181198, + "learning_rate": 2.163621453738168e-06, + "loss": 0.3093, + "step": 15495 + }, + { + "epoch": 0.7013351436976691, + "grad_norm": 0.6523515263755088, + "learning_rate": 2.1630179032755632e-06, + "loss": 0.2741, + "step": 15496 + }, + { + "epoch": 0.7013804028060647, + "grad_norm": 0.5729455223348102, + "learning_rate": 2.1624144137709917e-06, + "loss": 0.3055, + "step": 15497 + }, + { + "epoch": 0.7014256619144603, + "grad_norm": 0.6059087224660069, + "learning_rate": 2.161810985237418e-06, + "loss": 0.3363, + "step": 15498 + }, + { + "epoch": 0.7014709210228558, + "grad_norm": 0.7130322049678949, + "learning_rate": 2.1612076176878112e-06, + "loss": 0.3404, + "step": 15499 + }, + { + "epoch": 0.7015161801312514, + "grad_norm": 0.5802648354575368, + "learning_rate": 2.1606043111351316e-06, + "loss": 0.2398, + "step": 15500 + }, + { + "epoch": 0.701561439239647, + "grad_norm": 0.7169971190891021, + "learning_rate": 2.160001065592347e-06, + "loss": 0.2982, + "step": 15501 + }, + { + "epoch": 0.7016066983480426, + "grad_norm": 0.696332156180076, + "learning_rate": 2.1593978810724152e-06, + "loss": 0.3027, + "step": 15502 + }, + { + "epoch": 0.7016519574564382, + "grad_norm": 0.6148654487309713, + "learning_rate": 2.158794757588301e-06, + "loss": 0.2887, + "step": 15503 + }, + { + "epoch": 0.7016972165648336, + "grad_norm": 0.3234783939738736, + "learning_rate": 2.1581916951529606e-06, + "loss": 0.479, + "step": 15504 + }, + { + "epoch": 0.7017424756732292, + "grad_norm": 0.5654991950174835, + "learning_rate": 2.1575886937793515e-06, + "loss": 0.3061, + "step": 15505 + }, + { + "epoch": 0.7017877347816248, + "grad_norm": 0.6185639792057311, + "learning_rate": 2.1569857534804317e-06, + "loss": 0.2862, + "step": 15506 + }, + { + "epoch": 0.7018329938900204, + "grad_norm": 0.5872142467018157, + "learning_rate": 2.1563828742691597e-06, + "loss": 0.2907, + "step": 15507 + }, + { + "epoch": 0.7018782529984159, + "grad_norm": 0.6251172930466563, + "learning_rate": 2.1557800561584822e-06, + "loss": 0.2905, + "step": 15508 + }, + { + "epoch": 0.7019235121068115, + "grad_norm": 0.6491345308673889, + "learning_rate": 2.155177299161357e-06, + "loss": 0.3231, + "step": 15509 + }, + { + "epoch": 0.7019687712152071, + "grad_norm": 0.6886456035695668, + "learning_rate": 2.154574603290735e-06, + "loss": 0.3149, + "step": 15510 + }, + { + "epoch": 0.7020140303236027, + "grad_norm": 0.6153866555546205, + "learning_rate": 2.1539719685595665e-06, + "loss": 0.3017, + "step": 15511 + }, + { + "epoch": 0.7020592894319981, + "grad_norm": 0.6256755519011062, + "learning_rate": 2.153369394980798e-06, + "loss": 0.3578, + "step": 15512 + }, + { + "epoch": 0.7021045485403937, + "grad_norm": 0.6207121681626878, + "learning_rate": 2.1527668825673777e-06, + "loss": 0.3478, + "step": 15513 + }, + { + "epoch": 0.7021498076487893, + "grad_norm": 0.634240642136747, + "learning_rate": 2.1521644313322543e-06, + "loss": 0.3382, + "step": 15514 + }, + { + "epoch": 0.7021950667571849, + "grad_norm": 0.6182497417181321, + "learning_rate": 2.151562041288371e-06, + "loss": 0.3213, + "step": 15515 + }, + { + "epoch": 0.7022403258655805, + "grad_norm": 0.6069266273627872, + "learning_rate": 2.1509597124486693e-06, + "loss": 0.2757, + "step": 15516 + }, + { + "epoch": 0.702285584973976, + "grad_norm": 0.568023784096722, + "learning_rate": 2.150357444826095e-06, + "loss": 0.2847, + "step": 15517 + }, + { + "epoch": 0.7023308440823716, + "grad_norm": 0.617320839569645, + "learning_rate": 2.1497552384335858e-06, + "loss": 0.3185, + "step": 15518 + }, + { + "epoch": 0.7023761031907672, + "grad_norm": 0.6363743882498056, + "learning_rate": 2.1491530932840835e-06, + "loss": 0.3063, + "step": 15519 + }, + { + "epoch": 0.7024213622991627, + "grad_norm": 0.6209437669725376, + "learning_rate": 2.1485510093905264e-06, + "loss": 0.3117, + "step": 15520 + }, + { + "epoch": 0.7024666214075582, + "grad_norm": 0.6466039291351509, + "learning_rate": 2.147948986765849e-06, + "loss": 0.3298, + "step": 15521 + }, + { + "epoch": 0.7025118805159538, + "grad_norm": 0.2965267650217191, + "learning_rate": 2.147347025422988e-06, + "loss": 0.5006, + "step": 15522 + }, + { + "epoch": 0.7025571396243494, + "grad_norm": 0.6130321389581915, + "learning_rate": 2.1467451253748797e-06, + "loss": 0.2877, + "step": 15523 + }, + { + "epoch": 0.702602398732745, + "grad_norm": 0.6827378240286927, + "learning_rate": 2.1461432866344554e-06, + "loss": 0.2878, + "step": 15524 + }, + { + "epoch": 0.7026476578411406, + "grad_norm": 0.5873439345678003, + "learning_rate": 2.145541509214646e-06, + "loss": 0.311, + "step": 15525 + }, + { + "epoch": 0.7026929169495361, + "grad_norm": 0.6193308607911314, + "learning_rate": 2.1449397931283838e-06, + "loss": 0.3204, + "step": 15526 + }, + { + "epoch": 0.7027381760579317, + "grad_norm": 0.6274669568283813, + "learning_rate": 2.1443381383885954e-06, + "loss": 0.3135, + "step": 15527 + }, + { + "epoch": 0.7027834351663272, + "grad_norm": 0.5998583644959756, + "learning_rate": 2.1437365450082114e-06, + "loss": 0.2914, + "step": 15528 + }, + { + "epoch": 0.7028286942747228, + "grad_norm": 0.6134254482027904, + "learning_rate": 2.1431350130001556e-06, + "loss": 0.2954, + "step": 15529 + }, + { + "epoch": 0.7028739533831183, + "grad_norm": 0.28329977300242243, + "learning_rate": 2.142533542377355e-06, + "loss": 0.4972, + "step": 15530 + }, + { + "epoch": 0.7029192124915139, + "grad_norm": 0.6315642879116592, + "learning_rate": 2.1419321331527317e-06, + "loss": 0.3163, + "step": 15531 + }, + { + "epoch": 0.7029644715999095, + "grad_norm": 0.6288753910599849, + "learning_rate": 2.14133078533921e-06, + "loss": 0.3264, + "step": 15532 + }, + { + "epoch": 0.7030097307083051, + "grad_norm": 0.6167214538896445, + "learning_rate": 2.14072949894971e-06, + "loss": 0.3117, + "step": 15533 + }, + { + "epoch": 0.7030549898167006, + "grad_norm": 0.5920196063320219, + "learning_rate": 2.14012827399715e-06, + "loss": 0.2891, + "step": 15534 + }, + { + "epoch": 0.7031002489250961, + "grad_norm": 0.5728835353448037, + "learning_rate": 2.13952711049445e-06, + "loss": 0.2853, + "step": 15535 + }, + { + "epoch": 0.7031455080334917, + "grad_norm": 0.6498149796461337, + "learning_rate": 2.1389260084545305e-06, + "loss": 0.3132, + "step": 15536 + }, + { + "epoch": 0.7031907671418873, + "grad_norm": 0.60638016947381, + "learning_rate": 2.1383249678903006e-06, + "loss": 0.3054, + "step": 15537 + }, + { + "epoch": 0.7032360262502829, + "grad_norm": 0.2768250776447379, + "learning_rate": 2.1377239888146785e-06, + "loss": 0.4889, + "step": 15538 + }, + { + "epoch": 0.7032812853586784, + "grad_norm": 0.647992038999156, + "learning_rate": 2.1371230712405783e-06, + "loss": 0.3623, + "step": 15539 + }, + { + "epoch": 0.703326544467074, + "grad_norm": 0.7498658647713098, + "learning_rate": 2.1365222151809106e-06, + "loss": 0.3067, + "step": 15540 + }, + { + "epoch": 0.7033718035754696, + "grad_norm": 0.5874224222939307, + "learning_rate": 2.1359214206485845e-06, + "loss": 0.2784, + "step": 15541 + }, + { + "epoch": 0.7034170626838652, + "grad_norm": 0.6189769199261504, + "learning_rate": 2.135320687656511e-06, + "loss": 0.3168, + "step": 15542 + }, + { + "epoch": 0.7034623217922606, + "grad_norm": 0.6490191871652556, + "learning_rate": 2.1347200162175984e-06, + "loss": 0.3384, + "step": 15543 + }, + { + "epoch": 0.7035075809006562, + "grad_norm": 0.6092680751785646, + "learning_rate": 2.1341194063447533e-06, + "loss": 0.335, + "step": 15544 + }, + { + "epoch": 0.7035528400090518, + "grad_norm": 0.6465144265028875, + "learning_rate": 2.133518858050879e-06, + "loss": 0.3049, + "step": 15545 + }, + { + "epoch": 0.7035980991174474, + "grad_norm": 0.42971251058278587, + "learning_rate": 2.132918371348882e-06, + "loss": 0.4656, + "step": 15546 + }, + { + "epoch": 0.7036433582258429, + "grad_norm": 0.5763015261511008, + "learning_rate": 2.132317946251662e-06, + "loss": 0.3116, + "step": 15547 + }, + { + "epoch": 0.7036886173342385, + "grad_norm": 0.7014396526280077, + "learning_rate": 2.1317175827721238e-06, + "loss": 0.2937, + "step": 15548 + }, + { + "epoch": 0.7037338764426341, + "grad_norm": 0.6105683067560459, + "learning_rate": 2.131117280923165e-06, + "loss": 0.3095, + "step": 15549 + }, + { + "epoch": 0.7037791355510297, + "grad_norm": 0.5806980703941906, + "learning_rate": 2.1305170407176836e-06, + "loss": 0.3185, + "step": 15550 + }, + { + "epoch": 0.7038243946594253, + "grad_norm": 0.3146629447531045, + "learning_rate": 2.1299168621685775e-06, + "loss": 0.4511, + "step": 15551 + }, + { + "epoch": 0.7038696537678207, + "grad_norm": 0.5856947087391554, + "learning_rate": 2.1293167452887452e-06, + "loss": 0.3012, + "step": 15552 + }, + { + "epoch": 0.7039149128762163, + "grad_norm": 0.69512091931565, + "learning_rate": 2.1287166900910796e-06, + "loss": 0.295, + "step": 15553 + }, + { + "epoch": 0.7039601719846119, + "grad_norm": 0.5998239511118214, + "learning_rate": 2.1281166965884715e-06, + "loss": 0.3547, + "step": 15554 + }, + { + "epoch": 0.7040054310930075, + "grad_norm": 0.7292774262951063, + "learning_rate": 2.1275167647938153e-06, + "loss": 0.3277, + "step": 15555 + }, + { + "epoch": 0.704050690201403, + "grad_norm": 0.610099937979236, + "learning_rate": 2.1269168947200043e-06, + "loss": 0.2779, + "step": 15556 + }, + { + "epoch": 0.7040959493097986, + "grad_norm": 0.7000545008934641, + "learning_rate": 2.126317086379925e-06, + "loss": 0.3535, + "step": 15557 + }, + { + "epoch": 0.7041412084181942, + "grad_norm": 0.26531741359570854, + "learning_rate": 2.1257173397864635e-06, + "loss": 0.4646, + "step": 15558 + }, + { + "epoch": 0.7041864675265898, + "grad_norm": 0.6500995480765627, + "learning_rate": 2.1251176549525102e-06, + "loss": 0.3249, + "step": 15559 + }, + { + "epoch": 0.7042317266349853, + "grad_norm": 0.5911001356915474, + "learning_rate": 2.1245180318909482e-06, + "loss": 0.3052, + "step": 15560 + }, + { + "epoch": 0.7042769857433808, + "grad_norm": 0.6448318552907646, + "learning_rate": 2.123918470614663e-06, + "loss": 0.3311, + "step": 15561 + }, + { + "epoch": 0.7043222448517764, + "grad_norm": 0.5916943962480607, + "learning_rate": 2.1233189711365374e-06, + "loss": 0.2825, + "step": 15562 + }, + { + "epoch": 0.704367503960172, + "grad_norm": 0.5972384349849474, + "learning_rate": 2.12271953346945e-06, + "loss": 0.3075, + "step": 15563 + }, + { + "epoch": 0.7044127630685676, + "grad_norm": 0.7624894797514498, + "learning_rate": 2.1221201576262828e-06, + "loss": 0.3396, + "step": 15564 + }, + { + "epoch": 0.7044580221769631, + "grad_norm": 0.5869299786308838, + "learning_rate": 2.121520843619917e-06, + "loss": 0.294, + "step": 15565 + }, + { + "epoch": 0.7045032812853587, + "grad_norm": 0.5884655200661071, + "learning_rate": 2.1209215914632275e-06, + "loss": 0.2832, + "step": 15566 + }, + { + "epoch": 0.7045485403937543, + "grad_norm": 0.27608380802360893, + "learning_rate": 2.120322401169088e-06, + "loss": 0.4624, + "step": 15567 + }, + { + "epoch": 0.7045937995021498, + "grad_norm": 0.6114286440045849, + "learning_rate": 2.119723272750379e-06, + "loss": 0.2897, + "step": 15568 + }, + { + "epoch": 0.7046390586105453, + "grad_norm": 0.6040466508722457, + "learning_rate": 2.1191242062199695e-06, + "loss": 0.295, + "step": 15569 + }, + { + "epoch": 0.7046843177189409, + "grad_norm": 0.7752228122059384, + "learning_rate": 2.118525201590732e-06, + "loss": 0.2781, + "step": 15570 + }, + { + "epoch": 0.7047295768273365, + "grad_norm": 0.6012127086414305, + "learning_rate": 2.117926258875538e-06, + "loss": 0.3222, + "step": 15571 + }, + { + "epoch": 0.7047748359357321, + "grad_norm": 0.28231026926098374, + "learning_rate": 2.1173273780872584e-06, + "loss": 0.4625, + "step": 15572 + }, + { + "epoch": 0.7048200950441277, + "grad_norm": 0.6151721387865137, + "learning_rate": 2.11672855923876e-06, + "loss": 0.3222, + "step": 15573 + }, + { + "epoch": 0.7048653541525232, + "grad_norm": 1.0305312559650226, + "learning_rate": 2.1161298023429076e-06, + "loss": 0.2739, + "step": 15574 + }, + { + "epoch": 0.7049106132609188, + "grad_norm": 0.615711634922465, + "learning_rate": 2.1155311074125713e-06, + "loss": 0.2865, + "step": 15575 + }, + { + "epoch": 0.7049558723693143, + "grad_norm": 0.6168852774937957, + "learning_rate": 2.1149324744606103e-06, + "loss": 0.2861, + "step": 15576 + }, + { + "epoch": 0.7050011314777099, + "grad_norm": 0.6119238649528917, + "learning_rate": 2.114333903499891e-06, + "loss": 0.3326, + "step": 15577 + }, + { + "epoch": 0.7050463905861054, + "grad_norm": 0.5819637721539274, + "learning_rate": 2.1137353945432743e-06, + "loss": 0.3255, + "step": 15578 + }, + { + "epoch": 0.705091649694501, + "grad_norm": 0.6676049772112951, + "learning_rate": 2.1131369476036173e-06, + "loss": 0.3066, + "step": 15579 + }, + { + "epoch": 0.7051369088028966, + "grad_norm": 0.2675541874957772, + "learning_rate": 2.1125385626937806e-06, + "loss": 0.4758, + "step": 15580 + }, + { + "epoch": 0.7051821679112922, + "grad_norm": 0.6290051475284912, + "learning_rate": 2.111940239826624e-06, + "loss": 0.2666, + "step": 15581 + }, + { + "epoch": 0.7052274270196877, + "grad_norm": 0.5945427288601011, + "learning_rate": 2.1113419790150017e-06, + "loss": 0.3403, + "step": 15582 + }, + { + "epoch": 0.7052726861280832, + "grad_norm": 0.2695891017928327, + "learning_rate": 2.1107437802717667e-06, + "loss": 0.4439, + "step": 15583 + }, + { + "epoch": 0.7053179452364788, + "grad_norm": 0.2815561171450684, + "learning_rate": 2.1101456436097744e-06, + "loss": 0.4736, + "step": 15584 + }, + { + "epoch": 0.7053632043448744, + "grad_norm": 0.6002777685049623, + "learning_rate": 2.109547569041878e-06, + "loss": 0.3111, + "step": 15585 + }, + { + "epoch": 0.70540846345327, + "grad_norm": 0.6074427839267478, + "learning_rate": 2.1089495565809274e-06, + "loss": 0.2802, + "step": 15586 + }, + { + "epoch": 0.7054537225616655, + "grad_norm": 0.2618171024604309, + "learning_rate": 2.10835160623977e-06, + "loss": 0.4811, + "step": 15587 + }, + { + "epoch": 0.7054989816700611, + "grad_norm": 0.6403282477430244, + "learning_rate": 2.1077537180312568e-06, + "loss": 0.2957, + "step": 15588 + }, + { + "epoch": 0.7055442407784567, + "grad_norm": 0.6931161483217249, + "learning_rate": 2.107155891968232e-06, + "loss": 0.2813, + "step": 15589 + }, + { + "epoch": 0.7055894998868523, + "grad_norm": 0.5854855835134954, + "learning_rate": 2.106558128063544e-06, + "loss": 0.2764, + "step": 15590 + }, + { + "epoch": 0.7056347589952477, + "grad_norm": 0.9400059804734506, + "learning_rate": 2.1059604263300354e-06, + "loss": 0.2293, + "step": 15591 + }, + { + "epoch": 0.7056800181036433, + "grad_norm": 0.31758819314786835, + "learning_rate": 2.105362786780547e-06, + "loss": 0.448, + "step": 15592 + }, + { + "epoch": 0.7057252772120389, + "grad_norm": 0.59255163939248, + "learning_rate": 2.104765209427922e-06, + "loss": 0.2823, + "step": 15593 + }, + { + "epoch": 0.7057705363204345, + "grad_norm": 0.6202984057488541, + "learning_rate": 2.1041676942850025e-06, + "loss": 0.2659, + "step": 15594 + }, + { + "epoch": 0.7058157954288301, + "grad_norm": 0.6665940338013063, + "learning_rate": 2.1035702413646257e-06, + "loss": 0.3219, + "step": 15595 + }, + { + "epoch": 0.7058610545372256, + "grad_norm": 0.2709211805243884, + "learning_rate": 2.1029728506796266e-06, + "loss": 0.462, + "step": 15596 + }, + { + "epoch": 0.7059063136456212, + "grad_norm": 0.6551666991759286, + "learning_rate": 2.1023755222428434e-06, + "loss": 0.3088, + "step": 15597 + }, + { + "epoch": 0.7059515727540168, + "grad_norm": 0.6156762722283183, + "learning_rate": 2.1017782560671124e-06, + "loss": 0.3216, + "step": 15598 + }, + { + "epoch": 0.7059968318624124, + "grad_norm": 0.4609435977100646, + "learning_rate": 2.101181052165266e-06, + "loss": 0.4734, + "step": 15599 + }, + { + "epoch": 0.7060420909708078, + "grad_norm": 0.6868399069144914, + "learning_rate": 2.1005839105501336e-06, + "loss": 0.3084, + "step": 15600 + }, + { + "epoch": 0.7060873500792034, + "grad_norm": 0.6540307934496004, + "learning_rate": 2.09998683123455e-06, + "loss": 0.3307, + "step": 15601 + }, + { + "epoch": 0.706132609187599, + "grad_norm": 0.5804650262670074, + "learning_rate": 2.0993898142313428e-06, + "loss": 0.2532, + "step": 15602 + }, + { + "epoch": 0.7061778682959946, + "grad_norm": 0.7045137113564987, + "learning_rate": 2.098792859553338e-06, + "loss": 0.3121, + "step": 15603 + }, + { + "epoch": 0.7062231274043901, + "grad_norm": 0.5985304281397166, + "learning_rate": 2.0981959672133663e-06, + "loss": 0.2671, + "step": 15604 + }, + { + "epoch": 0.7062683865127857, + "grad_norm": 0.6932785443502347, + "learning_rate": 2.0975991372242488e-06, + "loss": 0.3095, + "step": 15605 + }, + { + "epoch": 0.7063136456211813, + "grad_norm": 0.25596252597725394, + "learning_rate": 2.097002369598814e-06, + "loss": 0.4578, + "step": 15606 + }, + { + "epoch": 0.7063589047295769, + "grad_norm": 0.6023667287025952, + "learning_rate": 2.096405664349882e-06, + "loss": 0.3176, + "step": 15607 + }, + { + "epoch": 0.7064041638379724, + "grad_norm": 0.6669780970769399, + "learning_rate": 2.095809021490273e-06, + "loss": 0.3647, + "step": 15608 + }, + { + "epoch": 0.7064494229463679, + "grad_norm": 0.27753170005461786, + "learning_rate": 2.0952124410328085e-06, + "loss": 0.4931, + "step": 15609 + }, + { + "epoch": 0.7064946820547635, + "grad_norm": 0.5896931641117975, + "learning_rate": 2.094615922990309e-06, + "loss": 0.294, + "step": 15610 + }, + { + "epoch": 0.7065399411631591, + "grad_norm": 0.28245895056221954, + "learning_rate": 2.0940194673755903e-06, + "loss": 0.4577, + "step": 15611 + }, + { + "epoch": 0.7065852002715547, + "grad_norm": 0.5821755169282244, + "learning_rate": 2.0934230742014666e-06, + "loss": 0.2881, + "step": 15612 + }, + { + "epoch": 0.7066304593799502, + "grad_norm": 0.6095114697705112, + "learning_rate": 2.0928267434807537e-06, + "loss": 0.3145, + "step": 15613 + }, + { + "epoch": 0.7066757184883458, + "grad_norm": 0.5966172519027911, + "learning_rate": 2.0922304752262672e-06, + "loss": 0.3091, + "step": 15614 + }, + { + "epoch": 0.7067209775967414, + "grad_norm": 0.5854903933345119, + "learning_rate": 2.0916342694508177e-06, + "loss": 0.2981, + "step": 15615 + }, + { + "epoch": 0.7067662367051369, + "grad_norm": 0.6476592475962757, + "learning_rate": 2.0910381261672136e-06, + "loss": 0.303, + "step": 15616 + }, + { + "epoch": 0.7068114958135324, + "grad_norm": 0.6178026593378579, + "learning_rate": 2.0904420453882675e-06, + "loss": 0.3085, + "step": 15617 + }, + { + "epoch": 0.706856754921928, + "grad_norm": 0.6411289753975102, + "learning_rate": 2.089846027126784e-06, + "loss": 0.3193, + "step": 15618 + }, + { + "epoch": 0.7069020140303236, + "grad_norm": 0.5891169438856212, + "learning_rate": 2.089250071395573e-06, + "loss": 0.2949, + "step": 15619 + }, + { + "epoch": 0.7069472731387192, + "grad_norm": 0.2940271428331134, + "learning_rate": 2.088654178207439e-06, + "loss": 0.4793, + "step": 15620 + }, + { + "epoch": 0.7069925322471148, + "grad_norm": 0.3054544662909863, + "learning_rate": 2.088058347575183e-06, + "loss": 0.4955, + "step": 15621 + }, + { + "epoch": 0.7070377913555103, + "grad_norm": 0.6651374294296094, + "learning_rate": 2.08746257951161e-06, + "loss": 0.3594, + "step": 15622 + }, + { + "epoch": 0.7070830504639058, + "grad_norm": 0.7210944144731016, + "learning_rate": 2.0868668740295223e-06, + "loss": 0.3299, + "step": 15623 + }, + { + "epoch": 0.7071283095723014, + "grad_norm": 0.5831154354291413, + "learning_rate": 2.086271231141719e-06, + "loss": 0.3078, + "step": 15624 + }, + { + "epoch": 0.707173568680697, + "grad_norm": 0.623507882297109, + "learning_rate": 2.085675650860996e-06, + "loss": 0.3114, + "step": 15625 + }, + { + "epoch": 0.7072188277890925, + "grad_norm": 0.6069100613245061, + "learning_rate": 2.0850801332001526e-06, + "loss": 0.3274, + "step": 15626 + }, + { + "epoch": 0.7072640868974881, + "grad_norm": 0.2759847739560776, + "learning_rate": 2.0844846781719865e-06, + "loss": 0.463, + "step": 15627 + }, + { + "epoch": 0.7073093460058837, + "grad_norm": 0.6771389749984982, + "learning_rate": 2.0838892857892908e-06, + "loss": 0.2949, + "step": 15628 + }, + { + "epoch": 0.7073546051142793, + "grad_norm": 0.5745668071147314, + "learning_rate": 2.0832939560648557e-06, + "loss": 0.2532, + "step": 15629 + }, + { + "epoch": 0.7073998642226749, + "grad_norm": 0.7284067278697884, + "learning_rate": 2.0826986890114775e-06, + "loss": 0.3186, + "step": 15630 + }, + { + "epoch": 0.7074451233310703, + "grad_norm": 0.6221647222093457, + "learning_rate": 2.082103484641943e-06, + "loss": 0.3153, + "step": 15631 + }, + { + "epoch": 0.7074903824394659, + "grad_norm": 0.6522605474785989, + "learning_rate": 2.0815083429690445e-06, + "loss": 0.3593, + "step": 15632 + }, + { + "epoch": 0.7075356415478615, + "grad_norm": 0.6066986056777576, + "learning_rate": 2.0809132640055685e-06, + "loss": 0.2701, + "step": 15633 + }, + { + "epoch": 0.7075809006562571, + "grad_norm": 0.5604974164989961, + "learning_rate": 2.080318247764299e-06, + "loss": 0.2929, + "step": 15634 + }, + { + "epoch": 0.7076261597646526, + "grad_norm": 0.5917143842030397, + "learning_rate": 2.0797232942580238e-06, + "loss": 0.2509, + "step": 15635 + }, + { + "epoch": 0.7076714188730482, + "grad_norm": 0.6229136734813554, + "learning_rate": 2.0791284034995296e-06, + "loss": 0.3029, + "step": 15636 + }, + { + "epoch": 0.7077166779814438, + "grad_norm": 0.734163918066207, + "learning_rate": 2.0785335755015913e-06, + "loss": 0.3006, + "step": 15637 + }, + { + "epoch": 0.7077619370898394, + "grad_norm": 0.4989460637768117, + "learning_rate": 2.077938810276994e-06, + "loss": 0.4719, + "step": 15638 + }, + { + "epoch": 0.7078071961982348, + "grad_norm": 0.6273674589339869, + "learning_rate": 2.0773441078385194e-06, + "loss": 0.3453, + "step": 15639 + }, + { + "epoch": 0.7078524553066304, + "grad_norm": 0.6310211789960349, + "learning_rate": 2.076749468198943e-06, + "loss": 0.3269, + "step": 15640 + }, + { + "epoch": 0.707897714415026, + "grad_norm": 0.6198212516711802, + "learning_rate": 2.076154891371041e-06, + "loss": 0.2853, + "step": 15641 + }, + { + "epoch": 0.7079429735234216, + "grad_norm": 0.6392808620470593, + "learning_rate": 2.0755603773675905e-06, + "loss": 0.3448, + "step": 15642 + }, + { + "epoch": 0.7079882326318172, + "grad_norm": 0.278762441922395, + "learning_rate": 2.0749659262013676e-06, + "loss": 0.4709, + "step": 15643 + }, + { + "epoch": 0.7080334917402127, + "grad_norm": 0.2769252676380643, + "learning_rate": 2.074371537885143e-06, + "loss": 0.4739, + "step": 15644 + }, + { + "epoch": 0.7080787508486083, + "grad_norm": 0.5528948171061779, + "learning_rate": 2.0737772124316872e-06, + "loss": 0.2756, + "step": 15645 + }, + { + "epoch": 0.7081240099570039, + "grad_norm": 0.6144812745521786, + "learning_rate": 2.0731829498537743e-06, + "loss": 0.2993, + "step": 15646 + }, + { + "epoch": 0.7081692690653995, + "grad_norm": 0.6165657421144701, + "learning_rate": 2.072588750164168e-06, + "loss": 0.3328, + "step": 15647 + }, + { + "epoch": 0.7082145281737949, + "grad_norm": 0.617449276444202, + "learning_rate": 2.071994613375641e-06, + "loss": 0.3008, + "step": 15648 + }, + { + "epoch": 0.7082597872821905, + "grad_norm": 0.6203225191777019, + "learning_rate": 2.0714005395009566e-06, + "loss": 0.3041, + "step": 15649 + }, + { + "epoch": 0.7083050463905861, + "grad_norm": 0.671178645171008, + "learning_rate": 2.0708065285528784e-06, + "loss": 0.3275, + "step": 15650 + }, + { + "epoch": 0.7083503054989817, + "grad_norm": 0.29431578101845934, + "learning_rate": 2.070212580544172e-06, + "loss": 0.4803, + "step": 15651 + }, + { + "epoch": 0.7083955646073772, + "grad_norm": 0.666418692678892, + "learning_rate": 2.0696186954876002e-06, + "loss": 0.2571, + "step": 15652 + }, + { + "epoch": 0.7084408237157728, + "grad_norm": 0.6247469527950302, + "learning_rate": 2.0690248733959235e-06, + "loss": 0.2972, + "step": 15653 + }, + { + "epoch": 0.7084860828241684, + "grad_norm": 0.623897382602735, + "learning_rate": 2.068431114281898e-06, + "loss": 0.3069, + "step": 15654 + }, + { + "epoch": 0.708531341932564, + "grad_norm": 0.5754801717126035, + "learning_rate": 2.0678374181582845e-06, + "loss": 0.2809, + "step": 15655 + }, + { + "epoch": 0.7085766010409595, + "grad_norm": 0.6202131802305537, + "learning_rate": 2.0672437850378414e-06, + "loss": 0.2959, + "step": 15656 + }, + { + "epoch": 0.708621860149355, + "grad_norm": 0.6962650105072428, + "learning_rate": 2.0666502149333215e-06, + "loss": 0.3044, + "step": 15657 + }, + { + "epoch": 0.7086671192577506, + "grad_norm": 0.5738380545478389, + "learning_rate": 2.066056707857478e-06, + "loss": 0.2375, + "step": 15658 + }, + { + "epoch": 0.7087123783661462, + "grad_norm": 0.6153974552831404, + "learning_rate": 2.0654632638230664e-06, + "loss": 0.3182, + "step": 15659 + }, + { + "epoch": 0.7087576374745418, + "grad_norm": 0.583783207426821, + "learning_rate": 2.064869882842835e-06, + "loss": 0.3539, + "step": 15660 + }, + { + "epoch": 0.7088028965829373, + "grad_norm": 0.5867962921817134, + "learning_rate": 2.064276564929537e-06, + "loss": 0.3274, + "step": 15661 + }, + { + "epoch": 0.7088481556913329, + "grad_norm": 0.6622066610058321, + "learning_rate": 2.0636833100959198e-06, + "loss": 0.2961, + "step": 15662 + }, + { + "epoch": 0.7088934147997284, + "grad_norm": 0.68034769054094, + "learning_rate": 2.0630901183547274e-06, + "loss": 0.3433, + "step": 15663 + }, + { + "epoch": 0.708938673908124, + "grad_norm": 0.6299499173817064, + "learning_rate": 2.0624969897187084e-06, + "loss": 0.2824, + "step": 15664 + }, + { + "epoch": 0.7089839330165196, + "grad_norm": 0.6134710115056257, + "learning_rate": 2.0619039242006117e-06, + "loss": 0.2903, + "step": 15665 + }, + { + "epoch": 0.7090291921249151, + "grad_norm": 1.1167269850382753, + "learning_rate": 2.0613109218131717e-06, + "loss": 0.2736, + "step": 15666 + }, + { + "epoch": 0.7090744512333107, + "grad_norm": 0.5868181423205776, + "learning_rate": 2.0607179825691344e-06, + "loss": 0.2655, + "step": 15667 + }, + { + "epoch": 0.7091197103417063, + "grad_norm": 0.6804760253555822, + "learning_rate": 2.0601251064812407e-06, + "loss": 0.2824, + "step": 15668 + }, + { + "epoch": 0.7091649694501019, + "grad_norm": 0.6415825775100403, + "learning_rate": 2.0595322935622326e-06, + "loss": 0.3331, + "step": 15669 + }, + { + "epoch": 0.7092102285584974, + "grad_norm": 0.9332292806580279, + "learning_rate": 2.058939543824841e-06, + "loss": 0.2952, + "step": 15670 + }, + { + "epoch": 0.709255487666893, + "grad_norm": 0.6043048873822964, + "learning_rate": 2.058346857281806e-06, + "loss": 0.3002, + "step": 15671 + }, + { + "epoch": 0.7093007467752885, + "grad_norm": 0.6590343921393856, + "learning_rate": 2.0577542339458647e-06, + "loss": 0.3137, + "step": 15672 + }, + { + "epoch": 0.7093460058836841, + "grad_norm": 0.29385629964947496, + "learning_rate": 2.0571616738297473e-06, + "loss": 0.4802, + "step": 15673 + }, + { + "epoch": 0.7093912649920796, + "grad_norm": 0.6251899858791947, + "learning_rate": 2.0565691769461865e-06, + "loss": 0.3158, + "step": 15674 + }, + { + "epoch": 0.7094365241004752, + "grad_norm": 0.5689479115374646, + "learning_rate": 2.0559767433079154e-06, + "loss": 0.3026, + "step": 15675 + }, + { + "epoch": 0.7094817832088708, + "grad_norm": 0.5731850317292073, + "learning_rate": 2.0553843729276606e-06, + "loss": 0.2944, + "step": 15676 + }, + { + "epoch": 0.7095270423172664, + "grad_norm": 0.6013672663083953, + "learning_rate": 2.0547920658181535e-06, + "loss": 0.309, + "step": 15677 + }, + { + "epoch": 0.709572301425662, + "grad_norm": 0.5966705722753385, + "learning_rate": 2.0541998219921194e-06, + "loss": 0.2835, + "step": 15678 + }, + { + "epoch": 0.7096175605340574, + "grad_norm": 0.6065622374492098, + "learning_rate": 2.0536076414622824e-06, + "loss": 0.2486, + "step": 15679 + }, + { + "epoch": 0.709662819642453, + "grad_norm": 0.6093719632820457, + "learning_rate": 2.0530155242413676e-06, + "loss": 0.3338, + "step": 15680 + }, + { + "epoch": 0.7097080787508486, + "grad_norm": 0.7299538787359219, + "learning_rate": 2.0524234703421003e-06, + "loss": 0.2503, + "step": 15681 + }, + { + "epoch": 0.7097533378592442, + "grad_norm": 0.6195740147006733, + "learning_rate": 2.0518314797771993e-06, + "loss": 0.3176, + "step": 15682 + }, + { + "epoch": 0.7097985969676397, + "grad_norm": 0.27778375446200076, + "learning_rate": 2.0512395525593842e-06, + "loss": 0.4632, + "step": 15683 + }, + { + "epoch": 0.7098438560760353, + "grad_norm": 0.31267117915988685, + "learning_rate": 2.050647688701374e-06, + "loss": 0.4892, + "step": 15684 + }, + { + "epoch": 0.7098891151844309, + "grad_norm": 0.5625030089547218, + "learning_rate": 2.050055888215889e-06, + "loss": 0.3681, + "step": 15685 + }, + { + "epoch": 0.7099343742928265, + "grad_norm": 0.5762043814346973, + "learning_rate": 2.0494641511156426e-06, + "loss": 0.2921, + "step": 15686 + }, + { + "epoch": 0.7099796334012219, + "grad_norm": 0.677688149072792, + "learning_rate": 2.048872477413348e-06, + "loss": 0.3002, + "step": 15687 + }, + { + "epoch": 0.7100248925096175, + "grad_norm": 0.2675888295042279, + "learning_rate": 2.048280867121722e-06, + "loss": 0.4714, + "step": 15688 + }, + { + "epoch": 0.7100701516180131, + "grad_norm": 0.610043033065155, + "learning_rate": 2.0476893202534726e-06, + "loss": 0.2863, + "step": 15689 + }, + { + "epoch": 0.7101154107264087, + "grad_norm": 0.6019687341898033, + "learning_rate": 2.0470978368213145e-06, + "loss": 0.2699, + "step": 15690 + }, + { + "epoch": 0.7101606698348043, + "grad_norm": 0.6557212029775732, + "learning_rate": 2.0465064168379547e-06, + "loss": 0.3047, + "step": 15691 + }, + { + "epoch": 0.7102059289431998, + "grad_norm": 0.5681907011018104, + "learning_rate": 2.0459150603160993e-06, + "loss": 0.3123, + "step": 15692 + }, + { + "epoch": 0.7102511880515954, + "grad_norm": 0.6937559347912634, + "learning_rate": 2.045323767268456e-06, + "loss": 0.3232, + "step": 15693 + }, + { + "epoch": 0.710296447159991, + "grad_norm": 0.7626075879462596, + "learning_rate": 2.0447325377077344e-06, + "loss": 0.3281, + "step": 15694 + }, + { + "epoch": 0.7103417062683866, + "grad_norm": 0.6171676917923269, + "learning_rate": 2.0441413716466308e-06, + "loss": 0.3237, + "step": 15695 + }, + { + "epoch": 0.710386965376782, + "grad_norm": 0.5615951852382349, + "learning_rate": 2.0435502690978502e-06, + "loss": 0.2728, + "step": 15696 + }, + { + "epoch": 0.7104322244851776, + "grad_norm": 0.6073481684244535, + "learning_rate": 2.0429592300740945e-06, + "loss": 0.272, + "step": 15697 + }, + { + "epoch": 0.7104774835935732, + "grad_norm": 0.5990557902507999, + "learning_rate": 2.042368254588067e-06, + "loss": 0.2885, + "step": 15698 + }, + { + "epoch": 0.7105227427019688, + "grad_norm": 0.5806910031766905, + "learning_rate": 2.0417773426524583e-06, + "loss": 0.2918, + "step": 15699 + }, + { + "epoch": 0.7105680018103643, + "grad_norm": 0.6167601568081476, + "learning_rate": 2.0411864942799685e-06, + "loss": 0.3183, + "step": 15700 + }, + { + "epoch": 0.7106132609187599, + "grad_norm": 0.2933210594741521, + "learning_rate": 2.0405957094832962e-06, + "loss": 0.4806, + "step": 15701 + }, + { + "epoch": 0.7106585200271555, + "grad_norm": 0.6009201878774669, + "learning_rate": 2.0400049882751327e-06, + "loss": 0.2852, + "step": 15702 + }, + { + "epoch": 0.710703779135551, + "grad_norm": 0.5691961115059543, + "learning_rate": 2.0394143306681692e-06, + "loss": 0.3211, + "step": 15703 + }, + { + "epoch": 0.7107490382439466, + "grad_norm": 0.6433385542199535, + "learning_rate": 2.0388237366751005e-06, + "loss": 0.313, + "step": 15704 + }, + { + "epoch": 0.7107942973523421, + "grad_norm": 0.6882102875117379, + "learning_rate": 2.038233206308614e-06, + "loss": 0.312, + "step": 15705 + }, + { + "epoch": 0.7108395564607377, + "grad_norm": 0.6308695404554812, + "learning_rate": 2.037642739581401e-06, + "loss": 0.2823, + "step": 15706 + }, + { + "epoch": 0.7108848155691333, + "grad_norm": 0.6075481401338595, + "learning_rate": 2.0370523365061473e-06, + "loss": 0.2824, + "step": 15707 + }, + { + "epoch": 0.7109300746775289, + "grad_norm": 0.6553470517522016, + "learning_rate": 2.0364619970955373e-06, + "loss": 0.3136, + "step": 15708 + }, + { + "epoch": 0.7109753337859244, + "grad_norm": 0.6008238589049834, + "learning_rate": 2.035871721362257e-06, + "loss": 0.3161, + "step": 15709 + }, + { + "epoch": 0.71102059289432, + "grad_norm": 0.6521871065838722, + "learning_rate": 2.0352815093189913e-06, + "loss": 0.2807, + "step": 15710 + }, + { + "epoch": 0.7110658520027155, + "grad_norm": 1.0025184314929678, + "learning_rate": 2.0346913609784215e-06, + "loss": 0.2905, + "step": 15711 + }, + { + "epoch": 0.7111111111111111, + "grad_norm": 0.7522931147926903, + "learning_rate": 2.0341012763532243e-06, + "loss": 0.3228, + "step": 15712 + }, + { + "epoch": 0.7111563702195067, + "grad_norm": 0.6231508704120658, + "learning_rate": 2.033511255456082e-06, + "loss": 0.3222, + "step": 15713 + }, + { + "epoch": 0.7112016293279022, + "grad_norm": 0.6309365311711861, + "learning_rate": 2.032921298299674e-06, + "loss": 0.2979, + "step": 15714 + }, + { + "epoch": 0.7112468884362978, + "grad_norm": 0.624392999154907, + "learning_rate": 2.0323314048966737e-06, + "loss": 0.2789, + "step": 15715 + }, + { + "epoch": 0.7112921475446934, + "grad_norm": 0.579559880412466, + "learning_rate": 2.031741575259756e-06, + "loss": 0.3034, + "step": 15716 + }, + { + "epoch": 0.711337406653089, + "grad_norm": 0.5873497811127301, + "learning_rate": 2.031151809401597e-06, + "loss": 0.3132, + "step": 15717 + }, + { + "epoch": 0.7113826657614845, + "grad_norm": 0.6213809208434222, + "learning_rate": 2.030562107334866e-06, + "loss": 0.3197, + "step": 15718 + }, + { + "epoch": 0.71142792486988, + "grad_norm": 0.2944410583813787, + "learning_rate": 2.0299724690722367e-06, + "loss": 0.4771, + "step": 15719 + }, + { + "epoch": 0.7114731839782756, + "grad_norm": 0.6200291281083014, + "learning_rate": 2.029382894626378e-06, + "loss": 0.2686, + "step": 15720 + }, + { + "epoch": 0.7115184430866712, + "grad_norm": 0.6471888141535328, + "learning_rate": 2.028793384009955e-06, + "loss": 0.2802, + "step": 15721 + }, + { + "epoch": 0.7115637021950667, + "grad_norm": 0.5992679305582995, + "learning_rate": 2.028203937235637e-06, + "loss": 0.3292, + "step": 15722 + }, + { + "epoch": 0.7116089613034623, + "grad_norm": 0.6208222626265416, + "learning_rate": 2.0276145543160923e-06, + "loss": 0.3402, + "step": 15723 + }, + { + "epoch": 0.7116542204118579, + "grad_norm": 0.5213881130170567, + "learning_rate": 2.027025235263979e-06, + "loss": 0.2572, + "step": 15724 + }, + { + "epoch": 0.7116994795202535, + "grad_norm": 0.6372870238486473, + "learning_rate": 2.0264359800919626e-06, + "loss": 0.3361, + "step": 15725 + }, + { + "epoch": 0.7117447386286491, + "grad_norm": 0.5644816426982812, + "learning_rate": 2.0258467888127036e-06, + "loss": 0.289, + "step": 15726 + }, + { + "epoch": 0.7117899977370445, + "grad_norm": 0.6133526414307099, + "learning_rate": 2.0252576614388668e-06, + "loss": 0.3023, + "step": 15727 + }, + { + "epoch": 0.7118352568454401, + "grad_norm": 0.635948868871442, + "learning_rate": 2.024668597983103e-06, + "loss": 0.3226, + "step": 15728 + }, + { + "epoch": 0.7118805159538357, + "grad_norm": 0.6566520390761876, + "learning_rate": 2.0240795984580734e-06, + "loss": 0.315, + "step": 15729 + }, + { + "epoch": 0.7119257750622313, + "grad_norm": 0.6218129037138104, + "learning_rate": 2.023490662876435e-06, + "loss": 0.2813, + "step": 15730 + }, + { + "epoch": 0.7119710341706268, + "grad_norm": 0.6490607702973659, + "learning_rate": 2.0229017912508403e-06, + "loss": 0.3203, + "step": 15731 + }, + { + "epoch": 0.7120162932790224, + "grad_norm": 0.6511603840546198, + "learning_rate": 2.022312983593941e-06, + "loss": 0.3047, + "step": 15732 + }, + { + "epoch": 0.712061552387418, + "grad_norm": 0.6033531094960397, + "learning_rate": 2.021724239918392e-06, + "loss": 0.2785, + "step": 15733 + }, + { + "epoch": 0.7121068114958136, + "grad_norm": 0.60674720041964, + "learning_rate": 2.0211355602368404e-06, + "loss": 0.3141, + "step": 15734 + }, + { + "epoch": 0.712152070604209, + "grad_norm": 0.9254902853327204, + "learning_rate": 2.0205469445619386e-06, + "loss": 0.3585, + "step": 15735 + }, + { + "epoch": 0.7121973297126046, + "grad_norm": 0.5968363646845488, + "learning_rate": 2.019958392906332e-06, + "loss": 0.3517, + "step": 15736 + }, + { + "epoch": 0.7122425888210002, + "grad_norm": 0.6486568418701398, + "learning_rate": 2.0193699052826656e-06, + "loss": 0.3078, + "step": 15737 + }, + { + "epoch": 0.7122878479293958, + "grad_norm": 0.6678475248324102, + "learning_rate": 2.0187814817035855e-06, + "loss": 0.2922, + "step": 15738 + }, + { + "epoch": 0.7123331070377914, + "grad_norm": 0.7596394560633785, + "learning_rate": 2.018193122181737e-06, + "loss": 0.2679, + "step": 15739 + }, + { + "epoch": 0.7123783661461869, + "grad_norm": 0.5864552075821262, + "learning_rate": 2.0176048267297603e-06, + "loss": 0.3269, + "step": 15740 + }, + { + "epoch": 0.7124236252545825, + "grad_norm": 1.2647697192982412, + "learning_rate": 2.0170165953602944e-06, + "loss": 0.4661, + "step": 15741 + }, + { + "epoch": 0.7124688843629781, + "grad_norm": 0.6058089185740559, + "learning_rate": 2.0164284280859803e-06, + "loss": 0.3029, + "step": 15742 + }, + { + "epoch": 0.7125141434713737, + "grad_norm": 0.6800456990515935, + "learning_rate": 2.015840324919458e-06, + "loss": 0.3031, + "step": 15743 + }, + { + "epoch": 0.7125594025797691, + "grad_norm": 0.6160556600016963, + "learning_rate": 2.0152522858733613e-06, + "loss": 0.2906, + "step": 15744 + }, + { + "epoch": 0.7126046616881647, + "grad_norm": 0.6190587645711407, + "learning_rate": 2.0146643109603247e-06, + "loss": 0.315, + "step": 15745 + }, + { + "epoch": 0.7126499207965603, + "grad_norm": 0.630589774663842, + "learning_rate": 2.0140764001929853e-06, + "loss": 0.3072, + "step": 15746 + }, + { + "epoch": 0.7126951799049559, + "grad_norm": 0.27009444682988265, + "learning_rate": 2.0134885535839714e-06, + "loss": 0.4883, + "step": 15747 + }, + { + "epoch": 0.7127404390133515, + "grad_norm": 0.5805411200121087, + "learning_rate": 2.012900771145918e-06, + "loss": 0.2955, + "step": 15748 + }, + { + "epoch": 0.712785698121747, + "grad_norm": 0.6036597094340551, + "learning_rate": 2.012313052891453e-06, + "loss": 0.2941, + "step": 15749 + }, + { + "epoch": 0.7128309572301426, + "grad_norm": 0.29160376342527783, + "learning_rate": 2.0117253988332023e-06, + "loss": 0.4879, + "step": 15750 + }, + { + "epoch": 0.7128762163385381, + "grad_norm": 0.287614861835318, + "learning_rate": 2.0111378089837958e-06, + "loss": 0.4641, + "step": 15751 + }, + { + "epoch": 0.7129214754469337, + "grad_norm": 0.6776791947833849, + "learning_rate": 2.010550283355861e-06, + "loss": 0.3494, + "step": 15752 + }, + { + "epoch": 0.7129667345553292, + "grad_norm": 0.6812298987323966, + "learning_rate": 2.009962821962016e-06, + "loss": 0.2833, + "step": 15753 + }, + { + "epoch": 0.7130119936637248, + "grad_norm": 0.6090574363970508, + "learning_rate": 2.009375424814886e-06, + "loss": 0.2879, + "step": 15754 + }, + { + "epoch": 0.7130572527721204, + "grad_norm": 0.8689524193398631, + "learning_rate": 2.0087880919270943e-06, + "loss": 0.3274, + "step": 15755 + }, + { + "epoch": 0.713102511880516, + "grad_norm": 0.5948898312977335, + "learning_rate": 2.008200823311263e-06, + "loss": 0.2807, + "step": 15756 + }, + { + "epoch": 0.7131477709889115, + "grad_norm": 0.6558636463613668, + "learning_rate": 2.0076136189800033e-06, + "loss": 0.3436, + "step": 15757 + }, + { + "epoch": 0.7131930300973071, + "grad_norm": 0.851091129043898, + "learning_rate": 2.0070264789459365e-06, + "loss": 0.2648, + "step": 15758 + }, + { + "epoch": 0.7132382892057026, + "grad_norm": 0.5796179561464531, + "learning_rate": 2.0064394032216807e-06, + "loss": 0.299, + "step": 15759 + }, + { + "epoch": 0.7132835483140982, + "grad_norm": 0.5994608703240536, + "learning_rate": 2.0058523918198473e-06, + "loss": 0.3189, + "step": 15760 + }, + { + "epoch": 0.7133288074224938, + "grad_norm": 0.5952886279712135, + "learning_rate": 2.0052654447530497e-06, + "loss": 0.3101, + "step": 15761 + }, + { + "epoch": 0.7133740665308893, + "grad_norm": 0.5411759893169642, + "learning_rate": 2.004678562033901e-06, + "loss": 0.2733, + "step": 15762 + }, + { + "epoch": 0.7134193256392849, + "grad_norm": 0.6305517774581135, + "learning_rate": 2.004091743675009e-06, + "loss": 0.2811, + "step": 15763 + }, + { + "epoch": 0.7134645847476805, + "grad_norm": 0.622821270648253, + "learning_rate": 2.0035049896889857e-06, + "loss": 0.2973, + "step": 15764 + }, + { + "epoch": 0.7135098438560761, + "grad_norm": 0.6579762622647377, + "learning_rate": 2.0029183000884372e-06, + "loss": 0.3082, + "step": 15765 + }, + { + "epoch": 0.7135551029644716, + "grad_norm": 0.5941769016688686, + "learning_rate": 2.0023316748859683e-06, + "loss": 0.289, + "step": 15766 + }, + { + "epoch": 0.7136003620728671, + "grad_norm": 0.6074341860587897, + "learning_rate": 2.0017451140941848e-06, + "loss": 0.299, + "step": 15767 + }, + { + "epoch": 0.7136456211812627, + "grad_norm": 0.5944365416986104, + "learning_rate": 2.001158617725692e-06, + "loss": 0.2788, + "step": 15768 + }, + { + "epoch": 0.7136908802896583, + "grad_norm": 0.6734313146626568, + "learning_rate": 2.0005721857930902e-06, + "loss": 0.2867, + "step": 15769 + }, + { + "epoch": 0.7137361393980538, + "grad_norm": 0.6120330385710467, + "learning_rate": 1.999985818308979e-06, + "loss": 0.2697, + "step": 15770 + }, + { + "epoch": 0.7137813985064494, + "grad_norm": 0.6424849521515572, + "learning_rate": 1.9993995152859574e-06, + "loss": 0.2846, + "step": 15771 + }, + { + "epoch": 0.713826657614845, + "grad_norm": 0.592428537896021, + "learning_rate": 1.9988132767366274e-06, + "loss": 0.3511, + "step": 15772 + }, + { + "epoch": 0.7138719167232406, + "grad_norm": 0.6274610076103312, + "learning_rate": 1.9982271026735822e-06, + "loss": 0.293, + "step": 15773 + }, + { + "epoch": 0.7139171758316362, + "grad_norm": 0.6506363639159314, + "learning_rate": 1.997640993109416e-06, + "loss": 0.3029, + "step": 15774 + }, + { + "epoch": 0.7139624349400316, + "grad_norm": 0.6224530901843206, + "learning_rate": 1.9970549480567253e-06, + "loss": 0.3086, + "step": 15775 + }, + { + "epoch": 0.7140076940484272, + "grad_norm": 0.5956593042207174, + "learning_rate": 1.9964689675280993e-06, + "loss": 0.2699, + "step": 15776 + }, + { + "epoch": 0.7140529531568228, + "grad_norm": 0.308828138671443, + "learning_rate": 1.9958830515361323e-06, + "loss": 0.4754, + "step": 15777 + }, + { + "epoch": 0.7140982122652184, + "grad_norm": 0.6760202791887676, + "learning_rate": 1.995297200093412e-06, + "loss": 0.2883, + "step": 15778 + }, + { + "epoch": 0.7141434713736139, + "grad_norm": 0.6687713757079061, + "learning_rate": 1.9947114132125243e-06, + "loss": 0.335, + "step": 15779 + }, + { + "epoch": 0.7141887304820095, + "grad_norm": 0.6688648010934541, + "learning_rate": 1.994125690906059e-06, + "loss": 0.2982, + "step": 15780 + }, + { + "epoch": 0.7142339895904051, + "grad_norm": 0.6302850774865625, + "learning_rate": 1.993540033186602e-06, + "loss": 0.263, + "step": 15781 + }, + { + "epoch": 0.7142792486988007, + "grad_norm": 0.28680344474268504, + "learning_rate": 1.9929544400667366e-06, + "loss": 0.4811, + "step": 15782 + }, + { + "epoch": 0.7143245078071963, + "grad_norm": 0.6167589499171111, + "learning_rate": 1.9923689115590428e-06, + "loss": 0.3014, + "step": 15783 + }, + { + "epoch": 0.7143697669155917, + "grad_norm": 0.5816952636059396, + "learning_rate": 1.9917834476761037e-06, + "loss": 0.3096, + "step": 15784 + }, + { + "epoch": 0.7144150260239873, + "grad_norm": 0.6636120045872163, + "learning_rate": 1.9911980484305017e-06, + "loss": 0.3257, + "step": 15785 + }, + { + "epoch": 0.7144602851323829, + "grad_norm": 0.6286370620860274, + "learning_rate": 1.9906127138348123e-06, + "loss": 0.2956, + "step": 15786 + }, + { + "epoch": 0.7145055442407785, + "grad_norm": 0.6780622703223456, + "learning_rate": 1.9900274439016116e-06, + "loss": 0.2681, + "step": 15787 + }, + { + "epoch": 0.714550803349174, + "grad_norm": 0.7062114812695381, + "learning_rate": 1.989442238643478e-06, + "loss": 0.3265, + "step": 15788 + }, + { + "epoch": 0.7145960624575696, + "grad_norm": 0.5841561044833753, + "learning_rate": 1.9888570980729847e-06, + "loss": 0.2714, + "step": 15789 + }, + { + "epoch": 0.7146413215659652, + "grad_norm": 0.7220036200359434, + "learning_rate": 1.9882720222027026e-06, + "loss": 0.3009, + "step": 15790 + }, + { + "epoch": 0.7146865806743607, + "grad_norm": 0.7006671521470127, + "learning_rate": 1.9876870110452066e-06, + "loss": 0.2785, + "step": 15791 + }, + { + "epoch": 0.7147318397827562, + "grad_norm": 0.5872154904076039, + "learning_rate": 1.9871020646130633e-06, + "loss": 0.2899, + "step": 15792 + }, + { + "epoch": 0.7147770988911518, + "grad_norm": 0.596507452041044, + "learning_rate": 1.9865171829188455e-06, + "loss": 0.2957, + "step": 15793 + }, + { + "epoch": 0.7148223579995474, + "grad_norm": 0.6760547872145583, + "learning_rate": 1.9859323659751178e-06, + "loss": 0.3272, + "step": 15794 + }, + { + "epoch": 0.714867617107943, + "grad_norm": 0.6936074449471855, + "learning_rate": 1.985347613794445e-06, + "loss": 0.3072, + "step": 15795 + }, + { + "epoch": 0.7149128762163386, + "grad_norm": 0.6141029292821792, + "learning_rate": 1.984762926389393e-06, + "loss": 0.3119, + "step": 15796 + }, + { + "epoch": 0.7149581353247341, + "grad_norm": 0.28622782989550266, + "learning_rate": 1.9841783037725264e-06, + "loss": 0.4784, + "step": 15797 + }, + { + "epoch": 0.7150033944331297, + "grad_norm": 0.603727115110764, + "learning_rate": 1.9835937459564065e-06, + "loss": 0.333, + "step": 15798 + }, + { + "epoch": 0.7150486535415252, + "grad_norm": 0.2584585191662679, + "learning_rate": 1.983009252953591e-06, + "loss": 0.4622, + "step": 15799 + }, + { + "epoch": 0.7150939126499208, + "grad_norm": 0.6295600557995713, + "learning_rate": 1.9824248247766404e-06, + "loss": 0.2999, + "step": 15800 + }, + { + "epoch": 0.7151391717583163, + "grad_norm": 0.6054018087673412, + "learning_rate": 1.981840461438114e-06, + "loss": 0.2997, + "step": 15801 + }, + { + "epoch": 0.7151844308667119, + "grad_norm": 0.6474448280391323, + "learning_rate": 1.9812561629505666e-06, + "loss": 0.336, + "step": 15802 + }, + { + "epoch": 0.7152296899751075, + "grad_norm": 0.6423659371952307, + "learning_rate": 1.980671929326551e-06, + "loss": 0.3294, + "step": 15803 + }, + { + "epoch": 0.7152749490835031, + "grad_norm": 0.29430593806193195, + "learning_rate": 1.980087760578625e-06, + "loss": 0.4523, + "step": 15804 + }, + { + "epoch": 0.7153202081918986, + "grad_norm": 0.5765988062290655, + "learning_rate": 1.979503656719336e-06, + "loss": 0.2993, + "step": 15805 + }, + { + "epoch": 0.7153654673002942, + "grad_norm": 0.5732845095529945, + "learning_rate": 1.9789196177612384e-06, + "loss": 0.2718, + "step": 15806 + }, + { + "epoch": 0.7154107264086897, + "grad_norm": 0.6072815854683996, + "learning_rate": 1.97833564371688e-06, + "loss": 0.3432, + "step": 15807 + }, + { + "epoch": 0.7154559855170853, + "grad_norm": 0.599200295872143, + "learning_rate": 1.9777517345988057e-06, + "loss": 0.3245, + "step": 15808 + }, + { + "epoch": 0.7155012446254809, + "grad_norm": 0.6179029784376359, + "learning_rate": 1.977167890419565e-06, + "loss": 0.2977, + "step": 15809 + }, + { + "epoch": 0.7155465037338764, + "grad_norm": 0.619875528901657, + "learning_rate": 1.976584111191704e-06, + "loss": 0.2772, + "step": 15810 + }, + { + "epoch": 0.715591762842272, + "grad_norm": 0.5717346985299682, + "learning_rate": 1.976000396927765e-06, + "loss": 0.2653, + "step": 15811 + }, + { + "epoch": 0.7156370219506676, + "grad_norm": 0.6167885903070414, + "learning_rate": 1.975416747640288e-06, + "loss": 0.3106, + "step": 15812 + }, + { + "epoch": 0.7156822810590632, + "grad_norm": 0.6860304566379826, + "learning_rate": 1.974833163341816e-06, + "loss": 0.3206, + "step": 15813 + }, + { + "epoch": 0.7157275401674587, + "grad_norm": 0.558697239360316, + "learning_rate": 1.9742496440448895e-06, + "loss": 0.2664, + "step": 15814 + }, + { + "epoch": 0.7157727992758542, + "grad_norm": 0.6422467254804713, + "learning_rate": 1.973666189762046e-06, + "loss": 0.3243, + "step": 15815 + }, + { + "epoch": 0.7158180583842498, + "grad_norm": 0.6272481981865834, + "learning_rate": 1.973082800505819e-06, + "loss": 0.2976, + "step": 15816 + }, + { + "epoch": 0.7158633174926454, + "grad_norm": 0.6170692813442996, + "learning_rate": 1.9724994762887484e-06, + "loss": 0.2906, + "step": 15817 + }, + { + "epoch": 0.715908576601041, + "grad_norm": 0.3043379788722677, + "learning_rate": 1.9719162171233636e-06, + "loss": 0.4571, + "step": 15818 + }, + { + "epoch": 0.7159538357094365, + "grad_norm": 0.7171550794067272, + "learning_rate": 1.9713330230222013e-06, + "loss": 0.2886, + "step": 15819 + }, + { + "epoch": 0.7159990948178321, + "grad_norm": 1.193219478002798, + "learning_rate": 1.9707498939977905e-06, + "loss": 0.3184, + "step": 15820 + }, + { + "epoch": 0.7160443539262277, + "grad_norm": 0.5752556708479493, + "learning_rate": 1.970166830062659e-06, + "loss": 0.2735, + "step": 15821 + }, + { + "epoch": 0.7160896130346233, + "grad_norm": 0.6125335200732254, + "learning_rate": 1.969583831229338e-06, + "loss": 0.3245, + "step": 15822 + }, + { + "epoch": 0.7161348721430187, + "grad_norm": 0.5971442106455194, + "learning_rate": 1.969000897510354e-06, + "loss": 0.3523, + "step": 15823 + }, + { + "epoch": 0.7161801312514143, + "grad_norm": 0.26319467873710606, + "learning_rate": 1.9684180289182297e-06, + "loss": 0.4491, + "step": 15824 + }, + { + "epoch": 0.7162253903598099, + "grad_norm": 0.6962210165352554, + "learning_rate": 1.9678352254654914e-06, + "loss": 0.2929, + "step": 15825 + }, + { + "epoch": 0.7162706494682055, + "grad_norm": 0.597435716964487, + "learning_rate": 1.967252487164663e-06, + "loss": 0.3234, + "step": 15826 + }, + { + "epoch": 0.716315908576601, + "grad_norm": 0.5774198683188823, + "learning_rate": 1.9666698140282648e-06, + "loss": 0.3108, + "step": 15827 + }, + { + "epoch": 0.7163611676849966, + "grad_norm": 0.6242275328564052, + "learning_rate": 1.966087206068814e-06, + "loss": 0.3164, + "step": 15828 + }, + { + "epoch": 0.7164064267933922, + "grad_norm": 0.7541080933452219, + "learning_rate": 1.9655046632988313e-06, + "loss": 0.2784, + "step": 15829 + }, + { + "epoch": 0.7164516859017878, + "grad_norm": 0.6206540518782321, + "learning_rate": 1.964922185730835e-06, + "loss": 0.2849, + "step": 15830 + }, + { + "epoch": 0.7164969450101834, + "grad_norm": 0.6295019724223111, + "learning_rate": 1.96433977337734e-06, + "loss": 0.2744, + "step": 15831 + }, + { + "epoch": 0.7165422041185788, + "grad_norm": 0.5879735551319749, + "learning_rate": 1.963757426250858e-06, + "loss": 0.2941, + "step": 15832 + }, + { + "epoch": 0.7165874632269744, + "grad_norm": 0.29055395224746566, + "learning_rate": 1.9631751443639054e-06, + "loss": 0.4766, + "step": 15833 + }, + { + "epoch": 0.71663272233537, + "grad_norm": 0.7155505457515415, + "learning_rate": 1.96259292772899e-06, + "loss": 0.3128, + "step": 15834 + }, + { + "epoch": 0.7166779814437656, + "grad_norm": 0.7041649501282407, + "learning_rate": 1.9620107763586267e-06, + "loss": 0.3233, + "step": 15835 + }, + { + "epoch": 0.7167232405521611, + "grad_norm": 0.2991827612162243, + "learning_rate": 1.96142869026532e-06, + "loss": 0.4652, + "step": 15836 + }, + { + "epoch": 0.7167684996605567, + "grad_norm": 0.5923121808846988, + "learning_rate": 1.960846669461578e-06, + "loss": 0.2683, + "step": 15837 + }, + { + "epoch": 0.7168137587689523, + "grad_norm": 0.630301288602417, + "learning_rate": 1.9602647139599063e-06, + "loss": 0.2829, + "step": 15838 + }, + { + "epoch": 0.7168590178773478, + "grad_norm": 0.6268779453538741, + "learning_rate": 1.959682823772812e-06, + "loss": 0.3598, + "step": 15839 + }, + { + "epoch": 0.7169042769857433, + "grad_norm": 0.7842283739627655, + "learning_rate": 1.9591009989127958e-06, + "loss": 0.3044, + "step": 15840 + }, + { + "epoch": 0.7169495360941389, + "grad_norm": 0.27938510457142823, + "learning_rate": 1.9585192393923583e-06, + "loss": 0.481, + "step": 15841 + }, + { + "epoch": 0.7169947952025345, + "grad_norm": 0.6064302788309044, + "learning_rate": 1.9579375452240013e-06, + "loss": 0.2783, + "step": 15842 + }, + { + "epoch": 0.7170400543109301, + "grad_norm": 0.6281747951378225, + "learning_rate": 1.9573559164202248e-06, + "loss": 0.3212, + "step": 15843 + }, + { + "epoch": 0.7170853134193257, + "grad_norm": 0.6213000653987445, + "learning_rate": 1.956774352993526e-06, + "loss": 0.2711, + "step": 15844 + }, + { + "epoch": 0.7171305725277212, + "grad_norm": 0.6296797112903717, + "learning_rate": 1.956192854956397e-06, + "loss": 0.3124, + "step": 15845 + }, + { + "epoch": 0.7171758316361168, + "grad_norm": 0.5755777054173816, + "learning_rate": 1.955611422321337e-06, + "loss": 0.2927, + "step": 15846 + }, + { + "epoch": 0.7172210907445123, + "grad_norm": 0.6481445542956137, + "learning_rate": 1.9550300551008357e-06, + "loss": 0.3052, + "step": 15847 + }, + { + "epoch": 0.7172663498529079, + "grad_norm": 0.2622176815018156, + "learning_rate": 1.9544487533073887e-06, + "loss": 0.4604, + "step": 15848 + }, + { + "epoch": 0.7173116089613034, + "grad_norm": 0.6096963530780615, + "learning_rate": 1.9538675169534838e-06, + "loss": 0.3055, + "step": 15849 + }, + { + "epoch": 0.717356868069699, + "grad_norm": 0.5801968716904514, + "learning_rate": 1.9532863460516095e-06, + "loss": 0.3121, + "step": 15850 + }, + { + "epoch": 0.7174021271780946, + "grad_norm": 0.6445991323541399, + "learning_rate": 1.9527052406142534e-06, + "loss": 0.3132, + "step": 15851 + }, + { + "epoch": 0.7174473862864902, + "grad_norm": 0.6252343576574004, + "learning_rate": 1.9521242006539065e-06, + "loss": 0.3084, + "step": 15852 + }, + { + "epoch": 0.7174926453948858, + "grad_norm": 0.7262094242239051, + "learning_rate": 1.9515432261830465e-06, + "loss": 0.2998, + "step": 15853 + }, + { + "epoch": 0.7175379045032813, + "grad_norm": 0.6183870478720913, + "learning_rate": 1.9509623172141596e-06, + "loss": 0.2937, + "step": 15854 + }, + { + "epoch": 0.7175831636116768, + "grad_norm": 0.6979481403695195, + "learning_rate": 1.9503814737597297e-06, + "loss": 0.2938, + "step": 15855 + }, + { + "epoch": 0.7176284227200724, + "grad_norm": 0.7083826073473445, + "learning_rate": 1.949800695832236e-06, + "loss": 0.2918, + "step": 15856 + }, + { + "epoch": 0.717673681828468, + "grad_norm": 0.6447679929100643, + "learning_rate": 1.949219983444156e-06, + "loss": 0.3071, + "step": 15857 + }, + { + "epoch": 0.7177189409368635, + "grad_norm": 0.6497871890283972, + "learning_rate": 1.9486393366079687e-06, + "loss": 0.313, + "step": 15858 + }, + { + "epoch": 0.7177642000452591, + "grad_norm": 0.682234150983536, + "learning_rate": 1.948058755336152e-06, + "loss": 0.2814, + "step": 15859 + }, + { + "epoch": 0.7178094591536547, + "grad_norm": 0.6759019114753635, + "learning_rate": 1.947478239641179e-06, + "loss": 0.3152, + "step": 15860 + }, + { + "epoch": 0.7178547182620503, + "grad_norm": 0.6525915599032759, + "learning_rate": 1.9468977895355225e-06, + "loss": 0.3431, + "step": 15861 + }, + { + "epoch": 0.7178999773704458, + "grad_norm": 0.660095016570737, + "learning_rate": 1.946317405031657e-06, + "loss": 0.2991, + "step": 15862 + }, + { + "epoch": 0.7179452364788413, + "grad_norm": 0.6324364409123052, + "learning_rate": 1.94573708614205e-06, + "loss": 0.2844, + "step": 15863 + }, + { + "epoch": 0.7179904955872369, + "grad_norm": 0.6335492193236768, + "learning_rate": 1.945156832879174e-06, + "loss": 0.2878, + "step": 15864 + }, + { + "epoch": 0.7180357546956325, + "grad_norm": 0.6585628540801011, + "learning_rate": 1.944576645255496e-06, + "loss": 0.3329, + "step": 15865 + }, + { + "epoch": 0.7180810138040281, + "grad_norm": 0.3185024687658992, + "learning_rate": 1.94399652328348e-06, + "loss": 0.4802, + "step": 15866 + }, + { + "epoch": 0.7181262729124236, + "grad_norm": 0.5985997255672704, + "learning_rate": 1.9434164669755928e-06, + "loss": 0.315, + "step": 15867 + }, + { + "epoch": 0.7181715320208192, + "grad_norm": 0.6134883826608056, + "learning_rate": 1.9428364763443e-06, + "loss": 0.3393, + "step": 15868 + }, + { + "epoch": 0.7182167911292148, + "grad_norm": 0.6659292946147676, + "learning_rate": 1.942256551402062e-06, + "loss": 0.3076, + "step": 15869 + }, + { + "epoch": 0.7182620502376104, + "grad_norm": 0.7063404873269847, + "learning_rate": 1.9416766921613375e-06, + "loss": 0.3074, + "step": 15870 + }, + { + "epoch": 0.7183073093460058, + "grad_norm": 0.8006880516701447, + "learning_rate": 1.941096898634588e-06, + "loss": 0.3203, + "step": 15871 + }, + { + "epoch": 0.7183525684544014, + "grad_norm": 0.6808668128914034, + "learning_rate": 1.9405171708342734e-06, + "loss": 0.3336, + "step": 15872 + }, + { + "epoch": 0.718397827562797, + "grad_norm": 0.6046078305349775, + "learning_rate": 1.9399375087728485e-06, + "loss": 0.2706, + "step": 15873 + }, + { + "epoch": 0.7184430866711926, + "grad_norm": 0.28548787155681826, + "learning_rate": 1.939357912462766e-06, + "loss": 0.4806, + "step": 15874 + }, + { + "epoch": 0.7184883457795881, + "grad_norm": 0.5994086971826666, + "learning_rate": 1.938778381916484e-06, + "loss": 0.2869, + "step": 15875 + }, + { + "epoch": 0.7185336048879837, + "grad_norm": 0.6159373405288157, + "learning_rate": 1.938198917146451e-06, + "loss": 0.3169, + "step": 15876 + }, + { + "epoch": 0.7185788639963793, + "grad_norm": 0.6221266268026915, + "learning_rate": 1.937619518165121e-06, + "loss": 0.3013, + "step": 15877 + }, + { + "epoch": 0.7186241231047749, + "grad_norm": 0.6939394072074498, + "learning_rate": 1.937040184984943e-06, + "loss": 0.2803, + "step": 15878 + }, + { + "epoch": 0.7186693822131704, + "grad_norm": 0.6745653728912595, + "learning_rate": 1.936460917618362e-06, + "loss": 0.2946, + "step": 15879 + }, + { + "epoch": 0.7187146413215659, + "grad_norm": 0.6351055310352166, + "learning_rate": 1.9358817160778272e-06, + "loss": 0.3579, + "step": 15880 + }, + { + "epoch": 0.7187599004299615, + "grad_norm": 0.2773305345910911, + "learning_rate": 1.935302580375785e-06, + "loss": 0.4696, + "step": 15881 + }, + { + "epoch": 0.7188051595383571, + "grad_norm": 0.28853986338486576, + "learning_rate": 1.9347235105246783e-06, + "loss": 0.4589, + "step": 15882 + }, + { + "epoch": 0.7188504186467527, + "grad_norm": 0.6801749109228717, + "learning_rate": 1.934144506536946e-06, + "loss": 0.3329, + "step": 15883 + }, + { + "epoch": 0.7188956777551482, + "grad_norm": 0.6160835970036759, + "learning_rate": 1.9335655684250335e-06, + "loss": 0.3068, + "step": 15884 + }, + { + "epoch": 0.7189409368635438, + "grad_norm": 0.6493036262924194, + "learning_rate": 1.9329866962013825e-06, + "loss": 0.3009, + "step": 15885 + }, + { + "epoch": 0.7189861959719394, + "grad_norm": 0.6042260802325438, + "learning_rate": 1.9324078898784245e-06, + "loss": 0.2646, + "step": 15886 + }, + { + "epoch": 0.719031455080335, + "grad_norm": 0.6283814205090501, + "learning_rate": 1.9318291494685986e-06, + "loss": 0.317, + "step": 15887 + }, + { + "epoch": 0.7190767141887305, + "grad_norm": 0.27051702087871426, + "learning_rate": 1.9312504749843435e-06, + "loss": 0.4805, + "step": 15888 + }, + { + "epoch": 0.719121973297126, + "grad_norm": 0.6903824750745414, + "learning_rate": 1.9306718664380907e-06, + "loss": 0.3057, + "step": 15889 + }, + { + "epoch": 0.7191672324055216, + "grad_norm": 0.5720509988148191, + "learning_rate": 1.930093323842271e-06, + "loss": 0.2872, + "step": 15890 + }, + { + "epoch": 0.7192124915139172, + "grad_norm": 0.6374653633908585, + "learning_rate": 1.929514847209319e-06, + "loss": 0.3056, + "step": 15891 + }, + { + "epoch": 0.7192577506223128, + "grad_norm": 0.641860689442778, + "learning_rate": 1.928936436551661e-06, + "loss": 0.309, + "step": 15892 + }, + { + "epoch": 0.7193030097307083, + "grad_norm": 0.33271062511421506, + "learning_rate": 1.9283580918817284e-06, + "loss": 0.4849, + "step": 15893 + }, + { + "epoch": 0.7193482688391039, + "grad_norm": 0.5551185896909835, + "learning_rate": 1.927779813211947e-06, + "loss": 0.3101, + "step": 15894 + }, + { + "epoch": 0.7193935279474994, + "grad_norm": 0.6580061730478527, + "learning_rate": 1.92720160055474e-06, + "loss": 0.2824, + "step": 15895 + }, + { + "epoch": 0.719438787055895, + "grad_norm": 0.603955540037126, + "learning_rate": 1.926623453922533e-06, + "loss": 0.3185, + "step": 15896 + }, + { + "epoch": 0.7194840461642905, + "grad_norm": 0.5711212663065297, + "learning_rate": 1.9260453733277505e-06, + "loss": 0.2553, + "step": 15897 + }, + { + "epoch": 0.7195293052726861, + "grad_norm": 0.6889052549277278, + "learning_rate": 1.925467358782812e-06, + "loss": 0.3484, + "step": 15898 + }, + { + "epoch": 0.7195745643810817, + "grad_norm": 0.2783497457159708, + "learning_rate": 1.924889410300136e-06, + "loss": 0.4684, + "step": 15899 + }, + { + "epoch": 0.7196198234894773, + "grad_norm": 0.25821795988046475, + "learning_rate": 1.9243115278921416e-06, + "loss": 0.4618, + "step": 15900 + }, + { + "epoch": 0.7196650825978729, + "grad_norm": 0.611423173817838, + "learning_rate": 1.923733711571248e-06, + "loss": 0.297, + "step": 15901 + }, + { + "epoch": 0.7197103417062684, + "grad_norm": 0.5840177696674929, + "learning_rate": 1.923155961349869e-06, + "loss": 0.2693, + "step": 15902 + }, + { + "epoch": 0.7197556008146639, + "grad_norm": 0.265000607498201, + "learning_rate": 1.9225782772404166e-06, + "loss": 0.4751, + "step": 15903 + }, + { + "epoch": 0.7198008599230595, + "grad_norm": 0.64824568095004, + "learning_rate": 1.9220006592553075e-06, + "loss": 0.3015, + "step": 15904 + }, + { + "epoch": 0.7198461190314551, + "grad_norm": 0.31206766498139427, + "learning_rate": 1.921423107406949e-06, + "loss": 0.4871, + "step": 15905 + }, + { + "epoch": 0.7198913781398506, + "grad_norm": 0.6588422898892048, + "learning_rate": 1.920845621707755e-06, + "loss": 0.271, + "step": 15906 + }, + { + "epoch": 0.7199366372482462, + "grad_norm": 0.5959150073512096, + "learning_rate": 1.920268202170131e-06, + "loss": 0.3178, + "step": 15907 + }, + { + "epoch": 0.7199818963566418, + "grad_norm": 0.5610244219911166, + "learning_rate": 1.9196908488064832e-06, + "loss": 0.2759, + "step": 15908 + }, + { + "epoch": 0.7200271554650374, + "grad_norm": 0.6057227446694428, + "learning_rate": 1.9191135616292184e-06, + "loss": 0.2924, + "step": 15909 + }, + { + "epoch": 0.7200724145734329, + "grad_norm": 0.25465578536998, + "learning_rate": 1.918536340650743e-06, + "loss": 0.4753, + "step": 15910 + }, + { + "epoch": 0.7201176736818284, + "grad_norm": 0.6162224428012438, + "learning_rate": 1.9179591858834572e-06, + "loss": 0.2771, + "step": 15911 + }, + { + "epoch": 0.720162932790224, + "grad_norm": 0.6236076685705199, + "learning_rate": 1.9173820973397617e-06, + "loss": 0.2924, + "step": 15912 + }, + { + "epoch": 0.7202081918986196, + "grad_norm": 0.6610076407431865, + "learning_rate": 1.916805075032057e-06, + "loss": 0.2964, + "step": 15913 + }, + { + "epoch": 0.7202534510070152, + "grad_norm": 0.8042526061411998, + "learning_rate": 1.9162281189727455e-06, + "loss": 0.2768, + "step": 15914 + }, + { + "epoch": 0.7202987101154107, + "grad_norm": 0.6371717003262385, + "learning_rate": 1.915651229174217e-06, + "loss": 0.316, + "step": 15915 + }, + { + "epoch": 0.7203439692238063, + "grad_norm": 0.630859574891062, + "learning_rate": 1.9150744056488708e-06, + "loss": 0.3211, + "step": 15916 + }, + { + "epoch": 0.7203892283322019, + "grad_norm": 0.692485725729248, + "learning_rate": 1.9144976484091025e-06, + "loss": 0.3136, + "step": 15917 + }, + { + "epoch": 0.7204344874405975, + "grad_norm": 0.5700622958020366, + "learning_rate": 1.913920957467304e-06, + "loss": 0.2925, + "step": 15918 + }, + { + "epoch": 0.7204797465489929, + "grad_norm": 0.5472061376592867, + "learning_rate": 1.913344332835864e-06, + "loss": 0.2976, + "step": 15919 + }, + { + "epoch": 0.7205250056573885, + "grad_norm": 0.7371868821920786, + "learning_rate": 1.9127677745271754e-06, + "loss": 0.275, + "step": 15920 + }, + { + "epoch": 0.7205702647657841, + "grad_norm": 0.9859973646941241, + "learning_rate": 1.912191282553624e-06, + "loss": 0.2919, + "step": 15921 + }, + { + "epoch": 0.7206155238741797, + "grad_norm": 0.3129194570815802, + "learning_rate": 1.911614856927601e-06, + "loss": 0.4891, + "step": 15922 + }, + { + "epoch": 0.7206607829825753, + "grad_norm": 0.7252536527570467, + "learning_rate": 1.911038497661487e-06, + "loss": 0.3079, + "step": 15923 + }, + { + "epoch": 0.7207060420909708, + "grad_norm": 0.6356784735211609, + "learning_rate": 1.910462204767671e-06, + "loss": 0.3388, + "step": 15924 + }, + { + "epoch": 0.7207513011993664, + "grad_norm": 0.8678659339354213, + "learning_rate": 1.9098859782585313e-06, + "loss": 0.3187, + "step": 15925 + }, + { + "epoch": 0.720796560307762, + "grad_norm": 0.615283773322787, + "learning_rate": 1.909309818146453e-06, + "loss": 0.2709, + "step": 15926 + }, + { + "epoch": 0.7208418194161575, + "grad_norm": 0.2776919225343176, + "learning_rate": 1.9087337244438147e-06, + "loss": 0.4684, + "step": 15927 + }, + { + "epoch": 0.720887078524553, + "grad_norm": 0.6257009861394178, + "learning_rate": 1.908157697162993e-06, + "loss": 0.3262, + "step": 15928 + }, + { + "epoch": 0.7209323376329486, + "grad_norm": 0.2599198789267447, + "learning_rate": 1.9075817363163655e-06, + "loss": 0.4589, + "step": 15929 + }, + { + "epoch": 0.7209775967413442, + "grad_norm": 0.6840195491615668, + "learning_rate": 1.9070058419163118e-06, + "loss": 0.2479, + "step": 15930 + }, + { + "epoch": 0.7210228558497398, + "grad_norm": 0.6094399970096488, + "learning_rate": 1.9064300139752024e-06, + "loss": 0.3021, + "step": 15931 + }, + { + "epoch": 0.7210681149581353, + "grad_norm": 0.25754235454591273, + "learning_rate": 1.9058542525054096e-06, + "loss": 0.466, + "step": 15932 + }, + { + "epoch": 0.7211133740665309, + "grad_norm": 0.6092321613812091, + "learning_rate": 1.9052785575193072e-06, + "loss": 0.3106, + "step": 15933 + }, + { + "epoch": 0.7211586331749265, + "grad_norm": 0.6228942019247349, + "learning_rate": 1.9047029290292623e-06, + "loss": 0.2786, + "step": 15934 + }, + { + "epoch": 0.721203892283322, + "grad_norm": 0.6296667281726287, + "learning_rate": 1.9041273670476468e-06, + "loss": 0.3071, + "step": 15935 + }, + { + "epoch": 0.7212491513917176, + "grad_norm": 0.6188099089168433, + "learning_rate": 1.9035518715868262e-06, + "loss": 0.2921, + "step": 15936 + }, + { + "epoch": 0.7212944105001131, + "grad_norm": 0.7346168096084281, + "learning_rate": 1.9029764426591641e-06, + "loss": 0.3299, + "step": 15937 + }, + { + "epoch": 0.7213396696085087, + "grad_norm": 0.6566646675703187, + "learning_rate": 1.902401080277026e-06, + "loss": 0.2967, + "step": 15938 + }, + { + "epoch": 0.7213849287169043, + "grad_norm": 0.6763430559004452, + "learning_rate": 1.901825784452777e-06, + "loss": 0.3422, + "step": 15939 + }, + { + "epoch": 0.7214301878252999, + "grad_norm": 0.5829262688664122, + "learning_rate": 1.9012505551987764e-06, + "loss": 0.3176, + "step": 15940 + }, + { + "epoch": 0.7214754469336954, + "grad_norm": 0.6464037491427786, + "learning_rate": 1.900675392527383e-06, + "loss": 0.355, + "step": 15941 + }, + { + "epoch": 0.721520706042091, + "grad_norm": 0.6426142657790407, + "learning_rate": 1.9001002964509564e-06, + "loss": 0.3419, + "step": 15942 + }, + { + "epoch": 0.7215659651504865, + "grad_norm": 0.3018870085983446, + "learning_rate": 1.8995252669818577e-06, + "loss": 0.4996, + "step": 15943 + }, + { + "epoch": 0.7216112242588821, + "grad_norm": 0.6567595177042254, + "learning_rate": 1.8989503041324341e-06, + "loss": 0.2671, + "step": 15944 + }, + { + "epoch": 0.7216564833672776, + "grad_norm": 1.1565325384593736, + "learning_rate": 1.8983754079150452e-06, + "loss": 0.3004, + "step": 15945 + }, + { + "epoch": 0.7217017424756732, + "grad_norm": 0.6058733951545143, + "learning_rate": 1.8978005783420444e-06, + "loss": 0.2855, + "step": 15946 + }, + { + "epoch": 0.7217470015840688, + "grad_norm": 0.6461791664712543, + "learning_rate": 1.8972258154257816e-06, + "loss": 0.3139, + "step": 15947 + }, + { + "epoch": 0.7217922606924644, + "grad_norm": 0.6570032567439259, + "learning_rate": 1.8966511191786047e-06, + "loss": 0.2739, + "step": 15948 + }, + { + "epoch": 0.72183751980086, + "grad_norm": 0.5894999645744051, + "learning_rate": 1.896076489612866e-06, + "loss": 0.2943, + "step": 15949 + }, + { + "epoch": 0.7218827789092555, + "grad_norm": 0.5993238627006856, + "learning_rate": 1.895501926740908e-06, + "loss": 0.2833, + "step": 15950 + }, + { + "epoch": 0.721928038017651, + "grad_norm": 0.6305668222337719, + "learning_rate": 1.8949274305750814e-06, + "loss": 0.2601, + "step": 15951 + }, + { + "epoch": 0.7219732971260466, + "grad_norm": 0.617752552570206, + "learning_rate": 1.8943530011277261e-06, + "loss": 0.2872, + "step": 15952 + }, + { + "epoch": 0.7220185562344422, + "grad_norm": 0.626608894837736, + "learning_rate": 1.893778638411188e-06, + "loss": 0.3434, + "step": 15953 + }, + { + "epoch": 0.7220638153428377, + "grad_norm": 0.733790000471714, + "learning_rate": 1.8932043424378049e-06, + "loss": 0.3371, + "step": 15954 + }, + { + "epoch": 0.7221090744512333, + "grad_norm": 0.294562872158215, + "learning_rate": 1.892630113219921e-06, + "loss": 0.4619, + "step": 15955 + }, + { + "epoch": 0.7221543335596289, + "grad_norm": 0.6323404370467263, + "learning_rate": 1.8920559507698722e-06, + "loss": 0.3145, + "step": 15956 + }, + { + "epoch": 0.7221995926680245, + "grad_norm": 0.6202866373164587, + "learning_rate": 1.891481855099994e-06, + "loss": 0.2851, + "step": 15957 + }, + { + "epoch": 0.72224485177642, + "grad_norm": 0.6657534635544718, + "learning_rate": 1.8909078262226237e-06, + "loss": 0.3197, + "step": 15958 + }, + { + "epoch": 0.7222901108848155, + "grad_norm": 0.5681651570767629, + "learning_rate": 1.8903338641500967e-06, + "loss": 0.3175, + "step": 15959 + }, + { + "epoch": 0.7223353699932111, + "grad_norm": 0.2866490398022034, + "learning_rate": 1.889759968894745e-06, + "loss": 0.4748, + "step": 15960 + }, + { + "epoch": 0.7223806291016067, + "grad_norm": 0.6383201904904624, + "learning_rate": 1.889186140468897e-06, + "loss": 0.3252, + "step": 15961 + }, + { + "epoch": 0.7224258882100023, + "grad_norm": 0.5736735821452259, + "learning_rate": 1.8886123788848864e-06, + "loss": 0.2795, + "step": 15962 + }, + { + "epoch": 0.7224711473183978, + "grad_norm": 0.6218336748590672, + "learning_rate": 1.8880386841550385e-06, + "loss": 0.2914, + "step": 15963 + }, + { + "epoch": 0.7225164064267934, + "grad_norm": 0.6702019724144967, + "learning_rate": 1.887465056291683e-06, + "loss": 0.327, + "step": 15964 + }, + { + "epoch": 0.722561665535189, + "grad_norm": 1.4128354899466162, + "learning_rate": 1.8868914953071444e-06, + "loss": 0.3026, + "step": 15965 + }, + { + "epoch": 0.7226069246435846, + "grad_norm": 0.619592327990379, + "learning_rate": 1.886318001213744e-06, + "loss": 0.2937, + "step": 15966 + }, + { + "epoch": 0.72265218375198, + "grad_norm": 0.6439866145775998, + "learning_rate": 1.8857445740238073e-06, + "loss": 0.3358, + "step": 15967 + }, + { + "epoch": 0.7226974428603756, + "grad_norm": 0.6418633396022916, + "learning_rate": 1.8851712137496564e-06, + "loss": 0.3288, + "step": 15968 + }, + { + "epoch": 0.7227427019687712, + "grad_norm": 0.5714028641956019, + "learning_rate": 1.8845979204036101e-06, + "loss": 0.2891, + "step": 15969 + }, + { + "epoch": 0.7227879610771668, + "grad_norm": 0.5926099561173371, + "learning_rate": 1.8840246939979846e-06, + "loss": 0.3185, + "step": 15970 + }, + { + "epoch": 0.7228332201855624, + "grad_norm": 0.5924911218056633, + "learning_rate": 1.8834515345450977e-06, + "loss": 0.2876, + "step": 15971 + }, + { + "epoch": 0.7228784792939579, + "grad_norm": 0.6234909875094362, + "learning_rate": 1.88287844205727e-06, + "loss": 0.3069, + "step": 15972 + }, + { + "epoch": 0.7229237384023535, + "grad_norm": 0.2745665548547292, + "learning_rate": 1.882305416546807e-06, + "loss": 0.4633, + "step": 15973 + }, + { + "epoch": 0.7229689975107491, + "grad_norm": 0.6551699035958258, + "learning_rate": 1.8817324580260254e-06, + "loss": 0.3584, + "step": 15974 + }, + { + "epoch": 0.7230142566191446, + "grad_norm": 0.27671999389208507, + "learning_rate": 1.881159566507238e-06, + "loss": 0.4772, + "step": 15975 + }, + { + "epoch": 0.7230595157275401, + "grad_norm": 0.9831127283266593, + "learning_rate": 1.8805867420027529e-06, + "loss": 0.2879, + "step": 15976 + }, + { + "epoch": 0.7231047748359357, + "grad_norm": 0.6164403804911328, + "learning_rate": 1.880013984524876e-06, + "loss": 0.3036, + "step": 15977 + }, + { + "epoch": 0.7231500339443313, + "grad_norm": 0.2706023179382082, + "learning_rate": 1.8794412940859186e-06, + "loss": 0.4742, + "step": 15978 + }, + { + "epoch": 0.7231952930527269, + "grad_norm": 0.813015049690279, + "learning_rate": 1.8788686706981813e-06, + "loss": 0.294, + "step": 15979 + }, + { + "epoch": 0.7232405521611224, + "grad_norm": 0.617435460194587, + "learning_rate": 1.8782961143739724e-06, + "loss": 0.3002, + "step": 15980 + }, + { + "epoch": 0.723285811269518, + "grad_norm": 0.6091669825529811, + "learning_rate": 1.877723625125591e-06, + "loss": 0.3329, + "step": 15981 + }, + { + "epoch": 0.7233310703779136, + "grad_norm": 0.5936151512194228, + "learning_rate": 1.877151202965341e-06, + "loss": 0.2673, + "step": 15982 + }, + { + "epoch": 0.7233763294863091, + "grad_norm": 0.26325091324160144, + "learning_rate": 1.876578847905519e-06, + "loss": 0.5012, + "step": 15983 + }, + { + "epoch": 0.7234215885947047, + "grad_norm": 0.6278205249319658, + "learning_rate": 1.8760065599584266e-06, + "loss": 0.3388, + "step": 15984 + }, + { + "epoch": 0.7234668477031002, + "grad_norm": 0.6902459735771315, + "learning_rate": 1.8754343391363584e-06, + "loss": 0.3194, + "step": 15985 + }, + { + "epoch": 0.7235121068114958, + "grad_norm": 0.5960812307850535, + "learning_rate": 1.874862185451608e-06, + "loss": 0.3023, + "step": 15986 + }, + { + "epoch": 0.7235573659198914, + "grad_norm": 0.6296054589179261, + "learning_rate": 1.8742900989164713e-06, + "loss": 0.2792, + "step": 15987 + }, + { + "epoch": 0.723602625028287, + "grad_norm": 0.9689884059413011, + "learning_rate": 1.8737180795432425e-06, + "loss": 0.3203, + "step": 15988 + }, + { + "epoch": 0.7236478841366825, + "grad_norm": 0.6232037537471353, + "learning_rate": 1.8731461273442097e-06, + "loss": 0.3106, + "step": 15989 + }, + { + "epoch": 0.723693143245078, + "grad_norm": 0.6087724636557703, + "learning_rate": 1.8725742423316623e-06, + "loss": 0.2677, + "step": 15990 + }, + { + "epoch": 0.7237384023534736, + "grad_norm": 0.6207701067127781, + "learning_rate": 1.872002424517891e-06, + "loss": 0.3131, + "step": 15991 + }, + { + "epoch": 0.7237836614618692, + "grad_norm": 0.6289892487218232, + "learning_rate": 1.8714306739151782e-06, + "loss": 0.2783, + "step": 15992 + }, + { + "epoch": 0.7238289205702647, + "grad_norm": 0.6020901857016302, + "learning_rate": 1.8708589905358138e-06, + "loss": 0.2943, + "step": 15993 + }, + { + "epoch": 0.7238741796786603, + "grad_norm": 0.6692875016527511, + "learning_rate": 1.8702873743920774e-06, + "loss": 0.3065, + "step": 15994 + }, + { + "epoch": 0.7239194387870559, + "grad_norm": 0.7282381530703047, + "learning_rate": 1.869715825496255e-06, + "loss": 0.3056, + "step": 15995 + }, + { + "epoch": 0.7239646978954515, + "grad_norm": 0.29601027516313794, + "learning_rate": 1.8691443438606239e-06, + "loss": 0.4801, + "step": 15996 + }, + { + "epoch": 0.7240099570038471, + "grad_norm": 0.6945105051331881, + "learning_rate": 1.8685729294974668e-06, + "loss": 0.3043, + "step": 15997 + }, + { + "epoch": 0.7240552161122426, + "grad_norm": 0.618491726853251, + "learning_rate": 1.86800158241906e-06, + "loss": 0.3075, + "step": 15998 + }, + { + "epoch": 0.7241004752206381, + "grad_norm": 0.6025137434539652, + "learning_rate": 1.8674303026376783e-06, + "loss": 0.2814, + "step": 15999 + }, + { + "epoch": 0.7241457343290337, + "grad_norm": 0.59573508665197, + "learning_rate": 1.866859090165598e-06, + "loss": 0.251, + "step": 16000 + }, + { + "epoch": 0.7241909934374293, + "grad_norm": 0.7315932204963735, + "learning_rate": 1.8662879450150956e-06, + "loss": 0.329, + "step": 16001 + }, + { + "epoch": 0.7242362525458248, + "grad_norm": 0.6784928963482864, + "learning_rate": 1.8657168671984404e-06, + "loss": 0.4074, + "step": 16002 + }, + { + "epoch": 0.7242815116542204, + "grad_norm": 0.6790960623785016, + "learning_rate": 1.8651458567279018e-06, + "loss": 0.2876, + "step": 16003 + }, + { + "epoch": 0.724326770762616, + "grad_norm": 0.2862915989405643, + "learning_rate": 1.8645749136157526e-06, + "loss": 0.4789, + "step": 16004 + }, + { + "epoch": 0.7243720298710116, + "grad_norm": 0.6123843036858047, + "learning_rate": 1.8640040378742585e-06, + "loss": 0.285, + "step": 16005 + }, + { + "epoch": 0.7244172889794072, + "grad_norm": 0.6121730728790465, + "learning_rate": 1.8634332295156848e-06, + "loss": 0.3159, + "step": 16006 + }, + { + "epoch": 0.7244625480878026, + "grad_norm": 0.5802479016860167, + "learning_rate": 1.8628624885522994e-06, + "loss": 0.2564, + "step": 16007 + }, + { + "epoch": 0.7245078071961982, + "grad_norm": 0.26500677475920026, + "learning_rate": 1.8622918149963626e-06, + "loss": 0.4929, + "step": 16008 + }, + { + "epoch": 0.7245530663045938, + "grad_norm": 0.6523572564394401, + "learning_rate": 1.8617212088601395e-06, + "loss": 0.3143, + "step": 16009 + }, + { + "epoch": 0.7245983254129894, + "grad_norm": 0.6026168399191, + "learning_rate": 1.8611506701558874e-06, + "loss": 0.2882, + "step": 16010 + }, + { + "epoch": 0.7246435845213849, + "grad_norm": 0.6382442474781357, + "learning_rate": 1.8605801988958688e-06, + "loss": 0.3491, + "step": 16011 + }, + { + "epoch": 0.7246888436297805, + "grad_norm": 0.26119721971741644, + "learning_rate": 1.8600097950923379e-06, + "loss": 0.4603, + "step": 16012 + }, + { + "epoch": 0.7247341027381761, + "grad_norm": 0.6042488032958495, + "learning_rate": 1.8594394587575548e-06, + "loss": 0.3025, + "step": 16013 + }, + { + "epoch": 0.7247793618465717, + "grad_norm": 0.5995169075539502, + "learning_rate": 1.858869189903772e-06, + "loss": 0.2698, + "step": 16014 + }, + { + "epoch": 0.7248246209549671, + "grad_norm": 0.5959838329113539, + "learning_rate": 1.8582989885432412e-06, + "loss": 0.313, + "step": 16015 + }, + { + "epoch": 0.7248698800633627, + "grad_norm": 0.6811871875801533, + "learning_rate": 1.8577288546882167e-06, + "loss": 0.3019, + "step": 16016 + }, + { + "epoch": 0.7249151391717583, + "grad_norm": 0.598166448273423, + "learning_rate": 1.8571587883509495e-06, + "loss": 0.2613, + "step": 16017 + }, + { + "epoch": 0.7249603982801539, + "grad_norm": 0.6119582544966328, + "learning_rate": 1.8565887895436874e-06, + "loss": 0.2858, + "step": 16018 + }, + { + "epoch": 0.7250056573885495, + "grad_norm": 0.6892323236298378, + "learning_rate": 1.856018858278677e-06, + "loss": 0.3108, + "step": 16019 + }, + { + "epoch": 0.725050916496945, + "grad_norm": 0.5374278359807803, + "learning_rate": 1.8554489945681663e-06, + "loss": 0.2651, + "step": 16020 + }, + { + "epoch": 0.7250961756053406, + "grad_norm": 0.6362418049415203, + "learning_rate": 1.8548791984243975e-06, + "loss": 0.2739, + "step": 16021 + }, + { + "epoch": 0.7251414347137362, + "grad_norm": 0.6069602912118967, + "learning_rate": 1.854309469859617e-06, + "loss": 0.2955, + "step": 16022 + }, + { + "epoch": 0.7251866938221317, + "grad_norm": 0.7050148269586889, + "learning_rate": 1.853739808886063e-06, + "loss": 0.3109, + "step": 16023 + }, + { + "epoch": 0.7252319529305272, + "grad_norm": 1.9981279830274465, + "learning_rate": 1.8531702155159792e-06, + "loss": 0.3129, + "step": 16024 + }, + { + "epoch": 0.7252772120389228, + "grad_norm": 0.7003608172407041, + "learning_rate": 1.8526006897616011e-06, + "loss": 0.3675, + "step": 16025 + }, + { + "epoch": 0.7253224711473184, + "grad_norm": 0.6301314495559056, + "learning_rate": 1.8520312316351692e-06, + "loss": 0.272, + "step": 16026 + }, + { + "epoch": 0.725367730255714, + "grad_norm": 0.6483469398299987, + "learning_rate": 1.8514618411489176e-06, + "loss": 0.2866, + "step": 16027 + }, + { + "epoch": 0.7254129893641095, + "grad_norm": 0.6696378094528813, + "learning_rate": 1.85089251831508e-06, + "loss": 0.3393, + "step": 16028 + }, + { + "epoch": 0.7254582484725051, + "grad_norm": 0.5718861514539335, + "learning_rate": 1.85032326314589e-06, + "loss": 0.2847, + "step": 16029 + }, + { + "epoch": 0.7255035075809007, + "grad_norm": 0.6306886641099659, + "learning_rate": 1.8497540756535814e-06, + "loss": 0.3192, + "step": 16030 + }, + { + "epoch": 0.7255487666892962, + "grad_norm": 0.6723825164117041, + "learning_rate": 1.8491849558503827e-06, + "loss": 0.2775, + "step": 16031 + }, + { + "epoch": 0.7255940257976918, + "grad_norm": 0.589946999511413, + "learning_rate": 1.8486159037485202e-06, + "loss": 0.3138, + "step": 16032 + }, + { + "epoch": 0.7256392849060873, + "grad_norm": 0.6564239072718545, + "learning_rate": 1.848046919360225e-06, + "loss": 0.2836, + "step": 16033 + }, + { + "epoch": 0.7256845440144829, + "grad_norm": 0.30486315624182836, + "learning_rate": 1.8474780026977196e-06, + "loss": 0.4662, + "step": 16034 + }, + { + "epoch": 0.7257298031228785, + "grad_norm": 0.28477761396096907, + "learning_rate": 1.8469091537732315e-06, + "loss": 0.4488, + "step": 16035 + }, + { + "epoch": 0.7257750622312741, + "grad_norm": 0.6452670364264114, + "learning_rate": 1.846340372598981e-06, + "loss": 0.2711, + "step": 16036 + }, + { + "epoch": 0.7258203213396696, + "grad_norm": 0.6356012564902356, + "learning_rate": 1.8457716591871887e-06, + "loss": 0.2993, + "step": 16037 + }, + { + "epoch": 0.7258655804480652, + "grad_norm": 0.7041584132129752, + "learning_rate": 1.8452030135500765e-06, + "loss": 0.3009, + "step": 16038 + }, + { + "epoch": 0.7259108395564607, + "grad_norm": 0.606881481776386, + "learning_rate": 1.8446344356998635e-06, + "loss": 0.2898, + "step": 16039 + }, + { + "epoch": 0.7259560986648563, + "grad_norm": 0.6058541708204863, + "learning_rate": 1.8440659256487658e-06, + "loss": 0.2707, + "step": 16040 + }, + { + "epoch": 0.7260013577732519, + "grad_norm": 0.6760264236093753, + "learning_rate": 1.843497483408997e-06, + "loss": 0.2898, + "step": 16041 + }, + { + "epoch": 0.7260466168816474, + "grad_norm": 0.28486960915330306, + "learning_rate": 1.8429291089927742e-06, + "loss": 0.4867, + "step": 16042 + }, + { + "epoch": 0.726091875990043, + "grad_norm": 0.6270785078332332, + "learning_rate": 1.8423608024123086e-06, + "loss": 0.3212, + "step": 16043 + }, + { + "epoch": 0.7261371350984386, + "grad_norm": 0.560317095729978, + "learning_rate": 1.8417925636798101e-06, + "loss": 0.2915, + "step": 16044 + }, + { + "epoch": 0.7261823942068342, + "grad_norm": 0.9136672675320224, + "learning_rate": 1.8412243928074897e-06, + "loss": 0.3181, + "step": 16045 + }, + { + "epoch": 0.7262276533152296, + "grad_norm": 0.6204588826654109, + "learning_rate": 1.840656289807557e-06, + "loss": 0.2882, + "step": 16046 + }, + { + "epoch": 0.7262729124236252, + "grad_norm": 0.620459957704693, + "learning_rate": 1.8400882546922177e-06, + "loss": 0.306, + "step": 16047 + }, + { + "epoch": 0.7263181715320208, + "grad_norm": 0.6024461681337429, + "learning_rate": 1.8395202874736752e-06, + "loss": 0.3312, + "step": 16048 + }, + { + "epoch": 0.7263634306404164, + "grad_norm": 0.5795091380255545, + "learning_rate": 1.8389523881641363e-06, + "loss": 0.299, + "step": 16049 + }, + { + "epoch": 0.7264086897488119, + "grad_norm": 0.3210511536301892, + "learning_rate": 1.8383845567758008e-06, + "loss": 0.5161, + "step": 16050 + }, + { + "epoch": 0.7264539488572075, + "grad_norm": 0.5599891840744048, + "learning_rate": 1.8378167933208729e-06, + "loss": 0.2658, + "step": 16051 + }, + { + "epoch": 0.7264992079656031, + "grad_norm": 0.6129156924252082, + "learning_rate": 1.837249097811548e-06, + "loss": 0.2641, + "step": 16052 + }, + { + "epoch": 0.7265444670739987, + "grad_norm": 0.7396036670714075, + "learning_rate": 1.8366814702600288e-06, + "loss": 0.3307, + "step": 16053 + }, + { + "epoch": 0.7265897261823943, + "grad_norm": 0.30804189245711033, + "learning_rate": 1.836113910678507e-06, + "loss": 0.4751, + "step": 16054 + }, + { + "epoch": 0.7266349852907897, + "grad_norm": 0.6450496615316457, + "learning_rate": 1.835546419079182e-06, + "loss": 0.3435, + "step": 16055 + }, + { + "epoch": 0.7266802443991853, + "grad_norm": 0.2796091456412559, + "learning_rate": 1.8349789954742459e-06, + "loss": 0.4613, + "step": 16056 + }, + { + "epoch": 0.7267255035075809, + "grad_norm": 0.64890080958883, + "learning_rate": 1.8344116398758888e-06, + "loss": 0.3306, + "step": 16057 + }, + { + "epoch": 0.7267707626159765, + "grad_norm": 0.575461620410372, + "learning_rate": 1.8338443522963028e-06, + "loss": 0.3081, + "step": 16058 + }, + { + "epoch": 0.726816021724372, + "grad_norm": 0.2883379797548653, + "learning_rate": 1.8332771327476795e-06, + "loss": 0.4966, + "step": 16059 + }, + { + "epoch": 0.7268612808327676, + "grad_norm": 0.5764107107006161, + "learning_rate": 1.832709981242205e-06, + "loss": 0.3059, + "step": 16060 + }, + { + "epoch": 0.7269065399411632, + "grad_norm": 0.6613462650278219, + "learning_rate": 1.8321428977920635e-06, + "loss": 0.3088, + "step": 16061 + }, + { + "epoch": 0.7269517990495588, + "grad_norm": 0.5971963076408902, + "learning_rate": 1.8315758824094432e-06, + "loss": 0.2816, + "step": 16062 + }, + { + "epoch": 0.7269970581579542, + "grad_norm": 0.6458655637611862, + "learning_rate": 1.8310089351065246e-06, + "loss": 0.3588, + "step": 16063 + }, + { + "epoch": 0.7270423172663498, + "grad_norm": 0.6029614831205884, + "learning_rate": 1.8304420558954933e-06, + "loss": 0.2893, + "step": 16064 + }, + { + "epoch": 0.7270875763747454, + "grad_norm": 0.6523860978116632, + "learning_rate": 1.8298752447885254e-06, + "loss": 0.3295, + "step": 16065 + }, + { + "epoch": 0.727132835483141, + "grad_norm": 0.6085824327664999, + "learning_rate": 1.829308501797804e-06, + "loss": 0.3268, + "step": 16066 + }, + { + "epoch": 0.7271780945915366, + "grad_norm": 0.5839321712913418, + "learning_rate": 1.8287418269355035e-06, + "loss": 0.3163, + "step": 16067 + }, + { + "epoch": 0.7272233536999321, + "grad_norm": 0.5934730562217898, + "learning_rate": 1.8281752202138032e-06, + "loss": 0.2845, + "step": 16068 + }, + { + "epoch": 0.7272686128083277, + "grad_norm": 0.5862163177911442, + "learning_rate": 1.8276086816448751e-06, + "loss": 0.3397, + "step": 16069 + }, + { + "epoch": 0.7273138719167233, + "grad_norm": 0.6138676841077318, + "learning_rate": 1.8270422112408919e-06, + "loss": 0.3131, + "step": 16070 + }, + { + "epoch": 0.7273591310251188, + "grad_norm": 0.6599671578492269, + "learning_rate": 1.8264758090140267e-06, + "loss": 0.2898, + "step": 16071 + }, + { + "epoch": 0.7274043901335143, + "grad_norm": 0.6756821861159864, + "learning_rate": 1.8259094749764532e-06, + "loss": 0.2862, + "step": 16072 + }, + { + "epoch": 0.7274496492419099, + "grad_norm": 0.5798011713799885, + "learning_rate": 1.8253432091403329e-06, + "loss": 0.2819, + "step": 16073 + }, + { + "epoch": 0.7274949083503055, + "grad_norm": 0.5905087514687498, + "learning_rate": 1.824777011517837e-06, + "loss": 0.3195, + "step": 16074 + }, + { + "epoch": 0.7275401674587011, + "grad_norm": 0.6026175779356564, + "learning_rate": 1.8242108821211324e-06, + "loss": 0.259, + "step": 16075 + }, + { + "epoch": 0.7275854265670967, + "grad_norm": 0.2949314560163198, + "learning_rate": 1.8236448209623825e-06, + "loss": 0.4743, + "step": 16076 + }, + { + "epoch": 0.7276306856754922, + "grad_norm": 0.28458791237527803, + "learning_rate": 1.8230788280537487e-06, + "loss": 0.4569, + "step": 16077 + }, + { + "epoch": 0.7276759447838878, + "grad_norm": 0.5643217470901566, + "learning_rate": 1.8225129034073951e-06, + "loss": 0.2971, + "step": 16078 + }, + { + "epoch": 0.7277212038922833, + "grad_norm": 0.9389791664087129, + "learning_rate": 1.8219470470354784e-06, + "loss": 0.3062, + "step": 16079 + }, + { + "epoch": 0.7277664630006789, + "grad_norm": 0.6886383299953388, + "learning_rate": 1.8213812589501611e-06, + "loss": 0.2948, + "step": 16080 + }, + { + "epoch": 0.7278117221090744, + "grad_norm": 0.5850857135648151, + "learning_rate": 1.8208155391635963e-06, + "loss": 0.2632, + "step": 16081 + }, + { + "epoch": 0.72785698121747, + "grad_norm": 0.6042575374580038, + "learning_rate": 1.8202498876879432e-06, + "loss": 0.3238, + "step": 16082 + }, + { + "epoch": 0.7279022403258656, + "grad_norm": 0.6928404697901881, + "learning_rate": 1.8196843045353519e-06, + "loss": 0.2896, + "step": 16083 + }, + { + "epoch": 0.7279474994342612, + "grad_norm": 0.5341234808814107, + "learning_rate": 1.8191187897179796e-06, + "loss": 0.2536, + "step": 16084 + }, + { + "epoch": 0.7279927585426567, + "grad_norm": 0.7459069229430887, + "learning_rate": 1.8185533432479751e-06, + "loss": 0.3156, + "step": 16085 + }, + { + "epoch": 0.7280380176510522, + "grad_norm": 0.6091769615205567, + "learning_rate": 1.8179879651374866e-06, + "loss": 0.3109, + "step": 16086 + }, + { + "epoch": 0.7280832767594478, + "grad_norm": 0.7380141827131348, + "learning_rate": 1.8174226553986635e-06, + "loss": 0.3079, + "step": 16087 + }, + { + "epoch": 0.7281285358678434, + "grad_norm": 0.714612464179002, + "learning_rate": 1.816857414043655e-06, + "loss": 0.3053, + "step": 16088 + }, + { + "epoch": 0.728173794976239, + "grad_norm": 0.2860785773149628, + "learning_rate": 1.8162922410846046e-06, + "loss": 0.4745, + "step": 16089 + }, + { + "epoch": 0.7282190540846345, + "grad_norm": 0.6039689558987191, + "learning_rate": 1.8157271365336536e-06, + "loss": 0.303, + "step": 16090 + }, + { + "epoch": 0.7282643131930301, + "grad_norm": 0.6083549302228266, + "learning_rate": 1.815162100402949e-06, + "loss": 0.2985, + "step": 16091 + }, + { + "epoch": 0.7283095723014257, + "grad_norm": 0.2660101982454626, + "learning_rate": 1.8145971327046274e-06, + "loss": 0.465, + "step": 16092 + }, + { + "epoch": 0.7283548314098213, + "grad_norm": 0.6313514296218542, + "learning_rate": 1.814032233450832e-06, + "loss": 0.2818, + "step": 16093 + }, + { + "epoch": 0.7284000905182167, + "grad_norm": 0.6567990816362825, + "learning_rate": 1.8134674026536968e-06, + "loss": 0.3437, + "step": 16094 + }, + { + "epoch": 0.7284453496266123, + "grad_norm": 0.5475313855383052, + "learning_rate": 1.8129026403253624e-06, + "loss": 0.2579, + "step": 16095 + }, + { + "epoch": 0.7284906087350079, + "grad_norm": 0.5950986141428766, + "learning_rate": 1.8123379464779606e-06, + "loss": 0.3002, + "step": 16096 + }, + { + "epoch": 0.7285358678434035, + "grad_norm": 0.27592219794883777, + "learning_rate": 1.8117733211236277e-06, + "loss": 0.4713, + "step": 16097 + }, + { + "epoch": 0.728581126951799, + "grad_norm": 0.6526013033225897, + "learning_rate": 1.811208764274494e-06, + "loss": 0.3383, + "step": 16098 + }, + { + "epoch": 0.7286263860601946, + "grad_norm": 0.3223904043897807, + "learning_rate": 1.8106442759426884e-06, + "loss": 0.4811, + "step": 16099 + }, + { + "epoch": 0.7286716451685902, + "grad_norm": 0.5550383719292121, + "learning_rate": 1.8100798561403426e-06, + "loss": 0.2741, + "step": 16100 + }, + { + "epoch": 0.7287169042769858, + "grad_norm": 0.6392507722711632, + "learning_rate": 1.8095155048795865e-06, + "loss": 0.2984, + "step": 16101 + }, + { + "epoch": 0.7287621633853814, + "grad_norm": 0.2879523619249691, + "learning_rate": 1.8089512221725402e-06, + "loss": 0.4739, + "step": 16102 + }, + { + "epoch": 0.7288074224937768, + "grad_norm": 0.6498409838546698, + "learning_rate": 1.8083870080313315e-06, + "loss": 0.2789, + "step": 16103 + }, + { + "epoch": 0.7288526816021724, + "grad_norm": 0.6382210882940718, + "learning_rate": 1.8078228624680854e-06, + "loss": 0.3114, + "step": 16104 + }, + { + "epoch": 0.728897940710568, + "grad_norm": 0.6468102133095937, + "learning_rate": 1.807258785494922e-06, + "loss": 0.277, + "step": 16105 + }, + { + "epoch": 0.7289431998189636, + "grad_norm": 0.6125188426779778, + "learning_rate": 1.8066947771239597e-06, + "loss": 0.2999, + "step": 16106 + }, + { + "epoch": 0.7289884589273591, + "grad_norm": 0.6198896839628587, + "learning_rate": 1.8061308373673208e-06, + "loss": 0.2838, + "step": 16107 + }, + { + "epoch": 0.7290337180357547, + "grad_norm": 0.28230401094755525, + "learning_rate": 1.8055669662371194e-06, + "loss": 0.492, + "step": 16108 + }, + { + "epoch": 0.7290789771441503, + "grad_norm": 0.6089025119666203, + "learning_rate": 1.8050031637454746e-06, + "loss": 0.286, + "step": 16109 + }, + { + "epoch": 0.7291242362525459, + "grad_norm": 0.6294693049743567, + "learning_rate": 1.8044394299044976e-06, + "loss": 0.2955, + "step": 16110 + }, + { + "epoch": 0.7291694953609414, + "grad_norm": 0.7226863549811137, + "learning_rate": 1.8038757647263045e-06, + "loss": 0.2899, + "step": 16111 + }, + { + "epoch": 0.7292147544693369, + "grad_norm": 0.31058843361683514, + "learning_rate": 1.803312168223003e-06, + "loss": 0.4737, + "step": 16112 + }, + { + "epoch": 0.7292600135777325, + "grad_norm": 0.6455846866744789, + "learning_rate": 1.8027486404067075e-06, + "loss": 0.3407, + "step": 16113 + }, + { + "epoch": 0.7293052726861281, + "grad_norm": 0.5926280243786574, + "learning_rate": 1.8021851812895235e-06, + "loss": 0.2939, + "step": 16114 + }, + { + "epoch": 0.7293505317945237, + "grad_norm": 0.6670643403778769, + "learning_rate": 1.8016217908835575e-06, + "loss": 0.269, + "step": 16115 + }, + { + "epoch": 0.7293957909029192, + "grad_norm": 0.5872139058201896, + "learning_rate": 1.8010584692009158e-06, + "loss": 0.316, + "step": 16116 + }, + { + "epoch": 0.7294410500113148, + "grad_norm": 0.6360687494862899, + "learning_rate": 1.8004952162537043e-06, + "loss": 0.3087, + "step": 16117 + }, + { + "epoch": 0.7294863091197104, + "grad_norm": 0.27153954274586384, + "learning_rate": 1.7999320320540242e-06, + "loss": 0.4696, + "step": 16118 + }, + { + "epoch": 0.7295315682281059, + "grad_norm": 0.7052951096372572, + "learning_rate": 1.799368916613975e-06, + "loss": 0.3101, + "step": 16119 + }, + { + "epoch": 0.7295768273365014, + "grad_norm": 0.7958306372804045, + "learning_rate": 1.7988058699456596e-06, + "loss": 0.3768, + "step": 16120 + }, + { + "epoch": 0.729622086444897, + "grad_norm": 0.5730055696897941, + "learning_rate": 1.7982428920611722e-06, + "loss": 0.2838, + "step": 16121 + }, + { + "epoch": 0.7296673455532926, + "grad_norm": 0.5679127683681903, + "learning_rate": 1.7976799829726138e-06, + "loss": 0.2892, + "step": 16122 + }, + { + "epoch": 0.7297126046616882, + "grad_norm": 0.6729493635220757, + "learning_rate": 1.7971171426920753e-06, + "loss": 0.2859, + "step": 16123 + }, + { + "epoch": 0.7297578637700838, + "grad_norm": 0.6069928623620314, + "learning_rate": 1.796554371231654e-06, + "loss": 0.3034, + "step": 16124 + }, + { + "epoch": 0.7298031228784793, + "grad_norm": 0.5807258567124326, + "learning_rate": 1.7959916686034395e-06, + "loss": 0.2854, + "step": 16125 + }, + { + "epoch": 0.7298483819868749, + "grad_norm": 0.8648799347898458, + "learning_rate": 1.7954290348195248e-06, + "loss": 0.257, + "step": 16126 + }, + { + "epoch": 0.7298936410952704, + "grad_norm": 0.620623586958228, + "learning_rate": 1.7948664698919987e-06, + "loss": 0.3324, + "step": 16127 + }, + { + "epoch": 0.729938900203666, + "grad_norm": 0.6149741888961061, + "learning_rate": 1.794303973832946e-06, + "loss": 0.2806, + "step": 16128 + }, + { + "epoch": 0.7299841593120615, + "grad_norm": 0.627170473824644, + "learning_rate": 1.7937415466544556e-06, + "loss": 0.3367, + "step": 16129 + }, + { + "epoch": 0.7300294184204571, + "grad_norm": 0.6527986768715455, + "learning_rate": 1.7931791883686155e-06, + "loss": 0.2592, + "step": 16130 + }, + { + "epoch": 0.7300746775288527, + "grad_norm": 0.6122081032264333, + "learning_rate": 1.7926168989875027e-06, + "loss": 0.3038, + "step": 16131 + }, + { + "epoch": 0.7301199366372483, + "grad_norm": 0.8831182658752256, + "learning_rate": 1.7920546785232013e-06, + "loss": 0.3087, + "step": 16132 + }, + { + "epoch": 0.7301651957456438, + "grad_norm": 1.3518827731634866, + "learning_rate": 1.7914925269877947e-06, + "loss": 0.2888, + "step": 16133 + }, + { + "epoch": 0.7302104548540393, + "grad_norm": 0.6225243673416847, + "learning_rate": 1.790930444393359e-06, + "loss": 0.2892, + "step": 16134 + }, + { + "epoch": 0.7302557139624349, + "grad_norm": 0.8839277255860258, + "learning_rate": 1.790368430751971e-06, + "loss": 0.284, + "step": 16135 + }, + { + "epoch": 0.7303009730708305, + "grad_norm": 0.6256720305902387, + "learning_rate": 1.789806486075707e-06, + "loss": 0.2914, + "step": 16136 + }, + { + "epoch": 0.7303462321792261, + "grad_norm": 0.929858129804423, + "learning_rate": 1.7892446103766448e-06, + "loss": 0.2669, + "step": 16137 + }, + { + "epoch": 0.7303914912876216, + "grad_norm": 0.613979034701072, + "learning_rate": 1.7886828036668541e-06, + "loss": 0.2845, + "step": 16138 + }, + { + "epoch": 0.7304367503960172, + "grad_norm": 0.7011755943506092, + "learning_rate": 1.7881210659584059e-06, + "loss": 0.3454, + "step": 16139 + }, + { + "epoch": 0.7304820095044128, + "grad_norm": 0.28666942994097444, + "learning_rate": 1.787559397263373e-06, + "loss": 0.4831, + "step": 16140 + }, + { + "epoch": 0.7305272686128084, + "grad_norm": 0.5808081092214722, + "learning_rate": 1.7869977975938207e-06, + "loss": 0.3135, + "step": 16141 + }, + { + "epoch": 0.7305725277212038, + "grad_norm": 0.6514388854146006, + "learning_rate": 1.7864362669618197e-06, + "loss": 0.2843, + "step": 16142 + }, + { + "epoch": 0.7306177868295994, + "grad_norm": 0.27221879864624515, + "learning_rate": 1.7858748053794334e-06, + "loss": 0.45, + "step": 16143 + }, + { + "epoch": 0.730663045937995, + "grad_norm": 0.6348420393974701, + "learning_rate": 1.7853134128587246e-06, + "loss": 0.2486, + "step": 16144 + }, + { + "epoch": 0.7307083050463906, + "grad_norm": 0.6297590527294329, + "learning_rate": 1.7847520894117571e-06, + "loss": 0.2917, + "step": 16145 + }, + { + "epoch": 0.7307535641547862, + "grad_norm": 0.6166517237990424, + "learning_rate": 1.7841908350505938e-06, + "loss": 0.247, + "step": 16146 + }, + { + "epoch": 0.7307988232631817, + "grad_norm": 0.6476230729976626, + "learning_rate": 1.7836296497872934e-06, + "loss": 0.3184, + "step": 16147 + }, + { + "epoch": 0.7308440823715773, + "grad_norm": 0.6107349964845618, + "learning_rate": 1.7830685336339114e-06, + "loss": 0.3087, + "step": 16148 + }, + { + "epoch": 0.7308893414799729, + "grad_norm": 0.6243457657114342, + "learning_rate": 1.7825074866025089e-06, + "loss": 0.3058, + "step": 16149 + }, + { + "epoch": 0.7309346005883685, + "grad_norm": 0.609189981925863, + "learning_rate": 1.7819465087051363e-06, + "loss": 0.2918, + "step": 16150 + }, + { + "epoch": 0.7309798596967639, + "grad_norm": 0.7824741621821414, + "learning_rate": 1.7813855999538516e-06, + "loss": 0.2945, + "step": 16151 + }, + { + "epoch": 0.7310251188051595, + "grad_norm": 0.6048509705744186, + "learning_rate": 1.7808247603607037e-06, + "loss": 0.3111, + "step": 16152 + }, + { + "epoch": 0.7310703779135551, + "grad_norm": 0.6128880269982051, + "learning_rate": 1.780263989937746e-06, + "loss": 0.3529, + "step": 16153 + }, + { + "epoch": 0.7311156370219507, + "grad_norm": 0.2896886702152505, + "learning_rate": 1.7797032886970255e-06, + "loss": 0.4761, + "step": 16154 + }, + { + "epoch": 0.7311608961303462, + "grad_norm": 0.5650019294174237, + "learning_rate": 1.779142656650592e-06, + "loss": 0.2465, + "step": 16155 + }, + { + "epoch": 0.7312061552387418, + "grad_norm": 0.6835006757504737, + "learning_rate": 1.7785820938104908e-06, + "loss": 0.2917, + "step": 16156 + }, + { + "epoch": 0.7312514143471374, + "grad_norm": 0.6490444548301334, + "learning_rate": 1.778021600188765e-06, + "loss": 0.2823, + "step": 16157 + }, + { + "epoch": 0.731296673455533, + "grad_norm": 0.5865118446140698, + "learning_rate": 1.7774611757974597e-06, + "loss": 0.3177, + "step": 16158 + }, + { + "epoch": 0.7313419325639285, + "grad_norm": 0.5935785923704926, + "learning_rate": 1.7769008206486198e-06, + "loss": 0.3035, + "step": 16159 + }, + { + "epoch": 0.731387191672324, + "grad_norm": 0.6843858349341361, + "learning_rate": 1.7763405347542783e-06, + "loss": 0.2603, + "step": 16160 + }, + { + "epoch": 0.7314324507807196, + "grad_norm": 0.6532083072580005, + "learning_rate": 1.7757803181264787e-06, + "loss": 0.3195, + "step": 16161 + }, + { + "epoch": 0.7314777098891152, + "grad_norm": 0.6233717325091536, + "learning_rate": 1.7752201707772593e-06, + "loss": 0.2925, + "step": 16162 + }, + { + "epoch": 0.7315229689975108, + "grad_norm": 1.1668439049554575, + "learning_rate": 1.7746600927186537e-06, + "loss": 0.3321, + "step": 16163 + }, + { + "epoch": 0.7315682281059063, + "grad_norm": 0.5655046089812515, + "learning_rate": 1.7741000839626954e-06, + "loss": 0.289, + "step": 16164 + }, + { + "epoch": 0.7316134872143019, + "grad_norm": 0.5983977680445043, + "learning_rate": 1.773540144521419e-06, + "loss": 0.2901, + "step": 16165 + }, + { + "epoch": 0.7316587463226975, + "grad_norm": 0.5956927504390058, + "learning_rate": 1.7729802744068568e-06, + "loss": 0.3212, + "step": 16166 + }, + { + "epoch": 0.731704005431093, + "grad_norm": 0.6098619134893986, + "learning_rate": 1.772420473631038e-06, + "loss": 0.2913, + "step": 16167 + }, + { + "epoch": 0.7317492645394885, + "grad_norm": 0.5879445773626746, + "learning_rate": 1.771860742205988e-06, + "loss": 0.3132, + "step": 16168 + }, + { + "epoch": 0.7317945236478841, + "grad_norm": 0.5972963244648661, + "learning_rate": 1.7713010801437385e-06, + "loss": 0.3341, + "step": 16169 + }, + { + "epoch": 0.7318397827562797, + "grad_norm": 0.6732092232271325, + "learning_rate": 1.7707414874563105e-06, + "loss": 0.3067, + "step": 16170 + }, + { + "epoch": 0.7318850418646753, + "grad_norm": 0.5417037764108821, + "learning_rate": 1.7701819641557321e-06, + "loss": 0.2702, + "step": 16171 + }, + { + "epoch": 0.7319303009730709, + "grad_norm": 0.31958280675188766, + "learning_rate": 1.7696225102540238e-06, + "loss": 0.482, + "step": 16172 + }, + { + "epoch": 0.7319755600814664, + "grad_norm": 0.5808850557217945, + "learning_rate": 1.769063125763204e-06, + "loss": 0.2677, + "step": 16173 + }, + { + "epoch": 0.732020819189862, + "grad_norm": 0.32794153591687286, + "learning_rate": 1.7685038106952952e-06, + "loss": 0.4763, + "step": 16174 + }, + { + "epoch": 0.7320660782982575, + "grad_norm": 0.27041347084527767, + "learning_rate": 1.7679445650623162e-06, + "loss": 0.463, + "step": 16175 + }, + { + "epoch": 0.7321113374066531, + "grad_norm": 0.6774948881437475, + "learning_rate": 1.767385388876282e-06, + "loss": 0.2688, + "step": 16176 + }, + { + "epoch": 0.7321565965150486, + "grad_norm": 0.6307995198758111, + "learning_rate": 1.7668262821492061e-06, + "loss": 0.3656, + "step": 16177 + }, + { + "epoch": 0.7322018556234442, + "grad_norm": 0.645933753279425, + "learning_rate": 1.7662672448931045e-06, + "loss": 0.2587, + "step": 16178 + }, + { + "epoch": 0.7322471147318398, + "grad_norm": 0.6571171710246331, + "learning_rate": 1.7657082771199875e-06, + "loss": 0.3034, + "step": 16179 + }, + { + "epoch": 0.7322923738402354, + "grad_norm": 0.27135675583410107, + "learning_rate": 1.7651493788418671e-06, + "loss": 0.4493, + "step": 16180 + }, + { + "epoch": 0.7323376329486309, + "grad_norm": 0.6301878641061803, + "learning_rate": 1.76459055007075e-06, + "loss": 0.2961, + "step": 16181 + }, + { + "epoch": 0.7323828920570264, + "grad_norm": 0.5330125153875653, + "learning_rate": 1.7640317908186466e-06, + "loss": 0.2704, + "step": 16182 + }, + { + "epoch": 0.732428151165422, + "grad_norm": 0.6052744126769056, + "learning_rate": 1.7634731010975603e-06, + "loss": 0.3365, + "step": 16183 + }, + { + "epoch": 0.7324734102738176, + "grad_norm": 0.7142932836569383, + "learning_rate": 1.7629144809194982e-06, + "loss": 0.2946, + "step": 16184 + }, + { + "epoch": 0.7325186693822132, + "grad_norm": 0.637528726658327, + "learning_rate": 1.762355930296462e-06, + "loss": 0.3449, + "step": 16185 + }, + { + "epoch": 0.7325639284906087, + "grad_norm": 0.6385320594102418, + "learning_rate": 1.7617974492404517e-06, + "loss": 0.3179, + "step": 16186 + }, + { + "epoch": 0.7326091875990043, + "grad_norm": 0.3100847434791019, + "learning_rate": 1.7612390377634685e-06, + "loss": 0.487, + "step": 16187 + }, + { + "epoch": 0.7326544467073999, + "grad_norm": 0.6732555146506458, + "learning_rate": 1.7606806958775135e-06, + "loss": 0.3023, + "step": 16188 + }, + { + "epoch": 0.7326997058157955, + "grad_norm": 1.0851260304440182, + "learning_rate": 1.7601224235945814e-06, + "loss": 0.2677, + "step": 16189 + }, + { + "epoch": 0.732744964924191, + "grad_norm": 0.5790100028298947, + "learning_rate": 1.7595642209266656e-06, + "loss": 0.358, + "step": 16190 + }, + { + "epoch": 0.7327902240325865, + "grad_norm": 0.5527251623525239, + "learning_rate": 1.7590060878857646e-06, + "loss": 0.2689, + "step": 16191 + }, + { + "epoch": 0.7328354831409821, + "grad_norm": 0.31178955263418023, + "learning_rate": 1.7584480244838687e-06, + "loss": 0.4709, + "step": 16192 + }, + { + "epoch": 0.7328807422493777, + "grad_norm": 0.645658883416608, + "learning_rate": 1.7578900307329677e-06, + "loss": 0.3121, + "step": 16193 + }, + { + "epoch": 0.7329260013577733, + "grad_norm": 0.6602342215994514, + "learning_rate": 1.7573321066450521e-06, + "loss": 0.32, + "step": 16194 + }, + { + "epoch": 0.7329712604661688, + "grad_norm": 0.28188324345028737, + "learning_rate": 1.7567742522321125e-06, + "loss": 0.4472, + "step": 16195 + }, + { + "epoch": 0.7330165195745644, + "grad_norm": 0.5970984254245914, + "learning_rate": 1.7562164675061332e-06, + "loss": 0.3067, + "step": 16196 + }, + { + "epoch": 0.73306177868296, + "grad_norm": 0.5885991002760158, + "learning_rate": 1.755658752479098e-06, + "loss": 0.3314, + "step": 16197 + }, + { + "epoch": 0.7331070377913556, + "grad_norm": 0.27455054719754596, + "learning_rate": 1.7551011071629937e-06, + "loss": 0.4673, + "step": 16198 + }, + { + "epoch": 0.733152296899751, + "grad_norm": 0.564345589903344, + "learning_rate": 1.7545435315697984e-06, + "loss": 0.2751, + "step": 16199 + }, + { + "epoch": 0.7331975560081466, + "grad_norm": 0.5819176536099305, + "learning_rate": 1.7539860257114972e-06, + "loss": 0.2864, + "step": 16200 + }, + { + "epoch": 0.7332428151165422, + "grad_norm": 0.6644071129619711, + "learning_rate": 1.7534285896000668e-06, + "loss": 0.344, + "step": 16201 + }, + { + "epoch": 0.7332880742249378, + "grad_norm": 0.9543976113779502, + "learning_rate": 1.7528712232474832e-06, + "loss": 0.302, + "step": 16202 + }, + { + "epoch": 0.7333333333333333, + "grad_norm": 0.6011566578844129, + "learning_rate": 1.7523139266657241e-06, + "loss": 0.2844, + "step": 16203 + }, + { + "epoch": 0.7333785924417289, + "grad_norm": 0.6951890360401429, + "learning_rate": 1.7517566998667661e-06, + "loss": 0.2943, + "step": 16204 + }, + { + "epoch": 0.7334238515501245, + "grad_norm": 0.2817462091680761, + "learning_rate": 1.7511995428625805e-06, + "loss": 0.4641, + "step": 16205 + }, + { + "epoch": 0.73346911065852, + "grad_norm": 0.28614243110327414, + "learning_rate": 1.7506424556651368e-06, + "loss": 0.4649, + "step": 16206 + }, + { + "epoch": 0.7335143697669156, + "grad_norm": 0.2573959389343955, + "learning_rate": 1.7500854382864073e-06, + "loss": 0.4686, + "step": 16207 + }, + { + "epoch": 0.7335596288753111, + "grad_norm": 0.6144018276945281, + "learning_rate": 1.749528490738362e-06, + "loss": 0.3207, + "step": 16208 + }, + { + "epoch": 0.7336048879837067, + "grad_norm": 0.5710859253017799, + "learning_rate": 1.7489716130329665e-06, + "loss": 0.2789, + "step": 16209 + }, + { + "epoch": 0.7336501470921023, + "grad_norm": 0.6540083390363107, + "learning_rate": 1.7484148051821842e-06, + "loss": 0.2682, + "step": 16210 + }, + { + "epoch": 0.7336954062004979, + "grad_norm": 0.6419187298562828, + "learning_rate": 1.7478580671979834e-06, + "loss": 0.307, + "step": 16211 + }, + { + "epoch": 0.7337406653088934, + "grad_norm": 0.552366230549885, + "learning_rate": 1.7473013990923226e-06, + "loss": 0.2631, + "step": 16212 + }, + { + "epoch": 0.733785924417289, + "grad_norm": 0.6651943539308403, + "learning_rate": 1.7467448008771664e-06, + "loss": 0.2808, + "step": 16213 + }, + { + "epoch": 0.7338311835256845, + "grad_norm": 0.6485848113037442, + "learning_rate": 1.746188272564473e-06, + "loss": 0.3085, + "step": 16214 + }, + { + "epoch": 0.7338764426340801, + "grad_norm": 0.6131481732266474, + "learning_rate": 1.7456318141661987e-06, + "loss": 0.2773, + "step": 16215 + }, + { + "epoch": 0.7339217017424756, + "grad_norm": 0.5674989271994176, + "learning_rate": 1.7450754256943014e-06, + "loss": 0.318, + "step": 16216 + }, + { + "epoch": 0.7339669608508712, + "grad_norm": 0.2680277654815759, + "learning_rate": 1.7445191071607386e-06, + "loss": 0.4632, + "step": 16217 + }, + { + "epoch": 0.7340122199592668, + "grad_norm": 0.5726163088083188, + "learning_rate": 1.7439628585774614e-06, + "loss": 0.3311, + "step": 16218 + }, + { + "epoch": 0.7340574790676624, + "grad_norm": 0.5971453091407026, + "learning_rate": 1.7434066799564204e-06, + "loss": 0.2926, + "step": 16219 + }, + { + "epoch": 0.734102738176058, + "grad_norm": 0.5955186786844018, + "learning_rate": 1.74285057130957e-06, + "loss": 0.2949, + "step": 16220 + }, + { + "epoch": 0.7341479972844535, + "grad_norm": 0.5848454136822139, + "learning_rate": 1.7422945326488555e-06, + "loss": 0.2825, + "step": 16221 + }, + { + "epoch": 0.734193256392849, + "grad_norm": 0.28955425891968584, + "learning_rate": 1.7417385639862278e-06, + "loss": 0.4347, + "step": 16222 + }, + { + "epoch": 0.7342385155012446, + "grad_norm": 0.5873018904151689, + "learning_rate": 1.7411826653336294e-06, + "loss": 0.2977, + "step": 16223 + }, + { + "epoch": 0.7342837746096402, + "grad_norm": 0.7384682886218776, + "learning_rate": 1.7406268367030094e-06, + "loss": 0.2365, + "step": 16224 + }, + { + "epoch": 0.7343290337180357, + "grad_norm": 0.5949250394015495, + "learning_rate": 1.7400710781063073e-06, + "loss": 0.312, + "step": 16225 + }, + { + "epoch": 0.7343742928264313, + "grad_norm": 0.6304591743920028, + "learning_rate": 1.7395153895554646e-06, + "loss": 0.354, + "step": 16226 + }, + { + "epoch": 0.7344195519348269, + "grad_norm": 0.2623909581641417, + "learning_rate": 1.7389597710624234e-06, + "loss": 0.4543, + "step": 16227 + }, + { + "epoch": 0.7344648110432225, + "grad_norm": 0.5992424361294328, + "learning_rate": 1.73840422263912e-06, + "loss": 0.2858, + "step": 16228 + }, + { + "epoch": 0.7345100701516181, + "grad_norm": 0.6152299382781466, + "learning_rate": 1.7378487442974946e-06, + "loss": 0.3059, + "step": 16229 + }, + { + "epoch": 0.7345553292600135, + "grad_norm": 0.6201435136046975, + "learning_rate": 1.7372933360494803e-06, + "loss": 0.2761, + "step": 16230 + }, + { + "epoch": 0.7346005883684091, + "grad_norm": 0.5975736264648502, + "learning_rate": 1.7367379979070098e-06, + "loss": 0.2514, + "step": 16231 + }, + { + "epoch": 0.7346458474768047, + "grad_norm": 0.6407013057308736, + "learning_rate": 1.7361827298820177e-06, + "loss": 0.2821, + "step": 16232 + }, + { + "epoch": 0.7346911065852003, + "grad_norm": 0.5952618697201847, + "learning_rate": 1.7356275319864363e-06, + "loss": 0.2691, + "step": 16233 + }, + { + "epoch": 0.7347363656935958, + "grad_norm": 0.7453342568149233, + "learning_rate": 1.735072404232193e-06, + "loss": 0.2683, + "step": 16234 + }, + { + "epoch": 0.7347816248019914, + "grad_norm": 0.3013045199708562, + "learning_rate": 1.7345173466312154e-06, + "loss": 0.467, + "step": 16235 + }, + { + "epoch": 0.734826883910387, + "grad_norm": 0.6086860644721339, + "learning_rate": 1.7339623591954302e-06, + "loss": 0.3008, + "step": 16236 + }, + { + "epoch": 0.7348721430187826, + "grad_norm": 0.2812574722109745, + "learning_rate": 1.7334074419367653e-06, + "loss": 0.4836, + "step": 16237 + }, + { + "epoch": 0.734917402127178, + "grad_norm": 0.6199272597355754, + "learning_rate": 1.7328525948671415e-06, + "loss": 0.33, + "step": 16238 + }, + { + "epoch": 0.7349626612355736, + "grad_norm": 0.666636524721789, + "learning_rate": 1.7322978179984794e-06, + "loss": 0.3248, + "step": 16239 + }, + { + "epoch": 0.7350079203439692, + "grad_norm": 0.5992083829293151, + "learning_rate": 1.731743111342703e-06, + "loss": 0.2746, + "step": 16240 + }, + { + "epoch": 0.7350531794523648, + "grad_norm": 0.6130098920143602, + "learning_rate": 1.731188474911728e-06, + "loss": 0.315, + "step": 16241 + }, + { + "epoch": 0.7350984385607604, + "grad_norm": 0.6134321032151977, + "learning_rate": 1.7306339087174746e-06, + "loss": 0.3267, + "step": 16242 + }, + { + "epoch": 0.7351436976691559, + "grad_norm": 0.6233762873878859, + "learning_rate": 1.7300794127718573e-06, + "loss": 0.3262, + "step": 16243 + }, + { + "epoch": 0.7351889567775515, + "grad_norm": 0.6017139780144443, + "learning_rate": 1.7295249870867898e-06, + "loss": 0.2899, + "step": 16244 + }, + { + "epoch": 0.7352342158859471, + "grad_norm": 0.6050681339465913, + "learning_rate": 1.728970631674185e-06, + "loss": 0.2636, + "step": 16245 + }, + { + "epoch": 0.7352794749943427, + "grad_norm": 0.24791910992335017, + "learning_rate": 1.7284163465459568e-06, + "loss": 0.4481, + "step": 16246 + }, + { + "epoch": 0.7353247341027381, + "grad_norm": 0.24852137458762985, + "learning_rate": 1.7278621317140138e-06, + "loss": 0.4439, + "step": 16247 + }, + { + "epoch": 0.7353699932111337, + "grad_norm": 0.6095744267748847, + "learning_rate": 1.727307987190262e-06, + "loss": 0.2865, + "step": 16248 + }, + { + "epoch": 0.7354152523195293, + "grad_norm": 0.6017360995143893, + "learning_rate": 1.7267539129866107e-06, + "loss": 0.2863, + "step": 16249 + }, + { + "epoch": 0.7354605114279249, + "grad_norm": 0.6757623548504654, + "learning_rate": 1.7261999091149662e-06, + "loss": 0.3323, + "step": 16250 + }, + { + "epoch": 0.7355057705363204, + "grad_norm": 0.5997681786802973, + "learning_rate": 1.7256459755872306e-06, + "loss": 0.2743, + "step": 16251 + }, + { + "epoch": 0.735551029644716, + "grad_norm": 0.6532107189741281, + "learning_rate": 1.7250921124153057e-06, + "loss": 0.3206, + "step": 16252 + }, + { + "epoch": 0.7355962887531116, + "grad_norm": 0.3912425490997418, + "learning_rate": 1.7245383196110944e-06, + "loss": 0.4662, + "step": 16253 + }, + { + "epoch": 0.7356415478615072, + "grad_norm": 0.6560478513281396, + "learning_rate": 1.7239845971864932e-06, + "loss": 0.2901, + "step": 16254 + }, + { + "epoch": 0.7356868069699027, + "grad_norm": 0.6498356637789432, + "learning_rate": 1.7234309451534032e-06, + "loss": 0.2927, + "step": 16255 + }, + { + "epoch": 0.7357320660782982, + "grad_norm": 0.27313198293460317, + "learning_rate": 1.7228773635237183e-06, + "loss": 0.4435, + "step": 16256 + }, + { + "epoch": 0.7357773251866938, + "grad_norm": 0.6455232477977734, + "learning_rate": 1.7223238523093334e-06, + "loss": 0.3067, + "step": 16257 + }, + { + "epoch": 0.7358225842950894, + "grad_norm": 0.2594817494985136, + "learning_rate": 1.7217704115221417e-06, + "loss": 0.4774, + "step": 16258 + }, + { + "epoch": 0.735867843403485, + "grad_norm": 0.6079411337663703, + "learning_rate": 1.7212170411740386e-06, + "loss": 0.2811, + "step": 16259 + }, + { + "epoch": 0.7359131025118805, + "grad_norm": 0.6617218118446594, + "learning_rate": 1.7206637412769084e-06, + "loss": 0.2982, + "step": 16260 + }, + { + "epoch": 0.7359583616202761, + "grad_norm": 0.6300539343632511, + "learning_rate": 1.7201105118426425e-06, + "loss": 0.3082, + "step": 16261 + }, + { + "epoch": 0.7360036207286716, + "grad_norm": 0.7164263661192096, + "learning_rate": 1.71955735288313e-06, + "loss": 0.2977, + "step": 16262 + }, + { + "epoch": 0.7360488798370672, + "grad_norm": 0.6183776534457194, + "learning_rate": 1.719004264410255e-06, + "loss": 0.2782, + "step": 16263 + }, + { + "epoch": 0.7360941389454628, + "grad_norm": 0.6105854472134217, + "learning_rate": 1.7184512464358998e-06, + "loss": 0.2975, + "step": 16264 + }, + { + "epoch": 0.7361393980538583, + "grad_norm": 0.27064198134083034, + "learning_rate": 1.717898298971949e-06, + "loss": 0.4445, + "step": 16265 + }, + { + "epoch": 0.7361846571622539, + "grad_norm": 0.6112261479619383, + "learning_rate": 1.717345422030285e-06, + "loss": 0.3312, + "step": 16266 + }, + { + "epoch": 0.7362299162706495, + "grad_norm": 0.5993561678079502, + "learning_rate": 1.7167926156227854e-06, + "loss": 0.2999, + "step": 16267 + }, + { + "epoch": 0.7362751753790451, + "grad_norm": 0.2625199804522994, + "learning_rate": 1.7162398797613284e-06, + "loss": 0.4422, + "step": 16268 + }, + { + "epoch": 0.7363204344874406, + "grad_norm": 0.6146855148656558, + "learning_rate": 1.7156872144577918e-06, + "loss": 0.3353, + "step": 16269 + }, + { + "epoch": 0.7363656935958361, + "grad_norm": 0.5542848996872033, + "learning_rate": 1.7151346197240486e-06, + "loss": 0.2922, + "step": 16270 + }, + { + "epoch": 0.7364109527042317, + "grad_norm": 0.2699452576598286, + "learning_rate": 1.7145820955719755e-06, + "loss": 0.4661, + "step": 16271 + }, + { + "epoch": 0.7364562118126273, + "grad_norm": 0.6130827450264918, + "learning_rate": 1.7140296420134428e-06, + "loss": 0.2892, + "step": 16272 + }, + { + "epoch": 0.7365014709210228, + "grad_norm": 0.2513562381783873, + "learning_rate": 1.7134772590603193e-06, + "loss": 0.459, + "step": 16273 + }, + { + "epoch": 0.7365467300294184, + "grad_norm": 0.5855918035070893, + "learning_rate": 1.7129249467244758e-06, + "loss": 0.281, + "step": 16274 + }, + { + "epoch": 0.736591989137814, + "grad_norm": 0.6899883521801347, + "learning_rate": 1.7123727050177808e-06, + "loss": 0.3348, + "step": 16275 + }, + { + "epoch": 0.7366372482462096, + "grad_norm": 0.6239946463347905, + "learning_rate": 1.7118205339520999e-06, + "loss": 0.3188, + "step": 16276 + }, + { + "epoch": 0.7366825073546052, + "grad_norm": 0.27240169811472975, + "learning_rate": 1.7112684335392948e-06, + "loss": 0.4644, + "step": 16277 + }, + { + "epoch": 0.7367277664630006, + "grad_norm": 0.6059417313635661, + "learning_rate": 1.7107164037912305e-06, + "loss": 0.3083, + "step": 16278 + }, + { + "epoch": 0.7367730255713962, + "grad_norm": 0.6361738820039944, + "learning_rate": 1.7101644447197702e-06, + "loss": 0.3065, + "step": 16279 + }, + { + "epoch": 0.7368182846797918, + "grad_norm": 0.6360595017871099, + "learning_rate": 1.7096125563367722e-06, + "loss": 0.2608, + "step": 16280 + }, + { + "epoch": 0.7368635437881874, + "grad_norm": 0.6382952432671731, + "learning_rate": 1.709060738654093e-06, + "loss": 0.3589, + "step": 16281 + }, + { + "epoch": 0.7369088028965829, + "grad_norm": 0.6057322972207433, + "learning_rate": 1.7085089916835924e-06, + "loss": 0.2809, + "step": 16282 + }, + { + "epoch": 0.7369540620049785, + "grad_norm": 0.2639576838682394, + "learning_rate": 1.7079573154371233e-06, + "loss": 0.4449, + "step": 16283 + }, + { + "epoch": 0.7369993211133741, + "grad_norm": 0.259451887808222, + "learning_rate": 1.7074057099265422e-06, + "loss": 0.4703, + "step": 16284 + }, + { + "epoch": 0.7370445802217697, + "grad_norm": 0.6382920149509974, + "learning_rate": 1.7068541751637001e-06, + "loss": 0.3055, + "step": 16285 + }, + { + "epoch": 0.7370898393301651, + "grad_norm": 0.6298061371883908, + "learning_rate": 1.7063027111604457e-06, + "loss": 0.3105, + "step": 16286 + }, + { + "epoch": 0.7371350984385607, + "grad_norm": 0.5901058915221602, + "learning_rate": 1.7057513179286305e-06, + "loss": 0.2964, + "step": 16287 + }, + { + "epoch": 0.7371803575469563, + "grad_norm": 0.6692352016470466, + "learning_rate": 1.7051999954801058e-06, + "loss": 0.3239, + "step": 16288 + }, + { + "epoch": 0.7372256166553519, + "grad_norm": 0.5637542321209367, + "learning_rate": 1.7046487438267101e-06, + "loss": 0.2878, + "step": 16289 + }, + { + "epoch": 0.7372708757637475, + "grad_norm": 0.6137312238539702, + "learning_rate": 1.704097562980292e-06, + "loss": 0.3092, + "step": 16290 + }, + { + "epoch": 0.737316134872143, + "grad_norm": 0.6104309860060394, + "learning_rate": 1.7035464529526963e-06, + "loss": 0.2703, + "step": 16291 + }, + { + "epoch": 0.7373613939805386, + "grad_norm": 0.2693494755988427, + "learning_rate": 1.702995413755763e-06, + "loss": 0.4814, + "step": 16292 + }, + { + "epoch": 0.7374066530889342, + "grad_norm": 0.6943479505825775, + "learning_rate": 1.7024444454013305e-06, + "loss": 0.2872, + "step": 16293 + }, + { + "epoch": 0.7374519121973298, + "grad_norm": 0.5915976030744456, + "learning_rate": 1.7018935479012394e-06, + "loss": 0.2772, + "step": 16294 + }, + { + "epoch": 0.7374971713057252, + "grad_norm": 0.6959850127902959, + "learning_rate": 1.7013427212673285e-06, + "loss": 0.262, + "step": 16295 + }, + { + "epoch": 0.7375424304141208, + "grad_norm": 0.2816393378161216, + "learning_rate": 1.7007919655114314e-06, + "loss": 0.4919, + "step": 16296 + }, + { + "epoch": 0.7375876895225164, + "grad_norm": 0.28639396589527855, + "learning_rate": 1.7002412806453799e-06, + "loss": 0.4763, + "step": 16297 + }, + { + "epoch": 0.737632948630912, + "grad_norm": 0.610734186823621, + "learning_rate": 1.6996906666810116e-06, + "loss": 0.2929, + "step": 16298 + }, + { + "epoch": 0.7376782077393076, + "grad_norm": 0.650120453481213, + "learning_rate": 1.699140123630152e-06, + "loss": 0.2951, + "step": 16299 + }, + { + "epoch": 0.7377234668477031, + "grad_norm": 0.27258678959918653, + "learning_rate": 1.6985896515046357e-06, + "loss": 0.4629, + "step": 16300 + }, + { + "epoch": 0.7377687259560987, + "grad_norm": 0.6129395204170821, + "learning_rate": 1.698039250316288e-06, + "loss": 0.2871, + "step": 16301 + }, + { + "epoch": 0.7378139850644942, + "grad_norm": 0.5771135278717086, + "learning_rate": 1.697488920076934e-06, + "loss": 0.2952, + "step": 16302 + }, + { + "epoch": 0.7378592441728898, + "grad_norm": 0.6090117336444172, + "learning_rate": 1.6969386607984e-06, + "loss": 0.3341, + "step": 16303 + }, + { + "epoch": 0.7379045032812853, + "grad_norm": 0.6698203949048338, + "learning_rate": 1.6963884724925116e-06, + "loss": 0.3434, + "step": 16304 + }, + { + "epoch": 0.7379497623896809, + "grad_norm": 0.5727007389889912, + "learning_rate": 1.6958383551710888e-06, + "loss": 0.2917, + "step": 16305 + }, + { + "epoch": 0.7379950214980765, + "grad_norm": 0.6287283582218592, + "learning_rate": 1.6952883088459498e-06, + "loss": 0.2911, + "step": 16306 + }, + { + "epoch": 0.7380402806064721, + "grad_norm": 0.6010885159430064, + "learning_rate": 1.6947383335289152e-06, + "loss": 0.3169, + "step": 16307 + }, + { + "epoch": 0.7380855397148676, + "grad_norm": 0.5694010188110677, + "learning_rate": 1.6941884292318044e-06, + "loss": 0.339, + "step": 16308 + }, + { + "epoch": 0.7381307988232632, + "grad_norm": 0.6348745967305999, + "learning_rate": 1.6936385959664315e-06, + "loss": 0.2999, + "step": 16309 + }, + { + "epoch": 0.7381760579316587, + "grad_norm": 0.609076028085305, + "learning_rate": 1.6930888337446082e-06, + "loss": 0.3054, + "step": 16310 + }, + { + "epoch": 0.7382213170400543, + "grad_norm": 0.6158342661208451, + "learning_rate": 1.6925391425781519e-06, + "loss": 0.2698, + "step": 16311 + }, + { + "epoch": 0.7382665761484499, + "grad_norm": 0.6013018213101735, + "learning_rate": 1.691989522478869e-06, + "loss": 0.2937, + "step": 16312 + }, + { + "epoch": 0.7383118352568454, + "grad_norm": 0.5464993799583493, + "learning_rate": 1.6914399734585735e-06, + "loss": 0.298, + "step": 16313 + }, + { + "epoch": 0.738357094365241, + "grad_norm": 0.5666297376449007, + "learning_rate": 1.690890495529071e-06, + "loss": 0.3046, + "step": 16314 + }, + { + "epoch": 0.7384023534736366, + "grad_norm": 0.26592206593075024, + "learning_rate": 1.6903410887021676e-06, + "loss": 0.4429, + "step": 16315 + }, + { + "epoch": 0.7384476125820322, + "grad_norm": 0.6022086141416868, + "learning_rate": 1.6897917529896691e-06, + "loss": 0.2991, + "step": 16316 + }, + { + "epoch": 0.7384928716904277, + "grad_norm": 0.5292050781665859, + "learning_rate": 1.6892424884033825e-06, + "loss": 0.2886, + "step": 16317 + }, + { + "epoch": 0.7385381307988232, + "grad_norm": 0.31019800261565705, + "learning_rate": 1.6886932949551032e-06, + "loss": 0.4588, + "step": 16318 + }, + { + "epoch": 0.7385833899072188, + "grad_norm": 0.5384154654615427, + "learning_rate": 1.6881441726566355e-06, + "loss": 0.2842, + "step": 16319 + }, + { + "epoch": 0.7386286490156144, + "grad_norm": 0.25599225115318863, + "learning_rate": 1.6875951215197779e-06, + "loss": 0.4539, + "step": 16320 + }, + { + "epoch": 0.7386739081240099, + "grad_norm": 0.6232868132447051, + "learning_rate": 1.6870461415563311e-06, + "loss": 0.2836, + "step": 16321 + }, + { + "epoch": 0.7387191672324055, + "grad_norm": 0.5879990085252694, + "learning_rate": 1.6864972327780842e-06, + "loss": 0.273, + "step": 16322 + }, + { + "epoch": 0.7387644263408011, + "grad_norm": 0.6348301993005881, + "learning_rate": 1.6859483951968353e-06, + "loss": 0.2917, + "step": 16323 + }, + { + "epoch": 0.7388096854491967, + "grad_norm": 0.6169572468167954, + "learning_rate": 1.6853996288243785e-06, + "loss": 0.2771, + "step": 16324 + }, + { + "epoch": 0.7388549445575923, + "grad_norm": 0.5949852175498666, + "learning_rate": 1.6848509336725039e-06, + "loss": 0.2983, + "step": 16325 + }, + { + "epoch": 0.7389002036659877, + "grad_norm": 0.5970094415996532, + "learning_rate": 1.6843023097529993e-06, + "loss": 0.2544, + "step": 16326 + }, + { + "epoch": 0.7389454627743833, + "grad_norm": 0.2664461704857231, + "learning_rate": 1.6837537570776563e-06, + "loss": 0.4461, + "step": 16327 + }, + { + "epoch": 0.7389907218827789, + "grad_norm": 0.6167237480226018, + "learning_rate": 1.6832052756582583e-06, + "loss": 0.3353, + "step": 16328 + }, + { + "epoch": 0.7390359809911745, + "grad_norm": 0.6651092051598921, + "learning_rate": 1.682656865506594e-06, + "loss": 0.3018, + "step": 16329 + }, + { + "epoch": 0.73908124009957, + "grad_norm": 0.5786650897321024, + "learning_rate": 1.682108526634445e-06, + "loss": 0.2956, + "step": 16330 + }, + { + "epoch": 0.7391264992079656, + "grad_norm": 0.7224670401316967, + "learning_rate": 1.6815602590535923e-06, + "loss": 0.3015, + "step": 16331 + }, + { + "epoch": 0.7391717583163612, + "grad_norm": 0.590046763536563, + "learning_rate": 1.6810120627758176e-06, + "loss": 0.3501, + "step": 16332 + }, + { + "epoch": 0.7392170174247568, + "grad_norm": 0.6788892856512094, + "learning_rate": 1.6804639378129017e-06, + "loss": 0.2926, + "step": 16333 + }, + { + "epoch": 0.7392622765331524, + "grad_norm": 0.5922790130333325, + "learning_rate": 1.6799158841766206e-06, + "loss": 0.2937, + "step": 16334 + }, + { + "epoch": 0.7393075356415478, + "grad_norm": 0.5851526631177592, + "learning_rate": 1.679367901878749e-06, + "loss": 0.2815, + "step": 16335 + }, + { + "epoch": 0.7393527947499434, + "grad_norm": 0.6265131905703484, + "learning_rate": 1.6788199909310626e-06, + "loss": 0.3148, + "step": 16336 + }, + { + "epoch": 0.739398053858339, + "grad_norm": 0.5723849009706529, + "learning_rate": 1.6782721513453353e-06, + "loss": 0.309, + "step": 16337 + }, + { + "epoch": 0.7394433129667346, + "grad_norm": 0.6314188202897429, + "learning_rate": 1.6777243831333383e-06, + "loss": 0.3172, + "step": 16338 + }, + { + "epoch": 0.7394885720751301, + "grad_norm": 0.5981352108047535, + "learning_rate": 1.6771766863068389e-06, + "loss": 0.2789, + "step": 16339 + }, + { + "epoch": 0.7395338311835257, + "grad_norm": 1.6972236802070124, + "learning_rate": 1.6766290608776093e-06, + "loss": 0.4823, + "step": 16340 + }, + { + "epoch": 0.7395790902919213, + "grad_norm": 0.6993751371813014, + "learning_rate": 1.6760815068574116e-06, + "loss": 0.3213, + "step": 16341 + }, + { + "epoch": 0.7396243494003168, + "grad_norm": 0.31186821878598653, + "learning_rate": 1.6755340242580158e-06, + "loss": 0.4798, + "step": 16342 + }, + { + "epoch": 0.7396696085087123, + "grad_norm": 0.647280432904068, + "learning_rate": 1.674986613091184e-06, + "loss": 0.3498, + "step": 16343 + }, + { + "epoch": 0.7397148676171079, + "grad_norm": 0.9307783766001247, + "learning_rate": 1.6744392733686754e-06, + "loss": 0.2766, + "step": 16344 + }, + { + "epoch": 0.7397601267255035, + "grad_norm": 0.3135811131714723, + "learning_rate": 1.673892005102254e-06, + "loss": 0.4809, + "step": 16345 + }, + { + "epoch": 0.7398053858338991, + "grad_norm": 0.7435748301186856, + "learning_rate": 1.6733448083036806e-06, + "loss": 0.2704, + "step": 16346 + }, + { + "epoch": 0.7398506449422947, + "grad_norm": 0.28471121810758254, + "learning_rate": 1.6727976829847075e-06, + "loss": 0.4669, + "step": 16347 + }, + { + "epoch": 0.7398959040506902, + "grad_norm": 0.590816121161494, + "learning_rate": 1.6722506291570929e-06, + "loss": 0.2602, + "step": 16348 + }, + { + "epoch": 0.7399411631590858, + "grad_norm": 0.5922176862394455, + "learning_rate": 1.671703646832592e-06, + "loss": 0.269, + "step": 16349 + }, + { + "epoch": 0.7399864222674813, + "grad_norm": 0.633108164986941, + "learning_rate": 1.6711567360229613e-06, + "loss": 0.2886, + "step": 16350 + }, + { + "epoch": 0.7400316813758769, + "grad_norm": 0.25987291362248266, + "learning_rate": 1.6706098967399454e-06, + "loss": 0.4559, + "step": 16351 + }, + { + "epoch": 0.7400769404842724, + "grad_norm": 0.672889853871863, + "learning_rate": 1.6700631289952967e-06, + "loss": 0.2582, + "step": 16352 + }, + { + "epoch": 0.740122199592668, + "grad_norm": 0.6149642362793232, + "learning_rate": 1.6695164328007663e-06, + "loss": 0.307, + "step": 16353 + }, + { + "epoch": 0.7401674587010636, + "grad_norm": 0.637888793193678, + "learning_rate": 1.6689698081680988e-06, + "loss": 0.298, + "step": 16354 + }, + { + "epoch": 0.7402127178094592, + "grad_norm": 0.629594169843332, + "learning_rate": 1.6684232551090385e-06, + "loss": 0.3063, + "step": 16355 + }, + { + "epoch": 0.7402579769178547, + "grad_norm": 0.5993736230807224, + "learning_rate": 1.6678767736353313e-06, + "loss": 0.2921, + "step": 16356 + }, + { + "epoch": 0.7403032360262503, + "grad_norm": 0.6533709936406462, + "learning_rate": 1.6673303637587169e-06, + "loss": 0.2944, + "step": 16357 + }, + { + "epoch": 0.7403484951346458, + "grad_norm": 0.6279231132277909, + "learning_rate": 1.6667840254909395e-06, + "loss": 0.2746, + "step": 16358 + }, + { + "epoch": 0.7403937542430414, + "grad_norm": 0.6895537865960341, + "learning_rate": 1.6662377588437356e-06, + "loss": 0.3198, + "step": 16359 + }, + { + "epoch": 0.740439013351437, + "grad_norm": 0.5613014329500297, + "learning_rate": 1.6656915638288423e-06, + "loss": 0.2509, + "step": 16360 + }, + { + "epoch": 0.7404842724598325, + "grad_norm": 0.5244703265777766, + "learning_rate": 1.6651454404579965e-06, + "loss": 0.2772, + "step": 16361 + }, + { + "epoch": 0.7405295315682281, + "grad_norm": 0.6434737155875516, + "learning_rate": 1.6645993887429345e-06, + "loss": 0.2939, + "step": 16362 + }, + { + "epoch": 0.7405747906766237, + "grad_norm": 0.27442691120208346, + "learning_rate": 1.664053408695388e-06, + "loss": 0.4551, + "step": 16363 + }, + { + "epoch": 0.7406200497850193, + "grad_norm": 0.6432461688130403, + "learning_rate": 1.6635075003270861e-06, + "loss": 0.3343, + "step": 16364 + }, + { + "epoch": 0.7406653088934148, + "grad_norm": 0.6585033459543134, + "learning_rate": 1.6629616636497615e-06, + "loss": 0.306, + "step": 16365 + }, + { + "epoch": 0.7407105680018103, + "grad_norm": 0.28741188531833256, + "learning_rate": 1.6624158986751427e-06, + "loss": 0.4308, + "step": 16366 + }, + { + "epoch": 0.7407558271102059, + "grad_norm": 0.6272621824628382, + "learning_rate": 1.661870205414956e-06, + "loss": 0.2924, + "step": 16367 + }, + { + "epoch": 0.7408010862186015, + "grad_norm": 0.6357340295514764, + "learning_rate": 1.6613245838809244e-06, + "loss": 0.3293, + "step": 16368 + }, + { + "epoch": 0.7408463453269971, + "grad_norm": 0.586649855741785, + "learning_rate": 1.6607790340847757e-06, + "loss": 0.2847, + "step": 16369 + }, + { + "epoch": 0.7408916044353926, + "grad_norm": 0.6519309610805597, + "learning_rate": 1.6602335560382276e-06, + "loss": 0.3355, + "step": 16370 + }, + { + "epoch": 0.7409368635437882, + "grad_norm": 0.27314375144628994, + "learning_rate": 1.6596881497530054e-06, + "loss": 0.4759, + "step": 16371 + }, + { + "epoch": 0.7409821226521838, + "grad_norm": 0.6334847302860123, + "learning_rate": 1.6591428152408256e-06, + "loss": 0.3212, + "step": 16372 + }, + { + "epoch": 0.7410273817605794, + "grad_norm": 0.6054026655609173, + "learning_rate": 1.6585975525134041e-06, + "loss": 0.2976, + "step": 16373 + }, + { + "epoch": 0.7410726408689748, + "grad_norm": 0.6606486067332609, + "learning_rate": 1.658052361582459e-06, + "loss": 0.3441, + "step": 16374 + }, + { + "epoch": 0.7411178999773704, + "grad_norm": 0.7073574769713497, + "learning_rate": 1.6575072424597083e-06, + "loss": 0.3342, + "step": 16375 + }, + { + "epoch": 0.741163159085766, + "grad_norm": 0.6335308387328569, + "learning_rate": 1.6569621951568575e-06, + "loss": 0.2961, + "step": 16376 + }, + { + "epoch": 0.7412084181941616, + "grad_norm": 0.5610231362990775, + "learning_rate": 1.6564172196856222e-06, + "loss": 0.2878, + "step": 16377 + }, + { + "epoch": 0.7412536773025571, + "grad_norm": 0.5774696662726435, + "learning_rate": 1.6558723160577118e-06, + "loss": 0.2611, + "step": 16378 + }, + { + "epoch": 0.7412989364109527, + "grad_norm": 0.6121664560415999, + "learning_rate": 1.655327484284837e-06, + "loss": 0.3021, + "step": 16379 + }, + { + "epoch": 0.7413441955193483, + "grad_norm": 0.6178927095424993, + "learning_rate": 1.6547827243787002e-06, + "loss": 0.2902, + "step": 16380 + }, + { + "epoch": 0.7413894546277439, + "grad_norm": 0.28287751021161356, + "learning_rate": 1.654238036351008e-06, + "loss": 0.4395, + "step": 16381 + }, + { + "epoch": 0.7414347137361395, + "grad_norm": 0.598779619002262, + "learning_rate": 1.6536934202134663e-06, + "loss": 0.3038, + "step": 16382 + }, + { + "epoch": 0.7414799728445349, + "grad_norm": 0.5969691615599192, + "learning_rate": 1.6531488759777753e-06, + "loss": 0.2914, + "step": 16383 + }, + { + "epoch": 0.7415252319529305, + "grad_norm": 0.667321425427489, + "learning_rate": 1.6526044036556349e-06, + "loss": 0.2809, + "step": 16384 + }, + { + "epoch": 0.7415704910613261, + "grad_norm": 0.2795974705144027, + "learning_rate": 1.6520600032587464e-06, + "loss": 0.4745, + "step": 16385 + }, + { + "epoch": 0.7416157501697217, + "grad_norm": 0.7550919144344835, + "learning_rate": 1.6515156747988043e-06, + "loss": 0.2814, + "step": 16386 + }, + { + "epoch": 0.7416610092781172, + "grad_norm": 0.6126708197070092, + "learning_rate": 1.650971418287508e-06, + "loss": 0.2722, + "step": 16387 + }, + { + "epoch": 0.7417062683865128, + "grad_norm": 0.6412752477864133, + "learning_rate": 1.6504272337365501e-06, + "loss": 0.3413, + "step": 16388 + }, + { + "epoch": 0.7417515274949084, + "grad_norm": 0.2807606489818299, + "learning_rate": 1.6498831211576222e-06, + "loss": 0.5012, + "step": 16389 + }, + { + "epoch": 0.741796786603304, + "grad_norm": 0.27978365250473713, + "learning_rate": 1.6493390805624165e-06, + "loss": 0.4903, + "step": 16390 + }, + { + "epoch": 0.7418420457116994, + "grad_norm": 0.5874638050564923, + "learning_rate": 1.648795111962625e-06, + "loss": 0.3449, + "step": 16391 + }, + { + "epoch": 0.741887304820095, + "grad_norm": 0.24334727858073832, + "learning_rate": 1.6482512153699344e-06, + "loss": 0.4518, + "step": 16392 + }, + { + "epoch": 0.7419325639284906, + "grad_norm": 0.6706376008021124, + "learning_rate": 1.647707390796029e-06, + "loss": 0.3001, + "step": 16393 + }, + { + "epoch": 0.7419778230368862, + "grad_norm": 0.581696236394703, + "learning_rate": 1.6471636382525963e-06, + "loss": 0.3018, + "step": 16394 + }, + { + "epoch": 0.7420230821452818, + "grad_norm": 0.6585204969144176, + "learning_rate": 1.6466199577513209e-06, + "loss": 0.2851, + "step": 16395 + }, + { + "epoch": 0.7420683412536773, + "grad_norm": 0.2582143407608148, + "learning_rate": 1.646076349303884e-06, + "loss": 0.4776, + "step": 16396 + }, + { + "epoch": 0.7421136003620729, + "grad_norm": 0.5990806798374867, + "learning_rate": 1.6455328129219634e-06, + "loss": 0.2621, + "step": 16397 + }, + { + "epoch": 0.7421588594704684, + "grad_norm": 0.5858539733747705, + "learning_rate": 1.6449893486172418e-06, + "loss": 0.3084, + "step": 16398 + }, + { + "epoch": 0.742204118578864, + "grad_norm": 0.6423462532100619, + "learning_rate": 1.6444459564013938e-06, + "loss": 0.3453, + "step": 16399 + }, + { + "epoch": 0.7422493776872595, + "grad_norm": 0.6999659736527946, + "learning_rate": 1.6439026362860977e-06, + "loss": 0.3212, + "step": 16400 + }, + { + "epoch": 0.7422946367956551, + "grad_norm": 0.6350926523497435, + "learning_rate": 1.6433593882830262e-06, + "loss": 0.2644, + "step": 16401 + }, + { + "epoch": 0.7423398959040507, + "grad_norm": 0.6700380593625176, + "learning_rate": 1.642816212403851e-06, + "loss": 0.2781, + "step": 16402 + }, + { + "epoch": 0.7423851550124463, + "grad_norm": 0.5725905632375857, + "learning_rate": 1.642273108660245e-06, + "loss": 0.3064, + "step": 16403 + }, + { + "epoch": 0.7424304141208418, + "grad_norm": 0.30964321741467477, + "learning_rate": 1.6417300770638784e-06, + "loss": 0.4693, + "step": 16404 + }, + { + "epoch": 0.7424756732292374, + "grad_norm": 0.6382625480112772, + "learning_rate": 1.6411871176264188e-06, + "loss": 0.3223, + "step": 16405 + }, + { + "epoch": 0.7425209323376329, + "grad_norm": 0.7372807946732123, + "learning_rate": 1.6406442303595305e-06, + "loss": 0.2886, + "step": 16406 + }, + { + "epoch": 0.7425661914460285, + "grad_norm": 0.6382685159478572, + "learning_rate": 1.6401014152748801e-06, + "loss": 0.249, + "step": 16407 + }, + { + "epoch": 0.7426114505544241, + "grad_norm": 0.5808086404162707, + "learning_rate": 1.6395586723841328e-06, + "loss": 0.3053, + "step": 16408 + }, + { + "epoch": 0.7426567096628196, + "grad_norm": 0.30327634396753367, + "learning_rate": 1.6390160016989487e-06, + "loss": 0.5011, + "step": 16409 + }, + { + "epoch": 0.7427019687712152, + "grad_norm": 0.5749949314930741, + "learning_rate": 1.6384734032309868e-06, + "loss": 0.2838, + "step": 16410 + }, + { + "epoch": 0.7427472278796108, + "grad_norm": 0.8120256752960093, + "learning_rate": 1.6379308769919084e-06, + "loss": 0.2751, + "step": 16411 + }, + { + "epoch": 0.7427924869880064, + "grad_norm": 0.2966374383743913, + "learning_rate": 1.63738842299337e-06, + "loss": 0.4333, + "step": 16412 + }, + { + "epoch": 0.7428377460964019, + "grad_norm": 0.817417467052321, + "learning_rate": 1.6368460412470255e-06, + "loss": 0.2531, + "step": 16413 + }, + { + "epoch": 0.7428830052047974, + "grad_norm": 0.6579780976381642, + "learning_rate": 1.636303731764532e-06, + "loss": 0.2897, + "step": 16414 + }, + { + "epoch": 0.742928264313193, + "grad_norm": 0.532435367077427, + "learning_rate": 1.635761494557539e-06, + "loss": 0.2566, + "step": 16415 + }, + { + "epoch": 0.7429735234215886, + "grad_norm": 0.573111185664909, + "learning_rate": 1.6352193296377006e-06, + "loss": 0.3194, + "step": 16416 + }, + { + "epoch": 0.7430187825299842, + "grad_norm": 0.6146707134736058, + "learning_rate": 1.6346772370166646e-06, + "loss": 0.2936, + "step": 16417 + }, + { + "epoch": 0.7430640416383797, + "grad_norm": 0.6074305357766798, + "learning_rate": 1.634135216706077e-06, + "loss": 0.266, + "step": 16418 + }, + { + "epoch": 0.7431093007467753, + "grad_norm": 0.5672575122908394, + "learning_rate": 1.6335932687175865e-06, + "loss": 0.3097, + "step": 16419 + }, + { + "epoch": 0.7431545598551709, + "grad_norm": 0.27175533404918323, + "learning_rate": 1.6330513930628389e-06, + "loss": 0.4583, + "step": 16420 + }, + { + "epoch": 0.7431998189635665, + "grad_norm": 0.27517315541514614, + "learning_rate": 1.6325095897534765e-06, + "loss": 0.4705, + "step": 16421 + }, + { + "epoch": 0.7432450780719619, + "grad_norm": 0.6187333387559553, + "learning_rate": 1.6319678588011385e-06, + "loss": 0.3075, + "step": 16422 + }, + { + "epoch": 0.7432903371803575, + "grad_norm": 0.5779202963904356, + "learning_rate": 1.6314262002174674e-06, + "loss": 0.2521, + "step": 16423 + }, + { + "epoch": 0.7433355962887531, + "grad_norm": 0.7523310961274872, + "learning_rate": 1.6308846140141027e-06, + "loss": 0.2957, + "step": 16424 + }, + { + "epoch": 0.7433808553971487, + "grad_norm": 0.5847693172280757, + "learning_rate": 1.630343100202681e-06, + "loss": 0.2476, + "step": 16425 + }, + { + "epoch": 0.7434261145055442, + "grad_norm": 0.6238932100164696, + "learning_rate": 1.6298016587948345e-06, + "loss": 0.3456, + "step": 16426 + }, + { + "epoch": 0.7434713736139398, + "grad_norm": 0.5644616877249133, + "learning_rate": 1.6292602898022015e-06, + "loss": 0.2797, + "step": 16427 + }, + { + "epoch": 0.7435166327223354, + "grad_norm": 0.6019491578971776, + "learning_rate": 1.6287189932364106e-06, + "loss": 0.287, + "step": 16428 + }, + { + "epoch": 0.743561891830731, + "grad_norm": 0.5979609372657156, + "learning_rate": 1.6281777691090966e-06, + "loss": 0.3096, + "step": 16429 + }, + { + "epoch": 0.7436071509391265, + "grad_norm": 0.5681120904040521, + "learning_rate": 1.6276366174318865e-06, + "loss": 0.2774, + "step": 16430 + }, + { + "epoch": 0.743652410047522, + "grad_norm": 0.6291721807245989, + "learning_rate": 1.627095538216406e-06, + "loss": 0.2919, + "step": 16431 + }, + { + "epoch": 0.7436976691559176, + "grad_norm": 0.6375260230038594, + "learning_rate": 1.6265545314742838e-06, + "loss": 0.2693, + "step": 16432 + }, + { + "epoch": 0.7437429282643132, + "grad_norm": 0.6516953368882744, + "learning_rate": 1.6260135972171448e-06, + "loss": 0.3493, + "step": 16433 + }, + { + "epoch": 0.7437881873727088, + "grad_norm": 0.6326681888223729, + "learning_rate": 1.625472735456612e-06, + "loss": 0.2763, + "step": 16434 + }, + { + "epoch": 0.7438334464811043, + "grad_norm": 0.6176613716364594, + "learning_rate": 1.6249319462043039e-06, + "loss": 0.2825, + "step": 16435 + }, + { + "epoch": 0.7438787055894999, + "grad_norm": 0.6120209281474143, + "learning_rate": 1.6243912294718428e-06, + "loss": 0.2937, + "step": 16436 + }, + { + "epoch": 0.7439239646978955, + "grad_norm": 0.5795229600915072, + "learning_rate": 1.6238505852708481e-06, + "loss": 0.28, + "step": 16437 + }, + { + "epoch": 0.743969223806291, + "grad_norm": 0.617044457533017, + "learning_rate": 1.623310013612936e-06, + "loss": 0.3315, + "step": 16438 + }, + { + "epoch": 0.7440144829146865, + "grad_norm": 0.6151415354863083, + "learning_rate": 1.622769514509719e-06, + "loss": 0.2776, + "step": 16439 + }, + { + "epoch": 0.7440597420230821, + "grad_norm": 0.6251360148196247, + "learning_rate": 1.6222290879728142e-06, + "loss": 0.2804, + "step": 16440 + }, + { + "epoch": 0.7441050011314777, + "grad_norm": 0.6917015257469193, + "learning_rate": 1.6216887340138304e-06, + "loss": 0.2605, + "step": 16441 + }, + { + "epoch": 0.7441502602398733, + "grad_norm": 0.616749212638954, + "learning_rate": 1.621148452644382e-06, + "loss": 0.3526, + "step": 16442 + }, + { + "epoch": 0.7441955193482689, + "grad_norm": 0.2708142105961081, + "learning_rate": 1.6206082438760762e-06, + "loss": 0.4795, + "step": 16443 + }, + { + "epoch": 0.7442407784566644, + "grad_norm": 0.5801112566416491, + "learning_rate": 1.6200681077205182e-06, + "loss": 0.2995, + "step": 16444 + }, + { + "epoch": 0.74428603756506, + "grad_norm": 0.5712979072266995, + "learning_rate": 1.619528044189318e-06, + "loss": 0.2847, + "step": 16445 + }, + { + "epoch": 0.7443312966734555, + "grad_norm": 0.29290842989103355, + "learning_rate": 1.6189880532940772e-06, + "loss": 0.4951, + "step": 16446 + }, + { + "epoch": 0.7443765557818511, + "grad_norm": 0.2558401143062855, + "learning_rate": 1.6184481350463976e-06, + "loss": 0.455, + "step": 16447 + }, + { + "epoch": 0.7444218148902466, + "grad_norm": 0.7829357141843611, + "learning_rate": 1.6179082894578824e-06, + "loss": 0.3241, + "step": 16448 + }, + { + "epoch": 0.7444670739986422, + "grad_norm": 0.6261295834024594, + "learning_rate": 1.617368516540132e-06, + "loss": 0.2901, + "step": 16449 + }, + { + "epoch": 0.7445123331070378, + "grad_norm": 0.26736031489859946, + "learning_rate": 1.6168288163047434e-06, + "loss": 0.4512, + "step": 16450 + }, + { + "epoch": 0.7445575922154334, + "grad_norm": 0.6085865074985845, + "learning_rate": 1.6162891887633114e-06, + "loss": 0.2953, + "step": 16451 + }, + { + "epoch": 0.744602851323829, + "grad_norm": 0.5997255472378802, + "learning_rate": 1.615749633927432e-06, + "loss": 0.2951, + "step": 16452 + }, + { + "epoch": 0.7446481104322245, + "grad_norm": 0.6290365174173874, + "learning_rate": 1.615210151808701e-06, + "loss": 0.2781, + "step": 16453 + }, + { + "epoch": 0.74469336954062, + "grad_norm": 0.6093195173585996, + "learning_rate": 1.6146707424187086e-06, + "loss": 0.2618, + "step": 16454 + }, + { + "epoch": 0.7447386286490156, + "grad_norm": 0.6563387860435451, + "learning_rate": 1.6141314057690426e-06, + "loss": 0.3281, + "step": 16455 + }, + { + "epoch": 0.7447838877574112, + "grad_norm": 0.5755936718531313, + "learning_rate": 1.6135921418712959e-06, + "loss": 0.2966, + "step": 16456 + }, + { + "epoch": 0.7448291468658067, + "grad_norm": 0.26943398549730296, + "learning_rate": 1.6130529507370513e-06, + "loss": 0.4748, + "step": 16457 + }, + { + "epoch": 0.7448744059742023, + "grad_norm": 0.6445979510026212, + "learning_rate": 1.6125138323778983e-06, + "loss": 0.3227, + "step": 16458 + }, + { + "epoch": 0.7449196650825979, + "grad_norm": 0.6014424196667216, + "learning_rate": 1.6119747868054193e-06, + "loss": 0.3094, + "step": 16459 + }, + { + "epoch": 0.7449649241909935, + "grad_norm": 0.5965822463164119, + "learning_rate": 1.6114358140311948e-06, + "loss": 0.2948, + "step": 16460 + }, + { + "epoch": 0.745010183299389, + "grad_norm": 0.27711896806631753, + "learning_rate": 1.610896914066808e-06, + "loss": 0.4665, + "step": 16461 + }, + { + "epoch": 0.7450554424077845, + "grad_norm": 0.5903313173991954, + "learning_rate": 1.6103580869238388e-06, + "loss": 0.2882, + "step": 16462 + }, + { + "epoch": 0.7451007015161801, + "grad_norm": 0.5993926220789347, + "learning_rate": 1.609819332613864e-06, + "loss": 0.2436, + "step": 16463 + }, + { + "epoch": 0.7451459606245757, + "grad_norm": 0.6501241995643003, + "learning_rate": 1.6092806511484576e-06, + "loss": 0.2978, + "step": 16464 + }, + { + "epoch": 0.7451912197329713, + "grad_norm": 0.6725764205319074, + "learning_rate": 1.6087420425391964e-06, + "loss": 0.3198, + "step": 16465 + }, + { + "epoch": 0.7452364788413668, + "grad_norm": 0.6499926682600154, + "learning_rate": 1.6082035067976553e-06, + "loss": 0.2942, + "step": 16466 + }, + { + "epoch": 0.7452817379497624, + "grad_norm": 0.7833476590078484, + "learning_rate": 1.6076650439354035e-06, + "loss": 0.3156, + "step": 16467 + }, + { + "epoch": 0.745326997058158, + "grad_norm": 0.6036145375496808, + "learning_rate": 1.6071266539640095e-06, + "loss": 0.3058, + "step": 16468 + }, + { + "epoch": 0.7453722561665536, + "grad_norm": 0.6395409785445747, + "learning_rate": 1.6065883368950447e-06, + "loss": 0.3243, + "step": 16469 + }, + { + "epoch": 0.745417515274949, + "grad_norm": 0.607747457083173, + "learning_rate": 1.606050092740073e-06, + "loss": 0.3002, + "step": 16470 + }, + { + "epoch": 0.7454627743833446, + "grad_norm": 0.6562502783928543, + "learning_rate": 1.6055119215106629e-06, + "loss": 0.2663, + "step": 16471 + }, + { + "epoch": 0.7455080334917402, + "grad_norm": 0.6645214066337757, + "learning_rate": 1.604973823218376e-06, + "loss": 0.3166, + "step": 16472 + }, + { + "epoch": 0.7455532926001358, + "grad_norm": 0.26599225456602565, + "learning_rate": 1.6044357978747733e-06, + "loss": 0.4609, + "step": 16473 + }, + { + "epoch": 0.7455985517085313, + "grad_norm": 0.6025592209553091, + "learning_rate": 1.603897845491416e-06, + "loss": 0.2608, + "step": 16474 + }, + { + "epoch": 0.7456438108169269, + "grad_norm": 0.596383754180155, + "learning_rate": 1.6033599660798676e-06, + "loss": 0.2616, + "step": 16475 + }, + { + "epoch": 0.7456890699253225, + "grad_norm": 0.6375212264875807, + "learning_rate": 1.6028221596516779e-06, + "loss": 0.3297, + "step": 16476 + }, + { + "epoch": 0.7457343290337181, + "grad_norm": 0.5940876054246925, + "learning_rate": 1.6022844262184061e-06, + "loss": 0.2497, + "step": 16477 + }, + { + "epoch": 0.7457795881421136, + "grad_norm": 0.6218890417536425, + "learning_rate": 1.6017467657916075e-06, + "loss": 0.276, + "step": 16478 + }, + { + "epoch": 0.7458248472505091, + "grad_norm": 0.6154177745187074, + "learning_rate": 1.6012091783828365e-06, + "loss": 0.3192, + "step": 16479 + }, + { + "epoch": 0.7458701063589047, + "grad_norm": 0.5586980717106319, + "learning_rate": 1.600671664003639e-06, + "loss": 0.3104, + "step": 16480 + }, + { + "epoch": 0.7459153654673003, + "grad_norm": 0.6361626035580967, + "learning_rate": 1.600134222665567e-06, + "loss": 0.2703, + "step": 16481 + }, + { + "epoch": 0.7459606245756959, + "grad_norm": 0.6284177067454095, + "learning_rate": 1.59959685438017e-06, + "loss": 0.3597, + "step": 16482 + }, + { + "epoch": 0.7460058836840914, + "grad_norm": 0.7516775332368926, + "learning_rate": 1.599059559158993e-06, + "loss": 0.3308, + "step": 16483 + }, + { + "epoch": 0.746051142792487, + "grad_norm": 0.5991664336958192, + "learning_rate": 1.5985223370135795e-06, + "loss": 0.2719, + "step": 16484 + }, + { + "epoch": 0.7460964019008826, + "grad_norm": 0.5970637698076308, + "learning_rate": 1.5979851879554758e-06, + "loss": 0.2858, + "step": 16485 + }, + { + "epoch": 0.7461416610092781, + "grad_norm": 0.5938806151950049, + "learning_rate": 1.5974481119962203e-06, + "loss": 0.3179, + "step": 16486 + }, + { + "epoch": 0.7461869201176737, + "grad_norm": 0.6206591591584846, + "learning_rate": 1.596911109147356e-06, + "loss": 0.3068, + "step": 16487 + }, + { + "epoch": 0.7462321792260692, + "grad_norm": 0.5958474881442908, + "learning_rate": 1.5963741794204207e-06, + "loss": 0.3092, + "step": 16488 + }, + { + "epoch": 0.7462774383344648, + "grad_norm": 0.6318170853705819, + "learning_rate": 1.595837322826949e-06, + "loss": 0.2846, + "step": 16489 + }, + { + "epoch": 0.7463226974428604, + "grad_norm": 0.5792372877921103, + "learning_rate": 1.5953005393784782e-06, + "loss": 0.2862, + "step": 16490 + }, + { + "epoch": 0.746367956551256, + "grad_norm": 0.6654866867136477, + "learning_rate": 1.5947638290865436e-06, + "loss": 0.2985, + "step": 16491 + }, + { + "epoch": 0.7464132156596515, + "grad_norm": 0.6909000728983873, + "learning_rate": 1.5942271919626762e-06, + "loss": 0.2709, + "step": 16492 + }, + { + "epoch": 0.746458474768047, + "grad_norm": 0.62095556360136, + "learning_rate": 1.5936906280184045e-06, + "loss": 0.287, + "step": 16493 + }, + { + "epoch": 0.7465037338764426, + "grad_norm": 0.6299401063232833, + "learning_rate": 1.5931541372652592e-06, + "loss": 0.3418, + "step": 16494 + }, + { + "epoch": 0.7465489929848382, + "grad_norm": 0.3067155177006923, + "learning_rate": 1.5926177197147702e-06, + "loss": 0.4905, + "step": 16495 + }, + { + "epoch": 0.7465942520932337, + "grad_norm": 0.6525451405318027, + "learning_rate": 1.5920813753784614e-06, + "loss": 0.322, + "step": 16496 + }, + { + "epoch": 0.7466395112016293, + "grad_norm": 0.6263061006818194, + "learning_rate": 1.5915451042678558e-06, + "loss": 0.2977, + "step": 16497 + }, + { + "epoch": 0.7466847703100249, + "grad_norm": 0.2618271933779362, + "learning_rate": 1.591008906394479e-06, + "loss": 0.4502, + "step": 16498 + }, + { + "epoch": 0.7467300294184205, + "grad_norm": 0.5847023317077064, + "learning_rate": 1.5904727817698495e-06, + "loss": 0.2815, + "step": 16499 + }, + { + "epoch": 0.7467752885268161, + "grad_norm": 0.7444545205585212, + "learning_rate": 1.5899367304054898e-06, + "loss": 0.3401, + "step": 16500 + }, + { + "epoch": 0.7468205476352116, + "grad_norm": 0.627059815960014, + "learning_rate": 1.5894007523129162e-06, + "loss": 0.2957, + "step": 16501 + }, + { + "epoch": 0.7468658067436071, + "grad_norm": 0.650345947630085, + "learning_rate": 1.5888648475036445e-06, + "loss": 0.3021, + "step": 16502 + }, + { + "epoch": 0.7469110658520027, + "grad_norm": 2.417059302876411, + "learning_rate": 1.5883290159891907e-06, + "loss": 0.3422, + "step": 16503 + }, + { + "epoch": 0.7469563249603983, + "grad_norm": 0.6539258850224872, + "learning_rate": 1.5877932577810712e-06, + "loss": 0.3189, + "step": 16504 + }, + { + "epoch": 0.7470015840687938, + "grad_norm": 0.6039525924981889, + "learning_rate": 1.5872575728907914e-06, + "loss": 0.2911, + "step": 16505 + }, + { + "epoch": 0.7470468431771894, + "grad_norm": 0.5788256798294671, + "learning_rate": 1.586721961329865e-06, + "loss": 0.3224, + "step": 16506 + }, + { + "epoch": 0.747092102285585, + "grad_norm": 0.7105780907921888, + "learning_rate": 1.5861864231098006e-06, + "loss": 0.3178, + "step": 16507 + }, + { + "epoch": 0.7471373613939806, + "grad_norm": 0.7063708263989005, + "learning_rate": 1.5856509582421086e-06, + "loss": 0.3351, + "step": 16508 + }, + { + "epoch": 0.747182620502376, + "grad_norm": 0.6510998279016662, + "learning_rate": 1.585115566738288e-06, + "loss": 0.369, + "step": 16509 + }, + { + "epoch": 0.7472278796107716, + "grad_norm": 0.277973618515596, + "learning_rate": 1.5845802486098461e-06, + "loss": 0.4612, + "step": 16510 + }, + { + "epoch": 0.7472731387191672, + "grad_norm": 0.642679572704552, + "learning_rate": 1.584045003868286e-06, + "loss": 0.3621, + "step": 16511 + }, + { + "epoch": 0.7473183978275628, + "grad_norm": 0.6128790270686905, + "learning_rate": 1.5835098325251075e-06, + "loss": 0.2871, + "step": 16512 + }, + { + "epoch": 0.7473636569359584, + "grad_norm": 0.5868380908965802, + "learning_rate": 1.5829747345918083e-06, + "loss": 0.312, + "step": 16513 + }, + { + "epoch": 0.7474089160443539, + "grad_norm": 0.5961565717006754, + "learning_rate": 1.5824397100798893e-06, + "loss": 0.2839, + "step": 16514 + }, + { + "epoch": 0.7474541751527495, + "grad_norm": 0.5754325439828039, + "learning_rate": 1.5819047590008429e-06, + "loss": 0.3001, + "step": 16515 + }, + { + "epoch": 0.7474994342611451, + "grad_norm": 0.7193337762494555, + "learning_rate": 1.5813698813661672e-06, + "loss": 0.3086, + "step": 16516 + }, + { + "epoch": 0.7475446933695407, + "grad_norm": 0.6679149437502885, + "learning_rate": 1.5808350771873527e-06, + "loss": 0.3474, + "step": 16517 + }, + { + "epoch": 0.7475899524779361, + "grad_norm": 0.5970275979081476, + "learning_rate": 1.58030034647589e-06, + "loss": 0.3043, + "step": 16518 + }, + { + "epoch": 0.7476352115863317, + "grad_norm": 0.5749082872452106, + "learning_rate": 1.57976568924327e-06, + "loss": 0.268, + "step": 16519 + }, + { + "epoch": 0.7476804706947273, + "grad_norm": 0.6548085932399869, + "learning_rate": 1.5792311055009824e-06, + "loss": 0.2956, + "step": 16520 + }, + { + "epoch": 0.7477257298031229, + "grad_norm": 0.623026618245212, + "learning_rate": 1.578696595260512e-06, + "loss": 0.2868, + "step": 16521 + }, + { + "epoch": 0.7477709889115185, + "grad_norm": 0.5916390234323907, + "learning_rate": 1.578162158533343e-06, + "loss": 0.2832, + "step": 16522 + }, + { + "epoch": 0.747816248019914, + "grad_norm": 0.6014859725372366, + "learning_rate": 1.57762779533096e-06, + "loss": 0.2819, + "step": 16523 + }, + { + "epoch": 0.7478615071283096, + "grad_norm": 0.7435152017578733, + "learning_rate": 1.5770935056648456e-06, + "loss": 0.2898, + "step": 16524 + }, + { + "epoch": 0.7479067662367052, + "grad_norm": 0.6084117400456398, + "learning_rate": 1.5765592895464793e-06, + "loss": 0.3335, + "step": 16525 + }, + { + "epoch": 0.7479520253451007, + "grad_norm": 0.6107025095627568, + "learning_rate": 1.5760251469873378e-06, + "loss": 0.3038, + "step": 16526 + }, + { + "epoch": 0.7479972844534962, + "grad_norm": 0.25637382721289265, + "learning_rate": 1.5754910779989018e-06, + "loss": 0.4819, + "step": 16527 + }, + { + "epoch": 0.7480425435618918, + "grad_norm": 0.6661645367906474, + "learning_rate": 1.5749570825926437e-06, + "loss": 0.3256, + "step": 16528 + }, + { + "epoch": 0.7480878026702874, + "grad_norm": 0.5931956952900801, + "learning_rate": 1.5744231607800397e-06, + "loss": 0.3045, + "step": 16529 + }, + { + "epoch": 0.748133061778683, + "grad_norm": 0.6488485046622263, + "learning_rate": 1.5738893125725613e-06, + "loss": 0.2963, + "step": 16530 + }, + { + "epoch": 0.7481783208870785, + "grad_norm": 0.27494362095273467, + "learning_rate": 1.5733555379816773e-06, + "loss": 0.4647, + "step": 16531 + }, + { + "epoch": 0.7482235799954741, + "grad_norm": 0.6009411757917738, + "learning_rate": 1.572821837018859e-06, + "loss": 0.2794, + "step": 16532 + }, + { + "epoch": 0.7482688391038697, + "grad_norm": 0.311812204997518, + "learning_rate": 1.5722882096955748e-06, + "loss": 0.4698, + "step": 16533 + }, + { + "epoch": 0.7483140982122652, + "grad_norm": 0.5832169580997869, + "learning_rate": 1.5717546560232904e-06, + "loss": 0.2724, + "step": 16534 + }, + { + "epoch": 0.7483593573206608, + "grad_norm": 0.5453719989550989, + "learning_rate": 1.5712211760134672e-06, + "loss": 0.2928, + "step": 16535 + }, + { + "epoch": 0.7484046164290563, + "grad_norm": 0.6266500321574185, + "learning_rate": 1.5706877696775703e-06, + "loss": 0.2585, + "step": 16536 + }, + { + "epoch": 0.7484498755374519, + "grad_norm": 0.6208100270151411, + "learning_rate": 1.5701544370270638e-06, + "loss": 0.2831, + "step": 16537 + }, + { + "epoch": 0.7484951346458475, + "grad_norm": 0.2689173965747566, + "learning_rate": 1.5696211780734017e-06, + "loss": 0.4833, + "step": 16538 + }, + { + "epoch": 0.7485403937542431, + "grad_norm": 0.6334801681171852, + "learning_rate": 1.569087992828045e-06, + "loss": 0.2706, + "step": 16539 + }, + { + "epoch": 0.7485856528626386, + "grad_norm": 0.28267318715463907, + "learning_rate": 1.5685548813024516e-06, + "loss": 0.4996, + "step": 16540 + }, + { + "epoch": 0.7486309119710342, + "grad_norm": 0.5544036599356525, + "learning_rate": 1.5680218435080747e-06, + "loss": 0.3117, + "step": 16541 + }, + { + "epoch": 0.7486761710794297, + "grad_norm": 0.5937306440955723, + "learning_rate": 1.5674888794563663e-06, + "loss": 0.2904, + "step": 16542 + }, + { + "epoch": 0.7487214301878253, + "grad_norm": 0.5710840182569125, + "learning_rate": 1.566955989158781e-06, + "loss": 0.2802, + "step": 16543 + }, + { + "epoch": 0.7487666892962208, + "grad_norm": 0.6156900161661162, + "learning_rate": 1.5664231726267664e-06, + "loss": 0.2877, + "step": 16544 + }, + { + "epoch": 0.7488119484046164, + "grad_norm": 1.9108896254674193, + "learning_rate": 1.5658904298717742e-06, + "loss": 0.3083, + "step": 16545 + }, + { + "epoch": 0.748857207513012, + "grad_norm": 0.6082886537469275, + "learning_rate": 1.5653577609052495e-06, + "loss": 0.2686, + "step": 16546 + }, + { + "epoch": 0.7489024666214076, + "grad_norm": 0.5797560507570885, + "learning_rate": 1.5648251657386366e-06, + "loss": 0.2715, + "step": 16547 + }, + { + "epoch": 0.7489477257298032, + "grad_norm": 0.6528100426591587, + "learning_rate": 1.56429264438338e-06, + "loss": 0.3317, + "step": 16548 + }, + { + "epoch": 0.7489929848381986, + "grad_norm": 0.6272924481051566, + "learning_rate": 1.5637601968509242e-06, + "loss": 0.325, + "step": 16549 + }, + { + "epoch": 0.7490382439465942, + "grad_norm": 0.6446741475507854, + "learning_rate": 1.5632278231527081e-06, + "loss": 0.2886, + "step": 16550 + }, + { + "epoch": 0.7490835030549898, + "grad_norm": 0.7003736284823379, + "learning_rate": 1.5626955233001695e-06, + "loss": 0.2752, + "step": 16551 + }, + { + "epoch": 0.7491287621633854, + "grad_norm": 0.2720333118535126, + "learning_rate": 1.5621632973047468e-06, + "loss": 0.4716, + "step": 16552 + }, + { + "epoch": 0.7491740212717809, + "grad_norm": 0.6303202358609897, + "learning_rate": 1.5616311451778782e-06, + "loss": 0.3199, + "step": 16553 + }, + { + "epoch": 0.7492192803801765, + "grad_norm": 0.6152455382833465, + "learning_rate": 1.5610990669309961e-06, + "loss": 0.3115, + "step": 16554 + }, + { + "epoch": 0.7492645394885721, + "grad_norm": 0.27440801803982967, + "learning_rate": 1.560567062575532e-06, + "loss": 0.4601, + "step": 16555 + }, + { + "epoch": 0.7493097985969677, + "grad_norm": 0.6169645054117495, + "learning_rate": 1.5600351321229196e-06, + "loss": 0.2785, + "step": 16556 + }, + { + "epoch": 0.7493550577053633, + "grad_norm": 0.6401479659716349, + "learning_rate": 1.5595032755845857e-06, + "loss": 0.2773, + "step": 16557 + }, + { + "epoch": 0.7494003168137587, + "grad_norm": 0.6396815110074804, + "learning_rate": 1.5589714929719614e-06, + "loss": 0.3114, + "step": 16558 + }, + { + "epoch": 0.7494455759221543, + "grad_norm": 0.5781596757272333, + "learning_rate": 1.558439784296471e-06, + "loss": 0.3343, + "step": 16559 + }, + { + "epoch": 0.7494908350305499, + "grad_norm": 0.5997425540319526, + "learning_rate": 1.5579081495695381e-06, + "loss": 0.2718, + "step": 16560 + }, + { + "epoch": 0.7495360941389455, + "grad_norm": 0.6039137344409807, + "learning_rate": 1.5573765888025877e-06, + "loss": 0.3065, + "step": 16561 + }, + { + "epoch": 0.749581353247341, + "grad_norm": 0.5809519922124259, + "learning_rate": 1.556845102007043e-06, + "loss": 0.2898, + "step": 16562 + }, + { + "epoch": 0.7496266123557366, + "grad_norm": 0.6782488781990339, + "learning_rate": 1.556313689194322e-06, + "loss": 0.3315, + "step": 16563 + }, + { + "epoch": 0.7496718714641322, + "grad_norm": 0.6110668027970159, + "learning_rate": 1.5557823503758418e-06, + "loss": 0.3036, + "step": 16564 + }, + { + "epoch": 0.7497171305725278, + "grad_norm": 0.6452243181347994, + "learning_rate": 1.555251085563021e-06, + "loss": 0.2489, + "step": 16565 + }, + { + "epoch": 0.7497623896809232, + "grad_norm": 0.3017016378995432, + "learning_rate": 1.5547198947672777e-06, + "loss": 0.473, + "step": 16566 + }, + { + "epoch": 0.7498076487893188, + "grad_norm": 0.26271291805718217, + "learning_rate": 1.5541887780000187e-06, + "loss": 0.4459, + "step": 16567 + }, + { + "epoch": 0.7498529078977144, + "grad_norm": 0.5799561207828952, + "learning_rate": 1.5536577352726607e-06, + "loss": 0.3114, + "step": 16568 + }, + { + "epoch": 0.74989816700611, + "grad_norm": 0.2854953151376636, + "learning_rate": 1.5531267665966143e-06, + "loss": 0.4627, + "step": 16569 + }, + { + "epoch": 0.7499434261145056, + "grad_norm": 0.5779329050400861, + "learning_rate": 1.5525958719832879e-06, + "loss": 0.2798, + "step": 16570 + }, + { + "epoch": 0.7499886852229011, + "grad_norm": 0.6120806007444967, + "learning_rate": 1.5520650514440866e-06, + "loss": 0.2901, + "step": 16571 + }, + { + "epoch": 0.7500339443312967, + "grad_norm": 0.6358208723438779, + "learning_rate": 1.5515343049904191e-06, + "loss": 0.3147, + "step": 16572 + }, + { + "epoch": 0.7500792034396923, + "grad_norm": 0.6218208817316578, + "learning_rate": 1.5510036326336868e-06, + "loss": 0.3193, + "step": 16573 + }, + { + "epoch": 0.7501244625480878, + "grad_norm": 0.6407424514409737, + "learning_rate": 1.5504730343852952e-06, + "loss": 0.3067, + "step": 16574 + }, + { + "epoch": 0.7501697216564833, + "grad_norm": 0.27408098422727273, + "learning_rate": 1.5499425102566423e-06, + "loss": 0.4935, + "step": 16575 + }, + { + "epoch": 0.7502149807648789, + "grad_norm": 0.5948893953322856, + "learning_rate": 1.5494120602591305e-06, + "loss": 0.2392, + "step": 16576 + }, + { + "epoch": 0.7502602398732745, + "grad_norm": 0.2785590734503924, + "learning_rate": 1.5488816844041537e-06, + "loss": 0.4424, + "step": 16577 + }, + { + "epoch": 0.7503054989816701, + "grad_norm": 0.2694455060170366, + "learning_rate": 1.5483513827031122e-06, + "loss": 0.4678, + "step": 16578 + }, + { + "epoch": 0.7503507580900656, + "grad_norm": 0.611838194162662, + "learning_rate": 1.547821155167399e-06, + "loss": 0.2833, + "step": 16579 + }, + { + "epoch": 0.7503960171984612, + "grad_norm": 0.6544908570921042, + "learning_rate": 1.5472910018084043e-06, + "loss": 0.3155, + "step": 16580 + }, + { + "epoch": 0.7504412763068568, + "grad_norm": 0.6395240466759102, + "learning_rate": 1.546760922637522e-06, + "loss": 0.312, + "step": 16581 + }, + { + "epoch": 0.7504865354152523, + "grad_norm": 0.600114842632944, + "learning_rate": 1.5462309176661433e-06, + "loss": 0.3063, + "step": 16582 + }, + { + "epoch": 0.7505317945236479, + "grad_norm": 0.25831747528691457, + "learning_rate": 1.5457009869056545e-06, + "loss": 0.4876, + "step": 16583 + }, + { + "epoch": 0.7505770536320434, + "grad_norm": 0.6005233821159032, + "learning_rate": 1.5451711303674411e-06, + "loss": 0.2753, + "step": 16584 + }, + { + "epoch": 0.750622312740439, + "grad_norm": 0.6294182622576255, + "learning_rate": 1.5446413480628908e-06, + "loss": 0.3225, + "step": 16585 + }, + { + "epoch": 0.7506675718488346, + "grad_norm": 0.6474888224795824, + "learning_rate": 1.5441116400033846e-06, + "loss": 0.3342, + "step": 16586 + }, + { + "epoch": 0.7507128309572302, + "grad_norm": 0.738721455533006, + "learning_rate": 1.543582006200306e-06, + "loss": 0.2893, + "step": 16587 + }, + { + "epoch": 0.7507580900656257, + "grad_norm": 0.5786158399123434, + "learning_rate": 1.5430524466650354e-06, + "loss": 0.2991, + "step": 16588 + }, + { + "epoch": 0.7508033491740213, + "grad_norm": 0.596568758570493, + "learning_rate": 1.5425229614089482e-06, + "loss": 0.3466, + "step": 16589 + }, + { + "epoch": 0.7508486082824168, + "grad_norm": 0.2613221091049353, + "learning_rate": 1.5419935504434242e-06, + "loss": 0.4615, + "step": 16590 + }, + { + "epoch": 0.7508938673908124, + "grad_norm": 0.377820860939105, + "learning_rate": 1.5414642137798396e-06, + "loss": 0.4633, + "step": 16591 + }, + { + "epoch": 0.750939126499208, + "grad_norm": 0.5871596830036833, + "learning_rate": 1.5409349514295674e-06, + "loss": 0.2742, + "step": 16592 + }, + { + "epoch": 0.7509843856076035, + "grad_norm": 0.27193806045907737, + "learning_rate": 1.540405763403977e-06, + "loss": 0.4966, + "step": 16593 + }, + { + "epoch": 0.7510296447159991, + "grad_norm": 0.5923123067358512, + "learning_rate": 1.5398766497144424e-06, + "loss": 0.2994, + "step": 16594 + }, + { + "epoch": 0.7510749038243947, + "grad_norm": 0.6217953147336326, + "learning_rate": 1.5393476103723342e-06, + "loss": 0.3565, + "step": 16595 + }, + { + "epoch": 0.7511201629327903, + "grad_norm": 0.6456203570155561, + "learning_rate": 1.5388186453890142e-06, + "loss": 0.27, + "step": 16596 + }, + { + "epoch": 0.7511654220411857, + "grad_norm": 0.5788306938991927, + "learning_rate": 1.5382897547758513e-06, + "loss": 0.283, + "step": 16597 + }, + { + "epoch": 0.7512106811495813, + "grad_norm": 0.2657948767174147, + "learning_rate": 1.5377609385442116e-06, + "loss": 0.4624, + "step": 16598 + }, + { + "epoch": 0.7512559402579769, + "grad_norm": 0.6724230533015774, + "learning_rate": 1.5372321967054554e-06, + "loss": 0.3256, + "step": 16599 + }, + { + "epoch": 0.7513011993663725, + "grad_norm": 0.6915387200679524, + "learning_rate": 1.5367035292709432e-06, + "loss": 0.3744, + "step": 16600 + }, + { + "epoch": 0.751346458474768, + "grad_norm": 0.2619534121431575, + "learning_rate": 1.5361749362520363e-06, + "loss": 0.4691, + "step": 16601 + }, + { + "epoch": 0.7513917175831636, + "grad_norm": 0.6145861080376424, + "learning_rate": 1.5356464176600905e-06, + "loss": 0.2918, + "step": 16602 + }, + { + "epoch": 0.7514369766915592, + "grad_norm": 0.6530194354447144, + "learning_rate": 1.5351179735064647e-06, + "loss": 0.2846, + "step": 16603 + }, + { + "epoch": 0.7514822357999548, + "grad_norm": 0.6849799194550324, + "learning_rate": 1.534589603802511e-06, + "loss": 0.2863, + "step": 16604 + }, + { + "epoch": 0.7515274949083504, + "grad_norm": 0.6193998756496838, + "learning_rate": 1.5340613085595846e-06, + "loss": 0.3173, + "step": 16605 + }, + { + "epoch": 0.7515727540167458, + "grad_norm": 0.616145686650051, + "learning_rate": 1.5335330877890341e-06, + "loss": 0.3018, + "step": 16606 + }, + { + "epoch": 0.7516180131251414, + "grad_norm": 0.6140848804103951, + "learning_rate": 1.533004941502213e-06, + "loss": 0.3318, + "step": 16607 + }, + { + "epoch": 0.751663272233537, + "grad_norm": 0.6719963364193847, + "learning_rate": 1.5324768697104681e-06, + "loss": 0.3211, + "step": 16608 + }, + { + "epoch": 0.7517085313419326, + "grad_norm": 0.6257389026054484, + "learning_rate": 1.5319488724251436e-06, + "loss": 0.3511, + "step": 16609 + }, + { + "epoch": 0.7517537904503281, + "grad_norm": 0.6387686337466676, + "learning_rate": 1.5314209496575861e-06, + "loss": 0.3059, + "step": 16610 + }, + { + "epoch": 0.7517990495587237, + "grad_norm": 0.7750573942287288, + "learning_rate": 1.5308931014191414e-06, + "loss": 0.2943, + "step": 16611 + }, + { + "epoch": 0.7518443086671193, + "grad_norm": 0.6321905375975513, + "learning_rate": 1.5303653277211493e-06, + "loss": 0.2929, + "step": 16612 + }, + { + "epoch": 0.7518895677755149, + "grad_norm": 0.6112993269416406, + "learning_rate": 1.5298376285749489e-06, + "loss": 0.3131, + "step": 16613 + }, + { + "epoch": 0.7519348268839103, + "grad_norm": 0.6284143210745861, + "learning_rate": 1.5293100039918812e-06, + "loss": 0.3148, + "step": 16614 + }, + { + "epoch": 0.7519800859923059, + "grad_norm": 0.5935461720492932, + "learning_rate": 1.5287824539832808e-06, + "loss": 0.3209, + "step": 16615 + }, + { + "epoch": 0.7520253451007015, + "grad_norm": 0.6039730603185649, + "learning_rate": 1.5282549785604861e-06, + "loss": 0.3211, + "step": 16616 + }, + { + "epoch": 0.7520706042090971, + "grad_norm": 0.30584223112382614, + "learning_rate": 1.5277275777348294e-06, + "loss": 0.4479, + "step": 16617 + }, + { + "epoch": 0.7521158633174927, + "grad_norm": 0.5795522532688792, + "learning_rate": 1.5272002515176404e-06, + "loss": 0.2825, + "step": 16618 + }, + { + "epoch": 0.7521611224258882, + "grad_norm": 0.5665869525523641, + "learning_rate": 1.526672999920253e-06, + "loss": 0.2639, + "step": 16619 + }, + { + "epoch": 0.7522063815342838, + "grad_norm": 0.5847304350030971, + "learning_rate": 1.5261458229539966e-06, + "loss": 0.3083, + "step": 16620 + }, + { + "epoch": 0.7522516406426794, + "grad_norm": 0.27271189698999354, + "learning_rate": 1.525618720630197e-06, + "loss": 0.4373, + "step": 16621 + }, + { + "epoch": 0.7522968997510749, + "grad_norm": 0.2711721727141647, + "learning_rate": 1.525091692960179e-06, + "loss": 0.4454, + "step": 16622 + }, + { + "epoch": 0.7523421588594704, + "grad_norm": 0.6319203562525041, + "learning_rate": 1.5245647399552682e-06, + "loss": 0.3096, + "step": 16623 + }, + { + "epoch": 0.752387417967866, + "grad_norm": 0.5365429777613504, + "learning_rate": 1.5240378616267887e-06, + "loss": 0.3046, + "step": 16624 + }, + { + "epoch": 0.7524326770762616, + "grad_norm": 0.5917964105538425, + "learning_rate": 1.5235110579860602e-06, + "loss": 0.3068, + "step": 16625 + }, + { + "epoch": 0.7524779361846572, + "grad_norm": 0.6203010656794513, + "learning_rate": 1.5229843290443996e-06, + "loss": 0.2773, + "step": 16626 + }, + { + "epoch": 0.7525231952930528, + "grad_norm": 0.6404234014576459, + "learning_rate": 1.5224576748131292e-06, + "loss": 0.3062, + "step": 16627 + }, + { + "epoch": 0.7525684544014483, + "grad_norm": 0.5958364086183551, + "learning_rate": 1.521931095303561e-06, + "loss": 0.3381, + "step": 16628 + }, + { + "epoch": 0.7526137135098439, + "grad_norm": 0.6663594972658775, + "learning_rate": 1.521404590527013e-06, + "loss": 0.3265, + "step": 16629 + }, + { + "epoch": 0.7526589726182394, + "grad_norm": 0.6487574124977519, + "learning_rate": 1.520878160494797e-06, + "loss": 0.2932, + "step": 16630 + }, + { + "epoch": 0.752704231726635, + "grad_norm": 0.5654488136672179, + "learning_rate": 1.520351805218222e-06, + "loss": 0.3152, + "step": 16631 + }, + { + "epoch": 0.7527494908350305, + "grad_norm": 0.6411086584970878, + "learning_rate": 1.5198255247086018e-06, + "loss": 0.2959, + "step": 16632 + }, + { + "epoch": 0.7527947499434261, + "grad_norm": 0.6128062221240184, + "learning_rate": 1.5192993189772408e-06, + "loss": 0.3323, + "step": 16633 + }, + { + "epoch": 0.7528400090518217, + "grad_norm": 0.617088053959374, + "learning_rate": 1.5187731880354489e-06, + "loss": 0.3243, + "step": 16634 + }, + { + "epoch": 0.7528852681602173, + "grad_norm": 0.7494997082124705, + "learning_rate": 1.5182471318945275e-06, + "loss": 0.3403, + "step": 16635 + }, + { + "epoch": 0.7529305272686128, + "grad_norm": 0.6297086295505582, + "learning_rate": 1.517721150565784e-06, + "loss": 0.2955, + "step": 16636 + }, + { + "epoch": 0.7529757863770083, + "grad_norm": 0.6379063471545178, + "learning_rate": 1.5171952440605175e-06, + "loss": 0.3205, + "step": 16637 + }, + { + "epoch": 0.7530210454854039, + "grad_norm": 0.5753119543744168, + "learning_rate": 1.5166694123900271e-06, + "loss": 0.2886, + "step": 16638 + }, + { + "epoch": 0.7530663045937995, + "grad_norm": 0.657485752082828, + "learning_rate": 1.5161436555656129e-06, + "loss": 0.3017, + "step": 16639 + }, + { + "epoch": 0.7531115637021951, + "grad_norm": 0.6017532634783226, + "learning_rate": 1.5156179735985732e-06, + "loss": 0.3474, + "step": 16640 + }, + { + "epoch": 0.7531568228105906, + "grad_norm": 0.6199333396182094, + "learning_rate": 1.5150923665002021e-06, + "loss": 0.2847, + "step": 16641 + }, + { + "epoch": 0.7532020819189862, + "grad_norm": 0.6428709789975856, + "learning_rate": 1.514566834281791e-06, + "loss": 0.2791, + "step": 16642 + }, + { + "epoch": 0.7532473410273818, + "grad_norm": 0.6080445266538872, + "learning_rate": 1.5140413769546353e-06, + "loss": 0.2884, + "step": 16643 + }, + { + "epoch": 0.7532926001357774, + "grad_norm": 0.6450629264084641, + "learning_rate": 1.5135159945300232e-06, + "loss": 0.2953, + "step": 16644 + }, + { + "epoch": 0.7533378592441728, + "grad_norm": 0.7004210719059599, + "learning_rate": 1.5129906870192456e-06, + "loss": 0.2754, + "step": 16645 + }, + { + "epoch": 0.7533831183525684, + "grad_norm": 0.6067616219779681, + "learning_rate": 1.512465454433587e-06, + "loss": 0.2697, + "step": 16646 + }, + { + "epoch": 0.753428377460964, + "grad_norm": 0.593391764471863, + "learning_rate": 1.5119402967843361e-06, + "loss": 0.2953, + "step": 16647 + }, + { + "epoch": 0.7534736365693596, + "grad_norm": 0.26635515223260847, + "learning_rate": 1.5114152140827744e-06, + "loss": 0.4766, + "step": 16648 + }, + { + "epoch": 0.7535188956777551, + "grad_norm": 0.5983568795117736, + "learning_rate": 1.5108902063401865e-06, + "loss": 0.2911, + "step": 16649 + }, + { + "epoch": 0.7535641547861507, + "grad_norm": 0.6481268868427346, + "learning_rate": 1.5103652735678525e-06, + "loss": 0.3202, + "step": 16650 + }, + { + "epoch": 0.7536094138945463, + "grad_norm": 0.5963856117452525, + "learning_rate": 1.509840415777049e-06, + "loss": 0.2971, + "step": 16651 + }, + { + "epoch": 0.7536546730029419, + "grad_norm": 0.6869703319652534, + "learning_rate": 1.5093156329790564e-06, + "loss": 0.2684, + "step": 16652 + }, + { + "epoch": 0.7536999321113375, + "grad_norm": 0.5996001305416143, + "learning_rate": 1.5087909251851513e-06, + "loss": 0.2859, + "step": 16653 + }, + { + "epoch": 0.7537451912197329, + "grad_norm": 0.6645363319026046, + "learning_rate": 1.5082662924066067e-06, + "loss": 0.353, + "step": 16654 + }, + { + "epoch": 0.7537904503281285, + "grad_norm": 0.6327652610190093, + "learning_rate": 1.5077417346546942e-06, + "loss": 0.3123, + "step": 16655 + }, + { + "epoch": 0.7538357094365241, + "grad_norm": 0.6546052835923984, + "learning_rate": 1.5072172519406874e-06, + "loss": 0.3011, + "step": 16656 + }, + { + "epoch": 0.7538809685449197, + "grad_norm": 0.26338153529601854, + "learning_rate": 1.5066928442758528e-06, + "loss": 0.4763, + "step": 16657 + }, + { + "epoch": 0.7539262276533152, + "grad_norm": 0.2735645155703924, + "learning_rate": 1.506168511671462e-06, + "loss": 0.4702, + "step": 16658 + }, + { + "epoch": 0.7539714867617108, + "grad_norm": 0.6256308592453239, + "learning_rate": 1.5056442541387794e-06, + "loss": 0.3219, + "step": 16659 + }, + { + "epoch": 0.7540167458701064, + "grad_norm": 0.6069178482632577, + "learning_rate": 1.5051200716890686e-06, + "loss": 0.3154, + "step": 16660 + }, + { + "epoch": 0.754062004978502, + "grad_norm": 0.5788701325657072, + "learning_rate": 1.5045959643335928e-06, + "loss": 0.326, + "step": 16661 + }, + { + "epoch": 0.7541072640868974, + "grad_norm": 0.6112234127041374, + "learning_rate": 1.5040719320836167e-06, + "loss": 0.3219, + "step": 16662 + }, + { + "epoch": 0.754152523195293, + "grad_norm": 0.5851015832556286, + "learning_rate": 1.5035479749503973e-06, + "loss": 0.2864, + "step": 16663 + }, + { + "epoch": 0.7541977823036886, + "grad_norm": 0.8292918564880299, + "learning_rate": 1.5030240929451922e-06, + "loss": 0.3121, + "step": 16664 + }, + { + "epoch": 0.7542430414120842, + "grad_norm": 1.198695686104617, + "learning_rate": 1.5025002860792609e-06, + "loss": 0.2842, + "step": 16665 + }, + { + "epoch": 0.7542883005204798, + "grad_norm": 0.6179582469616123, + "learning_rate": 1.5019765543638564e-06, + "loss": 0.2948, + "step": 16666 + }, + { + "epoch": 0.7543335596288753, + "grad_norm": 0.6735381253035728, + "learning_rate": 1.5014528978102311e-06, + "loss": 0.2801, + "step": 16667 + }, + { + "epoch": 0.7543788187372709, + "grad_norm": 0.6356498941144694, + "learning_rate": 1.500929316429638e-06, + "loss": 0.2803, + "step": 16668 + }, + { + "epoch": 0.7544240778456665, + "grad_norm": 0.6436793787689117, + "learning_rate": 1.5004058102333285e-06, + "loss": 0.3282, + "step": 16669 + }, + { + "epoch": 0.754469336954062, + "grad_norm": 0.763312169832447, + "learning_rate": 1.49988237923255e-06, + "loss": 0.3088, + "step": 16670 + }, + { + "epoch": 0.7545145960624575, + "grad_norm": 0.838672560377792, + "learning_rate": 1.499359023438548e-06, + "loss": 0.3191, + "step": 16671 + }, + { + "epoch": 0.7545598551708531, + "grad_norm": 0.5915128483790769, + "learning_rate": 1.4988357428625711e-06, + "loss": 0.2931, + "step": 16672 + }, + { + "epoch": 0.7546051142792487, + "grad_norm": 0.7560524050943002, + "learning_rate": 1.4983125375158591e-06, + "loss": 0.321, + "step": 16673 + }, + { + "epoch": 0.7546503733876443, + "grad_norm": 0.6354801252230485, + "learning_rate": 1.4977894074096576e-06, + "loss": 0.2892, + "step": 16674 + }, + { + "epoch": 0.7546956324960399, + "grad_norm": 0.5789119692914134, + "learning_rate": 1.497266352555204e-06, + "loss": 0.2678, + "step": 16675 + }, + { + "epoch": 0.7547408916044354, + "grad_norm": 0.298435257825619, + "learning_rate": 1.4967433729637403e-06, + "loss": 0.4841, + "step": 16676 + }, + { + "epoch": 0.754786150712831, + "grad_norm": 0.5903516195540437, + "learning_rate": 1.4962204686465003e-06, + "loss": 0.286, + "step": 16677 + }, + { + "epoch": 0.7548314098212265, + "grad_norm": 0.6343168652793674, + "learning_rate": 1.4956976396147233e-06, + "loss": 0.3419, + "step": 16678 + }, + { + "epoch": 0.7548766689296221, + "grad_norm": 0.2934288256714517, + "learning_rate": 1.4951748858796411e-06, + "loss": 0.4855, + "step": 16679 + }, + { + "epoch": 0.7549219280380176, + "grad_norm": 0.635717850760982, + "learning_rate": 1.494652207452485e-06, + "loss": 0.2848, + "step": 16680 + }, + { + "epoch": 0.7549671871464132, + "grad_norm": 0.5702053407110187, + "learning_rate": 1.4941296043444869e-06, + "loss": 0.3075, + "step": 16681 + }, + { + "epoch": 0.7550124462548088, + "grad_norm": 0.6190446810455607, + "learning_rate": 1.493607076566878e-06, + "loss": 0.2881, + "step": 16682 + }, + { + "epoch": 0.7550577053632044, + "grad_norm": 0.26498951155860173, + "learning_rate": 1.4930846241308838e-06, + "loss": 0.4557, + "step": 16683 + }, + { + "epoch": 0.7551029644715999, + "grad_norm": 0.592932363094059, + "learning_rate": 1.4925622470477291e-06, + "loss": 0.2828, + "step": 16684 + }, + { + "epoch": 0.7551482235799954, + "grad_norm": 0.6565257146532069, + "learning_rate": 1.4920399453286405e-06, + "loss": 0.2936, + "step": 16685 + }, + { + "epoch": 0.755193482688391, + "grad_norm": 0.6645887871402962, + "learning_rate": 1.4915177189848384e-06, + "loss": 0.2476, + "step": 16686 + }, + { + "epoch": 0.7552387417967866, + "grad_norm": 0.6285093421542468, + "learning_rate": 1.4909955680275462e-06, + "loss": 0.2285, + "step": 16687 + }, + { + "epoch": 0.7552840009051822, + "grad_norm": 0.2759830785118571, + "learning_rate": 1.4904734924679825e-06, + "loss": 0.4875, + "step": 16688 + }, + { + "epoch": 0.7553292600135777, + "grad_norm": 0.6342964851974112, + "learning_rate": 1.489951492317363e-06, + "loss": 0.3002, + "step": 16689 + }, + { + "epoch": 0.7553745191219733, + "grad_norm": 0.6198259499774071, + "learning_rate": 1.4894295675869058e-06, + "loss": 0.3391, + "step": 16690 + }, + { + "epoch": 0.7554197782303689, + "grad_norm": 0.57067587269311, + "learning_rate": 1.488907718287827e-06, + "loss": 0.2754, + "step": 16691 + }, + { + "epoch": 0.7554650373387645, + "grad_norm": 0.600458579427856, + "learning_rate": 1.4883859444313376e-06, + "loss": 0.3354, + "step": 16692 + }, + { + "epoch": 0.75551029644716, + "grad_norm": 0.6040714705322814, + "learning_rate": 1.4878642460286474e-06, + "loss": 0.3409, + "step": 16693 + }, + { + "epoch": 0.7555555555555555, + "grad_norm": 0.6057206746979718, + "learning_rate": 1.4873426230909682e-06, + "loss": 0.3009, + "step": 16694 + }, + { + "epoch": 0.7556008146639511, + "grad_norm": 0.645707146707286, + "learning_rate": 1.4868210756295109e-06, + "loss": 0.338, + "step": 16695 + }, + { + "epoch": 0.7556460737723467, + "grad_norm": 0.6667021532346543, + "learning_rate": 1.4862996036554756e-06, + "loss": 0.3085, + "step": 16696 + }, + { + "epoch": 0.7556913328807422, + "grad_norm": 0.2810025522918448, + "learning_rate": 1.4857782071800697e-06, + "loss": 0.4542, + "step": 16697 + }, + { + "epoch": 0.7557365919891378, + "grad_norm": 0.6814649766957752, + "learning_rate": 1.4852568862144995e-06, + "loss": 0.3069, + "step": 16698 + }, + { + "epoch": 0.7557818510975334, + "grad_norm": 0.5990733271348277, + "learning_rate": 1.4847356407699632e-06, + "loss": 0.3161, + "step": 16699 + }, + { + "epoch": 0.755827110205929, + "grad_norm": 0.6273544717931, + "learning_rate": 1.4842144708576606e-06, + "loss": 0.3418, + "step": 16700 + }, + { + "epoch": 0.7558723693143246, + "grad_norm": 0.6326039496529865, + "learning_rate": 1.4836933764887928e-06, + "loss": 0.2986, + "step": 16701 + }, + { + "epoch": 0.75591762842272, + "grad_norm": 0.26567037266128657, + "learning_rate": 1.4831723576745531e-06, + "loss": 0.4501, + "step": 16702 + }, + { + "epoch": 0.7559628875311156, + "grad_norm": 0.6678320036059954, + "learning_rate": 1.48265141442614e-06, + "loss": 0.2983, + "step": 16703 + }, + { + "epoch": 0.7560081466395112, + "grad_norm": 0.6033148157639469, + "learning_rate": 1.4821305467547436e-06, + "loss": 0.2782, + "step": 16704 + }, + { + "epoch": 0.7560534057479068, + "grad_norm": 0.6145787945428695, + "learning_rate": 1.481609754671559e-06, + "loss": 0.3172, + "step": 16705 + }, + { + "epoch": 0.7560986648563023, + "grad_norm": 0.6623739706543618, + "learning_rate": 1.4810890381877736e-06, + "loss": 0.275, + "step": 16706 + }, + { + "epoch": 0.7561439239646979, + "grad_norm": 0.594661372064247, + "learning_rate": 1.4805683973145784e-06, + "loss": 0.33, + "step": 16707 + }, + { + "epoch": 0.7561891830730935, + "grad_norm": 0.2872788418078114, + "learning_rate": 1.4800478320631595e-06, + "loss": 0.4667, + "step": 16708 + }, + { + "epoch": 0.756234442181489, + "grad_norm": 0.664942448101899, + "learning_rate": 1.4795273424446998e-06, + "loss": 0.3092, + "step": 16709 + }, + { + "epoch": 0.7562797012898846, + "grad_norm": 0.5697394913447789, + "learning_rate": 1.4790069284703863e-06, + "loss": 0.2566, + "step": 16710 + }, + { + "epoch": 0.7563249603982801, + "grad_norm": 0.6307175705577794, + "learning_rate": 1.4784865901514005e-06, + "loss": 0.3119, + "step": 16711 + }, + { + "epoch": 0.7563702195066757, + "grad_norm": 0.6019116928534433, + "learning_rate": 1.4779663274989232e-06, + "loss": 0.3139, + "step": 16712 + }, + { + "epoch": 0.7564154786150713, + "grad_norm": 0.6226910834635931, + "learning_rate": 1.4774461405241303e-06, + "loss": 0.3392, + "step": 16713 + }, + { + "epoch": 0.7564607377234669, + "grad_norm": 0.6134479971109972, + "learning_rate": 1.4769260292382031e-06, + "loss": 0.2916, + "step": 16714 + }, + { + "epoch": 0.7565059968318624, + "grad_norm": 0.6155452094594851, + "learning_rate": 1.4764059936523134e-06, + "loss": 0.2951, + "step": 16715 + }, + { + "epoch": 0.756551255940258, + "grad_norm": 0.2636065545551082, + "learning_rate": 1.4758860337776387e-06, + "loss": 0.4449, + "step": 16716 + }, + { + "epoch": 0.7565965150486536, + "grad_norm": 0.2850426046437727, + "learning_rate": 1.475366149625348e-06, + "loss": 0.4538, + "step": 16717 + }, + { + "epoch": 0.7566417741570491, + "grad_norm": 0.6652238532497509, + "learning_rate": 1.474846341206615e-06, + "loss": 0.3183, + "step": 16718 + }, + { + "epoch": 0.7566870332654446, + "grad_norm": 0.7590976861643215, + "learning_rate": 1.4743266085326062e-06, + "loss": 0.3575, + "step": 16719 + }, + { + "epoch": 0.7567322923738402, + "grad_norm": 0.5542928044334143, + "learning_rate": 1.473806951614492e-06, + "loss": 0.3177, + "step": 16720 + }, + { + "epoch": 0.7567775514822358, + "grad_norm": 0.646674522922809, + "learning_rate": 1.4732873704634366e-06, + "loss": 0.291, + "step": 16721 + }, + { + "epoch": 0.7568228105906314, + "grad_norm": 0.6264009080590869, + "learning_rate": 1.472767865090602e-06, + "loss": 0.3181, + "step": 16722 + }, + { + "epoch": 0.756868069699027, + "grad_norm": 0.279109290941747, + "learning_rate": 1.472248435507153e-06, + "loss": 0.4683, + "step": 16723 + }, + { + "epoch": 0.7569133288074225, + "grad_norm": 0.6090954445924818, + "learning_rate": 1.4717290817242542e-06, + "loss": 0.3097, + "step": 16724 + }, + { + "epoch": 0.756958587915818, + "grad_norm": 0.73037171924201, + "learning_rate": 1.4712098037530575e-06, + "loss": 0.3042, + "step": 16725 + }, + { + "epoch": 0.7570038470242136, + "grad_norm": 0.650877793870771, + "learning_rate": 1.4706906016047246e-06, + "loss": 0.3073, + "step": 16726 + }, + { + "epoch": 0.7570491061326092, + "grad_norm": 0.7641152232774284, + "learning_rate": 1.4701714752904123e-06, + "loss": 0.3051, + "step": 16727 + }, + { + "epoch": 0.7570943652410047, + "grad_norm": 0.6338668362810962, + "learning_rate": 1.4696524248212746e-06, + "loss": 0.2979, + "step": 16728 + }, + { + "epoch": 0.7571396243494003, + "grad_norm": 0.26435423960215104, + "learning_rate": 1.4691334502084614e-06, + "loss": 0.4609, + "step": 16729 + }, + { + "epoch": 0.7571848834577959, + "grad_norm": 0.27102785628931125, + "learning_rate": 1.4686145514631284e-06, + "loss": 0.4625, + "step": 16730 + }, + { + "epoch": 0.7572301425661915, + "grad_norm": 0.6085883837552643, + "learning_rate": 1.4680957285964208e-06, + "loss": 0.2541, + "step": 16731 + }, + { + "epoch": 0.757275401674587, + "grad_norm": 0.5566789634371905, + "learning_rate": 1.4675769816194902e-06, + "loss": 0.2708, + "step": 16732 + }, + { + "epoch": 0.7573206607829825, + "grad_norm": 0.6355008962027197, + "learning_rate": 1.46705831054348e-06, + "loss": 0.2918, + "step": 16733 + }, + { + "epoch": 0.7573659198913781, + "grad_norm": 0.25688814557401, + "learning_rate": 1.4665397153795375e-06, + "loss": 0.4723, + "step": 16734 + }, + { + "epoch": 0.7574111789997737, + "grad_norm": 0.5969059123585657, + "learning_rate": 1.4660211961388027e-06, + "loss": 0.3298, + "step": 16735 + }, + { + "epoch": 0.7574564381081693, + "grad_norm": 0.6764990779117397, + "learning_rate": 1.46550275283242e-06, + "loss": 0.2872, + "step": 16736 + }, + { + "epoch": 0.7575016972165648, + "grad_norm": 0.5972428709774328, + "learning_rate": 1.464984385471528e-06, + "loss": 0.307, + "step": 16737 + }, + { + "epoch": 0.7575469563249604, + "grad_norm": 0.6033021842878759, + "learning_rate": 1.4644660940672628e-06, + "loss": 0.3265, + "step": 16738 + }, + { + "epoch": 0.757592215433356, + "grad_norm": 0.6668498063128243, + "learning_rate": 1.4639478786307627e-06, + "loss": 0.3038, + "step": 16739 + }, + { + "epoch": 0.7576374745417516, + "grad_norm": 1.843639898452091, + "learning_rate": 1.4634297391731645e-06, + "loss": 0.3057, + "step": 16740 + }, + { + "epoch": 0.757682733650147, + "grad_norm": 0.6062164374997798, + "learning_rate": 1.4629116757055989e-06, + "loss": 0.2588, + "step": 16741 + }, + { + "epoch": 0.7577279927585426, + "grad_norm": 0.6141414599099798, + "learning_rate": 1.462393688239197e-06, + "loss": 0.2775, + "step": 16742 + }, + { + "epoch": 0.7577732518669382, + "grad_norm": 0.6209748362392395, + "learning_rate": 1.461875776785091e-06, + "loss": 0.3779, + "step": 16743 + }, + { + "epoch": 0.7578185109753338, + "grad_norm": 0.5652933419403928, + "learning_rate": 1.4613579413544065e-06, + "loss": 0.2686, + "step": 16744 + }, + { + "epoch": 0.7578637700837294, + "grad_norm": 0.6753961791194718, + "learning_rate": 1.4608401819582734e-06, + "loss": 0.3364, + "step": 16745 + }, + { + "epoch": 0.7579090291921249, + "grad_norm": 0.594061372333826, + "learning_rate": 1.460322498607814e-06, + "loss": 0.3186, + "step": 16746 + }, + { + "epoch": 0.7579542883005205, + "grad_norm": 0.2855580345783155, + "learning_rate": 1.4598048913141538e-06, + "loss": 0.4631, + "step": 16747 + }, + { + "epoch": 0.7579995474089161, + "grad_norm": 0.6084118565420273, + "learning_rate": 1.4592873600884123e-06, + "loss": 0.3163, + "step": 16748 + }, + { + "epoch": 0.7580448065173117, + "grad_norm": 0.624547778812951, + "learning_rate": 1.458769904941712e-06, + "loss": 0.322, + "step": 16749 + }, + { + "epoch": 0.7580900656257071, + "grad_norm": 0.5736452180550441, + "learning_rate": 1.458252525885171e-06, + "loss": 0.3053, + "step": 16750 + }, + { + "epoch": 0.7581353247341027, + "grad_norm": 0.6451197460634303, + "learning_rate": 1.4577352229299036e-06, + "loss": 0.3067, + "step": 16751 + }, + { + "epoch": 0.7581805838424983, + "grad_norm": 0.5835805495229613, + "learning_rate": 1.4572179960870276e-06, + "loss": 0.2996, + "step": 16752 + }, + { + "epoch": 0.7582258429508939, + "grad_norm": 0.644994455896996, + "learning_rate": 1.4567008453676584e-06, + "loss": 0.304, + "step": 16753 + }, + { + "epoch": 0.7582711020592894, + "grad_norm": 0.6242410509122905, + "learning_rate": 1.456183770782903e-06, + "loss": 0.2737, + "step": 16754 + }, + { + "epoch": 0.758316361167685, + "grad_norm": 0.5967340225522353, + "learning_rate": 1.4556667723438745e-06, + "loss": 0.2896, + "step": 16755 + }, + { + "epoch": 0.7583616202760806, + "grad_norm": 0.27541432600904664, + "learning_rate": 1.4551498500616823e-06, + "loss": 0.4955, + "step": 16756 + }, + { + "epoch": 0.7584068793844762, + "grad_norm": 0.7689975087348239, + "learning_rate": 1.4546330039474332e-06, + "loss": 0.2772, + "step": 16757 + }, + { + "epoch": 0.7584521384928717, + "grad_norm": 0.6404359902753533, + "learning_rate": 1.4541162340122305e-06, + "loss": 0.2655, + "step": 16758 + }, + { + "epoch": 0.7584973976012672, + "grad_norm": 0.6518976716703694, + "learning_rate": 1.453599540267181e-06, + "loss": 0.3307, + "step": 16759 + }, + { + "epoch": 0.7585426567096628, + "grad_norm": 0.6298797588350113, + "learning_rate": 1.453082922723384e-06, + "loss": 0.268, + "step": 16760 + }, + { + "epoch": 0.7585879158180584, + "grad_norm": 0.6043313496398718, + "learning_rate": 1.4525663813919433e-06, + "loss": 0.3191, + "step": 16761 + }, + { + "epoch": 0.758633174926454, + "grad_norm": 0.7437923777491514, + "learning_rate": 1.452049916283954e-06, + "loss": 0.3195, + "step": 16762 + }, + { + "epoch": 0.7586784340348495, + "grad_norm": 0.6746630879217013, + "learning_rate": 1.4515335274105168e-06, + "loss": 0.3604, + "step": 16763 + }, + { + "epoch": 0.7587236931432451, + "grad_norm": 0.5701015759486506, + "learning_rate": 1.4510172147827244e-06, + "loss": 0.2821, + "step": 16764 + }, + { + "epoch": 0.7587689522516406, + "grad_norm": 0.6900711667667638, + "learning_rate": 1.4505009784116735e-06, + "loss": 0.2907, + "step": 16765 + }, + { + "epoch": 0.7588142113600362, + "grad_norm": 0.5760290714206665, + "learning_rate": 1.4499848183084558e-06, + "loss": 0.2687, + "step": 16766 + }, + { + "epoch": 0.7588594704684317, + "grad_norm": 0.5925292721443706, + "learning_rate": 1.449468734484159e-06, + "loss": 0.3341, + "step": 16767 + }, + { + "epoch": 0.7589047295768273, + "grad_norm": 0.6251576508171806, + "learning_rate": 1.4489527269498749e-06, + "loss": 0.2754, + "step": 16768 + }, + { + "epoch": 0.7589499886852229, + "grad_norm": 0.5910949194326818, + "learning_rate": 1.4484367957166923e-06, + "loss": 0.3144, + "step": 16769 + }, + { + "epoch": 0.7589952477936185, + "grad_norm": 0.6602577060771069, + "learning_rate": 1.4479209407956946e-06, + "loss": 0.2814, + "step": 16770 + }, + { + "epoch": 0.7590405069020141, + "grad_norm": 0.6725839382128941, + "learning_rate": 1.4474051621979651e-06, + "loss": 0.306, + "step": 16771 + }, + { + "epoch": 0.7590857660104096, + "grad_norm": 0.6187387367824468, + "learning_rate": 1.4468894599345895e-06, + "loss": 0.2708, + "step": 16772 + }, + { + "epoch": 0.7591310251188051, + "grad_norm": 0.2720563841107742, + "learning_rate": 1.446373834016645e-06, + "loss": 0.4706, + "step": 16773 + }, + { + "epoch": 0.7591762842272007, + "grad_norm": 0.6388025149213199, + "learning_rate": 1.4458582844552144e-06, + "loss": 0.3396, + "step": 16774 + }, + { + "epoch": 0.7592215433355963, + "grad_norm": 0.6594795049049732, + "learning_rate": 1.4453428112613716e-06, + "loss": 0.2842, + "step": 16775 + }, + { + "epoch": 0.7592668024439918, + "grad_norm": 0.6183436482999873, + "learning_rate": 1.4448274144461965e-06, + "loss": 0.2865, + "step": 16776 + }, + { + "epoch": 0.7593120615523874, + "grad_norm": 0.7109544276087211, + "learning_rate": 1.44431209402076e-06, + "loss": 0.263, + "step": 16777 + }, + { + "epoch": 0.759357320660783, + "grad_norm": 0.6297736322139782, + "learning_rate": 1.4437968499961374e-06, + "loss": 0.2731, + "step": 16778 + }, + { + "epoch": 0.7594025797691786, + "grad_norm": 0.6384738341119297, + "learning_rate": 1.4432816823833983e-06, + "loss": 0.3162, + "step": 16779 + }, + { + "epoch": 0.7594478388775742, + "grad_norm": 0.582218562638432, + "learning_rate": 1.4427665911936106e-06, + "loss": 0.2916, + "step": 16780 + }, + { + "epoch": 0.7594930979859696, + "grad_norm": 0.6127931682849966, + "learning_rate": 1.4422515764378443e-06, + "loss": 0.294, + "step": 16781 + }, + { + "epoch": 0.7595383570943652, + "grad_norm": 0.6399210423841506, + "learning_rate": 1.4417366381271674e-06, + "loss": 0.286, + "step": 16782 + }, + { + "epoch": 0.7595836162027608, + "grad_norm": 0.620176767003677, + "learning_rate": 1.4412217762726388e-06, + "loss": 0.2846, + "step": 16783 + }, + { + "epoch": 0.7596288753111564, + "grad_norm": 0.6019342956194589, + "learning_rate": 1.4407069908853243e-06, + "loss": 0.2601, + "step": 16784 + }, + { + "epoch": 0.7596741344195519, + "grad_norm": 0.2856625396067131, + "learning_rate": 1.4401922819762864e-06, + "loss": 0.4776, + "step": 16785 + }, + { + "epoch": 0.7597193935279475, + "grad_norm": 0.27648760043520637, + "learning_rate": 1.4396776495565833e-06, + "loss": 0.4881, + "step": 16786 + }, + { + "epoch": 0.7597646526363431, + "grad_norm": 0.625835672946518, + "learning_rate": 1.4391630936372714e-06, + "loss": 0.3046, + "step": 16787 + }, + { + "epoch": 0.7598099117447387, + "grad_norm": 0.6146365451550019, + "learning_rate": 1.4386486142294081e-06, + "loss": 0.2524, + "step": 16788 + }, + { + "epoch": 0.7598551708531341, + "grad_norm": 0.6884840712238033, + "learning_rate": 1.43813421134405e-06, + "loss": 0.2684, + "step": 16789 + }, + { + "epoch": 0.7599004299615297, + "grad_norm": 0.7172089130658871, + "learning_rate": 1.4376198849922484e-06, + "loss": 0.2825, + "step": 16790 + }, + { + "epoch": 0.7599456890699253, + "grad_norm": 0.6264937069460348, + "learning_rate": 1.4371056351850525e-06, + "loss": 0.3419, + "step": 16791 + }, + { + "epoch": 0.7599909481783209, + "grad_norm": 0.6564587162794594, + "learning_rate": 1.4365914619335158e-06, + "loss": 0.2876, + "step": 16792 + }, + { + "epoch": 0.7600362072867165, + "grad_norm": 0.658685267212285, + "learning_rate": 1.4360773652486826e-06, + "loss": 0.3208, + "step": 16793 + }, + { + "epoch": 0.760081466395112, + "grad_norm": 0.598486391096725, + "learning_rate": 1.435563345141603e-06, + "loss": 0.3154, + "step": 16794 + }, + { + "epoch": 0.7601267255035076, + "grad_norm": 0.6100147880854968, + "learning_rate": 1.4350494016233197e-06, + "loss": 0.2963, + "step": 16795 + }, + { + "epoch": 0.7601719846119032, + "grad_norm": 0.6434704416028074, + "learning_rate": 1.4345355347048739e-06, + "loss": 0.2693, + "step": 16796 + }, + { + "epoch": 0.7602172437202988, + "grad_norm": 0.5765541262298857, + "learning_rate": 1.4340217443973093e-06, + "loss": 0.282, + "step": 16797 + }, + { + "epoch": 0.7602625028286942, + "grad_norm": 0.6419387726754483, + "learning_rate": 1.4335080307116667e-06, + "loss": 0.3015, + "step": 16798 + }, + { + "epoch": 0.7603077619370898, + "grad_norm": 0.5943876804016321, + "learning_rate": 1.432994393658983e-06, + "loss": 0.2685, + "step": 16799 + }, + { + "epoch": 0.7603530210454854, + "grad_norm": 0.6019177857531949, + "learning_rate": 1.4324808332502932e-06, + "loss": 0.2883, + "step": 16800 + }, + { + "epoch": 0.760398280153881, + "grad_norm": 0.27134340303500054, + "learning_rate": 1.4319673494966345e-06, + "loss": 0.4577, + "step": 16801 + }, + { + "epoch": 0.7604435392622765, + "grad_norm": 0.7610053429037491, + "learning_rate": 1.431453942409038e-06, + "loss": 0.2485, + "step": 16802 + }, + { + "epoch": 0.7604887983706721, + "grad_norm": 0.6087080165987451, + "learning_rate": 1.430940611998538e-06, + "loss": 0.28, + "step": 16803 + }, + { + "epoch": 0.7605340574790677, + "grad_norm": 0.5778916428969291, + "learning_rate": 1.4304273582761607e-06, + "loss": 0.2827, + "step": 16804 + }, + { + "epoch": 0.7605793165874632, + "grad_norm": 0.6189611531085527, + "learning_rate": 1.4299141812529382e-06, + "loss": 0.3085, + "step": 16805 + }, + { + "epoch": 0.7606245756958588, + "grad_norm": 0.6054645325134527, + "learning_rate": 1.429401080939894e-06, + "loss": 0.2818, + "step": 16806 + }, + { + "epoch": 0.7606698348042543, + "grad_norm": 0.7938087806773069, + "learning_rate": 1.4288880573480551e-06, + "loss": 0.3107, + "step": 16807 + }, + { + "epoch": 0.7607150939126499, + "grad_norm": 0.7210383208925405, + "learning_rate": 1.4283751104884446e-06, + "loss": 0.309, + "step": 16808 + }, + { + "epoch": 0.7607603530210455, + "grad_norm": 0.28603461792807805, + "learning_rate": 1.4278622403720816e-06, + "loss": 0.4726, + "step": 16809 + }, + { + "epoch": 0.7608056121294411, + "grad_norm": 0.6318045524562781, + "learning_rate": 1.4273494470099886e-06, + "loss": 0.3327, + "step": 16810 + }, + { + "epoch": 0.7608508712378366, + "grad_norm": 0.7677521171288333, + "learning_rate": 1.4268367304131847e-06, + "loss": 0.2926, + "step": 16811 + }, + { + "epoch": 0.7608961303462322, + "grad_norm": 0.5626308426564788, + "learning_rate": 1.426324090592685e-06, + "loss": 0.2787, + "step": 16812 + }, + { + "epoch": 0.7609413894546277, + "grad_norm": 0.2539087849679761, + "learning_rate": 1.4258115275595036e-06, + "loss": 0.456, + "step": 16813 + }, + { + "epoch": 0.7609866485630233, + "grad_norm": 0.6223413384815674, + "learning_rate": 1.425299041324657e-06, + "loss": 0.3336, + "step": 16814 + }, + { + "epoch": 0.7610319076714189, + "grad_norm": 0.6122816121017155, + "learning_rate": 1.424786631899155e-06, + "loss": 0.3442, + "step": 16815 + }, + { + "epoch": 0.7610771667798144, + "grad_norm": 0.6532982871938595, + "learning_rate": 1.424274299294006e-06, + "loss": 0.3565, + "step": 16816 + }, + { + "epoch": 0.76112242588821, + "grad_norm": 0.563622553510085, + "learning_rate": 1.423762043520221e-06, + "loss": 0.2918, + "step": 16817 + }, + { + "epoch": 0.7611676849966056, + "grad_norm": 1.1159220788524196, + "learning_rate": 1.4232498645888071e-06, + "loss": 0.316, + "step": 16818 + }, + { + "epoch": 0.7612129441050012, + "grad_norm": 0.5783143028320339, + "learning_rate": 1.4227377625107686e-06, + "loss": 0.3085, + "step": 16819 + }, + { + "epoch": 0.7612582032133967, + "grad_norm": 0.24991075881259164, + "learning_rate": 1.4222257372971072e-06, + "loss": 0.4805, + "step": 16820 + }, + { + "epoch": 0.7613034623217922, + "grad_norm": 0.6570138851141044, + "learning_rate": 1.4217137889588279e-06, + "loss": 0.3054, + "step": 16821 + }, + { + "epoch": 0.7613487214301878, + "grad_norm": 0.6034558054078771, + "learning_rate": 1.421201917506928e-06, + "loss": 0.2591, + "step": 16822 + }, + { + "epoch": 0.7613939805385834, + "grad_norm": 0.5642207860718541, + "learning_rate": 1.4206901229524089e-06, + "loss": 0.2863, + "step": 16823 + }, + { + "epoch": 0.7614392396469789, + "grad_norm": 0.6288858995612324, + "learning_rate": 1.4201784053062662e-06, + "loss": 0.2803, + "step": 16824 + }, + { + "epoch": 0.7614844987553745, + "grad_norm": 0.6032890415679383, + "learning_rate": 1.4196667645794932e-06, + "loss": 0.2858, + "step": 16825 + }, + { + "epoch": 0.7615297578637701, + "grad_norm": 0.577659202381288, + "learning_rate": 1.4191552007830856e-06, + "loss": 0.2414, + "step": 16826 + }, + { + "epoch": 0.7615750169721657, + "grad_norm": 0.6491047599613663, + "learning_rate": 1.4186437139280363e-06, + "loss": 0.3162, + "step": 16827 + }, + { + "epoch": 0.7616202760805613, + "grad_norm": 0.624204633497584, + "learning_rate": 1.4181323040253346e-06, + "loss": 0.2882, + "step": 16828 + }, + { + "epoch": 0.7616655351889567, + "grad_norm": 0.6492984485836424, + "learning_rate": 1.4176209710859672e-06, + "loss": 0.2684, + "step": 16829 + }, + { + "epoch": 0.7617107942973523, + "grad_norm": 0.9304262523062551, + "learning_rate": 1.417109715120924e-06, + "loss": 0.265, + "step": 16830 + }, + { + "epoch": 0.7617560534057479, + "grad_norm": 0.6239550151772859, + "learning_rate": 1.4165985361411878e-06, + "loss": 0.3371, + "step": 16831 + }, + { + "epoch": 0.7618013125141435, + "grad_norm": 0.6818527097603084, + "learning_rate": 1.4160874341577447e-06, + "loss": 0.3229, + "step": 16832 + }, + { + "epoch": 0.761846571622539, + "grad_norm": 0.6515337635420715, + "learning_rate": 1.4155764091815737e-06, + "loss": 0.2793, + "step": 16833 + }, + { + "epoch": 0.7618918307309346, + "grad_norm": 0.6006328439560533, + "learning_rate": 1.4150654612236592e-06, + "loss": 0.3002, + "step": 16834 + }, + { + "epoch": 0.7619370898393302, + "grad_norm": 0.2696520101700211, + "learning_rate": 1.4145545902949758e-06, + "loss": 0.4464, + "step": 16835 + }, + { + "epoch": 0.7619823489477258, + "grad_norm": 0.5866097715554073, + "learning_rate": 1.4140437964065034e-06, + "loss": 0.2875, + "step": 16836 + }, + { + "epoch": 0.7620276080561212, + "grad_norm": 0.6464923450865269, + "learning_rate": 1.413533079569217e-06, + "loss": 0.3314, + "step": 16837 + }, + { + "epoch": 0.7620728671645168, + "grad_norm": 0.6175031665946599, + "learning_rate": 1.4130224397940883e-06, + "loss": 0.2522, + "step": 16838 + }, + { + "epoch": 0.7621181262729124, + "grad_norm": 0.6334144130604863, + "learning_rate": 1.4125118770920903e-06, + "loss": 0.2754, + "step": 16839 + }, + { + "epoch": 0.762163385381308, + "grad_norm": 0.2774705809975444, + "learning_rate": 1.412001391474196e-06, + "loss": 0.4639, + "step": 16840 + }, + { + "epoch": 0.7622086444897036, + "grad_norm": 0.7367744811054106, + "learning_rate": 1.4114909829513718e-06, + "loss": 0.2797, + "step": 16841 + }, + { + "epoch": 0.7622539035980991, + "grad_norm": 0.6474503842151763, + "learning_rate": 1.4109806515345836e-06, + "loss": 0.2997, + "step": 16842 + }, + { + "epoch": 0.7622991627064947, + "grad_norm": 0.60544937199488, + "learning_rate": 1.4104703972348e-06, + "loss": 0.2746, + "step": 16843 + }, + { + "epoch": 0.7623444218148903, + "grad_norm": 0.5857315723739743, + "learning_rate": 1.4099602200629813e-06, + "loss": 0.2633, + "step": 16844 + }, + { + "epoch": 0.7623896809232859, + "grad_norm": 0.6437930937488767, + "learning_rate": 1.4094501200300937e-06, + "loss": 0.334, + "step": 16845 + }, + { + "epoch": 0.7624349400316813, + "grad_norm": 0.6409464336461744, + "learning_rate": 1.4089400971470935e-06, + "loss": 0.2714, + "step": 16846 + }, + { + "epoch": 0.7624801991400769, + "grad_norm": 0.6142536853103293, + "learning_rate": 1.4084301514249432e-06, + "loss": 0.247, + "step": 16847 + }, + { + "epoch": 0.7625254582484725, + "grad_norm": 0.7107969655346827, + "learning_rate": 1.407920282874598e-06, + "loss": 0.3, + "step": 16848 + }, + { + "epoch": 0.7625707173568681, + "grad_norm": 0.7072927653548671, + "learning_rate": 1.4074104915070124e-06, + "loss": 0.3144, + "step": 16849 + }, + { + "epoch": 0.7626159764652637, + "grad_norm": 0.7667570326124314, + "learning_rate": 1.4069007773331433e-06, + "loss": 0.3365, + "step": 16850 + }, + { + "epoch": 0.7626612355736592, + "grad_norm": 0.6173278493372766, + "learning_rate": 1.4063911403639392e-06, + "loss": 0.3386, + "step": 16851 + }, + { + "epoch": 0.7627064946820548, + "grad_norm": 0.28676641994733726, + "learning_rate": 1.4058815806103542e-06, + "loss": 0.4656, + "step": 16852 + }, + { + "epoch": 0.7627517537904503, + "grad_norm": 0.5823508981388617, + "learning_rate": 1.4053720980833357e-06, + "loss": 0.2728, + "step": 16853 + }, + { + "epoch": 0.7627970128988459, + "grad_norm": 0.6438356399181908, + "learning_rate": 1.4048626927938292e-06, + "loss": 0.3565, + "step": 16854 + }, + { + "epoch": 0.7628422720072414, + "grad_norm": 0.5933897311378838, + "learning_rate": 1.4043533647527813e-06, + "loss": 0.2896, + "step": 16855 + }, + { + "epoch": 0.762887531115637, + "grad_norm": 0.5942110961471518, + "learning_rate": 1.4038441139711384e-06, + "loss": 0.3016, + "step": 16856 + }, + { + "epoch": 0.7629327902240326, + "grad_norm": 0.5985865760232018, + "learning_rate": 1.4033349404598407e-06, + "loss": 0.2777, + "step": 16857 + }, + { + "epoch": 0.7629780493324282, + "grad_norm": 0.6502313785103132, + "learning_rate": 1.402825844229827e-06, + "loss": 0.326, + "step": 16858 + }, + { + "epoch": 0.7630233084408237, + "grad_norm": 0.6101212744528433, + "learning_rate": 1.4023168252920384e-06, + "loss": 0.267, + "step": 16859 + }, + { + "epoch": 0.7630685675492193, + "grad_norm": 0.6054417882461244, + "learning_rate": 1.4018078836574134e-06, + "loss": 0.2968, + "step": 16860 + }, + { + "epoch": 0.7631138266576148, + "grad_norm": 0.2915904665319847, + "learning_rate": 1.401299019336886e-06, + "loss": 0.4793, + "step": 16861 + }, + { + "epoch": 0.7631590857660104, + "grad_norm": 0.5737394256906462, + "learning_rate": 1.400790232341388e-06, + "loss": 0.2823, + "step": 16862 + }, + { + "epoch": 0.763204344874406, + "grad_norm": 0.6102924639830412, + "learning_rate": 1.4002815226818557e-06, + "loss": 0.2596, + "step": 16863 + }, + { + "epoch": 0.7632496039828015, + "grad_norm": 0.6441089802380752, + "learning_rate": 1.3997728903692164e-06, + "loss": 0.2871, + "step": 16864 + }, + { + "epoch": 0.7632948630911971, + "grad_norm": 0.653370143391403, + "learning_rate": 1.3992643354144013e-06, + "loss": 0.2683, + "step": 16865 + }, + { + "epoch": 0.7633401221995927, + "grad_norm": 0.8143105617909899, + "learning_rate": 1.3987558578283378e-06, + "loss": 0.2918, + "step": 16866 + }, + { + "epoch": 0.7633853813079883, + "grad_norm": 0.6276857224416678, + "learning_rate": 1.3982474576219485e-06, + "loss": 0.2695, + "step": 16867 + }, + { + "epoch": 0.7634306404163838, + "grad_norm": 0.6342212940275045, + "learning_rate": 1.3977391348061592e-06, + "loss": 0.2982, + "step": 16868 + }, + { + "epoch": 0.7634758995247793, + "grad_norm": 0.6223834130747216, + "learning_rate": 1.397230889391894e-06, + "loss": 0.2849, + "step": 16869 + }, + { + "epoch": 0.7635211586331749, + "grad_norm": 0.5577136042915022, + "learning_rate": 1.3967227213900725e-06, + "loss": 0.2476, + "step": 16870 + }, + { + "epoch": 0.7635664177415705, + "grad_norm": 0.7179559302744081, + "learning_rate": 1.3962146308116109e-06, + "loss": 0.2798, + "step": 16871 + }, + { + "epoch": 0.763611676849966, + "grad_norm": 0.30340564592253993, + "learning_rate": 1.3957066176674306e-06, + "loss": 0.4514, + "step": 16872 + }, + { + "epoch": 0.7636569359583616, + "grad_norm": 0.6339792291596237, + "learning_rate": 1.3951986819684432e-06, + "loss": 0.3246, + "step": 16873 + }, + { + "epoch": 0.7637021950667572, + "grad_norm": 0.5892116521049253, + "learning_rate": 1.3946908237255668e-06, + "loss": 0.3045, + "step": 16874 + }, + { + "epoch": 0.7637474541751528, + "grad_norm": 0.6261217936668318, + "learning_rate": 1.3941830429497105e-06, + "loss": 0.2898, + "step": 16875 + }, + { + "epoch": 0.7637927132835484, + "grad_norm": 1.5401022245595641, + "learning_rate": 1.3936753396517877e-06, + "loss": 0.2698, + "step": 16876 + }, + { + "epoch": 0.7638379723919438, + "grad_norm": 0.8747644893541968, + "learning_rate": 1.3931677138427035e-06, + "loss": 0.317, + "step": 16877 + }, + { + "epoch": 0.7638832315003394, + "grad_norm": 0.6173016787995003, + "learning_rate": 1.39266016553337e-06, + "loss": 0.3333, + "step": 16878 + }, + { + "epoch": 0.763928490608735, + "grad_norm": 0.6037204276293361, + "learning_rate": 1.3921526947346902e-06, + "loss": 0.2761, + "step": 16879 + }, + { + "epoch": 0.7639737497171306, + "grad_norm": 0.6201939894233246, + "learning_rate": 1.3916453014575664e-06, + "loss": 0.2864, + "step": 16880 + }, + { + "epoch": 0.7640190088255261, + "grad_norm": 0.6081279482261308, + "learning_rate": 1.3911379857129037e-06, + "loss": 0.2896, + "step": 16881 + }, + { + "epoch": 0.7640642679339217, + "grad_norm": 0.5885304306737921, + "learning_rate": 1.3906307475116044e-06, + "loss": 0.3191, + "step": 16882 + }, + { + "epoch": 0.7641095270423173, + "grad_norm": 0.5702277237570906, + "learning_rate": 1.390123586864562e-06, + "loss": 0.3063, + "step": 16883 + }, + { + "epoch": 0.7641547861507129, + "grad_norm": 0.6062661374592121, + "learning_rate": 1.389616503782677e-06, + "loss": 0.3084, + "step": 16884 + }, + { + "epoch": 0.7642000452591083, + "grad_norm": 0.5808660659853312, + "learning_rate": 1.389109498276846e-06, + "loss": 0.2715, + "step": 16885 + }, + { + "epoch": 0.7642453043675039, + "grad_norm": 0.6005673175985065, + "learning_rate": 1.388602570357962e-06, + "loss": 0.3076, + "step": 16886 + }, + { + "epoch": 0.7642905634758995, + "grad_norm": 0.6122167898907246, + "learning_rate": 1.388095720036916e-06, + "loss": 0.2916, + "step": 16887 + }, + { + "epoch": 0.7643358225842951, + "grad_norm": 0.2734031758798873, + "learning_rate": 1.3875889473245996e-06, + "loss": 0.4631, + "step": 16888 + }, + { + "epoch": 0.7643810816926907, + "grad_norm": 0.6061707662293109, + "learning_rate": 1.3870822522319039e-06, + "loss": 0.3079, + "step": 16889 + }, + { + "epoch": 0.7644263408010862, + "grad_norm": 0.6162602129563666, + "learning_rate": 1.386575634769714e-06, + "loss": 0.2763, + "step": 16890 + }, + { + "epoch": 0.7644715999094818, + "grad_norm": 0.3073469359186612, + "learning_rate": 1.3860690949489141e-06, + "loss": 0.4764, + "step": 16891 + }, + { + "epoch": 0.7645168590178774, + "grad_norm": 0.5780747748601645, + "learning_rate": 1.3855626327803923e-06, + "loss": 0.2817, + "step": 16892 + }, + { + "epoch": 0.764562118126273, + "grad_norm": 0.26548664320126003, + "learning_rate": 1.385056248275027e-06, + "loss": 0.4591, + "step": 16893 + }, + { + "epoch": 0.7646073772346684, + "grad_norm": 0.6425476158887657, + "learning_rate": 1.3845499414437013e-06, + "loss": 0.2995, + "step": 16894 + }, + { + "epoch": 0.764652636343064, + "grad_norm": 0.2543732806748501, + "learning_rate": 1.384043712297294e-06, + "loss": 0.4742, + "step": 16895 + }, + { + "epoch": 0.7646978954514596, + "grad_norm": 0.27888961787907973, + "learning_rate": 1.38353756084668e-06, + "loss": 0.4554, + "step": 16896 + }, + { + "epoch": 0.7647431545598552, + "grad_norm": 0.6126119132895413, + "learning_rate": 1.3830314871027367e-06, + "loss": 0.322, + "step": 16897 + }, + { + "epoch": 0.7647884136682508, + "grad_norm": 0.6800039236791499, + "learning_rate": 1.3825254910763396e-06, + "loss": 0.2992, + "step": 16898 + }, + { + "epoch": 0.7648336727766463, + "grad_norm": 0.6701975701935918, + "learning_rate": 1.3820195727783597e-06, + "loss": 0.3103, + "step": 16899 + }, + { + "epoch": 0.7648789318850419, + "grad_norm": 0.2552845699044317, + "learning_rate": 1.3815137322196654e-06, + "loss": 0.4648, + "step": 16900 + }, + { + "epoch": 0.7649241909934374, + "grad_norm": 0.665911728128072, + "learning_rate": 1.3810079694111295e-06, + "loss": 0.2839, + "step": 16901 + }, + { + "epoch": 0.764969450101833, + "grad_norm": 0.7525189007952864, + "learning_rate": 1.3805022843636162e-06, + "loss": 0.3057, + "step": 16902 + }, + { + "epoch": 0.7650147092102285, + "grad_norm": 0.2652382252083221, + "learning_rate": 1.3799966770879936e-06, + "loss": 0.455, + "step": 16903 + }, + { + "epoch": 0.7650599683186241, + "grad_norm": 0.6639683051494911, + "learning_rate": 1.3794911475951229e-06, + "loss": 0.3246, + "step": 16904 + }, + { + "epoch": 0.7651052274270197, + "grad_norm": 0.6142888855102578, + "learning_rate": 1.3789856958958692e-06, + "loss": 0.2624, + "step": 16905 + }, + { + "epoch": 0.7651504865354153, + "grad_norm": 0.6536807988841115, + "learning_rate": 1.3784803220010906e-06, + "loss": 0.299, + "step": 16906 + }, + { + "epoch": 0.7651957456438108, + "grad_norm": 0.6210388750058032, + "learning_rate": 1.3779750259216484e-06, + "loss": 0.2996, + "step": 16907 + }, + { + "epoch": 0.7652410047522064, + "grad_norm": 0.24744556272978027, + "learning_rate": 1.377469807668399e-06, + "loss": 0.4469, + "step": 16908 + }, + { + "epoch": 0.765286263860602, + "grad_norm": 0.5780801130034201, + "learning_rate": 1.3769646672521964e-06, + "loss": 0.2728, + "step": 16909 + }, + { + "epoch": 0.7653315229689975, + "grad_norm": 0.2845392317576298, + "learning_rate": 1.3764596046838951e-06, + "loss": 0.5005, + "step": 16910 + }, + { + "epoch": 0.7653767820773931, + "grad_norm": 0.6439653211486774, + "learning_rate": 1.3759546199743518e-06, + "loss": 0.2606, + "step": 16911 + }, + { + "epoch": 0.7654220411857886, + "grad_norm": 0.6052871614791017, + "learning_rate": 1.3754497131344097e-06, + "loss": 0.2423, + "step": 16912 + }, + { + "epoch": 0.7654673002941842, + "grad_norm": 0.6858402042648036, + "learning_rate": 1.3749448841749213e-06, + "loss": 0.289, + "step": 16913 + }, + { + "epoch": 0.7655125594025798, + "grad_norm": 0.6380469222281904, + "learning_rate": 1.3744401331067358e-06, + "loss": 0.321, + "step": 16914 + }, + { + "epoch": 0.7655578185109754, + "grad_norm": 0.6318465111490466, + "learning_rate": 1.3739354599406969e-06, + "loss": 0.2968, + "step": 16915 + }, + { + "epoch": 0.7656030776193709, + "grad_norm": 0.27806466123117685, + "learning_rate": 1.373430864687646e-06, + "loss": 0.4759, + "step": 16916 + }, + { + "epoch": 0.7656483367277664, + "grad_norm": 0.559100606385692, + "learning_rate": 1.3729263473584281e-06, + "loss": 0.2649, + "step": 16917 + }, + { + "epoch": 0.765693595836162, + "grad_norm": 0.5835671908916155, + "learning_rate": 1.372421907963885e-06, + "loss": 0.2895, + "step": 16918 + }, + { + "epoch": 0.7657388549445576, + "grad_norm": 0.5678136911278053, + "learning_rate": 1.3719175465148538e-06, + "loss": 0.3081, + "step": 16919 + }, + { + "epoch": 0.7657841140529531, + "grad_norm": 0.6582775454018125, + "learning_rate": 1.3714132630221699e-06, + "loss": 0.3243, + "step": 16920 + }, + { + "epoch": 0.7658293731613487, + "grad_norm": 0.6572142174436094, + "learning_rate": 1.3709090574966726e-06, + "loss": 0.288, + "step": 16921 + }, + { + "epoch": 0.7658746322697443, + "grad_norm": 0.5759000279508841, + "learning_rate": 1.3704049299491923e-06, + "loss": 0.2615, + "step": 16922 + }, + { + "epoch": 0.7659198913781399, + "grad_norm": 0.9477769156689875, + "learning_rate": 1.3699008803905633e-06, + "loss": 0.2983, + "step": 16923 + }, + { + "epoch": 0.7659651504865355, + "grad_norm": 0.28476611007507796, + "learning_rate": 1.369396908831616e-06, + "loss": 0.4754, + "step": 16924 + }, + { + "epoch": 0.7660104095949309, + "grad_norm": 0.5858814659736541, + "learning_rate": 1.368893015283177e-06, + "loss": 0.3237, + "step": 16925 + }, + { + "epoch": 0.7660556687033265, + "grad_norm": 0.2752287362306891, + "learning_rate": 1.368389199756075e-06, + "loss": 0.4561, + "step": 16926 + }, + { + "epoch": 0.7661009278117221, + "grad_norm": 0.8509358285265931, + "learning_rate": 1.3678854622611371e-06, + "loss": 0.2647, + "step": 16927 + }, + { + "epoch": 0.7661461869201177, + "grad_norm": 0.5731027176137438, + "learning_rate": 1.367381802809185e-06, + "loss": 0.2603, + "step": 16928 + }, + { + "epoch": 0.7661914460285132, + "grad_norm": 0.6125266417761187, + "learning_rate": 1.3668782214110404e-06, + "loss": 0.3007, + "step": 16929 + }, + { + "epoch": 0.7662367051369088, + "grad_norm": 0.6363985154525227, + "learning_rate": 1.3663747180775238e-06, + "loss": 0.3023, + "step": 16930 + }, + { + "epoch": 0.7662819642453044, + "grad_norm": 0.5990897495045766, + "learning_rate": 1.3658712928194567e-06, + "loss": 0.2762, + "step": 16931 + }, + { + "epoch": 0.7663272233537, + "grad_norm": 0.5934237035423845, + "learning_rate": 1.3653679456476536e-06, + "loss": 0.3044, + "step": 16932 + }, + { + "epoch": 0.7663724824620956, + "grad_norm": 0.7283000284782619, + "learning_rate": 1.3648646765729295e-06, + "loss": 0.334, + "step": 16933 + }, + { + "epoch": 0.766417741570491, + "grad_norm": 0.609175602263638, + "learning_rate": 1.3643614856061005e-06, + "loss": 0.2992, + "step": 16934 + }, + { + "epoch": 0.7664630006788866, + "grad_norm": 0.2539648191964789, + "learning_rate": 1.3638583727579752e-06, + "loss": 0.4318, + "step": 16935 + }, + { + "epoch": 0.7665082597872822, + "grad_norm": 0.6069518858266917, + "learning_rate": 1.3633553380393677e-06, + "loss": 0.3037, + "step": 16936 + }, + { + "epoch": 0.7665535188956778, + "grad_norm": 0.5627562186944901, + "learning_rate": 1.362852381461085e-06, + "loss": 0.3155, + "step": 16937 + }, + { + "epoch": 0.7665987780040733, + "grad_norm": 0.26406261763895233, + "learning_rate": 1.3623495030339323e-06, + "loss": 0.4702, + "step": 16938 + }, + { + "epoch": 0.7666440371124689, + "grad_norm": 0.7349242877482646, + "learning_rate": 1.3618467027687165e-06, + "loss": 0.315, + "step": 16939 + }, + { + "epoch": 0.7666892962208645, + "grad_norm": 0.6108496294096064, + "learning_rate": 1.3613439806762447e-06, + "loss": 0.2913, + "step": 16940 + }, + { + "epoch": 0.76673455532926, + "grad_norm": 0.2741832860992178, + "learning_rate": 1.3608413367673123e-06, + "loss": 0.4662, + "step": 16941 + }, + { + "epoch": 0.7667798144376555, + "grad_norm": 0.6248896759210308, + "learning_rate": 1.3603387710527228e-06, + "loss": 0.2901, + "step": 16942 + }, + { + "epoch": 0.7668250735460511, + "grad_norm": 0.2945483709669181, + "learning_rate": 1.359836283543276e-06, + "loss": 0.4719, + "step": 16943 + }, + { + "epoch": 0.7668703326544467, + "grad_norm": 0.6619790012059299, + "learning_rate": 1.3593338742497675e-06, + "loss": 0.2947, + "step": 16944 + }, + { + "epoch": 0.7669155917628423, + "grad_norm": 0.6236601589678317, + "learning_rate": 1.3588315431829913e-06, + "loss": 0.2854, + "step": 16945 + }, + { + "epoch": 0.7669608508712379, + "grad_norm": 0.7724796575015, + "learning_rate": 1.3583292903537427e-06, + "loss": 0.2866, + "step": 16946 + }, + { + "epoch": 0.7670061099796334, + "grad_norm": 1.0154723266293544, + "learning_rate": 1.357827115772814e-06, + "loss": 0.2853, + "step": 16947 + }, + { + "epoch": 0.767051369088029, + "grad_norm": 0.7038822773492349, + "learning_rate": 1.3573250194509946e-06, + "loss": 0.3171, + "step": 16948 + }, + { + "epoch": 0.7670966281964245, + "grad_norm": 0.5762294358474576, + "learning_rate": 1.3568230013990713e-06, + "loss": 0.265, + "step": 16949 + }, + { + "epoch": 0.7671418873048201, + "grad_norm": 0.5917370820067537, + "learning_rate": 1.3563210616278345e-06, + "loss": 0.2564, + "step": 16950 + }, + { + "epoch": 0.7671871464132156, + "grad_norm": 0.277251790379558, + "learning_rate": 1.3558192001480652e-06, + "loss": 0.4499, + "step": 16951 + }, + { + "epoch": 0.7672324055216112, + "grad_norm": 0.26052497713959355, + "learning_rate": 1.3553174169705507e-06, + "loss": 0.4531, + "step": 16952 + }, + { + "epoch": 0.7672776646300068, + "grad_norm": 0.6220368259883828, + "learning_rate": 1.3548157121060718e-06, + "loss": 0.2812, + "step": 16953 + }, + { + "epoch": 0.7673229237384024, + "grad_norm": 0.5294918508547679, + "learning_rate": 1.3543140855654058e-06, + "loss": 0.257, + "step": 16954 + }, + { + "epoch": 0.7673681828467979, + "grad_norm": 0.2807578134342027, + "learning_rate": 1.3538125373593335e-06, + "loss": 0.4716, + "step": 16955 + }, + { + "epoch": 0.7674134419551935, + "grad_norm": 0.5845163546613862, + "learning_rate": 1.3533110674986327e-06, + "loss": 0.2835, + "step": 16956 + }, + { + "epoch": 0.767458701063589, + "grad_norm": 0.5872242276041237, + "learning_rate": 1.3528096759940768e-06, + "loss": 0.3021, + "step": 16957 + }, + { + "epoch": 0.7675039601719846, + "grad_norm": 0.6504178575741542, + "learning_rate": 1.3523083628564388e-06, + "loss": 0.291, + "step": 16958 + }, + { + "epoch": 0.7675492192803802, + "grad_norm": 0.6391989446126154, + "learning_rate": 1.3518071280964901e-06, + "loss": 0.2885, + "step": 16959 + }, + { + "epoch": 0.7675944783887757, + "grad_norm": 0.2884959672495194, + "learning_rate": 1.3513059717250037e-06, + "loss": 0.4722, + "step": 16960 + }, + { + "epoch": 0.7676397374971713, + "grad_norm": 0.569592458656659, + "learning_rate": 1.3508048937527458e-06, + "loss": 0.3074, + "step": 16961 + }, + { + "epoch": 0.7676849966055669, + "grad_norm": 0.6032703298178237, + "learning_rate": 1.3503038941904818e-06, + "loss": 0.3544, + "step": 16962 + }, + { + "epoch": 0.7677302557139625, + "grad_norm": 0.5832410428674883, + "learning_rate": 1.3498029730489793e-06, + "loss": 0.3116, + "step": 16963 + }, + { + "epoch": 0.767775514822358, + "grad_norm": 0.6220872943704416, + "learning_rate": 1.3493021303389985e-06, + "loss": 0.2951, + "step": 16964 + }, + { + "epoch": 0.7678207739307535, + "grad_norm": 0.89794621665398, + "learning_rate": 1.348801366071304e-06, + "loss": 0.3117, + "step": 16965 + }, + { + "epoch": 0.7678660330391491, + "grad_norm": 0.6600777353123349, + "learning_rate": 1.3483006802566546e-06, + "loss": 0.311, + "step": 16966 + }, + { + "epoch": 0.7679112921475447, + "grad_norm": 0.6373080824566424, + "learning_rate": 1.3478000729058065e-06, + "loss": 0.3076, + "step": 16967 + }, + { + "epoch": 0.7679565512559403, + "grad_norm": 0.2797095518192231, + "learning_rate": 1.3472995440295183e-06, + "loss": 0.47, + "step": 16968 + }, + { + "epoch": 0.7680018103643358, + "grad_norm": 0.6633517189792275, + "learning_rate": 1.3467990936385478e-06, + "loss": 0.2976, + "step": 16969 + }, + { + "epoch": 0.7680470694727314, + "grad_norm": 0.6534031456071727, + "learning_rate": 1.3462987217436412e-06, + "loss": 0.3041, + "step": 16970 + }, + { + "epoch": 0.768092328581127, + "grad_norm": 0.6053512025354313, + "learning_rate": 1.3457984283555536e-06, + "loss": 0.3258, + "step": 16971 + }, + { + "epoch": 0.7681375876895226, + "grad_norm": 0.5653073402346999, + "learning_rate": 1.345298213485035e-06, + "loss": 0.3381, + "step": 16972 + }, + { + "epoch": 0.768182846797918, + "grad_norm": 0.5950416465028642, + "learning_rate": 1.344798077142836e-06, + "loss": 0.2806, + "step": 16973 + }, + { + "epoch": 0.7682281059063136, + "grad_norm": 0.6699224940747642, + "learning_rate": 1.3442980193396976e-06, + "loss": 0.2696, + "step": 16974 + }, + { + "epoch": 0.7682733650147092, + "grad_norm": 0.5930106756206643, + "learning_rate": 1.3437980400863671e-06, + "loss": 0.3147, + "step": 16975 + }, + { + "epoch": 0.7683186241231048, + "grad_norm": 0.6205814856898128, + "learning_rate": 1.3432981393935885e-06, + "loss": 0.3014, + "step": 16976 + }, + { + "epoch": 0.7683638832315003, + "grad_norm": 0.6479243058495224, + "learning_rate": 1.3427983172721026e-06, + "loss": 0.3099, + "step": 16977 + }, + { + "epoch": 0.7684091423398959, + "grad_norm": 0.6434276617349629, + "learning_rate": 1.3422985737326471e-06, + "loss": 0.2738, + "step": 16978 + }, + { + "epoch": 0.7684544014482915, + "grad_norm": 0.6253202570490182, + "learning_rate": 1.3417989087859628e-06, + "loss": 0.2903, + "step": 16979 + }, + { + "epoch": 0.7684996605566871, + "grad_norm": 0.6205806187921971, + "learning_rate": 1.3412993224427834e-06, + "loss": 0.2788, + "step": 16980 + }, + { + "epoch": 0.7685449196650826, + "grad_norm": 0.28114508109508995, + "learning_rate": 1.3407998147138462e-06, + "loss": 0.4619, + "step": 16981 + }, + { + "epoch": 0.7685901787734781, + "grad_norm": 0.6156844052315181, + "learning_rate": 1.3403003856098823e-06, + "loss": 0.2637, + "step": 16982 + }, + { + "epoch": 0.7686354378818737, + "grad_norm": 0.5597348557261431, + "learning_rate": 1.339801035141622e-06, + "loss": 0.3111, + "step": 16983 + }, + { + "epoch": 0.7686806969902693, + "grad_norm": 0.2478413630444913, + "learning_rate": 1.3393017633197958e-06, + "loss": 0.4477, + "step": 16984 + }, + { + "epoch": 0.7687259560986649, + "grad_norm": 0.6558781901080365, + "learning_rate": 1.3388025701551339e-06, + "loss": 0.2989, + "step": 16985 + }, + { + "epoch": 0.7687712152070604, + "grad_norm": 0.6440251359091363, + "learning_rate": 1.3383034556583596e-06, + "loss": 0.2987, + "step": 16986 + }, + { + "epoch": 0.768816474315456, + "grad_norm": 0.646678875565266, + "learning_rate": 1.3378044198401963e-06, + "loss": 0.3121, + "step": 16987 + }, + { + "epoch": 0.7688617334238516, + "grad_norm": 0.596718918220146, + "learning_rate": 1.337305462711369e-06, + "loss": 0.2762, + "step": 16988 + }, + { + "epoch": 0.7689069925322471, + "grad_norm": 0.6003635530165137, + "learning_rate": 1.3368065842825994e-06, + "loss": 0.2831, + "step": 16989 + }, + { + "epoch": 0.7689522516406426, + "grad_norm": 0.25469878483264125, + "learning_rate": 1.3363077845646056e-06, + "loss": 0.4325, + "step": 16990 + }, + { + "epoch": 0.7689975107490382, + "grad_norm": 0.5622020283876651, + "learning_rate": 1.3358090635681043e-06, + "loss": 0.244, + "step": 16991 + }, + { + "epoch": 0.7690427698574338, + "grad_norm": 0.6387750462435204, + "learning_rate": 1.335310421303813e-06, + "loss": 0.2878, + "step": 16992 + }, + { + "epoch": 0.7690880289658294, + "grad_norm": 0.7494803556084741, + "learning_rate": 1.3348118577824448e-06, + "loss": 0.2549, + "step": 16993 + }, + { + "epoch": 0.769133288074225, + "grad_norm": 0.2880711269974684, + "learning_rate": 1.3343133730147144e-06, + "loss": 0.4734, + "step": 16994 + }, + { + "epoch": 0.7691785471826205, + "grad_norm": 0.7187617476015365, + "learning_rate": 1.3338149670113314e-06, + "loss": 0.2833, + "step": 16995 + }, + { + "epoch": 0.769223806291016, + "grad_norm": 0.6144067884129695, + "learning_rate": 1.3333166397830033e-06, + "loss": 0.2975, + "step": 16996 + }, + { + "epoch": 0.7692690653994116, + "grad_norm": 0.6488198429125233, + "learning_rate": 1.3328183913404396e-06, + "loss": 0.2951, + "step": 16997 + }, + { + "epoch": 0.7693143245078072, + "grad_norm": 0.6572369985639023, + "learning_rate": 1.3323202216943488e-06, + "loss": 0.2879, + "step": 16998 + }, + { + "epoch": 0.7693595836162027, + "grad_norm": 0.5623155598704372, + "learning_rate": 1.3318221308554287e-06, + "loss": 0.297, + "step": 16999 + }, + { + "epoch": 0.7694048427245983, + "grad_norm": 0.590359266967176, + "learning_rate": 1.3313241188343845e-06, + "loss": 0.282, + "step": 17000 + }, + { + "epoch": 0.7694501018329939, + "grad_norm": 0.6139718057304095, + "learning_rate": 1.330826185641918e-06, + "loss": 0.2862, + "step": 17001 + }, + { + "epoch": 0.7694953609413895, + "grad_norm": 0.6207758725197622, + "learning_rate": 1.330328331288731e-06, + "loss": 0.2734, + "step": 17002 + }, + { + "epoch": 0.7695406200497851, + "grad_norm": 0.5562841534231531, + "learning_rate": 1.3298305557855146e-06, + "loss": 0.2743, + "step": 17003 + }, + { + "epoch": 0.7695858791581806, + "grad_norm": 0.6279572767987115, + "learning_rate": 1.329332859142967e-06, + "loss": 0.3076, + "step": 17004 + }, + { + "epoch": 0.7696311382665761, + "grad_norm": 0.5915825930214385, + "learning_rate": 1.3288352413717847e-06, + "loss": 0.3034, + "step": 17005 + }, + { + "epoch": 0.7696763973749717, + "grad_norm": 0.5942031851275636, + "learning_rate": 1.3283377024826576e-06, + "loss": 0.3188, + "step": 17006 + }, + { + "epoch": 0.7697216564833673, + "grad_norm": 0.276941718944032, + "learning_rate": 1.3278402424862758e-06, + "loss": 0.4766, + "step": 17007 + }, + { + "epoch": 0.7697669155917628, + "grad_norm": 0.6329108071302986, + "learning_rate": 1.3273428613933298e-06, + "loss": 0.2926, + "step": 17008 + }, + { + "epoch": 0.7698121747001584, + "grad_norm": 0.5793166555741998, + "learning_rate": 1.3268455592145047e-06, + "loss": 0.3074, + "step": 17009 + }, + { + "epoch": 0.769857433808554, + "grad_norm": 0.25748776118581745, + "learning_rate": 1.3263483359604884e-06, + "loss": 0.4525, + "step": 17010 + }, + { + "epoch": 0.7699026929169496, + "grad_norm": 0.5655961201333015, + "learning_rate": 1.3258511916419641e-06, + "loss": 0.3125, + "step": 17011 + }, + { + "epoch": 0.769947952025345, + "grad_norm": 0.5823148605766695, + "learning_rate": 1.3253541262696117e-06, + "loss": 0.2969, + "step": 17012 + }, + { + "epoch": 0.7699932111337406, + "grad_norm": 0.6444920412097761, + "learning_rate": 1.3248571398541138e-06, + "loss": 0.2965, + "step": 17013 + }, + { + "epoch": 0.7700384702421362, + "grad_norm": 0.2839524981208011, + "learning_rate": 1.3243602324061495e-06, + "loss": 0.4673, + "step": 17014 + }, + { + "epoch": 0.7700837293505318, + "grad_norm": 0.6402337676488344, + "learning_rate": 1.3238634039363952e-06, + "loss": 0.3205, + "step": 17015 + }, + { + "epoch": 0.7701289884589274, + "grad_norm": 0.6372879682518151, + "learning_rate": 1.3233666544555246e-06, + "loss": 0.3168, + "step": 17016 + }, + { + "epoch": 0.7701742475673229, + "grad_norm": 0.6019808709883003, + "learning_rate": 1.3228699839742125e-06, + "loss": 0.3091, + "step": 17017 + }, + { + "epoch": 0.7702195066757185, + "grad_norm": 0.7257516660275176, + "learning_rate": 1.3223733925031324e-06, + "loss": 0.3478, + "step": 17018 + }, + { + "epoch": 0.7702647657841141, + "grad_norm": 0.6080211785313477, + "learning_rate": 1.321876880052953e-06, + "loss": 0.2837, + "step": 17019 + }, + { + "epoch": 0.7703100248925097, + "grad_norm": 0.2720508001144198, + "learning_rate": 1.321380446634342e-06, + "loss": 0.4872, + "step": 17020 + }, + { + "epoch": 0.7703552840009051, + "grad_norm": 0.5971448911188646, + "learning_rate": 1.3208840922579686e-06, + "loss": 0.3264, + "step": 17021 + }, + { + "epoch": 0.7704005431093007, + "grad_norm": 0.6620853507211729, + "learning_rate": 1.3203878169344948e-06, + "loss": 0.3739, + "step": 17022 + }, + { + "epoch": 0.7704458022176963, + "grad_norm": 0.6750399270932413, + "learning_rate": 1.3198916206745871e-06, + "loss": 0.2813, + "step": 17023 + }, + { + "epoch": 0.7704910613260919, + "grad_norm": 0.6677213392981335, + "learning_rate": 1.3193955034889056e-06, + "loss": 0.304, + "step": 17024 + }, + { + "epoch": 0.7705363204344874, + "grad_norm": 0.26104906765845687, + "learning_rate": 1.31889946538811e-06, + "loss": 0.4523, + "step": 17025 + }, + { + "epoch": 0.770581579542883, + "grad_norm": 0.6332788248079336, + "learning_rate": 1.3184035063828586e-06, + "loss": 0.3088, + "step": 17026 + }, + { + "epoch": 0.7706268386512786, + "grad_norm": 0.5982813398175602, + "learning_rate": 1.3179076264838102e-06, + "loss": 0.2801, + "step": 17027 + }, + { + "epoch": 0.7706720977596742, + "grad_norm": 0.6142886930313358, + "learning_rate": 1.3174118257016182e-06, + "loss": 0.3024, + "step": 17028 + }, + { + "epoch": 0.7707173568680697, + "grad_norm": 0.5813833619991972, + "learning_rate": 1.3169161040469347e-06, + "loss": 0.2989, + "step": 17029 + }, + { + "epoch": 0.7707626159764652, + "grad_norm": 0.5807463456591303, + "learning_rate": 1.316420461530412e-06, + "loss": 0.2915, + "step": 17030 + }, + { + "epoch": 0.7708078750848608, + "grad_norm": 0.6038158844407124, + "learning_rate": 1.3159248981627026e-06, + "loss": 0.3041, + "step": 17031 + }, + { + "epoch": 0.7708531341932564, + "grad_norm": 0.6905869638067237, + "learning_rate": 1.3154294139544516e-06, + "loss": 0.321, + "step": 17032 + }, + { + "epoch": 0.770898393301652, + "grad_norm": 0.587729403920332, + "learning_rate": 1.3149340089163048e-06, + "loss": 0.3035, + "step": 17033 + }, + { + "epoch": 0.7709436524100475, + "grad_norm": 0.5984624721440018, + "learning_rate": 1.3144386830589102e-06, + "loss": 0.2999, + "step": 17034 + }, + { + "epoch": 0.7709889115184431, + "grad_norm": 0.5938178505981546, + "learning_rate": 1.3139434363929088e-06, + "loss": 0.3004, + "step": 17035 + }, + { + "epoch": 0.7710341706268387, + "grad_norm": 0.6211449043197764, + "learning_rate": 1.3134482689289408e-06, + "loss": 0.2901, + "step": 17036 + }, + { + "epoch": 0.7710794297352342, + "grad_norm": 0.6013448927897931, + "learning_rate": 1.312953180677648e-06, + "loss": 0.3086, + "step": 17037 + }, + { + "epoch": 0.7711246888436298, + "grad_norm": 0.5856091578020052, + "learning_rate": 1.3124581716496666e-06, + "loss": 0.2874, + "step": 17038 + }, + { + "epoch": 0.7711699479520253, + "grad_norm": 0.606121887689741, + "learning_rate": 1.3119632418556344e-06, + "loss": 0.3335, + "step": 17039 + }, + { + "epoch": 0.7712152070604209, + "grad_norm": 0.610090598311767, + "learning_rate": 1.311468391306186e-06, + "loss": 0.2584, + "step": 17040 + }, + { + "epoch": 0.7712604661688165, + "grad_norm": 0.6136849850789233, + "learning_rate": 1.3109736200119517e-06, + "loss": 0.3297, + "step": 17041 + }, + { + "epoch": 0.7713057252772121, + "grad_norm": 0.6267240457586536, + "learning_rate": 1.310478927983564e-06, + "loss": 0.2766, + "step": 17042 + }, + { + "epoch": 0.7713509843856076, + "grad_norm": 0.6977661479268513, + "learning_rate": 1.3099843152316543e-06, + "loss": 0.293, + "step": 17043 + }, + { + "epoch": 0.7713962434940032, + "grad_norm": 0.2869785079670655, + "learning_rate": 1.309489781766849e-06, + "loss": 0.4703, + "step": 17044 + }, + { + "epoch": 0.7714415026023987, + "grad_norm": 0.6219557466552592, + "learning_rate": 1.308995327599772e-06, + "loss": 0.2609, + "step": 17045 + }, + { + "epoch": 0.7714867617107943, + "grad_norm": 0.2754491484448643, + "learning_rate": 1.3085009527410491e-06, + "loss": 0.486, + "step": 17046 + }, + { + "epoch": 0.7715320208191898, + "grad_norm": 0.6285882729758558, + "learning_rate": 1.3080066572013045e-06, + "loss": 0.3062, + "step": 17047 + }, + { + "epoch": 0.7715772799275854, + "grad_norm": 0.6209444308060075, + "learning_rate": 1.3075124409911584e-06, + "loss": 0.3307, + "step": 17048 + }, + { + "epoch": 0.771622539035981, + "grad_norm": 0.608458141857825, + "learning_rate": 1.3070183041212276e-06, + "loss": 0.3477, + "step": 17049 + }, + { + "epoch": 0.7716677981443766, + "grad_norm": 0.5640585393302556, + "learning_rate": 1.3065242466021328e-06, + "loss": 0.2742, + "step": 17050 + }, + { + "epoch": 0.7717130572527722, + "grad_norm": 0.6017392265311926, + "learning_rate": 1.3060302684444864e-06, + "loss": 0.2725, + "step": 17051 + }, + { + "epoch": 0.7717583163611677, + "grad_norm": 0.6761743285746282, + "learning_rate": 1.3055363696589062e-06, + "loss": 0.3202, + "step": 17052 + }, + { + "epoch": 0.7718035754695632, + "grad_norm": 0.3254544022283287, + "learning_rate": 1.3050425502560028e-06, + "loss": 0.4524, + "step": 17053 + }, + { + "epoch": 0.7718488345779588, + "grad_norm": 0.6596226760850494, + "learning_rate": 1.3045488102463856e-06, + "loss": 0.3541, + "step": 17054 + }, + { + "epoch": 0.7718940936863544, + "grad_norm": 0.6610524550640676, + "learning_rate": 1.304055149640664e-06, + "loss": 0.3441, + "step": 17055 + }, + { + "epoch": 0.7719393527947499, + "grad_norm": 0.613461336777729, + "learning_rate": 1.303561568449448e-06, + "loss": 0.3042, + "step": 17056 + }, + { + "epoch": 0.7719846119031455, + "grad_norm": 0.6389856450153001, + "learning_rate": 1.3030680666833411e-06, + "loss": 0.3254, + "step": 17057 + }, + { + "epoch": 0.7720298710115411, + "grad_norm": 0.5882841282729346, + "learning_rate": 1.3025746443529459e-06, + "loss": 0.2951, + "step": 17058 + }, + { + "epoch": 0.7720751301199367, + "grad_norm": 0.5547272609744122, + "learning_rate": 1.302081301468865e-06, + "loss": 0.2637, + "step": 17059 + }, + { + "epoch": 0.7721203892283321, + "grad_norm": 0.5719637599550884, + "learning_rate": 1.3015880380417017e-06, + "loss": 0.2698, + "step": 17060 + }, + { + "epoch": 0.7721656483367277, + "grad_norm": 0.6128984698011726, + "learning_rate": 1.3010948540820528e-06, + "loss": 0.3142, + "step": 17061 + }, + { + "epoch": 0.7722109074451233, + "grad_norm": 0.7081009445258358, + "learning_rate": 1.3006017496005135e-06, + "loss": 0.3351, + "step": 17062 + }, + { + "epoch": 0.7722561665535189, + "grad_norm": 0.280580376477097, + "learning_rate": 1.3001087246076821e-06, + "loss": 0.4538, + "step": 17063 + }, + { + "epoch": 0.7723014256619145, + "grad_norm": 0.7330898678125246, + "learning_rate": 1.29961577911415e-06, + "loss": 0.3027, + "step": 17064 + }, + { + "epoch": 0.77234668477031, + "grad_norm": 0.5807060894420104, + "learning_rate": 1.2991229131305106e-06, + "loss": 0.2733, + "step": 17065 + }, + { + "epoch": 0.7723919438787056, + "grad_norm": 0.27381713925430146, + "learning_rate": 1.298630126667354e-06, + "loss": 0.4627, + "step": 17066 + }, + { + "epoch": 0.7724372029871012, + "grad_norm": 0.70382379688019, + "learning_rate": 1.2981374197352663e-06, + "loss": 0.2926, + "step": 17067 + }, + { + "epoch": 0.7724824620954968, + "grad_norm": 0.5869086110499759, + "learning_rate": 1.2976447923448376e-06, + "loss": 0.278, + "step": 17068 + }, + { + "epoch": 0.7725277212038922, + "grad_norm": 0.26054148489340934, + "learning_rate": 1.2971522445066515e-06, + "loss": 0.4378, + "step": 17069 + }, + { + "epoch": 0.7725729803122878, + "grad_norm": 0.7147306754379935, + "learning_rate": 1.29665977623129e-06, + "loss": 0.3465, + "step": 17070 + }, + { + "epoch": 0.7726182394206834, + "grad_norm": 0.6196791744507667, + "learning_rate": 1.2961673875293352e-06, + "loss": 0.287, + "step": 17071 + }, + { + "epoch": 0.772663498529079, + "grad_norm": 0.7222106900360292, + "learning_rate": 1.2956750784113698e-06, + "loss": 0.3465, + "step": 17072 + }, + { + "epoch": 0.7727087576374746, + "grad_norm": 0.6263213043582183, + "learning_rate": 1.2951828488879702e-06, + "loss": 0.3168, + "step": 17073 + }, + { + "epoch": 0.7727540167458701, + "grad_norm": 0.601989846349521, + "learning_rate": 1.2946906989697106e-06, + "loss": 0.3167, + "step": 17074 + }, + { + "epoch": 0.7727992758542657, + "grad_norm": 0.6061256602703506, + "learning_rate": 1.2941986286671682e-06, + "loss": 0.3222, + "step": 17075 + }, + { + "epoch": 0.7728445349626613, + "grad_norm": 0.6098812345161176, + "learning_rate": 1.2937066379909174e-06, + "loss": 0.3075, + "step": 17076 + }, + { + "epoch": 0.7728897940710568, + "grad_norm": 0.577058215520092, + "learning_rate": 1.2932147269515278e-06, + "loss": 0.2862, + "step": 17077 + }, + { + "epoch": 0.7729350531794523, + "grad_norm": 0.7320423453045078, + "learning_rate": 1.2927228955595678e-06, + "loss": 0.2623, + "step": 17078 + }, + { + "epoch": 0.7729803122878479, + "grad_norm": 0.634616556189686, + "learning_rate": 1.292231143825608e-06, + "loss": 0.2689, + "step": 17079 + }, + { + "epoch": 0.7730255713962435, + "grad_norm": 0.6614808934275564, + "learning_rate": 1.2917394717602123e-06, + "loss": 0.2991, + "step": 17080 + }, + { + "epoch": 0.7730708305046391, + "grad_norm": 0.6504693596242206, + "learning_rate": 1.2912478793739474e-06, + "loss": 0.2944, + "step": 17081 + }, + { + "epoch": 0.7731160896130346, + "grad_norm": 0.6111587590059084, + "learning_rate": 1.2907563666773753e-06, + "loss": 0.2904, + "step": 17082 + }, + { + "epoch": 0.7731613487214302, + "grad_norm": 0.6120474107293098, + "learning_rate": 1.2902649336810553e-06, + "loss": 0.3246, + "step": 17083 + }, + { + "epoch": 0.7732066078298258, + "grad_norm": 0.6841406326328182, + "learning_rate": 1.289773580395548e-06, + "loss": 0.2907, + "step": 17084 + }, + { + "epoch": 0.7732518669382213, + "grad_norm": 0.6543032317655777, + "learning_rate": 1.289282306831413e-06, + "loss": 0.3093, + "step": 17085 + }, + { + "epoch": 0.7732971260466169, + "grad_norm": 0.6332293830460849, + "learning_rate": 1.2887911129992047e-06, + "loss": 0.2928, + "step": 17086 + }, + { + "epoch": 0.7733423851550124, + "grad_norm": 0.6293863668690138, + "learning_rate": 1.2882999989094758e-06, + "loss": 0.2799, + "step": 17087 + }, + { + "epoch": 0.773387644263408, + "grad_norm": 0.6591978501859447, + "learning_rate": 1.2878089645727803e-06, + "loss": 0.3173, + "step": 17088 + }, + { + "epoch": 0.7734329033718036, + "grad_norm": 0.6410972269223547, + "learning_rate": 1.2873180099996701e-06, + "loss": 0.3301, + "step": 17089 + }, + { + "epoch": 0.7734781624801992, + "grad_norm": 0.9874183972194387, + "learning_rate": 1.2868271352006938e-06, + "loss": 0.2957, + "step": 17090 + }, + { + "epoch": 0.7735234215885947, + "grad_norm": 0.6636071316751236, + "learning_rate": 1.2863363401863966e-06, + "loss": 0.2871, + "step": 17091 + }, + { + "epoch": 0.7735686806969903, + "grad_norm": 0.5968374672452968, + "learning_rate": 1.2858456249673268e-06, + "loss": 0.2966, + "step": 17092 + }, + { + "epoch": 0.7736139398053858, + "grad_norm": 0.6203246444497553, + "learning_rate": 1.2853549895540268e-06, + "loss": 0.3091, + "step": 17093 + }, + { + "epoch": 0.7736591989137814, + "grad_norm": 0.5971734328529894, + "learning_rate": 1.2848644339570403e-06, + "loss": 0.3399, + "step": 17094 + }, + { + "epoch": 0.7737044580221769, + "grad_norm": 0.7651834237145628, + "learning_rate": 1.2843739581869068e-06, + "loss": 0.3061, + "step": 17095 + }, + { + "epoch": 0.7737497171305725, + "grad_norm": 0.63633869631367, + "learning_rate": 1.283883562254164e-06, + "loss": 0.2688, + "step": 17096 + }, + { + "epoch": 0.7737949762389681, + "grad_norm": 0.6051754070221738, + "learning_rate": 1.2833932461693504e-06, + "loss": 0.2692, + "step": 17097 + }, + { + "epoch": 0.7738402353473637, + "grad_norm": 0.592598034098679, + "learning_rate": 1.282903009943004e-06, + "loss": 0.3354, + "step": 17098 + }, + { + "epoch": 0.7738854944557593, + "grad_norm": 0.624401314189695, + "learning_rate": 1.282412853585653e-06, + "loss": 0.3162, + "step": 17099 + }, + { + "epoch": 0.7739307535641547, + "grad_norm": 0.7322262936851486, + "learning_rate": 1.2819227771078318e-06, + "loss": 0.2703, + "step": 17100 + }, + { + "epoch": 0.7739760126725503, + "grad_norm": 0.6266047926837723, + "learning_rate": 1.281432780520071e-06, + "loss": 0.2636, + "step": 17101 + }, + { + "epoch": 0.7740212717809459, + "grad_norm": 0.6831300671044548, + "learning_rate": 1.280942863832902e-06, + "loss": 0.2694, + "step": 17102 + }, + { + "epoch": 0.7740665308893415, + "grad_norm": 0.799933249191529, + "learning_rate": 1.280453027056846e-06, + "loss": 0.2572, + "step": 17103 + }, + { + "epoch": 0.774111789997737, + "grad_norm": 0.28749663474539183, + "learning_rate": 1.2799632702024307e-06, + "loss": 0.4234, + "step": 17104 + }, + { + "epoch": 0.7741570491061326, + "grad_norm": 0.6069052169608677, + "learning_rate": 1.2794735932801805e-06, + "loss": 0.2805, + "step": 17105 + }, + { + "epoch": 0.7742023082145282, + "grad_norm": 0.28689733822411023, + "learning_rate": 1.2789839963006161e-06, + "loss": 0.4686, + "step": 17106 + }, + { + "epoch": 0.7742475673229238, + "grad_norm": 0.5960346646546882, + "learning_rate": 1.278494479274256e-06, + "loss": 0.3029, + "step": 17107 + }, + { + "epoch": 0.7742928264313192, + "grad_norm": 0.6323977456256337, + "learning_rate": 1.2780050422116214e-06, + "loss": 0.3479, + "step": 17108 + }, + { + "epoch": 0.7743380855397148, + "grad_norm": 0.6001880736032134, + "learning_rate": 1.2775156851232262e-06, + "loss": 0.2833, + "step": 17109 + }, + { + "epoch": 0.7743833446481104, + "grad_norm": 0.6834787514837493, + "learning_rate": 1.277026408019587e-06, + "loss": 0.2696, + "step": 17110 + }, + { + "epoch": 0.774428603756506, + "grad_norm": 0.5895092584163092, + "learning_rate": 1.276537210911216e-06, + "loss": 0.2604, + "step": 17111 + }, + { + "epoch": 0.7744738628649016, + "grad_norm": 0.5966485169919734, + "learning_rate": 1.2760480938086234e-06, + "loss": 0.3136, + "step": 17112 + }, + { + "epoch": 0.7745191219732971, + "grad_norm": 0.6310195151175784, + "learning_rate": 1.2755590567223203e-06, + "loss": 0.2904, + "step": 17113 + }, + { + "epoch": 0.7745643810816927, + "grad_norm": 0.6517472902360024, + "learning_rate": 1.275070099662815e-06, + "loss": 0.3208, + "step": 17114 + }, + { + "epoch": 0.7746096401900883, + "grad_norm": 0.6227530110658003, + "learning_rate": 1.274581222640614e-06, + "loss": 0.3065, + "step": 17115 + }, + { + "epoch": 0.7746548992984839, + "grad_norm": 0.6298855001744631, + "learning_rate": 1.2740924256662185e-06, + "loss": 0.2588, + "step": 17116 + }, + { + "epoch": 0.7747001584068793, + "grad_norm": 0.610395970451911, + "learning_rate": 1.2736037087501342e-06, + "loss": 0.2841, + "step": 17117 + }, + { + "epoch": 0.7747454175152749, + "grad_norm": 0.6716630549579216, + "learning_rate": 1.2731150719028622e-06, + "loss": 0.2988, + "step": 17118 + }, + { + "epoch": 0.7747906766236705, + "grad_norm": 0.6214868094767717, + "learning_rate": 1.2726265151349015e-06, + "loss": 0.3546, + "step": 17119 + }, + { + "epoch": 0.7748359357320661, + "grad_norm": 0.5840971481190741, + "learning_rate": 1.2721380384567477e-06, + "loss": 0.3065, + "step": 17120 + }, + { + "epoch": 0.7748811948404617, + "grad_norm": 0.5838822957571945, + "learning_rate": 1.2716496418788998e-06, + "loss": 0.2748, + "step": 17121 + }, + { + "epoch": 0.7749264539488572, + "grad_norm": 0.6504767470495529, + "learning_rate": 1.2711613254118482e-06, + "loss": 0.3055, + "step": 17122 + }, + { + "epoch": 0.7749717130572528, + "grad_norm": 0.5934252736975144, + "learning_rate": 1.2706730890660896e-06, + "loss": 0.291, + "step": 17123 + }, + { + "epoch": 0.7750169721656484, + "grad_norm": 0.6088252559614474, + "learning_rate": 1.2701849328521127e-06, + "loss": 0.3175, + "step": 17124 + }, + { + "epoch": 0.775062231274044, + "grad_norm": 0.6474140755106753, + "learning_rate": 1.2696968567804042e-06, + "loss": 0.3136, + "step": 17125 + }, + { + "epoch": 0.7751074903824394, + "grad_norm": 0.5741135972312951, + "learning_rate": 1.269208860861454e-06, + "loss": 0.2631, + "step": 17126 + }, + { + "epoch": 0.775152749490835, + "grad_norm": 0.7014835529371155, + "learning_rate": 1.2687209451057498e-06, + "loss": 0.3057, + "step": 17127 + }, + { + "epoch": 0.7751980085992306, + "grad_norm": 0.6135637429616104, + "learning_rate": 1.26823310952377e-06, + "loss": 0.3084, + "step": 17128 + }, + { + "epoch": 0.7752432677076262, + "grad_norm": 0.7016143908046977, + "learning_rate": 1.2677453541259993e-06, + "loss": 0.3007, + "step": 17129 + }, + { + "epoch": 0.7752885268160217, + "grad_norm": 0.29424487408614314, + "learning_rate": 1.2672576789229186e-06, + "loss": 0.4599, + "step": 17130 + }, + { + "epoch": 0.7753337859244173, + "grad_norm": 0.6311842885262616, + "learning_rate": 1.2667700839250086e-06, + "loss": 0.2971, + "step": 17131 + }, + { + "epoch": 0.7753790450328129, + "grad_norm": 0.5907732570227133, + "learning_rate": 1.266282569142741e-06, + "loss": 0.2783, + "step": 17132 + }, + { + "epoch": 0.7754243041412084, + "grad_norm": 0.6576672305095088, + "learning_rate": 1.2657951345865938e-06, + "loss": 0.2876, + "step": 17133 + }, + { + "epoch": 0.775469563249604, + "grad_norm": 0.8345840354859797, + "learning_rate": 1.2653077802670416e-06, + "loss": 0.3081, + "step": 17134 + }, + { + "epoch": 0.7755148223579995, + "grad_norm": 0.6008978189685654, + "learning_rate": 1.264820506194555e-06, + "loss": 0.3252, + "step": 17135 + }, + { + "epoch": 0.7755600814663951, + "grad_norm": 0.2687867028456543, + "learning_rate": 1.2643333123796025e-06, + "loss": 0.4631, + "step": 17136 + }, + { + "epoch": 0.7756053405747907, + "grad_norm": 0.6555826987070684, + "learning_rate": 1.2638461988326556e-06, + "loss": 0.2904, + "step": 17137 + }, + { + "epoch": 0.7756505996831863, + "grad_norm": 0.609080407444969, + "learning_rate": 1.263359165564178e-06, + "loss": 0.2812, + "step": 17138 + }, + { + "epoch": 0.7756958587915818, + "grad_norm": 1.2277354180594564, + "learning_rate": 1.2628722125846365e-06, + "loss": 0.3462, + "step": 17139 + }, + { + "epoch": 0.7757411178999774, + "grad_norm": 0.5927932744795882, + "learning_rate": 1.2623853399044938e-06, + "loss": 0.2616, + "step": 17140 + }, + { + "epoch": 0.7757863770083729, + "grad_norm": 0.5984460545486344, + "learning_rate": 1.2618985475342093e-06, + "loss": 0.2858, + "step": 17141 + }, + { + "epoch": 0.7758316361167685, + "grad_norm": 0.59031256302671, + "learning_rate": 1.2614118354842447e-06, + "loss": 0.2662, + "step": 17142 + }, + { + "epoch": 0.775876895225164, + "grad_norm": 0.7025175929736768, + "learning_rate": 1.2609252037650587e-06, + "loss": 0.3013, + "step": 17143 + }, + { + "epoch": 0.7759221543335596, + "grad_norm": 0.643701224946021, + "learning_rate": 1.2604386523871064e-06, + "loss": 0.2853, + "step": 17144 + }, + { + "epoch": 0.7759674134419552, + "grad_norm": 0.5989556928627308, + "learning_rate": 1.2599521813608412e-06, + "loss": 0.2928, + "step": 17145 + }, + { + "epoch": 0.7760126725503508, + "grad_norm": 0.2845253593714727, + "learning_rate": 1.2594657906967161e-06, + "loss": 0.4847, + "step": 17146 + }, + { + "epoch": 0.7760579316587464, + "grad_norm": 0.6871974893336779, + "learning_rate": 1.2589794804051852e-06, + "loss": 0.3145, + "step": 17147 + }, + { + "epoch": 0.7761031907671418, + "grad_norm": 0.5784810860714322, + "learning_rate": 1.2584932504966952e-06, + "loss": 0.3174, + "step": 17148 + }, + { + "epoch": 0.7761484498755374, + "grad_norm": 0.6567586672835883, + "learning_rate": 1.258007100981693e-06, + "loss": 0.3097, + "step": 17149 + }, + { + "epoch": 0.776193708983933, + "grad_norm": 0.6514163489561462, + "learning_rate": 1.2575210318706266e-06, + "loss": 0.2711, + "step": 17150 + }, + { + "epoch": 0.7762389680923286, + "grad_norm": 0.6198600650648628, + "learning_rate": 1.2570350431739382e-06, + "loss": 0.3026, + "step": 17151 + }, + { + "epoch": 0.7762842272007241, + "grad_norm": 0.6841175367625959, + "learning_rate": 1.256549134902072e-06, + "loss": 0.2564, + "step": 17152 + }, + { + "epoch": 0.7763294863091197, + "grad_norm": 0.6208186416223886, + "learning_rate": 1.2560633070654677e-06, + "loss": 0.3106, + "step": 17153 + }, + { + "epoch": 0.7763747454175153, + "grad_norm": 0.592246840762376, + "learning_rate": 1.2555775596745628e-06, + "loss": 0.2898, + "step": 17154 + }, + { + "epoch": 0.7764200045259109, + "grad_norm": 0.6202578708073626, + "learning_rate": 1.2550918927397965e-06, + "loss": 0.2798, + "step": 17155 + }, + { + "epoch": 0.7764652636343065, + "grad_norm": 0.2673820938547034, + "learning_rate": 1.2546063062716069e-06, + "loss": 0.4716, + "step": 17156 + }, + { + "epoch": 0.7765105227427019, + "grad_norm": 0.2625151895796911, + "learning_rate": 1.2541208002804211e-06, + "loss": 0.4611, + "step": 17157 + }, + { + "epoch": 0.7765557818510975, + "grad_norm": 0.3058334961048853, + "learning_rate": 1.253635374776675e-06, + "loss": 0.4789, + "step": 17158 + }, + { + "epoch": 0.7766010409594931, + "grad_norm": 0.6107794870209484, + "learning_rate": 1.2531500297707987e-06, + "loss": 0.3129, + "step": 17159 + }, + { + "epoch": 0.7766463000678887, + "grad_norm": 0.5760173758350476, + "learning_rate": 1.2526647652732233e-06, + "loss": 0.2955, + "step": 17160 + }, + { + "epoch": 0.7766915591762842, + "grad_norm": 0.6996473534148662, + "learning_rate": 1.2521795812943704e-06, + "loss": 0.3089, + "step": 17161 + }, + { + "epoch": 0.7767368182846798, + "grad_norm": 0.6170363776126002, + "learning_rate": 1.2516944778446676e-06, + "loss": 0.3142, + "step": 17162 + }, + { + "epoch": 0.7767820773930754, + "grad_norm": 0.2843270946939196, + "learning_rate": 1.2512094549345399e-06, + "loss": 0.4601, + "step": 17163 + }, + { + "epoch": 0.776827336501471, + "grad_norm": 0.6367604001768401, + "learning_rate": 1.2507245125744077e-06, + "loss": 0.3181, + "step": 17164 + }, + { + "epoch": 0.7768725956098664, + "grad_norm": 0.5898394959517429, + "learning_rate": 1.2502396507746889e-06, + "loss": 0.2979, + "step": 17165 + }, + { + "epoch": 0.776917854718262, + "grad_norm": 0.6324469738057149, + "learning_rate": 1.2497548695458051e-06, + "loss": 0.3047, + "step": 17166 + }, + { + "epoch": 0.7769631138266576, + "grad_norm": 0.5924126772432989, + "learning_rate": 1.24927016889817e-06, + "loss": 0.29, + "step": 17167 + }, + { + "epoch": 0.7770083729350532, + "grad_norm": 0.274682558773357, + "learning_rate": 1.2487855488422007e-06, + "loss": 0.4564, + "step": 17168 + }, + { + "epoch": 0.7770536320434488, + "grad_norm": 0.5996590732612687, + "learning_rate": 1.2483010093883086e-06, + "loss": 0.2866, + "step": 17169 + }, + { + "epoch": 0.7770988911518443, + "grad_norm": 0.6573037277905804, + "learning_rate": 1.2478165505469042e-06, + "loss": 0.2807, + "step": 17170 + }, + { + "epoch": 0.7771441502602399, + "grad_norm": 0.6040269001549651, + "learning_rate": 1.2473321723283982e-06, + "loss": 0.2917, + "step": 17171 + }, + { + "epoch": 0.7771894093686355, + "grad_norm": 0.263686879267683, + "learning_rate": 1.2468478747432e-06, + "loss": 0.4632, + "step": 17172 + }, + { + "epoch": 0.777234668477031, + "grad_norm": 0.6994408235588686, + "learning_rate": 1.2463636578017142e-06, + "loss": 0.3008, + "step": 17173 + }, + { + "epoch": 0.7772799275854265, + "grad_norm": 0.6136295462490068, + "learning_rate": 1.2458795215143431e-06, + "loss": 0.2719, + "step": 17174 + }, + { + "epoch": 0.7773251866938221, + "grad_norm": 0.6700928782014203, + "learning_rate": 1.2453954658914913e-06, + "loss": 0.33, + "step": 17175 + }, + { + "epoch": 0.7773704458022177, + "grad_norm": 0.6453116410368338, + "learning_rate": 1.2449114909435611e-06, + "loss": 0.2892, + "step": 17176 + }, + { + "epoch": 0.7774157049106133, + "grad_norm": 0.28101865286112065, + "learning_rate": 1.24442759668095e-06, + "loss": 0.4958, + "step": 17177 + }, + { + "epoch": 0.7774609640190088, + "grad_norm": 0.25113335487181443, + "learning_rate": 1.2439437831140538e-06, + "loss": 0.4634, + "step": 17178 + }, + { + "epoch": 0.7775062231274044, + "grad_norm": 0.662903364481204, + "learning_rate": 1.2434600502532717e-06, + "loss": 0.2786, + "step": 17179 + }, + { + "epoch": 0.7775514822358, + "grad_norm": 0.7470827691874483, + "learning_rate": 1.2429763981089938e-06, + "loss": 0.2952, + "step": 17180 + }, + { + "epoch": 0.7775967413441955, + "grad_norm": 0.3091443192580121, + "learning_rate": 1.2424928266916164e-06, + "loss": 0.4501, + "step": 17181 + }, + { + "epoch": 0.7776420004525911, + "grad_norm": 0.5766299726257766, + "learning_rate": 1.2420093360115276e-06, + "loss": 0.2978, + "step": 17182 + }, + { + "epoch": 0.7776872595609866, + "grad_norm": 0.2973232597197437, + "learning_rate": 1.2415259260791147e-06, + "loss": 0.4589, + "step": 17183 + }, + { + "epoch": 0.7777325186693822, + "grad_norm": 0.6270181736822528, + "learning_rate": 1.2410425969047667e-06, + "loss": 0.2664, + "step": 17184 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 0.26161742101697577, + "learning_rate": 1.2405593484988697e-06, + "loss": 0.4854, + "step": 17185 + }, + { + "epoch": 0.7778230368861734, + "grad_norm": 0.2728915388621859, + "learning_rate": 1.2400761808718065e-06, + "loss": 0.4808, + "step": 17186 + }, + { + "epoch": 0.7778682959945689, + "grad_norm": 0.6215441103367519, + "learning_rate": 1.2395930940339562e-06, + "loss": 0.3026, + "step": 17187 + }, + { + "epoch": 0.7779135551029644, + "grad_norm": 0.5801009208199909, + "learning_rate": 1.2391100879957018e-06, + "loss": 0.2923, + "step": 17188 + }, + { + "epoch": 0.77795881421136, + "grad_norm": 0.6763289072826297, + "learning_rate": 1.2386271627674234e-06, + "loss": 0.3494, + "step": 17189 + }, + { + "epoch": 0.7780040733197556, + "grad_norm": 0.28140635961391025, + "learning_rate": 1.2381443183594927e-06, + "loss": 0.4727, + "step": 17190 + }, + { + "epoch": 0.7780493324281512, + "grad_norm": 0.670661369162288, + "learning_rate": 1.2376615547822867e-06, + "loss": 0.2701, + "step": 17191 + }, + { + "epoch": 0.7780945915365467, + "grad_norm": 0.8439057856638054, + "learning_rate": 1.2371788720461802e-06, + "loss": 0.3252, + "step": 17192 + }, + { + "epoch": 0.7781398506449423, + "grad_norm": 1.1001434994464, + "learning_rate": 1.2366962701615431e-06, + "loss": 0.229, + "step": 17193 + }, + { + "epoch": 0.7781851097533379, + "grad_norm": 0.5700448027566635, + "learning_rate": 1.2362137491387433e-06, + "loss": 0.2615, + "step": 17194 + }, + { + "epoch": 0.7782303688617335, + "grad_norm": 0.6436858853915678, + "learning_rate": 1.2357313089881524e-06, + "loss": 0.2852, + "step": 17195 + }, + { + "epoch": 0.778275627970129, + "grad_norm": 0.6936044601347898, + "learning_rate": 1.235248949720133e-06, + "loss": 0.3068, + "step": 17196 + }, + { + "epoch": 0.7783208870785245, + "grad_norm": 0.6631834829042466, + "learning_rate": 1.2347666713450524e-06, + "loss": 0.3152, + "step": 17197 + }, + { + "epoch": 0.7783661461869201, + "grad_norm": 0.6094239192280482, + "learning_rate": 1.2342844738732724e-06, + "loss": 0.3193, + "step": 17198 + }, + { + "epoch": 0.7784114052953157, + "grad_norm": 0.6329941486039516, + "learning_rate": 1.2338023573151514e-06, + "loss": 0.3004, + "step": 17199 + }, + { + "epoch": 0.7784566644037112, + "grad_norm": 0.6459641698773567, + "learning_rate": 1.2333203216810514e-06, + "loss": 0.2855, + "step": 17200 + }, + { + "epoch": 0.7785019235121068, + "grad_norm": 0.615623943776746, + "learning_rate": 1.2328383669813304e-06, + "loss": 0.3281, + "step": 17201 + }, + { + "epoch": 0.7785471826205024, + "grad_norm": 1.4465718273959651, + "learning_rate": 1.2323564932263428e-06, + "loss": 0.2979, + "step": 17202 + }, + { + "epoch": 0.778592441728898, + "grad_norm": 0.6687764650944367, + "learning_rate": 1.2318747004264414e-06, + "loss": 0.2778, + "step": 17203 + }, + { + "epoch": 0.7786377008372936, + "grad_norm": 0.6070489391281674, + "learning_rate": 1.2313929885919796e-06, + "loss": 0.2385, + "step": 17204 + }, + { + "epoch": 0.778682959945689, + "grad_norm": 0.2771610215799396, + "learning_rate": 1.2309113577333098e-06, + "loss": 0.4846, + "step": 17205 + }, + { + "epoch": 0.7787282190540846, + "grad_norm": 0.6130620834328264, + "learning_rate": 1.230429807860779e-06, + "loss": 0.2521, + "step": 17206 + }, + { + "epoch": 0.7787734781624802, + "grad_norm": 0.6405205580826229, + "learning_rate": 1.2299483389847328e-06, + "loss": 0.3027, + "step": 17207 + }, + { + "epoch": 0.7788187372708758, + "grad_norm": 0.5343316133104158, + "learning_rate": 1.2294669511155193e-06, + "loss": 0.2639, + "step": 17208 + }, + { + "epoch": 0.7788639963792713, + "grad_norm": 0.6271906841197386, + "learning_rate": 1.2289856442634796e-06, + "loss": 0.2932, + "step": 17209 + }, + { + "epoch": 0.7789092554876669, + "grad_norm": 0.6349351163411207, + "learning_rate": 1.2285044184389578e-06, + "loss": 0.291, + "step": 17210 + }, + { + "epoch": 0.7789545145960625, + "grad_norm": 0.6329715545668408, + "learning_rate": 1.2280232736522928e-06, + "loss": 0.3204, + "step": 17211 + }, + { + "epoch": 0.778999773704458, + "grad_norm": 0.2753888188223901, + "learning_rate": 1.2275422099138213e-06, + "loss": 0.4897, + "step": 17212 + }, + { + "epoch": 0.7790450328128535, + "grad_norm": 0.6631362683205497, + "learning_rate": 1.2270612272338816e-06, + "loss": 0.345, + "step": 17213 + }, + { + "epoch": 0.7790902919212491, + "grad_norm": 0.6205185522767497, + "learning_rate": 1.2265803256228103e-06, + "loss": 0.3039, + "step": 17214 + }, + { + "epoch": 0.7791355510296447, + "grad_norm": 0.607641422679698, + "learning_rate": 1.226099505090938e-06, + "loss": 0.2761, + "step": 17215 + }, + { + "epoch": 0.7791808101380403, + "grad_norm": 0.2766637285628353, + "learning_rate": 1.2256187656485957e-06, + "loss": 0.4918, + "step": 17216 + }, + { + "epoch": 0.7792260692464359, + "grad_norm": 0.6734382404378367, + "learning_rate": 1.2251381073061137e-06, + "loss": 0.2883, + "step": 17217 + }, + { + "epoch": 0.7792713283548314, + "grad_norm": 0.5843294729100404, + "learning_rate": 1.2246575300738234e-06, + "loss": 0.293, + "step": 17218 + }, + { + "epoch": 0.779316587463227, + "grad_norm": 0.29133487415654197, + "learning_rate": 1.2241770339620446e-06, + "loss": 0.4569, + "step": 17219 + }, + { + "epoch": 0.7793618465716226, + "grad_norm": 0.6515322371384752, + "learning_rate": 1.2236966189811045e-06, + "loss": 0.3347, + "step": 17220 + }, + { + "epoch": 0.7794071056800181, + "grad_norm": 0.6285603746441605, + "learning_rate": 1.2232162851413282e-06, + "loss": 0.3264, + "step": 17221 + }, + { + "epoch": 0.7794523647884136, + "grad_norm": 0.614769755240032, + "learning_rate": 1.2227360324530335e-06, + "loss": 0.3157, + "step": 17222 + }, + { + "epoch": 0.7794976238968092, + "grad_norm": 0.5887134813600734, + "learning_rate": 1.2222558609265394e-06, + "loss": 0.3023, + "step": 17223 + }, + { + "epoch": 0.7795428830052048, + "grad_norm": 0.6053064454054246, + "learning_rate": 1.2217757705721662e-06, + "loss": 0.2957, + "step": 17224 + }, + { + "epoch": 0.7795881421136004, + "grad_norm": 0.6885769936709684, + "learning_rate": 1.2212957614002263e-06, + "loss": 0.2738, + "step": 17225 + }, + { + "epoch": 0.779633401221996, + "grad_norm": 0.5805273307300288, + "learning_rate": 1.2208158334210363e-06, + "loss": 0.2744, + "step": 17226 + }, + { + "epoch": 0.7796786603303915, + "grad_norm": 0.6466902830249935, + "learning_rate": 1.2203359866449073e-06, + "loss": 0.2786, + "step": 17227 + }, + { + "epoch": 0.779723919438787, + "grad_norm": 0.645014514513907, + "learning_rate": 1.2198562210821474e-06, + "loss": 0.3269, + "step": 17228 + }, + { + "epoch": 0.7797691785471826, + "grad_norm": 0.6807809877605372, + "learning_rate": 1.2193765367430683e-06, + "loss": 0.2917, + "step": 17229 + }, + { + "epoch": 0.7798144376555782, + "grad_norm": 0.2872760545323733, + "learning_rate": 1.2188969336379775e-06, + "loss": 0.4527, + "step": 17230 + }, + { + "epoch": 0.7798596967639737, + "grad_norm": 0.5980345291753337, + "learning_rate": 1.2184174117771786e-06, + "loss": 0.322, + "step": 17231 + }, + { + "epoch": 0.7799049558723693, + "grad_norm": 0.6540469404760676, + "learning_rate": 1.2179379711709738e-06, + "loss": 0.2945, + "step": 17232 + }, + { + "epoch": 0.7799502149807649, + "grad_norm": 0.6760927496791808, + "learning_rate": 1.2174586118296665e-06, + "loss": 0.3381, + "step": 17233 + }, + { + "epoch": 0.7799954740891605, + "grad_norm": 0.9163450526486921, + "learning_rate": 1.2169793337635577e-06, + "loss": 0.2636, + "step": 17234 + }, + { + "epoch": 0.780040733197556, + "grad_norm": 0.607433315490523, + "learning_rate": 1.2165001369829442e-06, + "loss": 0.2601, + "step": 17235 + }, + { + "epoch": 0.7800859923059515, + "grad_norm": 0.6189073530057309, + "learning_rate": 1.2160210214981217e-06, + "loss": 0.295, + "step": 17236 + }, + { + "epoch": 0.7801312514143471, + "grad_norm": 0.6835413411402755, + "learning_rate": 1.215541987319387e-06, + "loss": 0.329, + "step": 17237 + }, + { + "epoch": 0.7801765105227427, + "grad_norm": 0.566895279509252, + "learning_rate": 1.2150630344570301e-06, + "loss": 0.2517, + "step": 17238 + }, + { + "epoch": 0.7802217696311383, + "grad_norm": 0.6177703234752498, + "learning_rate": 1.2145841629213462e-06, + "loss": 0.3313, + "step": 17239 + }, + { + "epoch": 0.7802670287395338, + "grad_norm": 0.6494082399900752, + "learning_rate": 1.2141053727226222e-06, + "loss": 0.2414, + "step": 17240 + }, + { + "epoch": 0.7803122878479294, + "grad_norm": 0.6395650775518928, + "learning_rate": 1.2136266638711452e-06, + "loss": 0.2737, + "step": 17241 + }, + { + "epoch": 0.780357546956325, + "grad_norm": 0.6178599277507637, + "learning_rate": 1.2131480363772018e-06, + "loss": 0.3063, + "step": 17242 + }, + { + "epoch": 0.7804028060647206, + "grad_norm": 0.6273313869636293, + "learning_rate": 1.2126694902510783e-06, + "loss": 0.3056, + "step": 17243 + }, + { + "epoch": 0.780448065173116, + "grad_norm": 1.1095679829939256, + "learning_rate": 1.2121910255030556e-06, + "loss": 0.2808, + "step": 17244 + }, + { + "epoch": 0.7804933242815116, + "grad_norm": 0.5816461982748863, + "learning_rate": 1.2117126421434127e-06, + "loss": 0.2703, + "step": 17245 + }, + { + "epoch": 0.7805385833899072, + "grad_norm": 0.6286741517081604, + "learning_rate": 1.2112343401824306e-06, + "loss": 0.3267, + "step": 17246 + }, + { + "epoch": 0.7805838424983028, + "grad_norm": 0.6109071921166503, + "learning_rate": 1.2107561196303874e-06, + "loss": 0.3199, + "step": 17247 + }, + { + "epoch": 0.7806291016066983, + "grad_norm": 0.680967226262785, + "learning_rate": 1.2102779804975574e-06, + "loss": 0.321, + "step": 17248 + }, + { + "epoch": 0.7806743607150939, + "grad_norm": 0.6266455172496049, + "learning_rate": 1.209799922794213e-06, + "loss": 0.3042, + "step": 17249 + }, + { + "epoch": 0.7807196198234895, + "grad_norm": 0.2809296741128752, + "learning_rate": 1.2093219465306289e-06, + "loss": 0.4413, + "step": 17250 + }, + { + "epoch": 0.7807648789318851, + "grad_norm": 0.6475967871995113, + "learning_rate": 1.2088440517170729e-06, + "loss": 0.3074, + "step": 17251 + }, + { + "epoch": 0.7808101380402807, + "grad_norm": 0.5713740728066894, + "learning_rate": 1.2083662383638156e-06, + "loss": 0.2676, + "step": 17252 + }, + { + "epoch": 0.7808553971486761, + "grad_norm": 0.5782149452659897, + "learning_rate": 1.207888506481123e-06, + "loss": 0.28, + "step": 17253 + }, + { + "epoch": 0.7809006562570717, + "grad_norm": 0.2789851147253102, + "learning_rate": 1.2074108560792586e-06, + "loss": 0.4498, + "step": 17254 + }, + { + "epoch": 0.7809459153654673, + "grad_norm": 0.6078179929177168, + "learning_rate": 1.2069332871684875e-06, + "loss": 0.2967, + "step": 17255 + }, + { + "epoch": 0.7809911744738629, + "grad_norm": 0.5688470930722148, + "learning_rate": 1.2064557997590697e-06, + "loss": 0.2738, + "step": 17256 + }, + { + "epoch": 0.7810364335822584, + "grad_norm": 0.5808970913291032, + "learning_rate": 1.2059783938612674e-06, + "loss": 0.2825, + "step": 17257 + }, + { + "epoch": 0.781081692690654, + "grad_norm": 0.60264982018552, + "learning_rate": 1.2055010694853347e-06, + "loss": 0.2928, + "step": 17258 + }, + { + "epoch": 0.7811269517990496, + "grad_norm": 0.630800348885263, + "learning_rate": 1.2050238266415325e-06, + "loss": 0.3223, + "step": 17259 + }, + { + "epoch": 0.7811722109074452, + "grad_norm": 0.6324553607490628, + "learning_rate": 1.2045466653401122e-06, + "loss": 0.2849, + "step": 17260 + }, + { + "epoch": 0.7812174700158407, + "grad_norm": 0.6422699549315195, + "learning_rate": 1.204069585591326e-06, + "loss": 0.2833, + "step": 17261 + }, + { + "epoch": 0.7812627291242362, + "grad_norm": 0.25655613677725086, + "learning_rate": 1.203592587405426e-06, + "loss": 0.437, + "step": 17262 + }, + { + "epoch": 0.7813079882326318, + "grad_norm": 0.5916781393995728, + "learning_rate": 1.2031156707926632e-06, + "loss": 0.3006, + "step": 17263 + }, + { + "epoch": 0.7813532473410274, + "grad_norm": 0.5927202272252555, + "learning_rate": 1.2026388357632835e-06, + "loss": 0.2649, + "step": 17264 + }, + { + "epoch": 0.781398506449423, + "grad_norm": 0.6134259430364656, + "learning_rate": 1.202162082327531e-06, + "loss": 0.2859, + "step": 17265 + }, + { + "epoch": 0.7814437655578185, + "grad_norm": 0.598847689996696, + "learning_rate": 1.2016854104956522e-06, + "loss": 0.289, + "step": 17266 + }, + { + "epoch": 0.7814890246662141, + "grad_norm": 0.6069934691634431, + "learning_rate": 1.201208820277887e-06, + "loss": 0.2622, + "step": 17267 + }, + { + "epoch": 0.7815342837746097, + "grad_norm": 0.6079465311713511, + "learning_rate": 1.2007323116844789e-06, + "loss": 0.2764, + "step": 17268 + }, + { + "epoch": 0.7815795428830052, + "grad_norm": 0.5959024824792365, + "learning_rate": 1.2002558847256652e-06, + "loss": 0.2586, + "step": 17269 + }, + { + "epoch": 0.7816248019914007, + "grad_norm": 0.6086991031251796, + "learning_rate": 1.1997795394116802e-06, + "loss": 0.3159, + "step": 17270 + }, + { + "epoch": 0.7816700610997963, + "grad_norm": 0.6123399712260394, + "learning_rate": 1.1993032757527618e-06, + "loss": 0.3057, + "step": 17271 + }, + { + "epoch": 0.7817153202081919, + "grad_norm": 0.29205414473219377, + "learning_rate": 1.1988270937591446e-06, + "loss": 0.4743, + "step": 17272 + }, + { + "epoch": 0.7817605793165875, + "grad_norm": 0.5817875667486778, + "learning_rate": 1.1983509934410586e-06, + "loss": 0.2693, + "step": 17273 + }, + { + "epoch": 0.7818058384249831, + "grad_norm": 0.6029411095590721, + "learning_rate": 1.1978749748087325e-06, + "loss": 0.2755, + "step": 17274 + }, + { + "epoch": 0.7818510975333786, + "grad_norm": 0.25139198220891606, + "learning_rate": 1.1973990378723954e-06, + "loss": 0.4454, + "step": 17275 + }, + { + "epoch": 0.7818963566417741, + "grad_norm": 1.0187806068919762, + "learning_rate": 1.1969231826422762e-06, + "loss": 0.2676, + "step": 17276 + }, + { + "epoch": 0.7819416157501697, + "grad_norm": 0.5490177076170151, + "learning_rate": 1.1964474091285976e-06, + "loss": 0.2568, + "step": 17277 + }, + { + "epoch": 0.7819868748585653, + "grad_norm": 0.2790758147332501, + "learning_rate": 1.1959717173415807e-06, + "loss": 0.4767, + "step": 17278 + }, + { + "epoch": 0.7820321339669608, + "grad_norm": 0.27360207391679175, + "learning_rate": 1.19549610729145e-06, + "loss": 0.4783, + "step": 17279 + }, + { + "epoch": 0.7820773930753564, + "grad_norm": 0.6240491325841333, + "learning_rate": 1.1950205789884217e-06, + "loss": 0.2921, + "step": 17280 + }, + { + "epoch": 0.782122652183752, + "grad_norm": 0.5984853106488534, + "learning_rate": 1.1945451324427166e-06, + "loss": 0.2819, + "step": 17281 + }, + { + "epoch": 0.7821679112921476, + "grad_norm": 0.5494188922806639, + "learning_rate": 1.194069767664549e-06, + "loss": 0.2744, + "step": 17282 + }, + { + "epoch": 0.7822131704005431, + "grad_norm": 0.5946047717941394, + "learning_rate": 1.1935944846641318e-06, + "loss": 0.2958, + "step": 17283 + }, + { + "epoch": 0.7822584295089386, + "grad_norm": 0.6009843995686562, + "learning_rate": 1.1931192834516787e-06, + "loss": 0.298, + "step": 17284 + }, + { + "epoch": 0.7823036886173342, + "grad_norm": 0.5980249673659658, + "learning_rate": 1.1926441640374015e-06, + "loss": 0.2896, + "step": 17285 + }, + { + "epoch": 0.7823489477257298, + "grad_norm": 1.0807125893259095, + "learning_rate": 1.1921691264315078e-06, + "loss": 0.2637, + "step": 17286 + }, + { + "epoch": 0.7823942068341254, + "grad_norm": 0.6771671739260511, + "learning_rate": 1.191694170644203e-06, + "loss": 0.304, + "step": 17287 + }, + { + "epoch": 0.7824394659425209, + "grad_norm": 0.6605418925307608, + "learning_rate": 1.191219296685696e-06, + "loss": 0.3074, + "step": 17288 + }, + { + "epoch": 0.7824847250509165, + "grad_norm": 0.2795546897923235, + "learning_rate": 1.1907445045661885e-06, + "loss": 0.5121, + "step": 17289 + }, + { + "epoch": 0.7825299841593121, + "grad_norm": 0.2621604055121253, + "learning_rate": 1.1902697942958806e-06, + "loss": 0.4615, + "step": 17290 + }, + { + "epoch": 0.7825752432677077, + "grad_norm": 0.5772421413586936, + "learning_rate": 1.189795165884975e-06, + "loss": 0.2757, + "step": 17291 + }, + { + "epoch": 0.7826205023761031, + "grad_norm": 0.7082060921982903, + "learning_rate": 1.1893206193436696e-06, + "loss": 0.299, + "step": 17292 + }, + { + "epoch": 0.7826657614844987, + "grad_norm": 0.2718666477866807, + "learning_rate": 1.188846154682161e-06, + "loss": 0.467, + "step": 17293 + }, + { + "epoch": 0.7827110205928943, + "grad_norm": 0.5893728826172965, + "learning_rate": 1.1883717719106419e-06, + "loss": 0.3106, + "step": 17294 + }, + { + "epoch": 0.7827562797012899, + "grad_norm": 0.773709814014488, + "learning_rate": 1.1878974710393082e-06, + "loss": 0.2885, + "step": 17295 + }, + { + "epoch": 0.7828015388096855, + "grad_norm": 0.6151307002494818, + "learning_rate": 1.1874232520783486e-06, + "loss": 0.2649, + "step": 17296 + }, + { + "epoch": 0.782846797918081, + "grad_norm": 0.6510029108292334, + "learning_rate": 1.1869491150379553e-06, + "loss": 0.2729, + "step": 17297 + }, + { + "epoch": 0.7828920570264766, + "grad_norm": 0.6532810746624844, + "learning_rate": 1.1864750599283132e-06, + "loss": 0.2947, + "step": 17298 + }, + { + "epoch": 0.7829373161348722, + "grad_norm": 0.6156139091235419, + "learning_rate": 1.1860010867596112e-06, + "loss": 0.3081, + "step": 17299 + }, + { + "epoch": 0.7829825752432678, + "grad_norm": 0.636874452229096, + "learning_rate": 1.1855271955420306e-06, + "loss": 0.3007, + "step": 17300 + }, + { + "epoch": 0.7830278343516632, + "grad_norm": 0.6506900226776479, + "learning_rate": 1.1850533862857567e-06, + "loss": 0.2929, + "step": 17301 + }, + { + "epoch": 0.7830730934600588, + "grad_norm": 0.645473079213239, + "learning_rate": 1.1845796590009684e-06, + "loss": 0.3279, + "step": 17302 + }, + { + "epoch": 0.7831183525684544, + "grad_norm": 0.6372074891495952, + "learning_rate": 1.1841060136978443e-06, + "loss": 0.2731, + "step": 17303 + }, + { + "epoch": 0.78316361167685, + "grad_norm": 0.560182761230861, + "learning_rate": 1.183632450386562e-06, + "loss": 0.2679, + "step": 17304 + }, + { + "epoch": 0.7832088707852455, + "grad_norm": 0.6097224018444345, + "learning_rate": 1.1831589690772988e-06, + "loss": 0.2748, + "step": 17305 + }, + { + "epoch": 0.7832541298936411, + "grad_norm": 0.2578450905562074, + "learning_rate": 1.1826855697802264e-06, + "loss": 0.4553, + "step": 17306 + }, + { + "epoch": 0.7832993890020367, + "grad_norm": 0.6094006990832728, + "learning_rate": 1.1822122525055163e-06, + "loss": 0.3061, + "step": 17307 + }, + { + "epoch": 0.7833446481104323, + "grad_norm": 0.6032761051944457, + "learning_rate": 1.1817390172633402e-06, + "loss": 0.2967, + "step": 17308 + }, + { + "epoch": 0.7833899072188278, + "grad_norm": 0.6550332039805137, + "learning_rate": 1.1812658640638653e-06, + "loss": 0.3423, + "step": 17309 + }, + { + "epoch": 0.7834351663272233, + "grad_norm": 0.6853703330648068, + "learning_rate": 1.180792792917259e-06, + "loss": 0.2907, + "step": 17310 + }, + { + "epoch": 0.7834804254356189, + "grad_norm": 0.6279731879623388, + "learning_rate": 1.1803198038336866e-06, + "loss": 0.2822, + "step": 17311 + }, + { + "epoch": 0.7835256845440145, + "grad_norm": 0.601357385134243, + "learning_rate": 1.1798468968233084e-06, + "loss": 0.3088, + "step": 17312 + }, + { + "epoch": 0.7835709436524101, + "grad_norm": 0.27019008623071317, + "learning_rate": 1.179374071896288e-06, + "loss": 0.4758, + "step": 17313 + }, + { + "epoch": 0.7836162027608056, + "grad_norm": 0.5989830670091586, + "learning_rate": 1.178901329062786e-06, + "loss": 0.265, + "step": 17314 + }, + { + "epoch": 0.7836614618692012, + "grad_norm": 0.6033427541616095, + "learning_rate": 1.1784286683329587e-06, + "loss": 0.2784, + "step": 17315 + }, + { + "epoch": 0.7837067209775967, + "grad_norm": 0.261811394535706, + "learning_rate": 1.1779560897169611e-06, + "loss": 0.4445, + "step": 17316 + }, + { + "epoch": 0.7837519800859923, + "grad_norm": 0.5609501858635176, + "learning_rate": 1.1774835932249485e-06, + "loss": 0.2336, + "step": 17317 + }, + { + "epoch": 0.7837972391943878, + "grad_norm": 0.6105280956973805, + "learning_rate": 1.1770111788670763e-06, + "loss": 0.2644, + "step": 17318 + }, + { + "epoch": 0.7838424983027834, + "grad_norm": 0.6030722422612644, + "learning_rate": 1.1765388466534895e-06, + "loss": 0.2569, + "step": 17319 + }, + { + "epoch": 0.783887757411179, + "grad_norm": 0.6215687069796155, + "learning_rate": 1.1760665965943402e-06, + "loss": 0.3065, + "step": 17320 + }, + { + "epoch": 0.7839330165195746, + "grad_norm": 0.612128101161067, + "learning_rate": 1.1755944286997766e-06, + "loss": 0.2529, + "step": 17321 + }, + { + "epoch": 0.7839782756279702, + "grad_norm": 0.5852209019344554, + "learning_rate": 1.175122342979943e-06, + "loss": 0.2851, + "step": 17322 + }, + { + "epoch": 0.7840235347363657, + "grad_norm": 0.2576637423810731, + "learning_rate": 1.174650339444982e-06, + "loss": 0.4541, + "step": 17323 + }, + { + "epoch": 0.7840687938447612, + "grad_norm": 0.5993989103121361, + "learning_rate": 1.1741784181050376e-06, + "loss": 0.2818, + "step": 17324 + }, + { + "epoch": 0.7841140529531568, + "grad_norm": 0.6645192315616409, + "learning_rate": 1.1737065789702473e-06, + "loss": 0.2974, + "step": 17325 + }, + { + "epoch": 0.7841593120615524, + "grad_norm": 0.6193089282975309, + "learning_rate": 1.1732348220507529e-06, + "loss": 0.2586, + "step": 17326 + }, + { + "epoch": 0.7842045711699479, + "grad_norm": 0.2807318580083904, + "learning_rate": 1.1727631473566875e-06, + "loss": 0.4753, + "step": 17327 + }, + { + "epoch": 0.7842498302783435, + "grad_norm": 0.28674929824535034, + "learning_rate": 1.1722915548981896e-06, + "loss": 0.4746, + "step": 17328 + }, + { + "epoch": 0.7842950893867391, + "grad_norm": 0.23822296288756592, + "learning_rate": 1.1718200446853877e-06, + "loss": 0.4742, + "step": 17329 + }, + { + "epoch": 0.7843403484951347, + "grad_norm": 0.6216948625679023, + "learning_rate": 1.1713486167284183e-06, + "loss": 0.2915, + "step": 17330 + }, + { + "epoch": 0.7843856076035302, + "grad_norm": 0.2684113634588021, + "learning_rate": 1.1708772710374078e-06, + "loss": 0.4857, + "step": 17331 + }, + { + "epoch": 0.7844308667119257, + "grad_norm": 0.2603748463730575, + "learning_rate": 1.1704060076224827e-06, + "loss": 0.4521, + "step": 17332 + }, + { + "epoch": 0.7844761258203213, + "grad_norm": 0.6779421800191525, + "learning_rate": 1.169934826493771e-06, + "loss": 0.2767, + "step": 17333 + }, + { + "epoch": 0.7845213849287169, + "grad_norm": 0.7134792482230873, + "learning_rate": 1.1694637276613985e-06, + "loss": 0.3272, + "step": 17334 + }, + { + "epoch": 0.7845666440371125, + "grad_norm": 0.6114288019983596, + "learning_rate": 1.168992711135486e-06, + "loss": 0.2796, + "step": 17335 + }, + { + "epoch": 0.784611903145508, + "grad_norm": 0.6899832877009546, + "learning_rate": 1.1685217769261519e-06, + "loss": 0.2972, + "step": 17336 + }, + { + "epoch": 0.7846571622539036, + "grad_norm": 0.5679586106893408, + "learning_rate": 1.1680509250435195e-06, + "loss": 0.2816, + "step": 17337 + }, + { + "epoch": 0.7847024213622992, + "grad_norm": 0.6080534926128125, + "learning_rate": 1.1675801554977017e-06, + "loss": 0.3232, + "step": 17338 + }, + { + "epoch": 0.7847476804706948, + "grad_norm": 0.5998955037981734, + "learning_rate": 1.1671094682988182e-06, + "loss": 0.2831, + "step": 17339 + }, + { + "epoch": 0.7847929395790902, + "grad_norm": 0.5728524362999736, + "learning_rate": 1.1666388634569798e-06, + "loss": 0.2993, + "step": 17340 + }, + { + "epoch": 0.7848381986874858, + "grad_norm": 0.672290396497668, + "learning_rate": 1.1661683409822976e-06, + "loss": 0.3322, + "step": 17341 + }, + { + "epoch": 0.7848834577958814, + "grad_norm": 0.650360992582781, + "learning_rate": 1.1656979008848834e-06, + "loss": 0.2962, + "step": 17342 + }, + { + "epoch": 0.784928716904277, + "grad_norm": 0.5724326349339708, + "learning_rate": 1.1652275431748462e-06, + "loss": 0.2957, + "step": 17343 + }, + { + "epoch": 0.7849739760126726, + "grad_norm": 1.5135642866782946, + "learning_rate": 1.164757267862292e-06, + "loss": 0.2927, + "step": 17344 + }, + { + "epoch": 0.7850192351210681, + "grad_norm": 0.6320185461540719, + "learning_rate": 1.1642870749573231e-06, + "loss": 0.26, + "step": 17345 + }, + { + "epoch": 0.7850644942294637, + "grad_norm": 0.6058476301879185, + "learning_rate": 1.1638169644700447e-06, + "loss": 0.3077, + "step": 17346 + }, + { + "epoch": 0.7851097533378593, + "grad_norm": 0.3120894027493385, + "learning_rate": 1.1633469364105604e-06, + "loss": 0.4534, + "step": 17347 + }, + { + "epoch": 0.7851550124462549, + "grad_norm": 0.5984657291673785, + "learning_rate": 1.1628769907889643e-06, + "loss": 0.3339, + "step": 17348 + }, + { + "epoch": 0.7852002715546503, + "grad_norm": 0.6255476437590982, + "learning_rate": 1.162407127615357e-06, + "loss": 0.2648, + "step": 17349 + }, + { + "epoch": 0.7852455306630459, + "grad_norm": 0.6902499963136173, + "learning_rate": 1.1619373468998357e-06, + "loss": 0.3146, + "step": 17350 + }, + { + "epoch": 0.7852907897714415, + "grad_norm": 0.2817907502818888, + "learning_rate": 1.1614676486524927e-06, + "loss": 0.4615, + "step": 17351 + }, + { + "epoch": 0.7853360488798371, + "grad_norm": 0.6043644113354228, + "learning_rate": 1.1609980328834196e-06, + "loss": 0.2971, + "step": 17352 + }, + { + "epoch": 0.7853813079882326, + "grad_norm": 0.5668065899841754, + "learning_rate": 1.16052849960271e-06, + "loss": 0.2657, + "step": 17353 + }, + { + "epoch": 0.7854265670966282, + "grad_norm": 0.26723005660984916, + "learning_rate": 1.1600590488204495e-06, + "loss": 0.4568, + "step": 17354 + }, + { + "epoch": 0.7854718262050238, + "grad_norm": 0.5662361819226162, + "learning_rate": 1.159589680546727e-06, + "loss": 0.2744, + "step": 17355 + }, + { + "epoch": 0.7855170853134193, + "grad_norm": 0.26301807410907907, + "learning_rate": 1.159120394791627e-06, + "loss": 0.4581, + "step": 17356 + }, + { + "epoch": 0.7855623444218149, + "grad_norm": 0.26376801379454134, + "learning_rate": 1.1586511915652343e-06, + "loss": 0.4793, + "step": 17357 + }, + { + "epoch": 0.7856076035302104, + "grad_norm": 0.6552962676442267, + "learning_rate": 1.1581820708776282e-06, + "loss": 0.3042, + "step": 17358 + }, + { + "epoch": 0.785652862638606, + "grad_norm": 0.749149067682797, + "learning_rate": 1.1577130327388918e-06, + "loss": 0.2622, + "step": 17359 + }, + { + "epoch": 0.7856981217470016, + "grad_norm": 0.5841610216480839, + "learning_rate": 1.1572440771591014e-06, + "loss": 0.3008, + "step": 17360 + }, + { + "epoch": 0.7857433808553972, + "grad_norm": 0.6299373407052793, + "learning_rate": 1.1567752041483328e-06, + "loss": 0.3048, + "step": 17361 + }, + { + "epoch": 0.7857886399637927, + "grad_norm": 0.7158363775054724, + "learning_rate": 1.1563064137166607e-06, + "loss": 0.2657, + "step": 17362 + }, + { + "epoch": 0.7858338990721883, + "grad_norm": 0.27321909472327843, + "learning_rate": 1.1558377058741605e-06, + "loss": 0.4914, + "step": 17363 + }, + { + "epoch": 0.7858791581805838, + "grad_norm": 0.27974658066057784, + "learning_rate": 1.1553690806309015e-06, + "loss": 0.4868, + "step": 17364 + }, + { + "epoch": 0.7859244172889794, + "grad_norm": 0.26357198631483486, + "learning_rate": 1.154900537996952e-06, + "loss": 0.4575, + "step": 17365 + }, + { + "epoch": 0.7859696763973749, + "grad_norm": 0.8363162554904776, + "learning_rate": 1.154432077982382e-06, + "loss": 0.2967, + "step": 17366 + }, + { + "epoch": 0.7860149355057705, + "grad_norm": 0.5646840841086878, + "learning_rate": 1.1539637005972543e-06, + "loss": 0.2601, + "step": 17367 + }, + { + "epoch": 0.7860601946141661, + "grad_norm": 0.5884686071005956, + "learning_rate": 1.1534954058516357e-06, + "loss": 0.2893, + "step": 17368 + }, + { + "epoch": 0.7861054537225617, + "grad_norm": 0.6341286607495344, + "learning_rate": 1.1530271937555859e-06, + "loss": 0.3212, + "step": 17369 + }, + { + "epoch": 0.7861507128309573, + "grad_norm": 0.653173806007168, + "learning_rate": 1.152559064319168e-06, + "loss": 0.2998, + "step": 17370 + }, + { + "epoch": 0.7861959719393528, + "grad_norm": 0.28527858241156007, + "learning_rate": 1.152091017552438e-06, + "loss": 0.4712, + "step": 17371 + }, + { + "epoch": 0.7862412310477483, + "grad_norm": 0.7963908930873177, + "learning_rate": 1.1516230534654554e-06, + "loss": 0.3352, + "step": 17372 + }, + { + "epoch": 0.7862864901561439, + "grad_norm": 0.6102186650838729, + "learning_rate": 1.151155172068274e-06, + "loss": 0.2846, + "step": 17373 + }, + { + "epoch": 0.7863317492645395, + "grad_norm": 0.27518292768602326, + "learning_rate": 1.1506873733709457e-06, + "loss": 0.4701, + "step": 17374 + }, + { + "epoch": 0.786377008372935, + "grad_norm": 0.6106521447355924, + "learning_rate": 1.1502196573835239e-06, + "loss": 0.2709, + "step": 17375 + }, + { + "epoch": 0.7864222674813306, + "grad_norm": 0.6243633846315203, + "learning_rate": 1.1497520241160603e-06, + "loss": 0.2618, + "step": 17376 + }, + { + "epoch": 0.7864675265897262, + "grad_norm": 0.9340048826982438, + "learning_rate": 1.1492844735785979e-06, + "loss": 0.3185, + "step": 17377 + }, + { + "epoch": 0.7865127856981218, + "grad_norm": 0.6010856148363889, + "learning_rate": 1.1488170057811853e-06, + "loss": 0.3282, + "step": 17378 + }, + { + "epoch": 0.7865580448065174, + "grad_norm": 0.650174914655158, + "learning_rate": 1.148349620733869e-06, + "loss": 0.2786, + "step": 17379 + }, + { + "epoch": 0.7866033039149128, + "grad_norm": 0.6267531884417445, + "learning_rate": 1.1478823184466897e-06, + "loss": 0.2876, + "step": 17380 + }, + { + "epoch": 0.7866485630233084, + "grad_norm": 0.651212363619707, + "learning_rate": 1.1474150989296872e-06, + "loss": 0.2996, + "step": 17381 + }, + { + "epoch": 0.786693822131704, + "grad_norm": 0.3385550548730938, + "learning_rate": 1.1469479621929036e-06, + "loss": 0.4627, + "step": 17382 + }, + { + "epoch": 0.7867390812400996, + "grad_norm": 1.1355741833386386, + "learning_rate": 1.146480908246373e-06, + "loss": 0.293, + "step": 17383 + }, + { + "epoch": 0.7867843403484951, + "grad_norm": 0.5953511777939547, + "learning_rate": 1.1460139371001339e-06, + "loss": 0.3296, + "step": 17384 + }, + { + "epoch": 0.7868295994568907, + "grad_norm": 0.28265491438938334, + "learning_rate": 1.1455470487642167e-06, + "loss": 0.4861, + "step": 17385 + }, + { + "epoch": 0.7868748585652863, + "grad_norm": 0.6026682293599088, + "learning_rate": 1.1450802432486574e-06, + "loss": 0.2921, + "step": 17386 + }, + { + "epoch": 0.7869201176736819, + "grad_norm": 0.6458133985405689, + "learning_rate": 1.1446135205634829e-06, + "loss": 0.2722, + "step": 17387 + }, + { + "epoch": 0.7869653767820773, + "grad_norm": 0.6222737944884161, + "learning_rate": 1.144146880718724e-06, + "loss": 0.2813, + "step": 17388 + }, + { + "epoch": 0.7870106358904729, + "grad_norm": 0.6055298101392338, + "learning_rate": 1.1436803237244065e-06, + "loss": 0.3041, + "step": 17389 + }, + { + "epoch": 0.7870558949988685, + "grad_norm": 0.6225200984515917, + "learning_rate": 1.1432138495905531e-06, + "loss": 0.3363, + "step": 17390 + }, + { + "epoch": 0.7871011541072641, + "grad_norm": 0.630228041297407, + "learning_rate": 1.1427474583271896e-06, + "loss": 0.2508, + "step": 17391 + }, + { + "epoch": 0.7871464132156597, + "grad_norm": 0.8153673652545924, + "learning_rate": 1.1422811499443375e-06, + "loss": 0.32, + "step": 17392 + }, + { + "epoch": 0.7871916723240552, + "grad_norm": 0.6295439380761974, + "learning_rate": 1.1418149244520155e-06, + "loss": 0.3029, + "step": 17393 + }, + { + "epoch": 0.7872369314324508, + "grad_norm": 0.6664844860170986, + "learning_rate": 1.1413487818602397e-06, + "loss": 0.3676, + "step": 17394 + }, + { + "epoch": 0.7872821905408464, + "grad_norm": 0.6075032187161125, + "learning_rate": 1.1408827221790297e-06, + "loss": 0.2806, + "step": 17395 + }, + { + "epoch": 0.787327449649242, + "grad_norm": 0.6965414154938917, + "learning_rate": 1.1404167454183957e-06, + "loss": 0.3159, + "step": 17396 + }, + { + "epoch": 0.7873727087576374, + "grad_norm": 0.6003957067092349, + "learning_rate": 1.1399508515883533e-06, + "loss": 0.272, + "step": 17397 + }, + { + "epoch": 0.787417967866033, + "grad_norm": 0.28195165676181216, + "learning_rate": 1.1394850406989106e-06, + "loss": 0.4759, + "step": 17398 + }, + { + "epoch": 0.7874632269744286, + "grad_norm": 1.5650898904360315, + "learning_rate": 1.139019312760079e-06, + "loss": 0.2927, + "step": 17399 + }, + { + "epoch": 0.7875084860828242, + "grad_norm": 0.34002607274237956, + "learning_rate": 1.1385536677818632e-06, + "loss": 0.429, + "step": 17400 + }, + { + "epoch": 0.7875537451912197, + "grad_norm": 0.6491468227013686, + "learning_rate": 1.138088105774271e-06, + "loss": 0.2949, + "step": 17401 + }, + { + "epoch": 0.7875990042996153, + "grad_norm": 0.6195093709848404, + "learning_rate": 1.137622626747304e-06, + "loss": 0.2673, + "step": 17402 + }, + { + "epoch": 0.7876442634080109, + "grad_norm": 0.5960717594699569, + "learning_rate": 1.1371572307109634e-06, + "loss": 0.3477, + "step": 17403 + }, + { + "epoch": 0.7876895225164064, + "grad_norm": 0.5759243199990242, + "learning_rate": 1.13669191767525e-06, + "loss": 0.2689, + "step": 17404 + }, + { + "epoch": 0.787734781624802, + "grad_norm": 0.6149926025008491, + "learning_rate": 1.1362266876501649e-06, + "loss": 0.2394, + "step": 17405 + }, + { + "epoch": 0.7877800407331975, + "grad_norm": 0.262255617987434, + "learning_rate": 1.1357615406456985e-06, + "loss": 0.4622, + "step": 17406 + }, + { + "epoch": 0.7878252998415931, + "grad_norm": 0.6810866592082769, + "learning_rate": 1.1352964766718488e-06, + "loss": 0.3056, + "step": 17407 + }, + { + "epoch": 0.7878705589499887, + "grad_norm": 0.8782494286183904, + "learning_rate": 1.1348314957386093e-06, + "loss": 0.2777, + "step": 17408 + }, + { + "epoch": 0.7879158180583843, + "grad_norm": 0.5896235487727614, + "learning_rate": 1.1343665978559704e-06, + "loss": 0.2746, + "step": 17409 + }, + { + "epoch": 0.7879610771667798, + "grad_norm": 0.6212552351887514, + "learning_rate": 1.1339017830339195e-06, + "loss": 0.2693, + "step": 17410 + }, + { + "epoch": 0.7880063362751754, + "grad_norm": 0.6423392372429871, + "learning_rate": 1.1334370512824466e-06, + "loss": 0.2959, + "step": 17411 + }, + { + "epoch": 0.788051595383571, + "grad_norm": 0.3022873126019532, + "learning_rate": 1.1329724026115345e-06, + "loss": 0.4557, + "step": 17412 + }, + { + "epoch": 0.7880968544919665, + "grad_norm": 0.27724362715051737, + "learning_rate": 1.132507837031171e-06, + "loss": 0.4582, + "step": 17413 + }, + { + "epoch": 0.7881421136003621, + "grad_norm": 0.5863026360362641, + "learning_rate": 1.1320433545513342e-06, + "loss": 0.3247, + "step": 17414 + }, + { + "epoch": 0.7881873727087576, + "grad_norm": 0.6152221198174394, + "learning_rate": 1.1315789551820078e-06, + "loss": 0.2866, + "step": 17415 + }, + { + "epoch": 0.7882326318171532, + "grad_norm": 0.6148723884806709, + "learning_rate": 1.1311146389331667e-06, + "loss": 0.3318, + "step": 17416 + }, + { + "epoch": 0.7882778909255488, + "grad_norm": 0.5900675655471992, + "learning_rate": 1.1306504058147915e-06, + "loss": 0.3178, + "step": 17417 + }, + { + "epoch": 0.7883231500339444, + "grad_norm": 0.6071007835312443, + "learning_rate": 1.1301862558368554e-06, + "loss": 0.3106, + "step": 17418 + }, + { + "epoch": 0.7883684091423399, + "grad_norm": 0.24889869987412316, + "learning_rate": 1.1297221890093302e-06, + "loss": 0.463, + "step": 17419 + }, + { + "epoch": 0.7884136682507354, + "grad_norm": 0.6513774161982812, + "learning_rate": 1.129258205342188e-06, + "loss": 0.319, + "step": 17420 + }, + { + "epoch": 0.788458927359131, + "grad_norm": 0.6863154751764883, + "learning_rate": 1.1287943048454003e-06, + "loss": 0.314, + "step": 17421 + }, + { + "epoch": 0.7885041864675266, + "grad_norm": 0.6930197690759812, + "learning_rate": 1.1283304875289335e-06, + "loss": 0.2905, + "step": 17422 + }, + { + "epoch": 0.7885494455759221, + "grad_norm": 0.27159987742847796, + "learning_rate": 1.1278667534027525e-06, + "loss": 0.4794, + "step": 17423 + }, + { + "epoch": 0.7885947046843177, + "grad_norm": 0.544502579307501, + "learning_rate": 1.1274031024768239e-06, + "loss": 0.3424, + "step": 17424 + }, + { + "epoch": 0.7886399637927133, + "grad_norm": 0.6749145704048627, + "learning_rate": 1.1269395347611074e-06, + "loss": 0.3051, + "step": 17425 + }, + { + "epoch": 0.7886852229011089, + "grad_norm": 0.6598064622660003, + "learning_rate": 1.126476050265567e-06, + "loss": 0.3006, + "step": 17426 + }, + { + "epoch": 0.7887304820095045, + "grad_norm": 0.6273926410342063, + "learning_rate": 1.1260126490001577e-06, + "loss": 0.2898, + "step": 17427 + }, + { + "epoch": 0.7887757411178999, + "grad_norm": 0.26148859512293166, + "learning_rate": 1.12554933097484e-06, + "loss": 0.4581, + "step": 17428 + }, + { + "epoch": 0.7888210002262955, + "grad_norm": 0.6653921917108375, + "learning_rate": 1.1250860961995663e-06, + "loss": 0.2734, + "step": 17429 + }, + { + "epoch": 0.7888662593346911, + "grad_norm": 0.5786418872287515, + "learning_rate": 1.1246229446842927e-06, + "loss": 0.2815, + "step": 17430 + }, + { + "epoch": 0.7889115184430867, + "grad_norm": 0.282184982960806, + "learning_rate": 1.1241598764389699e-06, + "loss": 0.4784, + "step": 17431 + }, + { + "epoch": 0.7889567775514822, + "grad_norm": 0.27743184880991817, + "learning_rate": 1.1236968914735462e-06, + "loss": 0.4798, + "step": 17432 + }, + { + "epoch": 0.7890020366598778, + "grad_norm": 0.6517786801029701, + "learning_rate": 1.1232339897979716e-06, + "loss": 0.2717, + "step": 17433 + }, + { + "epoch": 0.7890472957682734, + "grad_norm": 0.5885837605798061, + "learning_rate": 1.1227711714221928e-06, + "loss": 0.2987, + "step": 17434 + }, + { + "epoch": 0.789092554876669, + "grad_norm": 0.2615744958143664, + "learning_rate": 1.1223084363561538e-06, + "loss": 0.4636, + "step": 17435 + }, + { + "epoch": 0.7891378139850644, + "grad_norm": 0.2574679366280398, + "learning_rate": 1.1218457846097958e-06, + "loss": 0.4644, + "step": 17436 + }, + { + "epoch": 0.78918307309346, + "grad_norm": 0.2889475935321559, + "learning_rate": 1.1213832161930622e-06, + "loss": 0.4656, + "step": 17437 + }, + { + "epoch": 0.7892283322018556, + "grad_norm": 0.6560240655089297, + "learning_rate": 1.120920731115891e-06, + "loss": 0.3093, + "step": 17438 + }, + { + "epoch": 0.7892735913102512, + "grad_norm": 0.6313307722903918, + "learning_rate": 1.1204583293882181e-06, + "loss": 0.3272, + "step": 17439 + }, + { + "epoch": 0.7893188504186468, + "grad_norm": 0.6278866803927241, + "learning_rate": 1.119996011019981e-06, + "loss": 0.3384, + "step": 17440 + }, + { + "epoch": 0.7893641095270423, + "grad_norm": 0.6354112550807469, + "learning_rate": 1.119533776021114e-06, + "loss": 0.3399, + "step": 17441 + }, + { + "epoch": 0.7894093686354379, + "grad_norm": 0.6007230130425893, + "learning_rate": 1.1190716244015487e-06, + "loss": 0.2715, + "step": 17442 + }, + { + "epoch": 0.7894546277438335, + "grad_norm": 0.2898353552609453, + "learning_rate": 1.118609556171213e-06, + "loss": 0.4671, + "step": 17443 + }, + { + "epoch": 0.789499886852229, + "grad_norm": 0.6571880434843874, + "learning_rate": 1.118147571340039e-06, + "loss": 0.3023, + "step": 17444 + }, + { + "epoch": 0.7895451459606245, + "grad_norm": 0.37409319570832594, + "learning_rate": 1.11768566991795e-06, + "loss": 0.4866, + "step": 17445 + }, + { + "epoch": 0.7895904050690201, + "grad_norm": 0.621756727887467, + "learning_rate": 1.1172238519148732e-06, + "loss": 0.3239, + "step": 17446 + }, + { + "epoch": 0.7896356641774157, + "grad_norm": 0.273835122139672, + "learning_rate": 1.1167621173407312e-06, + "loss": 0.4652, + "step": 17447 + }, + { + "epoch": 0.7896809232858113, + "grad_norm": 0.5843208882446077, + "learning_rate": 1.1163004662054434e-06, + "loss": 0.3353, + "step": 17448 + }, + { + "epoch": 0.7897261823942069, + "grad_norm": 0.5808348307996056, + "learning_rate": 1.1158388985189312e-06, + "loss": 0.2871, + "step": 17449 + }, + { + "epoch": 0.7897714415026024, + "grad_norm": 0.5875845384179548, + "learning_rate": 1.1153774142911123e-06, + "loss": 0.3018, + "step": 17450 + }, + { + "epoch": 0.789816700610998, + "grad_norm": 0.594981041595839, + "learning_rate": 1.1149160135319027e-06, + "loss": 0.2962, + "step": 17451 + }, + { + "epoch": 0.7898619597193935, + "grad_norm": 0.6065672910891571, + "learning_rate": 1.1144546962512144e-06, + "loss": 0.2997, + "step": 17452 + }, + { + "epoch": 0.7899072188277891, + "grad_norm": 0.6603432041018994, + "learning_rate": 1.113993462458962e-06, + "loss": 0.2902, + "step": 17453 + }, + { + "epoch": 0.7899524779361846, + "grad_norm": 0.30835422814081065, + "learning_rate": 1.1135323121650542e-06, + "loss": 0.4582, + "step": 17454 + }, + { + "epoch": 0.7899977370445802, + "grad_norm": 0.2869527199386691, + "learning_rate": 1.113071245379402e-06, + "loss": 0.4902, + "step": 17455 + }, + { + "epoch": 0.7900429961529758, + "grad_norm": 0.6115552347964449, + "learning_rate": 1.1126102621119095e-06, + "loss": 0.2943, + "step": 17456 + }, + { + "epoch": 0.7900882552613714, + "grad_norm": 0.6205860996932847, + "learning_rate": 1.1121493623724845e-06, + "loss": 0.287, + "step": 17457 + }, + { + "epoch": 0.7901335143697669, + "grad_norm": 0.6428203422179108, + "learning_rate": 1.111688546171028e-06, + "loss": 0.3194, + "step": 17458 + }, + { + "epoch": 0.7901787734781625, + "grad_norm": 0.6574805982745042, + "learning_rate": 1.1112278135174438e-06, + "loss": 0.278, + "step": 17459 + }, + { + "epoch": 0.790224032586558, + "grad_norm": 0.6510734470303903, + "learning_rate": 1.1107671644216305e-06, + "loss": 0.2562, + "step": 17460 + }, + { + "epoch": 0.7902692916949536, + "grad_norm": 0.2731843402504672, + "learning_rate": 1.1103065988934842e-06, + "loss": 0.4615, + "step": 17461 + }, + { + "epoch": 0.7903145508033492, + "grad_norm": 0.60979356487696, + "learning_rate": 1.109846116942903e-06, + "loss": 0.3162, + "step": 17462 + }, + { + "epoch": 0.7903598099117447, + "grad_norm": 0.26468973035547944, + "learning_rate": 1.109385718579783e-06, + "loss": 0.4731, + "step": 17463 + }, + { + "epoch": 0.7904050690201403, + "grad_norm": 0.7844896219515496, + "learning_rate": 1.1089254038140141e-06, + "loss": 0.3443, + "step": 17464 + }, + { + "epoch": 0.7904503281285359, + "grad_norm": 0.6988599850950089, + "learning_rate": 1.1084651726554868e-06, + "loss": 0.3096, + "step": 17465 + }, + { + "epoch": 0.7904955872369315, + "grad_norm": 0.25101069040293145, + "learning_rate": 1.1080050251140923e-06, + "loss": 0.4939, + "step": 17466 + }, + { + "epoch": 0.790540846345327, + "grad_norm": 0.6044654536372189, + "learning_rate": 1.1075449611997153e-06, + "loss": 0.3558, + "step": 17467 + }, + { + "epoch": 0.7905861054537225, + "grad_norm": 0.6321237819686049, + "learning_rate": 1.1070849809222428e-06, + "loss": 0.3099, + "step": 17468 + }, + { + "epoch": 0.7906313645621181, + "grad_norm": 0.6163219336638721, + "learning_rate": 1.106625084291557e-06, + "loss": 0.2565, + "step": 17469 + }, + { + "epoch": 0.7906766236705137, + "grad_norm": 0.6160762324173575, + "learning_rate": 1.1061652713175425e-06, + "loss": 0.2767, + "step": 17470 + }, + { + "epoch": 0.7907218827789092, + "grad_norm": 0.572659054651402, + "learning_rate": 1.1057055420100755e-06, + "loss": 0.2694, + "step": 17471 + }, + { + "epoch": 0.7907671418873048, + "grad_norm": 0.6128638406139482, + "learning_rate": 1.1052458963790374e-06, + "loss": 0.3049, + "step": 17472 + }, + { + "epoch": 0.7908124009957004, + "grad_norm": 0.6937107123304866, + "learning_rate": 1.104786334434303e-06, + "loss": 0.3289, + "step": 17473 + }, + { + "epoch": 0.790857660104096, + "grad_norm": 0.5975880810758326, + "learning_rate": 1.1043268561857456e-06, + "loss": 0.3183, + "step": 17474 + }, + { + "epoch": 0.7909029192124916, + "grad_norm": 0.5915920011910971, + "learning_rate": 1.103867461643241e-06, + "loss": 0.2738, + "step": 17475 + }, + { + "epoch": 0.790948178320887, + "grad_norm": 0.6566936994613086, + "learning_rate": 1.1034081508166588e-06, + "loss": 0.2447, + "step": 17476 + }, + { + "epoch": 0.7909934374292826, + "grad_norm": 0.6188551895462027, + "learning_rate": 1.1029489237158663e-06, + "loss": 0.3233, + "step": 17477 + }, + { + "epoch": 0.7910386965376782, + "grad_norm": 0.6344862698039115, + "learning_rate": 1.1024897803507322e-06, + "loss": 0.2977, + "step": 17478 + }, + { + "epoch": 0.7910839556460738, + "grad_norm": 0.6057029705000123, + "learning_rate": 1.1020307207311244e-06, + "loss": 0.3067, + "step": 17479 + }, + { + "epoch": 0.7911292147544693, + "grad_norm": 0.6705464585220849, + "learning_rate": 1.1015717448669045e-06, + "loss": 0.2907, + "step": 17480 + }, + { + "epoch": 0.7911744738628649, + "grad_norm": 0.26897717434855983, + "learning_rate": 1.1011128527679332e-06, + "loss": 0.478, + "step": 17481 + }, + { + "epoch": 0.7912197329712605, + "grad_norm": 0.5829298651200642, + "learning_rate": 1.1006540444440738e-06, + "loss": 0.2897, + "step": 17482 + }, + { + "epoch": 0.7912649920796561, + "grad_norm": 0.27077008808618885, + "learning_rate": 1.100195319905182e-06, + "loss": 0.4589, + "step": 17483 + }, + { + "epoch": 0.7913102511880516, + "grad_norm": 0.6846489277337233, + "learning_rate": 1.0997366791611165e-06, + "loss": 0.3433, + "step": 17484 + }, + { + "epoch": 0.7913555102964471, + "grad_norm": 0.6362951825644277, + "learning_rate": 1.0992781222217291e-06, + "loss": 0.3462, + "step": 17485 + }, + { + "epoch": 0.7914007694048427, + "grad_norm": 0.5888260862616873, + "learning_rate": 1.0988196490968766e-06, + "loss": 0.2928, + "step": 17486 + }, + { + "epoch": 0.7914460285132383, + "grad_norm": 0.6074798482109431, + "learning_rate": 1.0983612597964065e-06, + "loss": 0.2874, + "step": 17487 + }, + { + "epoch": 0.7914912876216339, + "grad_norm": 0.6282389175937101, + "learning_rate": 1.0979029543301718e-06, + "loss": 0.2783, + "step": 17488 + }, + { + "epoch": 0.7915365467300294, + "grad_norm": 0.5872783503668951, + "learning_rate": 1.0974447327080185e-06, + "loss": 0.319, + "step": 17489 + }, + { + "epoch": 0.791581805838425, + "grad_norm": 0.27033500104217156, + "learning_rate": 1.0969865949397902e-06, + "loss": 0.4732, + "step": 17490 + }, + { + "epoch": 0.7916270649468206, + "grad_norm": 0.5968647069455049, + "learning_rate": 1.0965285410353326e-06, + "loss": 0.2963, + "step": 17491 + }, + { + "epoch": 0.7916723240552161, + "grad_norm": 0.5431438516873746, + "learning_rate": 1.09607057100449e-06, + "loss": 0.2741, + "step": 17492 + }, + { + "epoch": 0.7917175831636116, + "grad_norm": 0.5890407122521515, + "learning_rate": 1.0956126848571004e-06, + "loss": 0.3069, + "step": 17493 + }, + { + "epoch": 0.7917628422720072, + "grad_norm": 0.6542280291407335, + "learning_rate": 1.0951548826030018e-06, + "loss": 0.311, + "step": 17494 + }, + { + "epoch": 0.7918081013804028, + "grad_norm": 0.5326352594264989, + "learning_rate": 1.0946971642520327e-06, + "loss": 0.2977, + "step": 17495 + }, + { + "epoch": 0.7918533604887984, + "grad_norm": 0.6390072525495386, + "learning_rate": 1.0942395298140262e-06, + "loss": 0.367, + "step": 17496 + }, + { + "epoch": 0.791898619597194, + "grad_norm": 0.25659664659858095, + "learning_rate": 1.0937819792988186e-06, + "loss": 0.4745, + "step": 17497 + }, + { + "epoch": 0.7919438787055895, + "grad_norm": 0.6051771049858189, + "learning_rate": 1.0933245127162373e-06, + "loss": 0.2549, + "step": 17498 + }, + { + "epoch": 0.7919891378139851, + "grad_norm": 0.6346089264271125, + "learning_rate": 1.0928671300761152e-06, + "loss": 0.2758, + "step": 17499 + }, + { + "epoch": 0.7920343969223806, + "grad_norm": 0.7071202798055578, + "learning_rate": 1.092409831388277e-06, + "loss": 0.3607, + "step": 17500 + }, + { + "epoch": 0.7920796560307762, + "grad_norm": 0.6110020712418186, + "learning_rate": 1.091952616662552e-06, + "loss": 0.2941, + "step": 17501 + }, + { + "epoch": 0.7921249151391717, + "grad_norm": 0.8158655254030276, + "learning_rate": 1.0914954859087629e-06, + "loss": 0.2926, + "step": 17502 + }, + { + "epoch": 0.7921701742475673, + "grad_norm": 0.6683921375881807, + "learning_rate": 1.0910384391367296e-06, + "loss": 0.2774, + "step": 17503 + }, + { + "epoch": 0.7922154333559629, + "grad_norm": 0.5978056609012776, + "learning_rate": 1.0905814763562755e-06, + "loss": 0.3091, + "step": 17504 + }, + { + "epoch": 0.7922606924643585, + "grad_norm": 0.6267850613795837, + "learning_rate": 1.0901245975772207e-06, + "loss": 0.2897, + "step": 17505 + }, + { + "epoch": 0.792305951572754, + "grad_norm": 0.25613541359831193, + "learning_rate": 1.0896678028093777e-06, + "loss": 0.4773, + "step": 17506 + }, + { + "epoch": 0.7923512106811496, + "grad_norm": 0.28159218026235666, + "learning_rate": 1.0892110920625643e-06, + "loss": 0.4721, + "step": 17507 + }, + { + "epoch": 0.7923964697895451, + "grad_norm": 0.681314188020368, + "learning_rate": 1.0887544653465942e-06, + "loss": 0.3366, + "step": 17508 + }, + { + "epoch": 0.7924417288979407, + "grad_norm": 0.7454404200334943, + "learning_rate": 1.0882979226712782e-06, + "loss": 0.2951, + "step": 17509 + }, + { + "epoch": 0.7924869880063363, + "grad_norm": 0.2699858245041074, + "learning_rate": 1.0878414640464247e-06, + "loss": 0.4393, + "step": 17510 + }, + { + "epoch": 0.7925322471147318, + "grad_norm": 0.27894602885659225, + "learning_rate": 1.0873850894818433e-06, + "loss": 0.455, + "step": 17511 + }, + { + "epoch": 0.7925775062231274, + "grad_norm": 0.29535618246704853, + "learning_rate": 1.0869287989873406e-06, + "loss": 0.4873, + "step": 17512 + }, + { + "epoch": 0.792622765331523, + "grad_norm": 0.27670213394183, + "learning_rate": 1.0864725925727198e-06, + "loss": 0.4604, + "step": 17513 + }, + { + "epoch": 0.7926680244399186, + "grad_norm": 1.046633322989338, + "learning_rate": 1.0860164702477826e-06, + "loss": 0.2957, + "step": 17514 + }, + { + "epoch": 0.792713283548314, + "grad_norm": 0.6662828314797252, + "learning_rate": 1.0855604320223317e-06, + "loss": 0.3571, + "step": 17515 + }, + { + "epoch": 0.7927585426567096, + "grad_norm": 0.6028625071373115, + "learning_rate": 1.085104477906163e-06, + "loss": 0.2822, + "step": 17516 + }, + { + "epoch": 0.7928038017651052, + "grad_norm": 0.7617858158323627, + "learning_rate": 1.0846486079090773e-06, + "loss": 0.3176, + "step": 17517 + }, + { + "epoch": 0.7928490608735008, + "grad_norm": 0.6198032078988815, + "learning_rate": 1.0841928220408682e-06, + "loss": 0.2881, + "step": 17518 + }, + { + "epoch": 0.7928943199818964, + "grad_norm": 0.595029698665207, + "learning_rate": 1.0837371203113266e-06, + "loss": 0.2502, + "step": 17519 + }, + { + "epoch": 0.7929395790902919, + "grad_norm": 0.6069144135300447, + "learning_rate": 1.0832815027302473e-06, + "loss": 0.2761, + "step": 17520 + }, + { + "epoch": 0.7929848381986875, + "grad_norm": 0.626882704408546, + "learning_rate": 1.08282596930742e-06, + "loss": 0.3101, + "step": 17521 + }, + { + "epoch": 0.7930300973070831, + "grad_norm": 0.5670749951186008, + "learning_rate": 1.0823705200526325e-06, + "loss": 0.2865, + "step": 17522 + }, + { + "epoch": 0.7930753564154787, + "grad_norm": 0.5826516551992915, + "learning_rate": 1.0819151549756685e-06, + "loss": 0.3193, + "step": 17523 + }, + { + "epoch": 0.7931206155238741, + "grad_norm": 0.27305265116969735, + "learning_rate": 1.081459874086316e-06, + "loss": 0.4747, + "step": 17524 + }, + { + "epoch": 0.7931658746322697, + "grad_norm": 0.7086693801004199, + "learning_rate": 1.0810046773943544e-06, + "loss": 0.3022, + "step": 17525 + }, + { + "epoch": 0.7932111337406653, + "grad_norm": 0.5747501021359275, + "learning_rate": 1.0805495649095676e-06, + "loss": 0.2586, + "step": 17526 + }, + { + "epoch": 0.7932563928490609, + "grad_norm": 0.6506436875014426, + "learning_rate": 1.0800945366417316e-06, + "loss": 0.277, + "step": 17527 + }, + { + "epoch": 0.7933016519574564, + "grad_norm": 0.6422115703988123, + "learning_rate": 1.0796395926006258e-06, + "loss": 0.3037, + "step": 17528 + }, + { + "epoch": 0.793346911065852, + "grad_norm": 0.25125905669770676, + "learning_rate": 1.0791847327960236e-06, + "loss": 0.452, + "step": 17529 + }, + { + "epoch": 0.7933921701742476, + "grad_norm": 0.719021657172756, + "learning_rate": 1.0787299572377015e-06, + "loss": 0.2964, + "step": 17530 + }, + { + "epoch": 0.7934374292826432, + "grad_norm": 0.28278646840808924, + "learning_rate": 1.078275265935429e-06, + "loss": 0.4507, + "step": 17531 + }, + { + "epoch": 0.7934826883910387, + "grad_norm": 0.6185083956022757, + "learning_rate": 1.0778206588989748e-06, + "loss": 0.268, + "step": 17532 + }, + { + "epoch": 0.7935279474994342, + "grad_norm": 0.6212316194701377, + "learning_rate": 1.0773661361381088e-06, + "loss": 0.3169, + "step": 17533 + }, + { + "epoch": 0.7935732066078298, + "grad_norm": 0.6590947742085103, + "learning_rate": 1.0769116976625998e-06, + "loss": 0.3188, + "step": 17534 + }, + { + "epoch": 0.7936184657162254, + "grad_norm": 0.5734631130915846, + "learning_rate": 1.0764573434822067e-06, + "loss": 0.2602, + "step": 17535 + }, + { + "epoch": 0.793663724824621, + "grad_norm": 0.5968917546790733, + "learning_rate": 1.0760030736066952e-06, + "loss": 0.2786, + "step": 17536 + }, + { + "epoch": 0.7937089839330165, + "grad_norm": 0.2466211171565022, + "learning_rate": 1.075548888045827e-06, + "loss": 0.4527, + "step": 17537 + }, + { + "epoch": 0.7937542430414121, + "grad_norm": 0.6092165463869262, + "learning_rate": 1.0750947868093608e-06, + "loss": 0.3244, + "step": 17538 + }, + { + "epoch": 0.7937995021498077, + "grad_norm": 0.6165602362370749, + "learning_rate": 1.0746407699070516e-06, + "loss": 0.2905, + "step": 17539 + }, + { + "epoch": 0.7938447612582032, + "grad_norm": 0.25864134317846904, + "learning_rate": 1.0741868373486564e-06, + "loss": 0.4509, + "step": 17540 + }, + { + "epoch": 0.7938900203665987, + "grad_norm": 0.6255255495885236, + "learning_rate": 1.0737329891439303e-06, + "loss": 0.3143, + "step": 17541 + }, + { + "epoch": 0.7939352794749943, + "grad_norm": 0.5879664093941299, + "learning_rate": 1.0732792253026231e-06, + "loss": 0.2957, + "step": 17542 + }, + { + "epoch": 0.7939805385833899, + "grad_norm": 0.7776972902240687, + "learning_rate": 1.0728255458344843e-06, + "loss": 0.3047, + "step": 17543 + }, + { + "epoch": 0.7940257976917855, + "grad_norm": 0.6120374867402096, + "learning_rate": 1.0723719507492648e-06, + "loss": 0.3092, + "step": 17544 + }, + { + "epoch": 0.7940710568001811, + "grad_norm": 0.6156840091395118, + "learning_rate": 1.0719184400567078e-06, + "loss": 0.3109, + "step": 17545 + }, + { + "epoch": 0.7941163159085766, + "grad_norm": 0.6428936730218494, + "learning_rate": 1.0714650137665604e-06, + "loss": 0.3113, + "step": 17546 + }, + { + "epoch": 0.7941615750169722, + "grad_norm": 0.6156742053106594, + "learning_rate": 1.071011671888565e-06, + "loss": 0.3354, + "step": 17547 + }, + { + "epoch": 0.7942068341253677, + "grad_norm": 0.7783029452965077, + "learning_rate": 1.07055841443246e-06, + "loss": 0.3165, + "step": 17548 + }, + { + "epoch": 0.7942520932337633, + "grad_norm": 0.6133884955674895, + "learning_rate": 1.070105241407986e-06, + "loss": 0.2703, + "step": 17549 + }, + { + "epoch": 0.7942973523421588, + "grad_norm": 0.26731785140084197, + "learning_rate": 1.0696521528248822e-06, + "loss": 0.4519, + "step": 17550 + }, + { + "epoch": 0.7943426114505544, + "grad_norm": 0.7314874059177867, + "learning_rate": 1.0691991486928826e-06, + "loss": 0.2993, + "step": 17551 + }, + { + "epoch": 0.79438787055895, + "grad_norm": 0.5885684214964538, + "learning_rate": 1.0687462290217193e-06, + "loss": 0.3224, + "step": 17552 + }, + { + "epoch": 0.7944331296673456, + "grad_norm": 0.6464653570859419, + "learning_rate": 1.0682933938211272e-06, + "loss": 0.3066, + "step": 17553 + }, + { + "epoch": 0.7944783887757412, + "grad_norm": 0.6038104443874369, + "learning_rate": 1.067840643100833e-06, + "loss": 0.2899, + "step": 17554 + }, + { + "epoch": 0.7945236478841367, + "grad_norm": 0.26682934326948565, + "learning_rate": 1.0673879768705681e-06, + "loss": 0.4692, + "step": 17555 + }, + { + "epoch": 0.7945689069925322, + "grad_norm": 0.582118110369442, + "learning_rate": 1.0669353951400563e-06, + "loss": 0.31, + "step": 17556 + }, + { + "epoch": 0.7946141661009278, + "grad_norm": 1.0647840750997166, + "learning_rate": 1.066482897919025e-06, + "loss": 0.2951, + "step": 17557 + }, + { + "epoch": 0.7946594252093234, + "grad_norm": 0.569722159501484, + "learning_rate": 1.0660304852171932e-06, + "loss": 0.3115, + "step": 17558 + }, + { + "epoch": 0.7947046843177189, + "grad_norm": 0.5872530009817197, + "learning_rate": 1.0655781570442864e-06, + "loss": 0.2882, + "step": 17559 + }, + { + "epoch": 0.7947499434261145, + "grad_norm": 0.6712127830923466, + "learning_rate": 1.0651259134100205e-06, + "loss": 0.2829, + "step": 17560 + }, + { + "epoch": 0.7947952025345101, + "grad_norm": 0.6292811854227739, + "learning_rate": 1.0646737543241125e-06, + "loss": 0.2982, + "step": 17561 + }, + { + "epoch": 0.7948404616429057, + "grad_norm": 0.5772369823628817, + "learning_rate": 1.0642216797962795e-06, + "loss": 0.2927, + "step": 17562 + }, + { + "epoch": 0.7948857207513012, + "grad_norm": 0.270961324115532, + "learning_rate": 1.063769689836237e-06, + "loss": 0.4438, + "step": 17563 + }, + { + "epoch": 0.7949309798596967, + "grad_norm": 0.8262430223157452, + "learning_rate": 1.0633177844536924e-06, + "loss": 0.3384, + "step": 17564 + }, + { + "epoch": 0.7949762389680923, + "grad_norm": 0.3042669338076642, + "learning_rate": 1.0628659636583577e-06, + "loss": 0.4954, + "step": 17565 + }, + { + "epoch": 0.7950214980764879, + "grad_norm": 0.6161863340896231, + "learning_rate": 1.0624142274599425e-06, + "loss": 0.2735, + "step": 17566 + }, + { + "epoch": 0.7950667571848835, + "grad_norm": 1.033689895662228, + "learning_rate": 1.061962575868153e-06, + "loss": 0.2773, + "step": 17567 + }, + { + "epoch": 0.795112016293279, + "grad_norm": 0.3166101438817103, + "learning_rate": 1.061511008892691e-06, + "loss": 0.4553, + "step": 17568 + }, + { + "epoch": 0.7951572754016746, + "grad_norm": 0.6893218205777645, + "learning_rate": 1.0610595265432615e-06, + "loss": 0.2797, + "step": 17569 + }, + { + "epoch": 0.7952025345100702, + "grad_norm": 0.65132750453131, + "learning_rate": 1.0606081288295666e-06, + "loss": 0.2648, + "step": 17570 + }, + { + "epoch": 0.7952477936184658, + "grad_norm": 0.700838971389305, + "learning_rate": 1.060156815761304e-06, + "loss": 0.2626, + "step": 17571 + }, + { + "epoch": 0.7952930527268612, + "grad_norm": 0.7335130980432949, + "learning_rate": 1.05970558734817e-06, + "loss": 0.2909, + "step": 17572 + }, + { + "epoch": 0.7953383118352568, + "grad_norm": 0.25704225033621225, + "learning_rate": 1.059254443599862e-06, + "loss": 0.4322, + "step": 17573 + }, + { + "epoch": 0.7953835709436524, + "grad_norm": 0.6633779095086854, + "learning_rate": 1.058803384526072e-06, + "loss": 0.2754, + "step": 17574 + }, + { + "epoch": 0.795428830052048, + "grad_norm": 0.5887178006824763, + "learning_rate": 1.0583524101364945e-06, + "loss": 0.2911, + "step": 17575 + }, + { + "epoch": 0.7954740891604435, + "grad_norm": 0.6230379356817728, + "learning_rate": 1.0579015204408172e-06, + "loss": 0.2468, + "step": 17576 + }, + { + "epoch": 0.7955193482688391, + "grad_norm": 0.27580698258235353, + "learning_rate": 1.0574507154487279e-06, + "loss": 0.4821, + "step": 17577 + }, + { + "epoch": 0.7955646073772347, + "grad_norm": 0.7505786064339385, + "learning_rate": 1.0569999951699145e-06, + "loss": 0.355, + "step": 17578 + }, + { + "epoch": 0.7956098664856303, + "grad_norm": 0.5648279984944123, + "learning_rate": 1.056549359614062e-06, + "loss": 0.2797, + "step": 17579 + }, + { + "epoch": 0.7956551255940258, + "grad_norm": 0.2638095897738326, + "learning_rate": 1.0560988087908525e-06, + "loss": 0.4642, + "step": 17580 + }, + { + "epoch": 0.7957003847024213, + "grad_norm": 0.6005269717461627, + "learning_rate": 1.0556483427099656e-06, + "loss": 0.2654, + "step": 17581 + }, + { + "epoch": 0.7957456438108169, + "grad_norm": 0.6777576064315107, + "learning_rate": 1.0551979613810814e-06, + "loss": 0.2949, + "step": 17582 + }, + { + "epoch": 0.7957909029192125, + "grad_norm": 0.599855625140221, + "learning_rate": 1.0547476648138794e-06, + "loss": 0.2811, + "step": 17583 + }, + { + "epoch": 0.7958361620276081, + "grad_norm": 0.25174656246019217, + "learning_rate": 1.0542974530180327e-06, + "loss": 0.45, + "step": 17584 + }, + { + "epoch": 0.7958814211360036, + "grad_norm": 0.7033764647057604, + "learning_rate": 1.053847326003214e-06, + "loss": 0.2858, + "step": 17585 + }, + { + "epoch": 0.7959266802443992, + "grad_norm": 0.5999488290277657, + "learning_rate": 1.0533972837790985e-06, + "loss": 0.2377, + "step": 17586 + }, + { + "epoch": 0.7959719393527948, + "grad_norm": 0.5964618331830029, + "learning_rate": 1.0529473263553524e-06, + "loss": 0.2866, + "step": 17587 + }, + { + "epoch": 0.7960171984611903, + "grad_norm": 0.6255752090349693, + "learning_rate": 1.052497453741647e-06, + "loss": 0.2431, + "step": 17588 + }, + { + "epoch": 0.7960624575695858, + "grad_norm": 0.6280639343828609, + "learning_rate": 1.052047665947648e-06, + "loss": 0.2714, + "step": 17589 + }, + { + "epoch": 0.7961077166779814, + "grad_norm": 0.6175233941700501, + "learning_rate": 1.051597962983018e-06, + "loss": 0.3195, + "step": 17590 + }, + { + "epoch": 0.796152975786377, + "grad_norm": 0.595813377188827, + "learning_rate": 1.0511483448574212e-06, + "loss": 0.2878, + "step": 17591 + }, + { + "epoch": 0.7961982348947726, + "grad_norm": 0.6555253013213158, + "learning_rate": 1.0506988115805212e-06, + "loss": 0.2934, + "step": 17592 + }, + { + "epoch": 0.7962434940031682, + "grad_norm": 0.6763322039481978, + "learning_rate": 1.0502493631619715e-06, + "loss": 0.2639, + "step": 17593 + }, + { + "epoch": 0.7962887531115637, + "grad_norm": 0.6351978850263815, + "learning_rate": 1.0497999996114322e-06, + "loss": 0.2782, + "step": 17594 + }, + { + "epoch": 0.7963340122199593, + "grad_norm": 0.4986814228547615, + "learning_rate": 1.0493507209385606e-06, + "loss": 0.4738, + "step": 17595 + }, + { + "epoch": 0.7963792713283548, + "grad_norm": 0.2834511240637836, + "learning_rate": 1.0489015271530084e-06, + "loss": 0.4641, + "step": 17596 + }, + { + "epoch": 0.7964245304367504, + "grad_norm": 0.6958528444526049, + "learning_rate": 1.0484524182644257e-06, + "loss": 0.3247, + "step": 17597 + }, + { + "epoch": 0.7964697895451459, + "grad_norm": 0.6465101177250668, + "learning_rate": 1.0480033942824647e-06, + "loss": 0.293, + "step": 17598 + }, + { + "epoch": 0.7965150486535415, + "grad_norm": 0.6326649070370953, + "learning_rate": 1.0475544552167744e-06, + "loss": 0.3043, + "step": 17599 + }, + { + "epoch": 0.7965603077619371, + "grad_norm": 0.5894944985589974, + "learning_rate": 1.0471056010769997e-06, + "loss": 0.3071, + "step": 17600 + }, + { + "epoch": 0.7966055668703327, + "grad_norm": 0.2670228912312689, + "learning_rate": 1.0466568318727837e-06, + "loss": 0.458, + "step": 17601 + }, + { + "epoch": 0.7966508259787283, + "grad_norm": 0.5992312412861361, + "learning_rate": 1.0462081476137726e-06, + "loss": 0.3272, + "step": 17602 + }, + { + "epoch": 0.7966960850871238, + "grad_norm": 0.7801764524156193, + "learning_rate": 1.0457595483096033e-06, + "loss": 0.2947, + "step": 17603 + }, + { + "epoch": 0.7967413441955193, + "grad_norm": 0.608868802735606, + "learning_rate": 1.0453110339699184e-06, + "loss": 0.3329, + "step": 17604 + }, + { + "epoch": 0.7967866033039149, + "grad_norm": 0.7777214442910269, + "learning_rate": 1.0448626046043536e-06, + "loss": 0.2705, + "step": 17605 + }, + { + "epoch": 0.7968318624123105, + "grad_norm": 0.6140735157977182, + "learning_rate": 1.0444142602225426e-06, + "loss": 0.29, + "step": 17606 + }, + { + "epoch": 0.796877121520706, + "grad_norm": 0.6711582640372278, + "learning_rate": 1.0439660008341208e-06, + "loss": 0.3553, + "step": 17607 + }, + { + "epoch": 0.7969223806291016, + "grad_norm": 0.6043826392582125, + "learning_rate": 1.0435178264487205e-06, + "loss": 0.3375, + "step": 17608 + }, + { + "epoch": 0.7969676397374972, + "grad_norm": 0.2688407526676726, + "learning_rate": 1.0430697370759706e-06, + "loss": 0.4748, + "step": 17609 + }, + { + "epoch": 0.7970128988458928, + "grad_norm": 0.6964940237730056, + "learning_rate": 1.0426217327254984e-06, + "loss": 0.257, + "step": 17610 + }, + { + "epoch": 0.7970581579542882, + "grad_norm": 0.6460807926911792, + "learning_rate": 1.0421738134069309e-06, + "loss": 0.2833, + "step": 17611 + }, + { + "epoch": 0.7971034170626838, + "grad_norm": 0.6468028147962669, + "learning_rate": 1.041725979129894e-06, + "loss": 0.3434, + "step": 17612 + }, + { + "epoch": 0.7971486761710794, + "grad_norm": 0.2694178792341854, + "learning_rate": 1.0412782299040086e-06, + "loss": 0.4458, + "step": 17613 + }, + { + "epoch": 0.797193935279475, + "grad_norm": 0.296431835651895, + "learning_rate": 1.040830565738895e-06, + "loss": 0.4712, + "step": 17614 + }, + { + "epoch": 0.7972391943878706, + "grad_norm": 0.6637816879674655, + "learning_rate": 1.0403829866441734e-06, + "loss": 0.2945, + "step": 17615 + }, + { + "epoch": 0.7972844534962661, + "grad_norm": 0.5937931470412802, + "learning_rate": 1.0399354926294596e-06, + "loss": 0.2797, + "step": 17616 + }, + { + "epoch": 0.7973297126046617, + "grad_norm": 0.5951980187062453, + "learning_rate": 1.0394880837043708e-06, + "loss": 0.2686, + "step": 17617 + }, + { + "epoch": 0.7973749717130573, + "grad_norm": 0.6100560066934261, + "learning_rate": 1.0390407598785196e-06, + "loss": 0.2934, + "step": 17618 + }, + { + "epoch": 0.7974202308214529, + "grad_norm": 0.6433777122358736, + "learning_rate": 1.0385935211615156e-06, + "loss": 0.3198, + "step": 17619 + }, + { + "epoch": 0.7974654899298483, + "grad_norm": 0.5906931436439543, + "learning_rate": 1.0381463675629705e-06, + "loss": 0.3051, + "step": 17620 + }, + { + "epoch": 0.7975107490382439, + "grad_norm": 0.6337864263053438, + "learning_rate": 1.0376992990924934e-06, + "loss": 0.2657, + "step": 17621 + }, + { + "epoch": 0.7975560081466395, + "grad_norm": 0.6088029034611621, + "learning_rate": 1.0372523157596892e-06, + "loss": 0.3196, + "step": 17622 + }, + { + "epoch": 0.7976012672550351, + "grad_norm": 0.599341414845403, + "learning_rate": 1.0368054175741605e-06, + "loss": 0.2832, + "step": 17623 + }, + { + "epoch": 0.7976465263634306, + "grad_norm": 0.6090217413328994, + "learning_rate": 1.0363586045455116e-06, + "loss": 0.3151, + "step": 17624 + }, + { + "epoch": 0.7976917854718262, + "grad_norm": 0.7199767068371429, + "learning_rate": 1.0359118766833449e-06, + "loss": 0.2896, + "step": 17625 + }, + { + "epoch": 0.7977370445802218, + "grad_norm": 0.6920834954483712, + "learning_rate": 1.0354652339972554e-06, + "loss": 0.2849, + "step": 17626 + }, + { + "epoch": 0.7977823036886174, + "grad_norm": 0.6620269494371919, + "learning_rate": 1.0350186764968412e-06, + "loss": 0.2696, + "step": 17627 + }, + { + "epoch": 0.797827562797013, + "grad_norm": 0.43858767360734446, + "learning_rate": 1.0345722041917e-06, + "loss": 0.4844, + "step": 17628 + }, + { + "epoch": 0.7978728219054084, + "grad_norm": 0.6894559615869037, + "learning_rate": 1.0341258170914232e-06, + "loss": 0.2617, + "step": 17629 + }, + { + "epoch": 0.797918081013804, + "grad_norm": 0.605112420834483, + "learning_rate": 1.0336795152056006e-06, + "loss": 0.3109, + "step": 17630 + }, + { + "epoch": 0.7979633401221996, + "grad_norm": 0.7878539249551214, + "learning_rate": 1.0332332985438248e-06, + "loss": 0.3024, + "step": 17631 + }, + { + "epoch": 0.7980085992305952, + "grad_norm": 0.5933180263637753, + "learning_rate": 1.0327871671156814e-06, + "loss": 0.2722, + "step": 17632 + }, + { + "epoch": 0.7980538583389907, + "grad_norm": 0.5777220334047763, + "learning_rate": 1.0323411209307587e-06, + "loss": 0.2742, + "step": 17633 + }, + { + "epoch": 0.7980991174473863, + "grad_norm": 0.5305555011830864, + "learning_rate": 1.03189515999864e-06, + "loss": 0.2725, + "step": 17634 + }, + { + "epoch": 0.7981443765557819, + "grad_norm": 0.6397495079379393, + "learning_rate": 1.0314492843289053e-06, + "loss": 0.3186, + "step": 17635 + }, + { + "epoch": 0.7981896356641774, + "grad_norm": 0.5854544091507691, + "learning_rate": 1.0310034939311376e-06, + "loss": 0.2468, + "step": 17636 + }, + { + "epoch": 0.798234894772573, + "grad_norm": 0.6268437136490664, + "learning_rate": 1.030557788814916e-06, + "loss": 0.2811, + "step": 17637 + }, + { + "epoch": 0.7982801538809685, + "grad_norm": 0.61366665863228, + "learning_rate": 1.0301121689898158e-06, + "loss": 0.3128, + "step": 17638 + }, + { + "epoch": 0.7983254129893641, + "grad_norm": 0.6636046824576902, + "learning_rate": 1.0296666344654115e-06, + "loss": 0.3215, + "step": 17639 + }, + { + "epoch": 0.7983706720977597, + "grad_norm": 0.6466243191249332, + "learning_rate": 1.029221185251278e-06, + "loss": 0.312, + "step": 17640 + }, + { + "epoch": 0.7984159312061553, + "grad_norm": 0.6267688365230768, + "learning_rate": 1.0287758213569865e-06, + "loss": 0.2783, + "step": 17641 + }, + { + "epoch": 0.7984611903145508, + "grad_norm": 0.6067358900754252, + "learning_rate": 1.0283305427921058e-06, + "loss": 0.2886, + "step": 17642 + }, + { + "epoch": 0.7985064494229464, + "grad_norm": 0.6130856239014243, + "learning_rate": 1.0278853495662028e-06, + "loss": 0.269, + "step": 17643 + }, + { + "epoch": 0.7985517085313419, + "grad_norm": 0.6277865285234768, + "learning_rate": 1.0274402416888452e-06, + "loss": 0.2817, + "step": 17644 + }, + { + "epoch": 0.7985969676397375, + "grad_norm": 0.26982262436094706, + "learning_rate": 1.0269952191695948e-06, + "loss": 0.4845, + "step": 17645 + }, + { + "epoch": 0.798642226748133, + "grad_norm": 0.29022274123693, + "learning_rate": 1.0265502820180167e-06, + "loss": 0.486, + "step": 17646 + }, + { + "epoch": 0.7986874858565286, + "grad_norm": 0.27151041749692006, + "learning_rate": 1.026105430243669e-06, + "loss": 0.472, + "step": 17647 + }, + { + "epoch": 0.7987327449649242, + "grad_norm": 0.6086692202481577, + "learning_rate": 1.0256606638561094e-06, + "loss": 0.2523, + "step": 17648 + }, + { + "epoch": 0.7987780040733198, + "grad_norm": 0.6459863488738338, + "learning_rate": 1.0252159828648961e-06, + "loss": 0.3177, + "step": 17649 + }, + { + "epoch": 0.7988232631817154, + "grad_norm": 0.6423110726927362, + "learning_rate": 1.024771387279585e-06, + "loss": 0.3043, + "step": 17650 + }, + { + "epoch": 0.7988685222901108, + "grad_norm": 0.6897569820488314, + "learning_rate": 1.024326877109728e-06, + "loss": 0.29, + "step": 17651 + }, + { + "epoch": 0.7989137813985064, + "grad_norm": 0.6781956667845884, + "learning_rate": 1.0238824523648744e-06, + "loss": 0.2848, + "step": 17652 + }, + { + "epoch": 0.798959040506902, + "grad_norm": 0.6070597055985785, + "learning_rate": 1.0234381130545757e-06, + "loss": 0.2834, + "step": 17653 + }, + { + "epoch": 0.7990042996152976, + "grad_norm": 0.6186120877052891, + "learning_rate": 1.0229938591883798e-06, + "loss": 0.2665, + "step": 17654 + }, + { + "epoch": 0.7990495587236931, + "grad_norm": 0.6134788953343151, + "learning_rate": 1.0225496907758314e-06, + "loss": 0.2771, + "step": 17655 + }, + { + "epoch": 0.7990948178320887, + "grad_norm": 0.6517754167771941, + "learning_rate": 1.022105607826473e-06, + "loss": 0.3166, + "step": 17656 + }, + { + "epoch": 0.7991400769404843, + "grad_norm": 0.672306692935697, + "learning_rate": 1.0216616103498494e-06, + "loss": 0.2806, + "step": 17657 + }, + { + "epoch": 0.7991853360488799, + "grad_norm": 0.6391489481315076, + "learning_rate": 1.021217698355499e-06, + "loss": 0.3117, + "step": 17658 + }, + { + "epoch": 0.7992305951572753, + "grad_norm": 0.6034580647484019, + "learning_rate": 1.0207738718529592e-06, + "loss": 0.2954, + "step": 17659 + }, + { + "epoch": 0.7992758542656709, + "grad_norm": 0.601464046082302, + "learning_rate": 1.0203301308517687e-06, + "loss": 0.2504, + "step": 17660 + }, + { + "epoch": 0.7993211133740665, + "grad_norm": 0.6401166761859084, + "learning_rate": 1.0198864753614602e-06, + "loss": 0.2727, + "step": 17661 + }, + { + "epoch": 0.7993663724824621, + "grad_norm": 0.6225955998793017, + "learning_rate": 1.0194429053915683e-06, + "loss": 0.2765, + "step": 17662 + }, + { + "epoch": 0.7994116315908577, + "grad_norm": 0.26537395003035563, + "learning_rate": 1.0189994209516234e-06, + "loss": 0.4749, + "step": 17663 + }, + { + "epoch": 0.7994568906992532, + "grad_norm": 0.6540042865726453, + "learning_rate": 1.0185560220511525e-06, + "loss": 0.2843, + "step": 17664 + }, + { + "epoch": 0.7995021498076488, + "grad_norm": 0.6185663619453415, + "learning_rate": 1.018112708699685e-06, + "loss": 0.3233, + "step": 17665 + }, + { + "epoch": 0.7995474089160444, + "grad_norm": 0.647486381785589, + "learning_rate": 1.0176694809067471e-06, + "loss": 0.2752, + "step": 17666 + }, + { + "epoch": 0.79959266802444, + "grad_norm": 0.6502279765653171, + "learning_rate": 1.0172263386818615e-06, + "loss": 0.3064, + "step": 17667 + }, + { + "epoch": 0.7996379271328354, + "grad_norm": 0.2757197327489894, + "learning_rate": 1.016783282034548e-06, + "loss": 0.471, + "step": 17668 + }, + { + "epoch": 0.799683186241231, + "grad_norm": 0.7585339264168486, + "learning_rate": 1.0163403109743287e-06, + "loss": 0.3186, + "step": 17669 + }, + { + "epoch": 0.7997284453496266, + "grad_norm": 0.5495025188913338, + "learning_rate": 1.0158974255107223e-06, + "loss": 0.3192, + "step": 17670 + }, + { + "epoch": 0.7997737044580222, + "grad_norm": 0.6535560242829833, + "learning_rate": 1.0154546256532438e-06, + "loss": 0.295, + "step": 17671 + }, + { + "epoch": 0.7998189635664178, + "grad_norm": 0.6595014318560279, + "learning_rate": 1.0150119114114066e-06, + "loss": 0.3458, + "step": 17672 + }, + { + "epoch": 0.7998642226748133, + "grad_norm": 0.5929165171952434, + "learning_rate": 1.0145692827947256e-06, + "loss": 0.3189, + "step": 17673 + }, + { + "epoch": 0.7999094817832089, + "grad_norm": 0.6466146823306345, + "learning_rate": 1.0141267398127098e-06, + "loss": 0.3333, + "step": 17674 + }, + { + "epoch": 0.7999547408916045, + "grad_norm": 0.7863825353875846, + "learning_rate": 1.0136842824748694e-06, + "loss": 0.303, + "step": 17675 + }, + { + "epoch": 0.8, + "grad_norm": 0.6008716630456132, + "learning_rate": 1.013241910790711e-06, + "loss": 0.3102, + "step": 17676 + }, + { + "epoch": 0.8000452591083955, + "grad_norm": 0.5469202165957461, + "learning_rate": 1.012799624769738e-06, + "loss": 0.2713, + "step": 17677 + }, + { + "epoch": 0.8000905182167911, + "grad_norm": 0.6538977964631789, + "learning_rate": 1.0123574244214552e-06, + "loss": 0.2669, + "step": 17678 + }, + { + "epoch": 0.8001357773251867, + "grad_norm": 0.6411931463000914, + "learning_rate": 1.0119153097553657e-06, + "loss": 0.2752, + "step": 17679 + }, + { + "epoch": 0.8001810364335823, + "grad_norm": 0.6266810385085103, + "learning_rate": 1.011473280780968e-06, + "loss": 0.3539, + "step": 17680 + }, + { + "epoch": 0.8002262955419778, + "grad_norm": 0.2999474320900906, + "learning_rate": 1.011031337507758e-06, + "loss": 0.4766, + "step": 17681 + }, + { + "epoch": 0.8002715546503734, + "grad_norm": 0.7185236225481391, + "learning_rate": 1.0105894799452337e-06, + "loss": 0.3193, + "step": 17682 + }, + { + "epoch": 0.800316813758769, + "grad_norm": 0.6359176456441936, + "learning_rate": 1.0101477081028899e-06, + "loss": 0.282, + "step": 17683 + }, + { + "epoch": 0.8003620728671645, + "grad_norm": 0.6174531661683802, + "learning_rate": 1.0097060219902183e-06, + "loss": 0.2772, + "step": 17684 + }, + { + "epoch": 0.8004073319755601, + "grad_norm": 0.26616502028666844, + "learning_rate": 1.0092644216167076e-06, + "loss": 0.454, + "step": 17685 + }, + { + "epoch": 0.8004525910839556, + "grad_norm": 0.6308937760368052, + "learning_rate": 1.0088229069918488e-06, + "loss": 0.2659, + "step": 17686 + }, + { + "epoch": 0.8004978501923512, + "grad_norm": 0.5928485973391232, + "learning_rate": 1.0083814781251266e-06, + "loss": 0.2866, + "step": 17687 + }, + { + "epoch": 0.8005431093007468, + "grad_norm": 0.6026542050506672, + "learning_rate": 1.0079401350260288e-06, + "loss": 0.2788, + "step": 17688 + }, + { + "epoch": 0.8005883684091424, + "grad_norm": 0.6765753998077264, + "learning_rate": 1.0074988777040368e-06, + "loss": 0.3273, + "step": 17689 + }, + { + "epoch": 0.8006336275175379, + "grad_norm": 0.7063825924900976, + "learning_rate": 1.0070577061686305e-06, + "loss": 0.3075, + "step": 17690 + }, + { + "epoch": 0.8006788866259335, + "grad_norm": 0.6579882369889726, + "learning_rate": 1.0066166204292915e-06, + "loss": 0.3028, + "step": 17691 + }, + { + "epoch": 0.800724145734329, + "grad_norm": 0.6188920702015304, + "learning_rate": 1.006175620495497e-06, + "loss": 0.282, + "step": 17692 + }, + { + "epoch": 0.8007694048427246, + "grad_norm": 0.7598314254003824, + "learning_rate": 1.005734706376721e-06, + "loss": 0.284, + "step": 17693 + }, + { + "epoch": 0.8008146639511201, + "grad_norm": 0.6781608359130874, + "learning_rate": 1.005293878082439e-06, + "loss": 0.2815, + "step": 17694 + }, + { + "epoch": 0.8008599230595157, + "grad_norm": 0.6378508227692651, + "learning_rate": 1.0048531356221235e-06, + "loss": 0.3225, + "step": 17695 + }, + { + "epoch": 0.8009051821679113, + "grad_norm": 0.5678500878789736, + "learning_rate": 1.0044124790052445e-06, + "loss": 0.2734, + "step": 17696 + }, + { + "epoch": 0.8009504412763069, + "grad_norm": 0.5803032201494306, + "learning_rate": 1.003971908241268e-06, + "loss": 0.324, + "step": 17697 + }, + { + "epoch": 0.8009957003847025, + "grad_norm": 0.612816553250393, + "learning_rate": 1.0035314233396625e-06, + "loss": 0.3237, + "step": 17698 + }, + { + "epoch": 0.801040959493098, + "grad_norm": 0.30486019227051825, + "learning_rate": 1.003091024309894e-06, + "loss": 0.4817, + "step": 17699 + }, + { + "epoch": 0.8010862186014935, + "grad_norm": 0.5718601567690808, + "learning_rate": 1.0026507111614237e-06, + "loss": 0.3197, + "step": 17700 + }, + { + "epoch": 0.8011314777098891, + "grad_norm": 0.6975670743286432, + "learning_rate": 1.0022104839037117e-06, + "loss": 0.3026, + "step": 17701 + }, + { + "epoch": 0.8011767368182847, + "grad_norm": 0.698243244761055, + "learning_rate": 1.0017703425462188e-06, + "loss": 0.2704, + "step": 17702 + }, + { + "epoch": 0.8012219959266802, + "grad_norm": 0.6034277810329546, + "learning_rate": 1.001330287098401e-06, + "loss": 0.2848, + "step": 17703 + }, + { + "epoch": 0.8012672550350758, + "grad_norm": 0.6413270440601546, + "learning_rate": 1.000890317569715e-06, + "loss": 0.2948, + "step": 17704 + }, + { + "epoch": 0.8013125141434714, + "grad_norm": 0.5807050571774788, + "learning_rate": 1.0004504339696142e-06, + "loss": 0.2834, + "step": 17705 + }, + { + "epoch": 0.801357773251867, + "grad_norm": 0.5554722470951098, + "learning_rate": 1.0000106363075486e-06, + "loss": 0.2843, + "step": 17706 + }, + { + "epoch": 0.8014030323602626, + "grad_norm": 0.6102523337797331, + "learning_rate": 9.995709245929691e-07, + "loss": 0.3053, + "step": 17707 + }, + { + "epoch": 0.801448291468658, + "grad_norm": 0.5993098468757605, + "learning_rate": 9.991312988353252e-07, + "loss": 0.297, + "step": 17708 + }, + { + "epoch": 0.8014935505770536, + "grad_norm": 0.25248078783050365, + "learning_rate": 9.986917590440626e-07, + "loss": 0.4618, + "step": 17709 + }, + { + "epoch": 0.8015388096854492, + "grad_norm": 0.6236113679626196, + "learning_rate": 9.98252305228623e-07, + "loss": 0.3025, + "step": 17710 + }, + { + "epoch": 0.8015840687938448, + "grad_norm": 0.652868395328871, + "learning_rate": 9.978129373984513e-07, + "loss": 0.2469, + "step": 17711 + }, + { + "epoch": 0.8016293279022403, + "grad_norm": 0.8312087621953048, + "learning_rate": 9.973736555629894e-07, + "loss": 0.3167, + "step": 17712 + }, + { + "epoch": 0.8016745870106359, + "grad_norm": 0.5410252483694974, + "learning_rate": 9.969344597316737e-07, + "loss": 0.2976, + "step": 17713 + }, + { + "epoch": 0.8017198461190315, + "grad_norm": 0.5911874079384539, + "learning_rate": 9.964953499139412e-07, + "loss": 0.265, + "step": 17714 + }, + { + "epoch": 0.801765105227427, + "grad_norm": 0.6134724463121841, + "learning_rate": 9.96056326119229e-07, + "loss": 0.2728, + "step": 17715 + }, + { + "epoch": 0.8018103643358225, + "grad_norm": 0.6130417169184403, + "learning_rate": 9.95617388356968e-07, + "loss": 0.3108, + "step": 17716 + }, + { + "epoch": 0.8018556234442181, + "grad_norm": 0.5780465373609368, + "learning_rate": 9.951785366365924e-07, + "loss": 0.2989, + "step": 17717 + }, + { + "epoch": 0.8019008825526137, + "grad_norm": 0.2673010955946115, + "learning_rate": 9.9473977096753e-07, + "loss": 0.4656, + "step": 17718 + }, + { + "epoch": 0.8019461416610093, + "grad_norm": 0.6280599241854697, + "learning_rate": 9.943010913592072e-07, + "loss": 0.296, + "step": 17719 + }, + { + "epoch": 0.8019914007694049, + "grad_norm": 0.2905817135983755, + "learning_rate": 9.938624978210514e-07, + "loss": 0.4676, + "step": 17720 + }, + { + "epoch": 0.8020366598778004, + "grad_norm": 0.2699402741418339, + "learning_rate": 9.934239903624893e-07, + "loss": 0.4627, + "step": 17721 + }, + { + "epoch": 0.802081918986196, + "grad_norm": 0.5640631750848742, + "learning_rate": 9.929855689929374e-07, + "loss": 0.2849, + "step": 17722 + }, + { + "epoch": 0.8021271780945916, + "grad_norm": 0.6868637680081962, + "learning_rate": 9.925472337218194e-07, + "loss": 0.2839, + "step": 17723 + }, + { + "epoch": 0.8021724372029871, + "grad_norm": 0.26501366463231285, + "learning_rate": 9.921089845585536e-07, + "loss": 0.4631, + "step": 17724 + }, + { + "epoch": 0.8022176963113826, + "grad_norm": 0.6741465898250314, + "learning_rate": 9.916708215125586e-07, + "loss": 0.2915, + "step": 17725 + }, + { + "epoch": 0.8022629554197782, + "grad_norm": 0.6280940510386683, + "learning_rate": 9.912327445932446e-07, + "loss": 0.299, + "step": 17726 + }, + { + "epoch": 0.8023082145281738, + "grad_norm": 0.6069141990952394, + "learning_rate": 9.907947538100265e-07, + "loss": 0.2883, + "step": 17727 + }, + { + "epoch": 0.8023534736365694, + "grad_norm": 0.6037705382049574, + "learning_rate": 9.903568491723176e-07, + "loss": 0.33, + "step": 17728 + }, + { + "epoch": 0.8023987327449649, + "grad_norm": 0.6288078967755846, + "learning_rate": 9.899190306895257e-07, + "loss": 0.2898, + "step": 17729 + }, + { + "epoch": 0.8024439918533605, + "grad_norm": 0.555905713304811, + "learning_rate": 9.894812983710556e-07, + "loss": 0.2756, + "step": 17730 + }, + { + "epoch": 0.802489250961756, + "grad_norm": 0.2952776150612605, + "learning_rate": 9.89043652226317e-07, + "loss": 0.4919, + "step": 17731 + }, + { + "epoch": 0.8025345100701516, + "grad_norm": 0.5651942536592243, + "learning_rate": 9.8860609226471e-07, + "loss": 0.2967, + "step": 17732 + }, + { + "epoch": 0.8025797691785472, + "grad_norm": 0.6505822448237075, + "learning_rate": 9.881686184956396e-07, + "loss": 0.3383, + "step": 17733 + }, + { + "epoch": 0.8026250282869427, + "grad_norm": 0.5949901731512393, + "learning_rate": 9.877312309285036e-07, + "loss": 0.3293, + "step": 17734 + }, + { + "epoch": 0.8026702873953383, + "grad_norm": 0.6151881571192074, + "learning_rate": 9.872939295726997e-07, + "loss": 0.2992, + "step": 17735 + }, + { + "epoch": 0.8027155465037339, + "grad_norm": 0.621174808148157, + "learning_rate": 9.868567144376256e-07, + "loss": 0.3155, + "step": 17736 + }, + { + "epoch": 0.8027608056121295, + "grad_norm": 0.6111958778982538, + "learning_rate": 9.864195855326764e-07, + "loss": 0.2803, + "step": 17737 + }, + { + "epoch": 0.802806064720525, + "grad_norm": 0.5843529790163214, + "learning_rate": 9.85982542867243e-07, + "loss": 0.2744, + "step": 17738 + }, + { + "epoch": 0.8028513238289205, + "grad_norm": 0.26551540249461303, + "learning_rate": 9.855455864507157e-07, + "loss": 0.4794, + "step": 17739 + }, + { + "epoch": 0.8028965829373161, + "grad_norm": 0.6113881148320287, + "learning_rate": 9.851087162924845e-07, + "loss": 0.2857, + "step": 17740 + }, + { + "epoch": 0.8029418420457117, + "grad_norm": 0.6239502664209545, + "learning_rate": 9.846719324019372e-07, + "loss": 0.2742, + "step": 17741 + }, + { + "epoch": 0.8029871011541073, + "grad_norm": 0.6533691162116929, + "learning_rate": 9.842352347884582e-07, + "loss": 0.3235, + "step": 17742 + }, + { + "epoch": 0.8030323602625028, + "grad_norm": 1.5388800022815832, + "learning_rate": 9.837986234614288e-07, + "loss": 0.2605, + "step": 17743 + }, + { + "epoch": 0.8030776193708984, + "grad_norm": 0.543305453627655, + "learning_rate": 9.833620984302338e-07, + "loss": 0.2682, + "step": 17744 + }, + { + "epoch": 0.803122878479294, + "grad_norm": 0.6244755921904002, + "learning_rate": 9.829256597042496e-07, + "loss": 0.2777, + "step": 17745 + }, + { + "epoch": 0.8031681375876896, + "grad_norm": 0.6570277246440114, + "learning_rate": 9.824893072928572e-07, + "loss": 0.2946, + "step": 17746 + }, + { + "epoch": 0.803213396696085, + "grad_norm": 0.6215458936046111, + "learning_rate": 9.820530412054302e-07, + "loss": 0.2848, + "step": 17747 + }, + { + "epoch": 0.8032586558044806, + "grad_norm": 0.6040671769548295, + "learning_rate": 9.816168614513423e-07, + "loss": 0.2676, + "step": 17748 + }, + { + "epoch": 0.8033039149128762, + "grad_norm": 0.6104696414519816, + "learning_rate": 9.81180768039966e-07, + "loss": 0.2659, + "step": 17749 + }, + { + "epoch": 0.8033491740212718, + "grad_norm": 0.5967745873187079, + "learning_rate": 9.807447609806752e-07, + "loss": 0.2717, + "step": 17750 + }, + { + "epoch": 0.8033944331296673, + "grad_norm": 0.6607978228742731, + "learning_rate": 9.803088402828326e-07, + "loss": 0.316, + "step": 17751 + }, + { + "epoch": 0.8034396922380629, + "grad_norm": 0.659657357573865, + "learning_rate": 9.798730059558076e-07, + "loss": 0.2631, + "step": 17752 + }, + { + "epoch": 0.8034849513464585, + "grad_norm": 0.6103075212447557, + "learning_rate": 9.794372580089645e-07, + "loss": 0.2906, + "step": 17753 + }, + { + "epoch": 0.8035302104548541, + "grad_norm": 0.6494566495041401, + "learning_rate": 9.790015964516692e-07, + "loss": 0.3029, + "step": 17754 + }, + { + "epoch": 0.8035754695632497, + "grad_norm": 1.0423514431242897, + "learning_rate": 9.785660212932775e-07, + "loss": 0.3044, + "step": 17755 + }, + { + "epoch": 0.8036207286716451, + "grad_norm": 0.5784660241782171, + "learning_rate": 9.781305325431512e-07, + "loss": 0.2676, + "step": 17756 + }, + { + "epoch": 0.8036659877800407, + "grad_norm": 0.6102474087109717, + "learning_rate": 9.776951302106485e-07, + "loss": 0.266, + "step": 17757 + }, + { + "epoch": 0.8037112468884363, + "grad_norm": 0.6286956970183284, + "learning_rate": 9.772598143051242e-07, + "loss": 0.3052, + "step": 17758 + }, + { + "epoch": 0.8037565059968319, + "grad_norm": 0.6239997771945285, + "learning_rate": 9.768245848359304e-07, + "loss": 0.2955, + "step": 17759 + }, + { + "epoch": 0.8038017651052274, + "grad_norm": 0.6649553793926286, + "learning_rate": 9.763894418124215e-07, + "loss": 0.3546, + "step": 17760 + }, + { + "epoch": 0.803847024213623, + "grad_norm": 0.28664882723210144, + "learning_rate": 9.75954385243944e-07, + "loss": 0.4974, + "step": 17761 + }, + { + "epoch": 0.8038922833220186, + "grad_norm": 0.5878687259601594, + "learning_rate": 9.755194151398494e-07, + "loss": 0.3236, + "step": 17762 + }, + { + "epoch": 0.8039375424304142, + "grad_norm": 0.7083215294265206, + "learning_rate": 9.750845315094826e-07, + "loss": 0.26, + "step": 17763 + }, + { + "epoch": 0.8039828015388096, + "grad_norm": 0.2928910819210211, + "learning_rate": 9.746497343621857e-07, + "loss": 0.4376, + "step": 17764 + }, + { + "epoch": 0.8040280606472052, + "grad_norm": 0.24709386014473778, + "learning_rate": 9.74215023707304e-07, + "loss": 0.4392, + "step": 17765 + }, + { + "epoch": 0.8040733197556008, + "grad_norm": 0.26616740028184244, + "learning_rate": 9.737803995541777e-07, + "loss": 0.4614, + "step": 17766 + }, + { + "epoch": 0.8041185788639964, + "grad_norm": 0.7103385064900106, + "learning_rate": 9.733458619121449e-07, + "loss": 0.351, + "step": 17767 + }, + { + "epoch": 0.804163837972392, + "grad_norm": 0.6863027935741712, + "learning_rate": 9.72911410790542e-07, + "loss": 0.2807, + "step": 17768 + }, + { + "epoch": 0.8042090970807875, + "grad_norm": 0.2519538309492859, + "learning_rate": 9.724770461987044e-07, + "loss": 0.4491, + "step": 17769 + }, + { + "epoch": 0.8042543561891831, + "grad_norm": 0.614031610054373, + "learning_rate": 9.720427681459665e-07, + "loss": 0.2903, + "step": 17770 + }, + { + "epoch": 0.8042996152975787, + "grad_norm": 0.6690508718365019, + "learning_rate": 9.71608576641659e-07, + "loss": 0.3037, + "step": 17771 + }, + { + "epoch": 0.8043448744059742, + "grad_norm": 0.7043153878217588, + "learning_rate": 9.711744716951093e-07, + "loss": 0.3126, + "step": 17772 + }, + { + "epoch": 0.8043901335143697, + "grad_norm": 0.5684993411944421, + "learning_rate": 9.707404533156479e-07, + "loss": 0.2348, + "step": 17773 + }, + { + "epoch": 0.8044353926227653, + "grad_norm": 0.625394348488827, + "learning_rate": 9.703065215125978e-07, + "loss": 0.3292, + "step": 17774 + }, + { + "epoch": 0.8044806517311609, + "grad_norm": 0.612393036000987, + "learning_rate": 9.698726762952859e-07, + "loss": 0.2888, + "step": 17775 + }, + { + "epoch": 0.8045259108395565, + "grad_norm": 0.6443397133260294, + "learning_rate": 9.69438917673033e-07, + "loss": 0.3287, + "step": 17776 + }, + { + "epoch": 0.8045711699479521, + "grad_norm": 0.2731566439388685, + "learning_rate": 9.69005245655157e-07, + "loss": 0.4616, + "step": 17777 + }, + { + "epoch": 0.8046164290563476, + "grad_norm": 0.6635968408950185, + "learning_rate": 9.685716602509782e-07, + "loss": 0.3266, + "step": 17778 + }, + { + "epoch": 0.8046616881647431, + "grad_norm": 0.6750096154295964, + "learning_rate": 9.681381614698148e-07, + "loss": 0.2842, + "step": 17779 + }, + { + "epoch": 0.8047069472731387, + "grad_norm": 0.5740177530113943, + "learning_rate": 9.677047493209775e-07, + "loss": 0.3, + "step": 17780 + }, + { + "epoch": 0.8047522063815343, + "grad_norm": 0.59785691959512, + "learning_rate": 9.67271423813781e-07, + "loss": 0.3076, + "step": 17781 + }, + { + "epoch": 0.8047974654899298, + "grad_norm": 0.6427667444325058, + "learning_rate": 9.668381849575354e-07, + "loss": 0.3088, + "step": 17782 + }, + { + "epoch": 0.8048427245983254, + "grad_norm": 0.27035245054586593, + "learning_rate": 9.664050327615531e-07, + "loss": 0.4606, + "step": 17783 + }, + { + "epoch": 0.804887983706721, + "grad_norm": 0.5900749386428757, + "learning_rate": 9.659719672351363e-07, + "loss": 0.2815, + "step": 17784 + }, + { + "epoch": 0.8049332428151166, + "grad_norm": 0.6792514119123676, + "learning_rate": 9.65538988387592e-07, + "loss": 0.2977, + "step": 17785 + }, + { + "epoch": 0.8049785019235121, + "grad_norm": 0.6449374754101008, + "learning_rate": 9.65106096228225e-07, + "loss": 0.2898, + "step": 17786 + }, + { + "epoch": 0.8050237610319076, + "grad_norm": 0.5739704721959326, + "learning_rate": 9.646732907663358e-07, + "loss": 0.2707, + "step": 17787 + }, + { + "epoch": 0.8050690201403032, + "grad_norm": 0.27968337547443045, + "learning_rate": 9.64240572011223e-07, + "loss": 0.4649, + "step": 17788 + }, + { + "epoch": 0.8051142792486988, + "grad_norm": 0.6508549364566079, + "learning_rate": 9.638079399721866e-07, + "loss": 0.2833, + "step": 17789 + }, + { + "epoch": 0.8051595383570944, + "grad_norm": 0.6567856991621365, + "learning_rate": 9.633753946585201e-07, + "loss": 0.2914, + "step": 17790 + }, + { + "epoch": 0.8052047974654899, + "grad_norm": 0.5991907161446376, + "learning_rate": 9.629429360795201e-07, + "loss": 0.29, + "step": 17791 + }, + { + "epoch": 0.8052500565738855, + "grad_norm": 0.6436835596344362, + "learning_rate": 9.625105642444777e-07, + "loss": 0.3134, + "step": 17792 + }, + { + "epoch": 0.8052953156822811, + "grad_norm": 0.26121420030905806, + "learning_rate": 9.620782791626815e-07, + "loss": 0.4442, + "step": 17793 + }, + { + "epoch": 0.8053405747906767, + "grad_norm": 0.7029702963957971, + "learning_rate": 9.616460808434213e-07, + "loss": 0.3315, + "step": 17794 + }, + { + "epoch": 0.8053858338990721, + "grad_norm": 0.5785032749920184, + "learning_rate": 9.612139692959859e-07, + "loss": 0.2775, + "step": 17795 + }, + { + "epoch": 0.8054310930074677, + "grad_norm": 0.6740378749138689, + "learning_rate": 9.607819445296579e-07, + "loss": 0.323, + "step": 17796 + }, + { + "epoch": 0.8054763521158633, + "grad_norm": 0.3714646127639946, + "learning_rate": 9.60350006553719e-07, + "loss": 0.463, + "step": 17797 + }, + { + "epoch": 0.8055216112242589, + "grad_norm": 0.6722240591447426, + "learning_rate": 9.599181553774517e-07, + "loss": 0.2908, + "step": 17798 + }, + { + "epoch": 0.8055668703326544, + "grad_norm": 0.2518733239893241, + "learning_rate": 9.59486391010136e-07, + "loss": 0.4378, + "step": 17799 + }, + { + "epoch": 0.80561212944105, + "grad_norm": 0.5960606271702927, + "learning_rate": 9.59054713461049e-07, + "loss": 0.258, + "step": 17800 + }, + { + "epoch": 0.8056573885494456, + "grad_norm": 0.5225734865181609, + "learning_rate": 9.586231227394632e-07, + "loss": 0.2191, + "step": 17801 + }, + { + "epoch": 0.8057026476578412, + "grad_norm": 0.30134938053213317, + "learning_rate": 9.581916188546563e-07, + "loss": 0.455, + "step": 17802 + }, + { + "epoch": 0.8057479067662368, + "grad_norm": 0.2919711089673619, + "learning_rate": 9.577602018158966e-07, + "loss": 0.462, + "step": 17803 + }, + { + "epoch": 0.8057931658746322, + "grad_norm": 0.5348588791690345, + "learning_rate": 9.57328871632457e-07, + "loss": 0.2597, + "step": 17804 + }, + { + "epoch": 0.8058384249830278, + "grad_norm": 0.26899116106159493, + "learning_rate": 9.568976283136033e-07, + "loss": 0.4445, + "step": 17805 + }, + { + "epoch": 0.8058836840914234, + "grad_norm": 0.6010819484837083, + "learning_rate": 9.564664718686006e-07, + "loss": 0.2669, + "step": 17806 + }, + { + "epoch": 0.805928943199819, + "grad_norm": 0.5988338193919135, + "learning_rate": 9.560354023067154e-07, + "loss": 0.3217, + "step": 17807 + }, + { + "epoch": 0.8059742023082145, + "grad_norm": 0.6031911398914606, + "learning_rate": 9.556044196372117e-07, + "loss": 0.2926, + "step": 17808 + }, + { + "epoch": 0.8060194614166101, + "grad_norm": 0.6186691343631564, + "learning_rate": 9.551735238693448e-07, + "loss": 0.2715, + "step": 17809 + }, + { + "epoch": 0.8060647205250057, + "grad_norm": 0.6173799694688191, + "learning_rate": 9.547427150123762e-07, + "loss": 0.2874, + "step": 17810 + }, + { + "epoch": 0.8061099796334013, + "grad_norm": 0.6022156353413017, + "learning_rate": 9.543119930755622e-07, + "loss": 0.2956, + "step": 17811 + }, + { + "epoch": 0.8061552387417967, + "grad_norm": 0.5927472848824709, + "learning_rate": 9.538813580681616e-07, + "loss": 0.2715, + "step": 17812 + }, + { + "epoch": 0.8062004978501923, + "grad_norm": 0.6148306294036522, + "learning_rate": 9.534508099994206e-07, + "loss": 0.3106, + "step": 17813 + }, + { + "epoch": 0.8062457569585879, + "grad_norm": 0.7236385379377256, + "learning_rate": 9.530203488785939e-07, + "loss": 0.3033, + "step": 17814 + }, + { + "epoch": 0.8062910160669835, + "grad_norm": 0.6026043617790225, + "learning_rate": 9.52589974714932e-07, + "loss": 0.2908, + "step": 17815 + }, + { + "epoch": 0.8063362751753791, + "grad_norm": 0.2657865844437305, + "learning_rate": 9.521596875176803e-07, + "loss": 0.4463, + "step": 17816 + }, + { + "epoch": 0.8063815342837746, + "grad_norm": 0.664579620544881, + "learning_rate": 9.517294872960841e-07, + "loss": 0.2808, + "step": 17817 + }, + { + "epoch": 0.8064267933921702, + "grad_norm": 0.62770544763799, + "learning_rate": 9.51299374059389e-07, + "loss": 0.2625, + "step": 17818 + }, + { + "epoch": 0.8064720525005658, + "grad_norm": 0.6491865545045638, + "learning_rate": 9.508693478168346e-07, + "loss": 0.265, + "step": 17819 + }, + { + "epoch": 0.8065173116089613, + "grad_norm": 0.6222501394219117, + "learning_rate": 9.504394085776636e-07, + "loss": 0.289, + "step": 17820 + }, + { + "epoch": 0.8065625707173568, + "grad_norm": 0.5920336723940774, + "learning_rate": 9.500095563511119e-07, + "loss": 0.3043, + "step": 17821 + }, + { + "epoch": 0.8066078298257524, + "grad_norm": 0.6828166339707074, + "learning_rate": 9.49579791146415e-07, + "loss": 0.2999, + "step": 17822 + }, + { + "epoch": 0.806653088934148, + "grad_norm": 0.8659427099989623, + "learning_rate": 9.491501129728087e-07, + "loss": 0.3415, + "step": 17823 + }, + { + "epoch": 0.8066983480425436, + "grad_norm": 0.6783573168092197, + "learning_rate": 9.487205218395262e-07, + "loss": 0.3317, + "step": 17824 + }, + { + "epoch": 0.8067436071509392, + "grad_norm": 0.569484306281922, + "learning_rate": 9.482910177557975e-07, + "loss": 0.2727, + "step": 17825 + }, + { + "epoch": 0.8067888662593347, + "grad_norm": 0.6803035123904807, + "learning_rate": 9.478616007308495e-07, + "loss": 0.3235, + "step": 17826 + }, + { + "epoch": 0.8068341253677302, + "grad_norm": 0.34306276811634023, + "learning_rate": 9.474322707739103e-07, + "loss": 0.4562, + "step": 17827 + }, + { + "epoch": 0.8068793844761258, + "grad_norm": 0.6123543709961801, + "learning_rate": 9.470030278942066e-07, + "loss": 0.3109, + "step": 17828 + }, + { + "epoch": 0.8069246435845214, + "grad_norm": 0.2548844258247678, + "learning_rate": 9.465738721009598e-07, + "loss": 0.4591, + "step": 17829 + }, + { + "epoch": 0.8069699026929169, + "grad_norm": 0.26506859233429114, + "learning_rate": 9.461448034033905e-07, + "loss": 0.4505, + "step": 17830 + }, + { + "epoch": 0.8070151618013125, + "grad_norm": 0.6048925450365089, + "learning_rate": 9.457158218107198e-07, + "loss": 0.2945, + "step": 17831 + }, + { + "epoch": 0.8070604209097081, + "grad_norm": 0.6257261716315243, + "learning_rate": 9.45286927332163e-07, + "loss": 0.2926, + "step": 17832 + }, + { + "epoch": 0.8071056800181037, + "grad_norm": 0.6735816668686118, + "learning_rate": 9.448581199769385e-07, + "loss": 0.2789, + "step": 17833 + }, + { + "epoch": 0.8071509391264992, + "grad_norm": 0.2743845808695478, + "learning_rate": 9.444293997542586e-07, + "loss": 0.4636, + "step": 17834 + }, + { + "epoch": 0.8071961982348947, + "grad_norm": 0.28865643557615306, + "learning_rate": 9.440007666733336e-07, + "loss": 0.4819, + "step": 17835 + }, + { + "epoch": 0.8072414573432903, + "grad_norm": 0.5745541285151884, + "learning_rate": 9.43572220743375e-07, + "loss": 0.2733, + "step": 17836 + }, + { + "epoch": 0.8072867164516859, + "grad_norm": 0.24529223471523312, + "learning_rate": 9.431437619735928e-07, + "loss": 0.436, + "step": 17837 + }, + { + "epoch": 0.8073319755600815, + "grad_norm": 0.591013245264191, + "learning_rate": 9.427153903731912e-07, + "loss": 0.2857, + "step": 17838 + }, + { + "epoch": 0.807377234668477, + "grad_norm": 0.5672598214064283, + "learning_rate": 9.422871059513738e-07, + "loss": 0.2954, + "step": 17839 + }, + { + "epoch": 0.8074224937768726, + "grad_norm": 0.2732986318260846, + "learning_rate": 9.418589087173441e-07, + "loss": 0.4775, + "step": 17840 + }, + { + "epoch": 0.8074677528852682, + "grad_norm": 0.6033196974822153, + "learning_rate": 9.414307986803051e-07, + "loss": 0.2578, + "step": 17841 + }, + { + "epoch": 0.8075130119936638, + "grad_norm": 0.6700641066968276, + "learning_rate": 9.410027758494511e-07, + "loss": 0.2617, + "step": 17842 + }, + { + "epoch": 0.8075582711020592, + "grad_norm": 0.6062704598820653, + "learning_rate": 9.405748402339809e-07, + "loss": 0.2894, + "step": 17843 + }, + { + "epoch": 0.8076035302104548, + "grad_norm": 0.29249875010539206, + "learning_rate": 9.401469918430911e-07, + "loss": 0.4807, + "step": 17844 + }, + { + "epoch": 0.8076487893188504, + "grad_norm": 0.5552771925540408, + "learning_rate": 9.397192306859737e-07, + "loss": 0.253, + "step": 17845 + }, + { + "epoch": 0.807694048427246, + "grad_norm": 0.729487092010495, + "learning_rate": 9.392915567718186e-07, + "loss": 0.3286, + "step": 17846 + }, + { + "epoch": 0.8077393075356415, + "grad_norm": 0.6632461653794297, + "learning_rate": 9.388639701098174e-07, + "loss": 0.3288, + "step": 17847 + }, + { + "epoch": 0.8077845666440371, + "grad_norm": 0.2959198050260553, + "learning_rate": 9.384364707091559e-07, + "loss": 0.4735, + "step": 17848 + }, + { + "epoch": 0.8078298257524327, + "grad_norm": 0.5870934956027272, + "learning_rate": 9.380090585790213e-07, + "loss": 0.3435, + "step": 17849 + }, + { + "epoch": 0.8078750848608283, + "grad_norm": 0.5788613590093816, + "learning_rate": 9.375817337285969e-07, + "loss": 0.2863, + "step": 17850 + }, + { + "epoch": 0.8079203439692239, + "grad_norm": 0.6004090232292422, + "learning_rate": 9.371544961670625e-07, + "loss": 0.2654, + "step": 17851 + }, + { + "epoch": 0.8079656030776193, + "grad_norm": 0.6542045633090844, + "learning_rate": 9.367273459036003e-07, + "loss": 0.2952, + "step": 17852 + }, + { + "epoch": 0.8080108621860149, + "grad_norm": 0.6154991149598072, + "learning_rate": 9.363002829473894e-07, + "loss": 0.2866, + "step": 17853 + }, + { + "epoch": 0.8080561212944105, + "grad_norm": 0.6160983703257152, + "learning_rate": 9.358733073076048e-07, + "loss": 0.2862, + "step": 17854 + }, + { + "epoch": 0.8081013804028061, + "grad_norm": 0.5935234985164778, + "learning_rate": 9.354464189934193e-07, + "loss": 0.298, + "step": 17855 + }, + { + "epoch": 0.8081466395112016, + "grad_norm": 0.6286116231527318, + "learning_rate": 9.35019618014007e-07, + "loss": 0.3286, + "step": 17856 + }, + { + "epoch": 0.8081918986195972, + "grad_norm": 0.6033556103336765, + "learning_rate": 9.345929043785396e-07, + "loss": 0.2455, + "step": 17857 + }, + { + "epoch": 0.8082371577279928, + "grad_norm": 0.2754283766879807, + "learning_rate": 9.341662780961847e-07, + "loss": 0.458, + "step": 17858 + }, + { + "epoch": 0.8082824168363884, + "grad_norm": 0.6388948865050439, + "learning_rate": 9.337397391761083e-07, + "loss": 0.3176, + "step": 17859 + }, + { + "epoch": 0.8083276759447839, + "grad_norm": 0.7290291472085741, + "learning_rate": 9.333132876274775e-07, + "loss": 0.314, + "step": 17860 + }, + { + "epoch": 0.8083729350531794, + "grad_norm": 0.6223888135457937, + "learning_rate": 9.328869234594529e-07, + "loss": 0.2904, + "step": 17861 + }, + { + "epoch": 0.808418194161575, + "grad_norm": 0.681702030465637, + "learning_rate": 9.32460646681198e-07, + "loss": 0.3566, + "step": 17862 + }, + { + "epoch": 0.8084634532699706, + "grad_norm": 0.642590939024477, + "learning_rate": 9.320344573018719e-07, + "loss": 0.2918, + "step": 17863 + }, + { + "epoch": 0.8085087123783662, + "grad_norm": 0.3022485742167686, + "learning_rate": 9.316083553306299e-07, + "loss": 0.4925, + "step": 17864 + }, + { + "epoch": 0.8085539714867617, + "grad_norm": 0.6056913667244028, + "learning_rate": 9.311823407766297e-07, + "loss": 0.3207, + "step": 17865 + }, + { + "epoch": 0.8085992305951573, + "grad_norm": 0.6621139643820714, + "learning_rate": 9.307564136490255e-07, + "loss": 0.3178, + "step": 17866 + }, + { + "epoch": 0.8086444897035528, + "grad_norm": 0.6056796351091303, + "learning_rate": 9.303305739569685e-07, + "loss": 0.2781, + "step": 17867 + }, + { + "epoch": 0.8086897488119484, + "grad_norm": 0.26125384139409213, + "learning_rate": 9.299048217096068e-07, + "loss": 0.463, + "step": 17868 + }, + { + "epoch": 0.8087350079203439, + "grad_norm": 0.6179247104927748, + "learning_rate": 9.294791569160899e-07, + "loss": 0.2742, + "step": 17869 + }, + { + "epoch": 0.8087802670287395, + "grad_norm": 0.5790561879273974, + "learning_rate": 9.290535795855659e-07, + "loss": 0.3285, + "step": 17870 + }, + { + "epoch": 0.8088255261371351, + "grad_norm": 0.25355361987720526, + "learning_rate": 9.286280897271777e-07, + "loss": 0.4392, + "step": 17871 + }, + { + "epoch": 0.8088707852455307, + "grad_norm": 0.5659073976736065, + "learning_rate": 9.282026873500666e-07, + "loss": 0.263, + "step": 17872 + }, + { + "epoch": 0.8089160443539263, + "grad_norm": 0.5744715963109575, + "learning_rate": 9.277773724633749e-07, + "loss": 0.3104, + "step": 17873 + }, + { + "epoch": 0.8089613034623218, + "grad_norm": 0.6116546412618016, + "learning_rate": 9.273521450762391e-07, + "loss": 0.309, + "step": 17874 + }, + { + "epoch": 0.8090065625707173, + "grad_norm": 0.5956581699580364, + "learning_rate": 9.269270051977991e-07, + "loss": 0.3248, + "step": 17875 + }, + { + "epoch": 0.8090518216791129, + "grad_norm": 0.5953470393565178, + "learning_rate": 9.265019528371882e-07, + "loss": 0.2871, + "step": 17876 + }, + { + "epoch": 0.8090970807875085, + "grad_norm": 0.6054083677074037, + "learning_rate": 9.260769880035387e-07, + "loss": 0.2765, + "step": 17877 + }, + { + "epoch": 0.809142339895904, + "grad_norm": 0.5646933571107533, + "learning_rate": 9.256521107059834e-07, + "loss": 0.2642, + "step": 17878 + }, + { + "epoch": 0.8091875990042996, + "grad_norm": 0.5796948761794014, + "learning_rate": 9.25227320953651e-07, + "loss": 0.2792, + "step": 17879 + }, + { + "epoch": 0.8092328581126952, + "grad_norm": 0.5997158177178545, + "learning_rate": 9.248026187556674e-07, + "loss": 0.2902, + "step": 17880 + }, + { + "epoch": 0.8092781172210908, + "grad_norm": 0.5903397760622614, + "learning_rate": 9.243780041211597e-07, + "loss": 0.277, + "step": 17881 + }, + { + "epoch": 0.8093233763294863, + "grad_norm": 0.6532787133554799, + "learning_rate": 9.239534770592529e-07, + "loss": 0.2828, + "step": 17882 + }, + { + "epoch": 0.8093686354378818, + "grad_norm": 0.5876285887382989, + "learning_rate": 9.235290375790668e-07, + "loss": 0.3298, + "step": 17883 + }, + { + "epoch": 0.8094138945462774, + "grad_norm": 0.28400911286173425, + "learning_rate": 9.231046856897202e-07, + "loss": 0.4658, + "step": 17884 + }, + { + "epoch": 0.809459153654673, + "grad_norm": 0.6034911675444754, + "learning_rate": 9.226804214003332e-07, + "loss": 0.3213, + "step": 17885 + }, + { + "epoch": 0.8095044127630686, + "grad_norm": 0.5545357819570826, + "learning_rate": 9.222562447200228e-07, + "loss": 0.2834, + "step": 17886 + }, + { + "epoch": 0.8095496718714641, + "grad_norm": 0.5909633921075437, + "learning_rate": 9.218321556579013e-07, + "loss": 0.3009, + "step": 17887 + }, + { + "epoch": 0.8095949309798597, + "grad_norm": 0.6095861341205993, + "learning_rate": 9.214081542230808e-07, + "loss": 0.2812, + "step": 17888 + }, + { + "epoch": 0.8096401900882553, + "grad_norm": 0.6267122163248784, + "learning_rate": 9.209842404246738e-07, + "loss": 0.3018, + "step": 17889 + }, + { + "epoch": 0.8096854491966509, + "grad_norm": 0.6088538323220015, + "learning_rate": 9.205604142717866e-07, + "loss": 0.3304, + "step": 17890 + }, + { + "epoch": 0.8097307083050463, + "grad_norm": 0.6242827380667848, + "learning_rate": 9.201366757735281e-07, + "loss": 0.261, + "step": 17891 + }, + { + "epoch": 0.8097759674134419, + "grad_norm": 0.686089695766711, + "learning_rate": 9.197130249390019e-07, + "loss": 0.2722, + "step": 17892 + }, + { + "epoch": 0.8098212265218375, + "grad_norm": 0.2739949605184704, + "learning_rate": 9.192894617773102e-07, + "loss": 0.4771, + "step": 17893 + }, + { + "epoch": 0.8098664856302331, + "grad_norm": 0.5891830152785924, + "learning_rate": 9.188659862975552e-07, + "loss": 0.2851, + "step": 17894 + }, + { + "epoch": 0.8099117447386287, + "grad_norm": 0.6435546338845332, + "learning_rate": 9.184425985088368e-07, + "loss": 0.2699, + "step": 17895 + }, + { + "epoch": 0.8099570038470242, + "grad_norm": 0.6427501974253429, + "learning_rate": 9.180192984202513e-07, + "loss": 0.2669, + "step": 17896 + }, + { + "epoch": 0.8100022629554198, + "grad_norm": 0.6225840811639484, + "learning_rate": 9.175960860408934e-07, + "loss": 0.2409, + "step": 17897 + }, + { + "epoch": 0.8100475220638154, + "grad_norm": 0.6270124468852579, + "learning_rate": 9.171729613798575e-07, + "loss": 0.2685, + "step": 17898 + }, + { + "epoch": 0.810092781172211, + "grad_norm": 0.6317719814591691, + "learning_rate": 9.167499244462358e-07, + "loss": 0.3228, + "step": 17899 + }, + { + "epoch": 0.8101380402806064, + "grad_norm": 0.677980534030642, + "learning_rate": 9.163269752491183e-07, + "loss": 0.2426, + "step": 17900 + }, + { + "epoch": 0.810183299389002, + "grad_norm": 0.28151380028711165, + "learning_rate": 9.159041137975904e-07, + "loss": 0.4515, + "step": 17901 + }, + { + "epoch": 0.8102285584973976, + "grad_norm": 0.6332702542591043, + "learning_rate": 9.154813401007406e-07, + "loss": 0.3118, + "step": 17902 + }, + { + "epoch": 0.8102738176057932, + "grad_norm": 0.6925917382896261, + "learning_rate": 9.150586541676515e-07, + "loss": 0.312, + "step": 17903 + }, + { + "epoch": 0.8103190767141887, + "grad_norm": 0.5881584189149481, + "learning_rate": 9.146360560074074e-07, + "loss": 0.3068, + "step": 17904 + }, + { + "epoch": 0.8103643358225843, + "grad_norm": 0.6392350598878368, + "learning_rate": 9.142135456290868e-07, + "loss": 0.2829, + "step": 17905 + }, + { + "epoch": 0.8104095949309799, + "grad_norm": 0.6827851072609606, + "learning_rate": 9.137911230417673e-07, + "loss": 0.3119, + "step": 17906 + }, + { + "epoch": 0.8104548540393754, + "grad_norm": 0.6056204064823583, + "learning_rate": 9.133687882545267e-07, + "loss": 0.2369, + "step": 17907 + }, + { + "epoch": 0.810500113147771, + "grad_norm": 0.6214795257359838, + "learning_rate": 9.12946541276441e-07, + "loss": 0.2816, + "step": 17908 + }, + { + "epoch": 0.8105453722561665, + "grad_norm": 0.6456268891092399, + "learning_rate": 9.125243821165819e-07, + "loss": 0.2977, + "step": 17909 + }, + { + "epoch": 0.8105906313645621, + "grad_norm": 0.5826360599394663, + "learning_rate": 9.121023107840188e-07, + "loss": 0.283, + "step": 17910 + }, + { + "epoch": 0.8106358904729577, + "grad_norm": 0.6909731471921312, + "learning_rate": 9.116803272878233e-07, + "loss": 0.3122, + "step": 17911 + }, + { + "epoch": 0.8106811495813533, + "grad_norm": 0.6152573399790072, + "learning_rate": 9.112584316370615e-07, + "loss": 0.2973, + "step": 17912 + }, + { + "epoch": 0.8107264086897488, + "grad_norm": 0.6196372725926754, + "learning_rate": 9.108366238407968e-07, + "loss": 0.302, + "step": 17913 + }, + { + "epoch": 0.8107716677981444, + "grad_norm": 0.6446896084894796, + "learning_rate": 9.104149039080939e-07, + "loss": 0.2914, + "step": 17914 + }, + { + "epoch": 0.81081692690654, + "grad_norm": 0.5551934075128154, + "learning_rate": 9.099932718480158e-07, + "loss": 0.2629, + "step": 17915 + }, + { + "epoch": 0.8108621860149355, + "grad_norm": 0.6081600900581837, + "learning_rate": 9.095717276696214e-07, + "loss": 0.2885, + "step": 17916 + }, + { + "epoch": 0.810907445123331, + "grad_norm": 0.2681156513940687, + "learning_rate": 9.091502713819661e-07, + "loss": 0.4772, + "step": 17917 + }, + { + "epoch": 0.8109527042317266, + "grad_norm": 0.2478478171090661, + "learning_rate": 9.087289029941088e-07, + "loss": 0.4607, + "step": 17918 + }, + { + "epoch": 0.8109979633401222, + "grad_norm": 0.25789240256355805, + "learning_rate": 9.083076225151005e-07, + "loss": 0.457, + "step": 17919 + }, + { + "epoch": 0.8110432224485178, + "grad_norm": 0.6053463787096428, + "learning_rate": 9.078864299539963e-07, + "loss": 0.2934, + "step": 17920 + }, + { + "epoch": 0.8110884815569134, + "grad_norm": 0.73569061476308, + "learning_rate": 9.074653253198445e-07, + "loss": 0.3346, + "step": 17921 + }, + { + "epoch": 0.8111337406653089, + "grad_norm": 0.6063644964412455, + "learning_rate": 9.070443086216924e-07, + "loss": 0.3413, + "step": 17922 + }, + { + "epoch": 0.8111789997737044, + "grad_norm": 0.5351965867554764, + "learning_rate": 9.066233798685875e-07, + "loss": 0.3067, + "step": 17923 + }, + { + "epoch": 0.8112242588821, + "grad_norm": 0.28645027424828934, + "learning_rate": 9.062025390695756e-07, + "loss": 0.4717, + "step": 17924 + }, + { + "epoch": 0.8112695179904956, + "grad_norm": 0.2566172115969165, + "learning_rate": 9.057817862336982e-07, + "loss": 0.4457, + "step": 17925 + }, + { + "epoch": 0.8113147770988911, + "grad_norm": 0.5957504613420535, + "learning_rate": 9.053611213699942e-07, + "loss": 0.2781, + "step": 17926 + }, + { + "epoch": 0.8113600362072867, + "grad_norm": 0.6092725014501065, + "learning_rate": 9.049405444875042e-07, + "loss": 0.3022, + "step": 17927 + }, + { + "epoch": 0.8114052953156823, + "grad_norm": 0.5947499132955205, + "learning_rate": 9.04520055595266e-07, + "loss": 0.2739, + "step": 17928 + }, + { + "epoch": 0.8114505544240779, + "grad_norm": 0.6436976100680032, + "learning_rate": 9.040996547023134e-07, + "loss": 0.2487, + "step": 17929 + }, + { + "epoch": 0.8114958135324735, + "grad_norm": 0.5838302765186906, + "learning_rate": 9.036793418176786e-07, + "loss": 0.3405, + "step": 17930 + }, + { + "epoch": 0.8115410726408689, + "grad_norm": 0.6307928816587278, + "learning_rate": 9.032591169503951e-07, + "loss": 0.2652, + "step": 17931 + }, + { + "epoch": 0.8115863317492645, + "grad_norm": 0.6497305944441155, + "learning_rate": 9.028389801094895e-07, + "loss": 0.2793, + "step": 17932 + }, + { + "epoch": 0.8116315908576601, + "grad_norm": 0.5786671075336611, + "learning_rate": 9.024189313039922e-07, + "loss": 0.2842, + "step": 17933 + }, + { + "epoch": 0.8116768499660557, + "grad_norm": 0.28544876263977276, + "learning_rate": 9.019989705429271e-07, + "loss": 0.4457, + "step": 17934 + }, + { + "epoch": 0.8117221090744512, + "grad_norm": 0.6562782485085401, + "learning_rate": 9.015790978353173e-07, + "loss": 0.2923, + "step": 17935 + }, + { + "epoch": 0.8117673681828468, + "grad_norm": 0.6443448934260734, + "learning_rate": 9.011593131901852e-07, + "loss": 0.3118, + "step": 17936 + }, + { + "epoch": 0.8118126272912424, + "grad_norm": 0.3076745719943253, + "learning_rate": 9.007396166165516e-07, + "loss": 0.4967, + "step": 17937 + }, + { + "epoch": 0.811857886399638, + "grad_norm": 0.6008853865334476, + "learning_rate": 9.003200081234342e-07, + "loss": 0.3007, + "step": 17938 + }, + { + "epoch": 0.8119031455080334, + "grad_norm": 0.5958514186274408, + "learning_rate": 8.999004877198475e-07, + "loss": 0.2712, + "step": 17939 + }, + { + "epoch": 0.811948404616429, + "grad_norm": 0.604126124888519, + "learning_rate": 8.994810554148065e-07, + "loss": 0.297, + "step": 17940 + }, + { + "epoch": 0.8119936637248246, + "grad_norm": 0.5785700048548983, + "learning_rate": 8.990617112173261e-07, + "loss": 0.2805, + "step": 17941 + }, + { + "epoch": 0.8120389228332202, + "grad_norm": 0.6076645267963782, + "learning_rate": 8.986424551364126e-07, + "loss": 0.3081, + "step": 17942 + }, + { + "epoch": 0.8120841819416158, + "grad_norm": 0.71513808602186, + "learning_rate": 8.982232871810759e-07, + "loss": 0.2825, + "step": 17943 + }, + { + "epoch": 0.8121294410500113, + "grad_norm": 0.2832504811945113, + "learning_rate": 8.978042073603243e-07, + "loss": 0.4603, + "step": 17944 + }, + { + "epoch": 0.8121747001584069, + "grad_norm": 0.5795502680430666, + "learning_rate": 8.97385215683162e-07, + "loss": 0.2935, + "step": 17945 + }, + { + "epoch": 0.8122199592668025, + "grad_norm": 0.6008787860153728, + "learning_rate": 8.969663121585892e-07, + "loss": 0.2908, + "step": 17946 + }, + { + "epoch": 0.812265218375198, + "grad_norm": 0.5873590487284043, + "learning_rate": 8.965474967956106e-07, + "loss": 0.2657, + "step": 17947 + }, + { + "epoch": 0.8123104774835935, + "grad_norm": 0.6827117507553307, + "learning_rate": 8.961287696032217e-07, + "loss": 0.3314, + "step": 17948 + }, + { + "epoch": 0.8123557365919891, + "grad_norm": 0.6190725778935383, + "learning_rate": 8.957101305904231e-07, + "loss": 0.2584, + "step": 17949 + }, + { + "epoch": 0.8124009957003847, + "grad_norm": 0.635167529439559, + "learning_rate": 8.95291579766207e-07, + "loss": 0.3212, + "step": 17950 + }, + { + "epoch": 0.8124462548087803, + "grad_norm": 0.6384375583049835, + "learning_rate": 8.948731171395697e-07, + "loss": 0.295, + "step": 17951 + }, + { + "epoch": 0.8124915139171758, + "grad_norm": 0.6788234412119452, + "learning_rate": 8.944547427195e-07, + "loss": 0.3228, + "step": 17952 + }, + { + "epoch": 0.8125367730255714, + "grad_norm": 0.5621391389443448, + "learning_rate": 8.940364565149895e-07, + "loss": 0.2866, + "step": 17953 + }, + { + "epoch": 0.812582032133967, + "grad_norm": 0.641359077850069, + "learning_rate": 8.936182585350256e-07, + "loss": 0.2987, + "step": 17954 + }, + { + "epoch": 0.8126272912423625, + "grad_norm": 0.6266553026836502, + "learning_rate": 8.932001487885916e-07, + "loss": 0.2646, + "step": 17955 + }, + { + "epoch": 0.8126725503507581, + "grad_norm": 0.5493797054964148, + "learning_rate": 8.927821272846737e-07, + "loss": 0.3098, + "step": 17956 + }, + { + "epoch": 0.8127178094591536, + "grad_norm": 0.7846153097565445, + "learning_rate": 8.923641940322547e-07, + "loss": 0.2677, + "step": 17957 + }, + { + "epoch": 0.8127630685675492, + "grad_norm": 0.5725269933339863, + "learning_rate": 8.919463490403141e-07, + "loss": 0.299, + "step": 17958 + }, + { + "epoch": 0.8128083276759448, + "grad_norm": 0.5885140435649727, + "learning_rate": 8.915285923178274e-07, + "loss": 0.2828, + "step": 17959 + }, + { + "epoch": 0.8128535867843404, + "grad_norm": 0.5613966687381595, + "learning_rate": 8.911109238737748e-07, + "loss": 0.2552, + "step": 17960 + }, + { + "epoch": 0.8128988458927359, + "grad_norm": 0.5813063519597989, + "learning_rate": 8.906933437171278e-07, + "loss": 0.2803, + "step": 17961 + }, + { + "epoch": 0.8129441050011315, + "grad_norm": 0.2914511549712682, + "learning_rate": 8.90275851856861e-07, + "loss": 0.4879, + "step": 17962 + }, + { + "epoch": 0.812989364109527, + "grad_norm": 0.5898202553287785, + "learning_rate": 8.89858448301944e-07, + "loss": 0.2995, + "step": 17963 + }, + { + "epoch": 0.8130346232179226, + "grad_norm": 0.6590792125265483, + "learning_rate": 8.894411330613445e-07, + "loss": 0.3256, + "step": 17964 + }, + { + "epoch": 0.8130798823263182, + "grad_norm": 0.27424966293835146, + "learning_rate": 8.890239061440303e-07, + "loss": 0.4525, + "step": 17965 + }, + { + "epoch": 0.8131251414347137, + "grad_norm": 0.5828946147301375, + "learning_rate": 8.886067675589682e-07, + "loss": 0.2955, + "step": 17966 + }, + { + "epoch": 0.8131704005431093, + "grad_norm": 0.5702092181704951, + "learning_rate": 8.881897173151188e-07, + "loss": 0.315, + "step": 17967 + }, + { + "epoch": 0.8132156596515049, + "grad_norm": 0.2968341038768363, + "learning_rate": 8.877727554214432e-07, + "loss": 0.4809, + "step": 17968 + }, + { + "epoch": 0.8132609187599005, + "grad_norm": 0.2735751270105653, + "learning_rate": 8.87355881886901e-07, + "loss": 0.4748, + "step": 17969 + }, + { + "epoch": 0.813306177868296, + "grad_norm": 0.5959178434447817, + "learning_rate": 8.869390967204527e-07, + "loss": 0.2481, + "step": 17970 + }, + { + "epoch": 0.8133514369766915, + "grad_norm": 0.581536219848497, + "learning_rate": 8.865223999310485e-07, + "loss": 0.2885, + "step": 17971 + }, + { + "epoch": 0.8133966960850871, + "grad_norm": 0.25640784722105925, + "learning_rate": 8.861057915276438e-07, + "loss": 0.4554, + "step": 17972 + }, + { + "epoch": 0.8134419551934827, + "grad_norm": 0.6235145197841093, + "learning_rate": 8.856892715191929e-07, + "loss": 0.2738, + "step": 17973 + }, + { + "epoch": 0.8134872143018782, + "grad_norm": 0.2672252289127243, + "learning_rate": 8.852728399146427e-07, + "loss": 0.4696, + "step": 17974 + }, + { + "epoch": 0.8135324734102738, + "grad_norm": 0.7879556336384355, + "learning_rate": 8.848564967229407e-07, + "loss": 0.2915, + "step": 17975 + }, + { + "epoch": 0.8135777325186694, + "grad_norm": 0.6113467717808888, + "learning_rate": 8.844402419530346e-07, + "loss": 0.25, + "step": 17976 + }, + { + "epoch": 0.813622991627065, + "grad_norm": 0.6012089521551512, + "learning_rate": 8.840240756138673e-07, + "loss": 0.2584, + "step": 17977 + }, + { + "epoch": 0.8136682507354606, + "grad_norm": 0.6901266025938774, + "learning_rate": 8.836079977143819e-07, + "loss": 0.3234, + "step": 17978 + }, + { + "epoch": 0.813713509843856, + "grad_norm": 0.266879344823511, + "learning_rate": 8.831920082635175e-07, + "loss": 0.4446, + "step": 17979 + }, + { + "epoch": 0.8137587689522516, + "grad_norm": 0.2728346231036277, + "learning_rate": 8.82776107270214e-07, + "loss": 0.458, + "step": 17980 + }, + { + "epoch": 0.8138040280606472, + "grad_norm": 0.5920782461326178, + "learning_rate": 8.823602947434056e-07, + "loss": 0.2792, + "step": 17981 + }, + { + "epoch": 0.8138492871690428, + "grad_norm": 0.6327987553773374, + "learning_rate": 8.819445706920293e-07, + "loss": 0.3333, + "step": 17982 + }, + { + "epoch": 0.8138945462774383, + "grad_norm": 0.6320931870141716, + "learning_rate": 8.815289351250166e-07, + "loss": 0.2912, + "step": 17983 + }, + { + "epoch": 0.8139398053858339, + "grad_norm": 0.6171374305864387, + "learning_rate": 8.811133880512967e-07, + "loss": 0.2914, + "step": 17984 + }, + { + "epoch": 0.8139850644942295, + "grad_norm": 0.5839525656588083, + "learning_rate": 8.806979294798001e-07, + "loss": 0.3362, + "step": 17985 + }, + { + "epoch": 0.8140303236026251, + "grad_norm": 0.6262579128109615, + "learning_rate": 8.802825594194553e-07, + "loss": 0.2797, + "step": 17986 + }, + { + "epoch": 0.8140755827110205, + "grad_norm": 0.6644319477416321, + "learning_rate": 8.798672778791851e-07, + "loss": 0.2652, + "step": 17987 + }, + { + "epoch": 0.8141208418194161, + "grad_norm": 0.5892605327097258, + "learning_rate": 8.794520848679117e-07, + "loss": 0.3056, + "step": 17988 + }, + { + "epoch": 0.8141661009278117, + "grad_norm": 0.651773532486608, + "learning_rate": 8.790369803945586e-07, + "loss": 0.3171, + "step": 17989 + }, + { + "epoch": 0.8142113600362073, + "grad_norm": 0.6321790753968162, + "learning_rate": 8.786219644680433e-07, + "loss": 0.3445, + "step": 17990 + }, + { + "epoch": 0.8142566191446029, + "grad_norm": 0.7756419867480293, + "learning_rate": 8.782070370972856e-07, + "loss": 0.308, + "step": 17991 + }, + { + "epoch": 0.8143018782529984, + "grad_norm": 0.6363231400815615, + "learning_rate": 8.777921982911996e-07, + "loss": 0.2983, + "step": 17992 + }, + { + "epoch": 0.814347137361394, + "grad_norm": 0.6368075871813724, + "learning_rate": 8.773774480586972e-07, + "loss": 0.2611, + "step": 17993 + }, + { + "epoch": 0.8143923964697896, + "grad_norm": 0.6556289472461144, + "learning_rate": 8.769627864086922e-07, + "loss": 0.3046, + "step": 17994 + }, + { + "epoch": 0.8144376555781851, + "grad_norm": 0.2811520282166436, + "learning_rate": 8.765482133500952e-07, + "loss": 0.4674, + "step": 17995 + }, + { + "epoch": 0.8144829146865806, + "grad_norm": 0.6395634551090364, + "learning_rate": 8.761337288918126e-07, + "loss": 0.3681, + "step": 17996 + }, + { + "epoch": 0.8145281737949762, + "grad_norm": 0.5727505237743223, + "learning_rate": 8.757193330427494e-07, + "loss": 0.2535, + "step": 17997 + }, + { + "epoch": 0.8145734329033718, + "grad_norm": 0.6768003967586744, + "learning_rate": 8.753050258118112e-07, + "loss": 0.3083, + "step": 17998 + }, + { + "epoch": 0.8146186920117674, + "grad_norm": 0.6193381144097816, + "learning_rate": 8.748908072079021e-07, + "loss": 0.3023, + "step": 17999 + }, + { + "epoch": 0.814663951120163, + "grad_norm": 0.3014669651232553, + "learning_rate": 8.744766772399182e-07, + "loss": 0.4727, + "step": 18000 + }, + { + "epoch": 0.8147092102285585, + "grad_norm": 0.29125316456076983, + "learning_rate": 8.740626359167598e-07, + "loss": 0.463, + "step": 18001 + }, + { + "epoch": 0.8147544693369541, + "grad_norm": 0.5728738895218383, + "learning_rate": 8.736486832473246e-07, + "loss": 0.2559, + "step": 18002 + }, + { + "epoch": 0.8147997284453496, + "grad_norm": 0.592202199487586, + "learning_rate": 8.732348192405061e-07, + "loss": 0.2874, + "step": 18003 + }, + { + "epoch": 0.8148449875537452, + "grad_norm": 0.6240452002246657, + "learning_rate": 8.72821043905196e-07, + "loss": 0.3314, + "step": 18004 + }, + { + "epoch": 0.8148902466621407, + "grad_norm": 0.27107133608396483, + "learning_rate": 8.724073572502867e-07, + "loss": 0.4812, + "step": 18005 + }, + { + "epoch": 0.8149355057705363, + "grad_norm": 0.26570821313541987, + "learning_rate": 8.719937592846655e-07, + "loss": 0.4683, + "step": 18006 + }, + { + "epoch": 0.8149807648789319, + "grad_norm": 0.6297787448641845, + "learning_rate": 8.715802500172215e-07, + "loss": 0.3069, + "step": 18007 + }, + { + "epoch": 0.8150260239873275, + "grad_norm": 1.1029666181074564, + "learning_rate": 8.71166829456837e-07, + "loss": 0.2747, + "step": 18008 + }, + { + "epoch": 0.815071283095723, + "grad_norm": 0.6509827180821017, + "learning_rate": 8.707534976123982e-07, + "loss": 0.2896, + "step": 18009 + }, + { + "epoch": 0.8151165422041186, + "grad_norm": 0.6877266774259756, + "learning_rate": 8.70340254492783e-07, + "loss": 0.3169, + "step": 18010 + }, + { + "epoch": 0.8151618013125141, + "grad_norm": 0.6522964469108322, + "learning_rate": 8.699271001068737e-07, + "loss": 0.3197, + "step": 18011 + }, + { + "epoch": 0.8152070604209097, + "grad_norm": 0.607681550138856, + "learning_rate": 8.695140344635472e-07, + "loss": 0.3409, + "step": 18012 + }, + { + "epoch": 0.8152523195293053, + "grad_norm": 0.28011214100128695, + "learning_rate": 8.691010575716763e-07, + "loss": 0.4695, + "step": 18013 + }, + { + "epoch": 0.8152975786377008, + "grad_norm": 0.6319478865347986, + "learning_rate": 8.686881694401366e-07, + "loss": 0.3129, + "step": 18014 + }, + { + "epoch": 0.8153428377460964, + "grad_norm": 0.6545870615655542, + "learning_rate": 8.682753700778013e-07, + "loss": 0.2748, + "step": 18015 + }, + { + "epoch": 0.815388096854492, + "grad_norm": 0.5383505723290435, + "learning_rate": 8.678626594935385e-07, + "loss": 0.2814, + "step": 18016 + }, + { + "epoch": 0.8154333559628876, + "grad_norm": 0.6287309199313841, + "learning_rate": 8.674500376962153e-07, + "loss": 0.3214, + "step": 18017 + }, + { + "epoch": 0.815478615071283, + "grad_norm": 0.6113330668405157, + "learning_rate": 8.670375046946999e-07, + "loss": 0.2629, + "step": 18018 + }, + { + "epoch": 0.8155238741796786, + "grad_norm": 0.6091712904104718, + "learning_rate": 8.666250604978532e-07, + "loss": 0.2794, + "step": 18019 + }, + { + "epoch": 0.8155691332880742, + "grad_norm": 0.5902500448470669, + "learning_rate": 8.662127051145414e-07, + "loss": 0.2868, + "step": 18020 + }, + { + "epoch": 0.8156143923964698, + "grad_norm": 0.852530710451694, + "learning_rate": 8.658004385536207e-07, + "loss": 0.3043, + "step": 18021 + }, + { + "epoch": 0.8156596515048653, + "grad_norm": 0.662962701091303, + "learning_rate": 8.653882608239528e-07, + "loss": 0.287, + "step": 18022 + }, + { + "epoch": 0.8157049106132609, + "grad_norm": 0.6398341194492669, + "learning_rate": 8.649761719343913e-07, + "loss": 0.2593, + "step": 18023 + }, + { + "epoch": 0.8157501697216565, + "grad_norm": 0.6288687742255715, + "learning_rate": 8.645641718937936e-07, + "loss": 0.3002, + "step": 18024 + }, + { + "epoch": 0.8157954288300521, + "grad_norm": 0.6466427119006023, + "learning_rate": 8.641522607110108e-07, + "loss": 0.2877, + "step": 18025 + }, + { + "epoch": 0.8158406879384477, + "grad_norm": 0.5266603977129436, + "learning_rate": 8.637404383948922e-07, + "loss": 0.2459, + "step": 18026 + }, + { + "epoch": 0.8158859470468431, + "grad_norm": 0.5882476530914964, + "learning_rate": 8.633287049542882e-07, + "loss": 0.2877, + "step": 18027 + }, + { + "epoch": 0.8159312061552387, + "grad_norm": 0.6191750205576243, + "learning_rate": 8.62917060398048e-07, + "loss": 0.3038, + "step": 18028 + }, + { + "epoch": 0.8159764652636343, + "grad_norm": 0.5662399271037007, + "learning_rate": 8.625055047350117e-07, + "loss": 0.2968, + "step": 18029 + }, + { + "epoch": 0.8160217243720299, + "grad_norm": 0.6095585560534345, + "learning_rate": 8.620940379740245e-07, + "loss": 0.2823, + "step": 18030 + }, + { + "epoch": 0.8160669834804254, + "grad_norm": 0.658250569960579, + "learning_rate": 8.616826601239292e-07, + "loss": 0.2968, + "step": 18031 + }, + { + "epoch": 0.816112242588821, + "grad_norm": 0.6742898954206353, + "learning_rate": 8.612713711935633e-07, + "loss": 0.3158, + "step": 18032 + }, + { + "epoch": 0.8161575016972166, + "grad_norm": 0.5886347097456335, + "learning_rate": 8.608601711917635e-07, + "loss": 0.2991, + "step": 18033 + }, + { + "epoch": 0.8162027608056122, + "grad_norm": 0.5730933904332982, + "learning_rate": 8.60449060127368e-07, + "loss": 0.277, + "step": 18034 + }, + { + "epoch": 0.8162480199140076, + "grad_norm": 0.314793965110205, + "learning_rate": 8.600380380092066e-07, + "loss": 0.4785, + "step": 18035 + }, + { + "epoch": 0.8162932790224032, + "grad_norm": 0.6019555970834, + "learning_rate": 8.596271048461141e-07, + "loss": 0.2944, + "step": 18036 + }, + { + "epoch": 0.8163385381307988, + "grad_norm": 0.6373598092176254, + "learning_rate": 8.592162606469179e-07, + "loss": 0.3038, + "step": 18037 + }, + { + "epoch": 0.8163837972391944, + "grad_norm": 0.2930173461656693, + "learning_rate": 8.588055054204481e-07, + "loss": 0.4772, + "step": 18038 + }, + { + "epoch": 0.81642905634759, + "grad_norm": 0.5853816249810239, + "learning_rate": 8.583948391755281e-07, + "loss": 0.297, + "step": 18039 + }, + { + "epoch": 0.8164743154559855, + "grad_norm": 0.6216333902770305, + "learning_rate": 8.579842619209844e-07, + "loss": 0.3457, + "step": 18040 + }, + { + "epoch": 0.8165195745643811, + "grad_norm": 0.5844381819470246, + "learning_rate": 8.575737736656376e-07, + "loss": 0.2948, + "step": 18041 + }, + { + "epoch": 0.8165648336727767, + "grad_norm": 0.5545450711342551, + "learning_rate": 8.571633744183061e-07, + "loss": 0.2926, + "step": 18042 + }, + { + "epoch": 0.8166100927811722, + "grad_norm": 0.27769105334571487, + "learning_rate": 8.567530641878103e-07, + "loss": 0.4673, + "step": 18043 + }, + { + "epoch": 0.8166553518895677, + "grad_norm": 0.6955840626435977, + "learning_rate": 8.563428429829674e-07, + "loss": 0.3118, + "step": 18044 + }, + { + "epoch": 0.8167006109979633, + "grad_norm": 0.7041439251924754, + "learning_rate": 8.559327108125909e-07, + "loss": 0.3299, + "step": 18045 + }, + { + "epoch": 0.8167458701063589, + "grad_norm": 0.6386908175674691, + "learning_rate": 8.555226676854911e-07, + "loss": 0.2401, + "step": 18046 + }, + { + "epoch": 0.8167911292147545, + "grad_norm": 0.2727069733305922, + "learning_rate": 8.55112713610482e-07, + "loss": 0.4705, + "step": 18047 + }, + { + "epoch": 0.8168363883231501, + "grad_norm": 0.2700845917645277, + "learning_rate": 8.547028485963693e-07, + "loss": 0.4624, + "step": 18048 + }, + { + "epoch": 0.8168816474315456, + "grad_norm": 0.6460354403133349, + "learning_rate": 8.542930726519622e-07, + "loss": 0.3294, + "step": 18049 + }, + { + "epoch": 0.8169269065399412, + "grad_norm": 0.6197181489036633, + "learning_rate": 8.538833857860635e-07, + "loss": 0.2709, + "step": 18050 + }, + { + "epoch": 0.8169721656483367, + "grad_norm": 0.735925769436235, + "learning_rate": 8.534737880074778e-07, + "loss": 0.2896, + "step": 18051 + }, + { + "epoch": 0.8170174247567323, + "grad_norm": 0.6346324401263916, + "learning_rate": 8.530642793250044e-07, + "loss": 0.2803, + "step": 18052 + }, + { + "epoch": 0.8170626838651278, + "grad_norm": 0.5409400804031699, + "learning_rate": 8.526548597474444e-07, + "loss": 0.2843, + "step": 18053 + }, + { + "epoch": 0.8171079429735234, + "grad_norm": 0.5605393417963239, + "learning_rate": 8.522455292835935e-07, + "loss": 0.2799, + "step": 18054 + }, + { + "epoch": 0.817153202081919, + "grad_norm": 0.6013058872868924, + "learning_rate": 8.518362879422465e-07, + "loss": 0.3166, + "step": 18055 + }, + { + "epoch": 0.8171984611903146, + "grad_norm": 0.5732754320023369, + "learning_rate": 8.514271357321974e-07, + "loss": 0.3177, + "step": 18056 + }, + { + "epoch": 0.8172437202987101, + "grad_norm": 0.6212655502781934, + "learning_rate": 8.510180726622392e-07, + "loss": 0.2924, + "step": 18057 + }, + { + "epoch": 0.8172889794071057, + "grad_norm": 0.30100841815431545, + "learning_rate": 8.506090987411603e-07, + "loss": 0.4635, + "step": 18058 + }, + { + "epoch": 0.8173342385155012, + "grad_norm": 0.6259434935033461, + "learning_rate": 8.50200213977746e-07, + "loss": 0.3195, + "step": 18059 + }, + { + "epoch": 0.8173794976238968, + "grad_norm": 0.611446509904659, + "learning_rate": 8.49791418380786e-07, + "loss": 0.2448, + "step": 18060 + }, + { + "epoch": 0.8174247567322924, + "grad_norm": 0.6051093462489923, + "learning_rate": 8.493827119590615e-07, + "loss": 0.2709, + "step": 18061 + }, + { + "epoch": 0.8174700158406879, + "grad_norm": 0.6503222997170561, + "learning_rate": 8.489740947213537e-07, + "loss": 0.328, + "step": 18062 + }, + { + "epoch": 0.8175152749490835, + "grad_norm": 0.6214017202106538, + "learning_rate": 8.485655666764448e-07, + "loss": 0.2965, + "step": 18063 + }, + { + "epoch": 0.8175605340574791, + "grad_norm": 0.5921564688005043, + "learning_rate": 8.481571278331108e-07, + "loss": 0.2627, + "step": 18064 + }, + { + "epoch": 0.8176057931658747, + "grad_norm": 0.6325059930394564, + "learning_rate": 8.477487782001298e-07, + "loss": 0.2888, + "step": 18065 + }, + { + "epoch": 0.8176510522742702, + "grad_norm": 0.579900367318165, + "learning_rate": 8.473405177862737e-07, + "loss": 0.3454, + "step": 18066 + }, + { + "epoch": 0.8176963113826657, + "grad_norm": 0.51058712141538, + "learning_rate": 8.46932346600317e-07, + "loss": 0.4872, + "step": 18067 + }, + { + "epoch": 0.8177415704910613, + "grad_norm": 0.285936508840432, + "learning_rate": 8.46524264651028e-07, + "loss": 0.5006, + "step": 18068 + }, + { + "epoch": 0.8177868295994569, + "grad_norm": 0.6130167016167806, + "learning_rate": 8.461162719471772e-07, + "loss": 0.2589, + "step": 18069 + }, + { + "epoch": 0.8178320887078524, + "grad_norm": 0.5926511878837478, + "learning_rate": 8.457083684975298e-07, + "loss": 0.2764, + "step": 18070 + }, + { + "epoch": 0.817877347816248, + "grad_norm": 0.5814554261878402, + "learning_rate": 8.453005543108501e-07, + "loss": 0.3133, + "step": 18071 + }, + { + "epoch": 0.8179226069246436, + "grad_norm": 0.5453222644749728, + "learning_rate": 8.448928293959007e-07, + "loss": 0.2403, + "step": 18072 + }, + { + "epoch": 0.8179678660330392, + "grad_norm": 0.6616303384602777, + "learning_rate": 8.444851937614446e-07, + "loss": 0.3076, + "step": 18073 + }, + { + "epoch": 0.8180131251414348, + "grad_norm": 0.2775563857111059, + "learning_rate": 8.440776474162388e-07, + "loss": 0.4685, + "step": 18074 + }, + { + "epoch": 0.8180583842498302, + "grad_norm": 0.5854024135886258, + "learning_rate": 8.436701903690392e-07, + "loss": 0.2993, + "step": 18075 + }, + { + "epoch": 0.8181036433582258, + "grad_norm": 0.6477255114728717, + "learning_rate": 8.432628226286032e-07, + "loss": 0.3135, + "step": 18076 + }, + { + "epoch": 0.8181489024666214, + "grad_norm": 0.27798679897661827, + "learning_rate": 8.428555442036812e-07, + "loss": 0.4663, + "step": 18077 + }, + { + "epoch": 0.818194161575017, + "grad_norm": 0.5661684252041219, + "learning_rate": 8.424483551030277e-07, + "loss": 0.3152, + "step": 18078 + }, + { + "epoch": 0.8182394206834125, + "grad_norm": 0.616865906589501, + "learning_rate": 8.420412553353885e-07, + "loss": 0.2672, + "step": 18079 + }, + { + "epoch": 0.8182846797918081, + "grad_norm": 0.2682258287041038, + "learning_rate": 8.416342449095138e-07, + "loss": 0.4634, + "step": 18080 + }, + { + "epoch": 0.8183299389002037, + "grad_norm": 0.6944034869375798, + "learning_rate": 8.412273238341462e-07, + "loss": 0.306, + "step": 18081 + }, + { + "epoch": 0.8183751980085993, + "grad_norm": 0.632263742360841, + "learning_rate": 8.408204921180324e-07, + "loss": 0.28, + "step": 18082 + }, + { + "epoch": 0.8184204571169948, + "grad_norm": 0.626627799718189, + "learning_rate": 8.404137497699122e-07, + "loss": 0.2901, + "step": 18083 + }, + { + "epoch": 0.8184657162253903, + "grad_norm": 0.6525676731594316, + "learning_rate": 8.400070967985241e-07, + "loss": 0.3531, + "step": 18084 + }, + { + "epoch": 0.8185109753337859, + "grad_norm": 0.7172573954857759, + "learning_rate": 8.396005332126068e-07, + "loss": 0.3061, + "step": 18085 + }, + { + "epoch": 0.8185562344421815, + "grad_norm": 0.2704532318775338, + "learning_rate": 8.391940590208975e-07, + "loss": 0.4708, + "step": 18086 + }, + { + "epoch": 0.8186014935505771, + "grad_norm": 0.24641823451323808, + "learning_rate": 8.387876742321294e-07, + "loss": 0.4428, + "step": 18087 + }, + { + "epoch": 0.8186467526589726, + "grad_norm": 0.6891107336428738, + "learning_rate": 8.383813788550326e-07, + "loss": 0.3161, + "step": 18088 + }, + { + "epoch": 0.8186920117673682, + "grad_norm": 0.6139209157814352, + "learning_rate": 8.379751728983399e-07, + "loss": 0.3436, + "step": 18089 + }, + { + "epoch": 0.8187372708757638, + "grad_norm": 0.595071574398606, + "learning_rate": 8.375690563707761e-07, + "loss": 0.2992, + "step": 18090 + }, + { + "epoch": 0.8187825299841593, + "grad_norm": 0.770051033788159, + "learning_rate": 8.371630292810712e-07, + "loss": 0.2521, + "step": 18091 + }, + { + "epoch": 0.8188277890925548, + "grad_norm": 1.1735476464817132, + "learning_rate": 8.367570916379464e-07, + "loss": 0.3236, + "step": 18092 + }, + { + "epoch": 0.8188730482009504, + "grad_norm": 0.7713069831108617, + "learning_rate": 8.363512434501264e-07, + "loss": 0.3005, + "step": 18093 + }, + { + "epoch": 0.818918307309346, + "grad_norm": 0.2847719199034834, + "learning_rate": 8.359454847263293e-07, + "loss": 0.4746, + "step": 18094 + }, + { + "epoch": 0.8189635664177416, + "grad_norm": 0.552169758517861, + "learning_rate": 8.355398154752759e-07, + "loss": 0.2622, + "step": 18095 + }, + { + "epoch": 0.8190088255261372, + "grad_norm": 0.6249685361362285, + "learning_rate": 8.351342357056818e-07, + "loss": 0.2641, + "step": 18096 + }, + { + "epoch": 0.8190540846345327, + "grad_norm": 0.6003519390627243, + "learning_rate": 8.347287454262603e-07, + "loss": 0.265, + "step": 18097 + }, + { + "epoch": 0.8190993437429283, + "grad_norm": 0.5991625551143557, + "learning_rate": 8.343233446457272e-07, + "loss": 0.2519, + "step": 18098 + }, + { + "epoch": 0.8191446028513238, + "grad_norm": 0.2810411008882779, + "learning_rate": 8.339180333727909e-07, + "loss": 0.4893, + "step": 18099 + }, + { + "epoch": 0.8191898619597194, + "grad_norm": 0.5922428037797257, + "learning_rate": 8.335128116161595e-07, + "loss": 0.314, + "step": 18100 + }, + { + "epoch": 0.8192351210681149, + "grad_norm": 0.7208442527501777, + "learning_rate": 8.331076793845422e-07, + "loss": 0.2865, + "step": 18101 + }, + { + "epoch": 0.8192803801765105, + "grad_norm": 0.2829411112652835, + "learning_rate": 8.327026366866437e-07, + "loss": 0.4443, + "step": 18102 + }, + { + "epoch": 0.8193256392849061, + "grad_norm": 0.6211237231506862, + "learning_rate": 8.322976835311669e-07, + "loss": 0.292, + "step": 18103 + }, + { + "epoch": 0.8193708983933017, + "grad_norm": 0.26641329034091676, + "learning_rate": 8.318928199268117e-07, + "loss": 0.4805, + "step": 18104 + }, + { + "epoch": 0.8194161575016972, + "grad_norm": 0.5954674577374436, + "learning_rate": 8.314880458822794e-07, + "loss": 0.3213, + "step": 18105 + }, + { + "epoch": 0.8194614166100928, + "grad_norm": 0.6591766551015656, + "learning_rate": 8.310833614062652e-07, + "loss": 0.3267, + "step": 18106 + }, + { + "epoch": 0.8195066757184883, + "grad_norm": 0.634582558619486, + "learning_rate": 8.306787665074673e-07, + "loss": 0.2646, + "step": 18107 + }, + { + "epoch": 0.8195519348268839, + "grad_norm": 0.591732723888874, + "learning_rate": 8.302742611945758e-07, + "loss": 0.2812, + "step": 18108 + }, + { + "epoch": 0.8195971939352795, + "grad_norm": 0.5876345405105374, + "learning_rate": 8.298698454762854e-07, + "loss": 0.2814, + "step": 18109 + }, + { + "epoch": 0.819642453043675, + "grad_norm": 0.2784159769170307, + "learning_rate": 8.294655193612838e-07, + "loss": 0.4747, + "step": 18110 + }, + { + "epoch": 0.8196877121520706, + "grad_norm": 0.6053642942795595, + "learning_rate": 8.2906128285826e-07, + "loss": 0.3417, + "step": 18111 + }, + { + "epoch": 0.8197329712604662, + "grad_norm": 0.3103793994779556, + "learning_rate": 8.286571359758993e-07, + "loss": 0.4722, + "step": 18112 + }, + { + "epoch": 0.8197782303688618, + "grad_norm": 0.6863803379605348, + "learning_rate": 8.282530787228848e-07, + "loss": 0.2578, + "step": 18113 + }, + { + "epoch": 0.8198234894772573, + "grad_norm": 0.286499942924758, + "learning_rate": 8.278491111078984e-07, + "loss": 0.4758, + "step": 18114 + }, + { + "epoch": 0.8198687485856528, + "grad_norm": 0.28709485782608885, + "learning_rate": 8.274452331396221e-07, + "loss": 0.4866, + "step": 18115 + }, + { + "epoch": 0.8199140076940484, + "grad_norm": 0.2764893701502648, + "learning_rate": 8.270414448267333e-07, + "loss": 0.4694, + "step": 18116 + }, + { + "epoch": 0.819959266802444, + "grad_norm": 0.8317042355162554, + "learning_rate": 8.266377461779057e-07, + "loss": 0.3021, + "step": 18117 + }, + { + "epoch": 0.8200045259108396, + "grad_norm": 0.5772234597460723, + "learning_rate": 8.262341372018168e-07, + "loss": 0.2858, + "step": 18118 + }, + { + "epoch": 0.8200497850192351, + "grad_norm": 0.5982028186899865, + "learning_rate": 8.258306179071368e-07, + "loss": 0.3145, + "step": 18119 + }, + { + "epoch": 0.8200950441276307, + "grad_norm": 0.6180362368064066, + "learning_rate": 8.254271883025377e-07, + "loss": 0.2632, + "step": 18120 + }, + { + "epoch": 0.8201403032360263, + "grad_norm": 0.6745805870388583, + "learning_rate": 8.250238483966855e-07, + "loss": 0.2411, + "step": 18121 + }, + { + "epoch": 0.8201855623444219, + "grad_norm": 0.5797029407307055, + "learning_rate": 8.246205981982503e-07, + "loss": 0.3088, + "step": 18122 + }, + { + "epoch": 0.8202308214528173, + "grad_norm": 0.5830337703042072, + "learning_rate": 8.242174377158929e-07, + "loss": 0.2584, + "step": 18123 + }, + { + "epoch": 0.8202760805612129, + "grad_norm": 0.8578833069169375, + "learning_rate": 8.238143669582794e-07, + "loss": 0.348, + "step": 18124 + }, + { + "epoch": 0.8203213396696085, + "grad_norm": 0.5814887271750685, + "learning_rate": 8.234113859340687e-07, + "loss": 0.2835, + "step": 18125 + }, + { + "epoch": 0.8203665987780041, + "grad_norm": 0.994467368324847, + "learning_rate": 8.23008494651919e-07, + "loss": 0.3162, + "step": 18126 + }, + { + "epoch": 0.8204118578863996, + "grad_norm": 0.6106514026492696, + "learning_rate": 8.226056931204879e-07, + "loss": 0.2825, + "step": 18127 + }, + { + "epoch": 0.8204571169947952, + "grad_norm": 0.6112142660925025, + "learning_rate": 8.222029813484333e-07, + "loss": 0.3031, + "step": 18128 + }, + { + "epoch": 0.8205023761031908, + "grad_norm": 0.6635302954408059, + "learning_rate": 8.218003593444029e-07, + "loss": 0.3145, + "step": 18129 + }, + { + "epoch": 0.8205476352115864, + "grad_norm": 0.6091885895922927, + "learning_rate": 8.213978271170503e-07, + "loss": 0.2666, + "step": 18130 + }, + { + "epoch": 0.820592894319982, + "grad_norm": 0.6142540711638156, + "learning_rate": 8.209953846750257e-07, + "loss": 0.3235, + "step": 18131 + }, + { + "epoch": 0.8206381534283774, + "grad_norm": 0.32349620026022546, + "learning_rate": 8.205930320269762e-07, + "loss": 0.4826, + "step": 18132 + }, + { + "epoch": 0.820683412536773, + "grad_norm": 0.6565366682003055, + "learning_rate": 8.201907691815448e-07, + "loss": 0.3094, + "step": 18133 + }, + { + "epoch": 0.8207286716451686, + "grad_norm": 1.3502509981198274, + "learning_rate": 8.197885961473773e-07, + "loss": 0.3032, + "step": 18134 + }, + { + "epoch": 0.8207739307535642, + "grad_norm": 0.5785720017965967, + "learning_rate": 8.193865129331136e-07, + "loss": 0.2772, + "step": 18135 + }, + { + "epoch": 0.8208191898619597, + "grad_norm": 0.28259587502298983, + "learning_rate": 8.18984519547395e-07, + "loss": 0.4586, + "step": 18136 + }, + { + "epoch": 0.8208644489703553, + "grad_norm": 0.5966135804840258, + "learning_rate": 8.18582615998857e-07, + "loss": 0.2603, + "step": 18137 + }, + { + "epoch": 0.8209097080787509, + "grad_norm": 0.6561140381210594, + "learning_rate": 8.181808022961374e-07, + "loss": 0.3201, + "step": 18138 + }, + { + "epoch": 0.8209549671871464, + "grad_norm": 0.6647527581768201, + "learning_rate": 8.177790784478679e-07, + "loss": 0.2884, + "step": 18139 + }, + { + "epoch": 0.8210002262955419, + "grad_norm": 0.24971824884844132, + "learning_rate": 8.173774444626819e-07, + "loss": 0.4565, + "step": 18140 + }, + { + "epoch": 0.8210454854039375, + "grad_norm": 0.6438764482155982, + "learning_rate": 8.169759003492095e-07, + "loss": 0.3287, + "step": 18141 + }, + { + "epoch": 0.8210907445123331, + "grad_norm": 0.6143142139977565, + "learning_rate": 8.165744461160763e-07, + "loss": 0.3141, + "step": 18142 + }, + { + "epoch": 0.8211360036207287, + "grad_norm": 0.25061996236946416, + "learning_rate": 8.161730817719094e-07, + "loss": 0.4639, + "step": 18143 + }, + { + "epoch": 0.8211812627291243, + "grad_norm": 0.26770998240193866, + "learning_rate": 8.157718073253351e-07, + "loss": 0.461, + "step": 18144 + }, + { + "epoch": 0.8212265218375198, + "grad_norm": 0.6405285204560663, + "learning_rate": 8.153706227849734e-07, + "loss": 0.2556, + "step": 18145 + }, + { + "epoch": 0.8212717809459154, + "grad_norm": 0.637428253048071, + "learning_rate": 8.149695281594438e-07, + "loss": 0.2818, + "step": 18146 + }, + { + "epoch": 0.8213170400543109, + "grad_norm": 0.6201304150041989, + "learning_rate": 8.145685234573675e-07, + "loss": 0.3443, + "step": 18147 + }, + { + "epoch": 0.8213622991627065, + "grad_norm": 0.5821698916057503, + "learning_rate": 8.141676086873574e-07, + "loss": 0.3329, + "step": 18148 + }, + { + "epoch": 0.821407558271102, + "grad_norm": 0.2703313986797029, + "learning_rate": 8.137667838580304e-07, + "loss": 0.4701, + "step": 18149 + }, + { + "epoch": 0.8214528173794976, + "grad_norm": 0.5871374932622914, + "learning_rate": 8.13366048977997e-07, + "loss": 0.252, + "step": 18150 + }, + { + "epoch": 0.8214980764878932, + "grad_norm": 0.6248557357627881, + "learning_rate": 8.12965404055871e-07, + "loss": 0.3217, + "step": 18151 + }, + { + "epoch": 0.8215433355962888, + "grad_norm": 0.6547924784275919, + "learning_rate": 8.125648491002569e-07, + "loss": 0.2855, + "step": 18152 + }, + { + "epoch": 0.8215885947046844, + "grad_norm": 0.6527228580605702, + "learning_rate": 8.121643841197652e-07, + "loss": 0.29, + "step": 18153 + }, + { + "epoch": 0.8216338538130799, + "grad_norm": 0.6755647792905147, + "learning_rate": 8.117640091229984e-07, + "loss": 0.3293, + "step": 18154 + }, + { + "epoch": 0.8216791129214754, + "grad_norm": 0.5999022378057997, + "learning_rate": 8.11363724118559e-07, + "loss": 0.2874, + "step": 18155 + }, + { + "epoch": 0.821724372029871, + "grad_norm": 0.6007954263800855, + "learning_rate": 8.109635291150492e-07, + "loss": 0.252, + "step": 18156 + }, + { + "epoch": 0.8217696311382666, + "grad_norm": 0.7865647067064967, + "learning_rate": 8.105634241210692e-07, + "loss": 0.3278, + "step": 18157 + }, + { + "epoch": 0.8218148902466621, + "grad_norm": 0.27734227101871806, + "learning_rate": 8.101634091452121e-07, + "loss": 0.4878, + "step": 18158 + }, + { + "epoch": 0.8218601493550577, + "grad_norm": 0.5600608454366888, + "learning_rate": 8.097634841960756e-07, + "loss": 0.2536, + "step": 18159 + }, + { + "epoch": 0.8219054084634533, + "grad_norm": 0.623917531673197, + "learning_rate": 8.093636492822532e-07, + "loss": 0.3144, + "step": 18160 + }, + { + "epoch": 0.8219506675718489, + "grad_norm": 0.623452221799919, + "learning_rate": 8.089639044123354e-07, + "loss": 0.2931, + "step": 18161 + }, + { + "epoch": 0.8219959266802443, + "grad_norm": 0.6132492355170043, + "learning_rate": 8.085642495949108e-07, + "loss": 0.2827, + "step": 18162 + }, + { + "epoch": 0.8220411857886399, + "grad_norm": 0.6219717134997269, + "learning_rate": 8.081646848385671e-07, + "loss": 0.3229, + "step": 18163 + }, + { + "epoch": 0.8220864448970355, + "grad_norm": 0.5882007765149354, + "learning_rate": 8.077652101518918e-07, + "loss": 0.2871, + "step": 18164 + }, + { + "epoch": 0.8221317040054311, + "grad_norm": 0.2626698915552704, + "learning_rate": 8.073658255434658e-07, + "loss": 0.4568, + "step": 18165 + }, + { + "epoch": 0.8221769631138267, + "grad_norm": 0.8530835081708823, + "learning_rate": 8.06966531021871e-07, + "loss": 0.3057, + "step": 18166 + }, + { + "epoch": 0.8222222222222222, + "grad_norm": 0.6556701708777884, + "learning_rate": 8.065673265956886e-07, + "loss": 0.3907, + "step": 18167 + }, + { + "epoch": 0.8222674813306178, + "grad_norm": 0.27094993368097087, + "learning_rate": 8.061682122734937e-07, + "loss": 0.4701, + "step": 18168 + }, + { + "epoch": 0.8223127404390134, + "grad_norm": 0.5941525019158841, + "learning_rate": 8.057691880638651e-07, + "loss": 0.3179, + "step": 18169 + }, + { + "epoch": 0.822357999547409, + "grad_norm": 0.5982256887096105, + "learning_rate": 8.053702539753749e-07, + "loss": 0.2853, + "step": 18170 + }, + { + "epoch": 0.8224032586558044, + "grad_norm": 0.7014758040783442, + "learning_rate": 8.04971410016594e-07, + "loss": 0.2905, + "step": 18171 + }, + { + "epoch": 0.8224485177642, + "grad_norm": 0.28190956210995716, + "learning_rate": 8.045726561960931e-07, + "loss": 0.4598, + "step": 18172 + }, + { + "epoch": 0.8224937768725956, + "grad_norm": 0.6631737483533378, + "learning_rate": 8.041739925224424e-07, + "loss": 0.2901, + "step": 18173 + }, + { + "epoch": 0.8225390359809912, + "grad_norm": 0.643318251146491, + "learning_rate": 8.037754190042058e-07, + "loss": 0.2919, + "step": 18174 + }, + { + "epoch": 0.8225842950893867, + "grad_norm": 0.2627393483539942, + "learning_rate": 8.033769356499466e-07, + "loss": 0.4767, + "step": 18175 + }, + { + "epoch": 0.8226295541977823, + "grad_norm": 0.5960163959853573, + "learning_rate": 8.029785424682291e-07, + "loss": 0.2922, + "step": 18176 + }, + { + "epoch": 0.8226748133061779, + "grad_norm": 0.6325423313576644, + "learning_rate": 8.025802394676114e-07, + "loss": 0.3352, + "step": 18177 + }, + { + "epoch": 0.8227200724145735, + "grad_norm": 0.6717469644769754, + "learning_rate": 8.021820266566538e-07, + "loss": 0.3123, + "step": 18178 + }, + { + "epoch": 0.822765331522969, + "grad_norm": 0.6987405573728781, + "learning_rate": 8.017839040439113e-07, + "loss": 0.2657, + "step": 18179 + }, + { + "epoch": 0.8228105906313645, + "grad_norm": 0.6914251246915453, + "learning_rate": 8.013858716379396e-07, + "loss": 0.3038, + "step": 18180 + }, + { + "epoch": 0.8228558497397601, + "grad_norm": 0.6329902662785449, + "learning_rate": 8.009879294472894e-07, + "loss": 0.3245, + "step": 18181 + }, + { + "epoch": 0.8229011088481557, + "grad_norm": 0.6271602808615151, + "learning_rate": 8.005900774805137e-07, + "loss": 0.282, + "step": 18182 + }, + { + "epoch": 0.8229463679565513, + "grad_norm": 0.5855788353703362, + "learning_rate": 8.001923157461594e-07, + "loss": 0.2784, + "step": 18183 + }, + { + "epoch": 0.8229916270649468, + "grad_norm": 0.661459765686884, + "learning_rate": 7.997946442527726e-07, + "loss": 0.2903, + "step": 18184 + }, + { + "epoch": 0.8230368861733424, + "grad_norm": 0.6626994250988717, + "learning_rate": 7.993970630088988e-07, + "loss": 0.2791, + "step": 18185 + }, + { + "epoch": 0.823082145281738, + "grad_norm": 0.2745844389633259, + "learning_rate": 7.989995720230837e-07, + "loss": 0.4706, + "step": 18186 + }, + { + "epoch": 0.8231274043901335, + "grad_norm": 0.6205753959267856, + "learning_rate": 7.986021713038627e-07, + "loss": 0.3139, + "step": 18187 + }, + { + "epoch": 0.8231726634985291, + "grad_norm": 0.2632761372907264, + "learning_rate": 7.982048608597776e-07, + "loss": 0.4659, + "step": 18188 + }, + { + "epoch": 0.8232179226069246, + "grad_norm": 0.7485575335159796, + "learning_rate": 7.978076406993662e-07, + "loss": 0.2786, + "step": 18189 + }, + { + "epoch": 0.8232631817153202, + "grad_norm": 0.6566044462595303, + "learning_rate": 7.974105108311625e-07, + "loss": 0.2959, + "step": 18190 + }, + { + "epoch": 0.8233084408237158, + "grad_norm": 0.2937101749404086, + "learning_rate": 7.970134712636984e-07, + "loss": 0.473, + "step": 18191 + }, + { + "epoch": 0.8233536999321114, + "grad_norm": 0.6265125798422845, + "learning_rate": 7.966165220055067e-07, + "loss": 0.3067, + "step": 18192 + }, + { + "epoch": 0.8233989590405069, + "grad_norm": 0.6204538426926942, + "learning_rate": 7.96219663065117e-07, + "loss": 0.2693, + "step": 18193 + }, + { + "epoch": 0.8234442181489025, + "grad_norm": 0.6668241049027904, + "learning_rate": 7.95822894451056e-07, + "loss": 0.2908, + "step": 18194 + }, + { + "epoch": 0.823489477257298, + "grad_norm": 0.6167276572327692, + "learning_rate": 7.954262161718479e-07, + "loss": 0.2979, + "step": 18195 + }, + { + "epoch": 0.8235347363656936, + "grad_norm": 0.263679280167042, + "learning_rate": 7.950296282360181e-07, + "loss": 0.4892, + "step": 18196 + }, + { + "epoch": 0.8235799954740891, + "grad_norm": 0.7312264021619062, + "learning_rate": 7.946331306520854e-07, + "loss": 0.2942, + "step": 18197 + }, + { + "epoch": 0.8236252545824847, + "grad_norm": 0.6937632900134934, + "learning_rate": 7.942367234285725e-07, + "loss": 0.2761, + "step": 18198 + }, + { + "epoch": 0.8236705136908803, + "grad_norm": 0.6389653819189878, + "learning_rate": 7.938404065739952e-07, + "loss": 0.3461, + "step": 18199 + }, + { + "epoch": 0.8237157727992759, + "grad_norm": 0.7198943347630627, + "learning_rate": 7.934441800968684e-07, + "loss": 0.3216, + "step": 18200 + }, + { + "epoch": 0.8237610319076715, + "grad_norm": 0.9748183117086185, + "learning_rate": 7.93048044005707e-07, + "loss": 0.2941, + "step": 18201 + }, + { + "epoch": 0.823806291016067, + "grad_norm": 0.26246050734572146, + "learning_rate": 7.92651998309023e-07, + "loss": 0.4613, + "step": 18202 + }, + { + "epoch": 0.8238515501244625, + "grad_norm": 0.2857559137902954, + "learning_rate": 7.922560430153259e-07, + "loss": 0.471, + "step": 18203 + }, + { + "epoch": 0.8238968092328581, + "grad_norm": 0.6119988564709138, + "learning_rate": 7.918601781331225e-07, + "loss": 0.2734, + "step": 18204 + }, + { + "epoch": 0.8239420683412537, + "grad_norm": 0.6360295830543973, + "learning_rate": 7.914644036709202e-07, + "loss": 0.3015, + "step": 18205 + }, + { + "epoch": 0.8239873274496492, + "grad_norm": 0.5614432284169272, + "learning_rate": 7.910687196372214e-07, + "loss": 0.3613, + "step": 18206 + }, + { + "epoch": 0.8240325865580448, + "grad_norm": 0.28829720900091843, + "learning_rate": 7.906731260405304e-07, + "loss": 0.4515, + "step": 18207 + }, + { + "epoch": 0.8240778456664404, + "grad_norm": 0.5525539167791396, + "learning_rate": 7.902776228893444e-07, + "loss": 0.2823, + "step": 18208 + }, + { + "epoch": 0.824123104774836, + "grad_norm": 0.6762310852060991, + "learning_rate": 7.898822101921644e-07, + "loss": 0.3265, + "step": 18209 + }, + { + "epoch": 0.8241683638832314, + "grad_norm": 0.6259275549416197, + "learning_rate": 7.894868879574847e-07, + "loss": 0.2474, + "step": 18210 + }, + { + "epoch": 0.824213622991627, + "grad_norm": 0.6182305289073031, + "learning_rate": 7.890916561938006e-07, + "loss": 0.2945, + "step": 18211 + }, + { + "epoch": 0.8242588821000226, + "grad_norm": 0.634163999330956, + "learning_rate": 7.886965149096044e-07, + "loss": 0.3004, + "step": 18212 + }, + { + "epoch": 0.8243041412084182, + "grad_norm": 0.613473307222863, + "learning_rate": 7.883014641133846e-07, + "loss": 0.2223, + "step": 18213 + }, + { + "epoch": 0.8243494003168138, + "grad_norm": 0.590548117790914, + "learning_rate": 7.879065038136314e-07, + "loss": 0.2979, + "step": 18214 + }, + { + "epoch": 0.8243946594252093, + "grad_norm": 0.6453971910214398, + "learning_rate": 7.875116340188333e-07, + "loss": 0.3004, + "step": 18215 + }, + { + "epoch": 0.8244399185336049, + "grad_norm": 0.2649864985329331, + "learning_rate": 7.871168547374697e-07, + "loss": 0.4756, + "step": 18216 + }, + { + "epoch": 0.8244851776420005, + "grad_norm": 0.6405631892725506, + "learning_rate": 7.867221659780267e-07, + "loss": 0.2831, + "step": 18217 + }, + { + "epoch": 0.8245304367503961, + "grad_norm": 0.6321704014564995, + "learning_rate": 7.863275677489851e-07, + "loss": 0.308, + "step": 18218 + }, + { + "epoch": 0.8245756958587915, + "grad_norm": 0.6509092342644126, + "learning_rate": 7.859330600588228e-07, + "loss": 0.2833, + "step": 18219 + }, + { + "epoch": 0.8246209549671871, + "grad_norm": 0.6273536804779368, + "learning_rate": 7.85538642916015e-07, + "loss": 0.2995, + "step": 18220 + }, + { + "epoch": 0.8246662140755827, + "grad_norm": 0.587258924680402, + "learning_rate": 7.851443163290385e-07, + "loss": 0.2725, + "step": 18221 + }, + { + "epoch": 0.8247114731839783, + "grad_norm": 0.6799857601085152, + "learning_rate": 7.847500803063668e-07, + "loss": 0.2936, + "step": 18222 + }, + { + "epoch": 0.8247567322923739, + "grad_norm": 0.2675036031094028, + "learning_rate": 7.843559348564694e-07, + "loss": 0.458, + "step": 18223 + }, + { + "epoch": 0.8248019914007694, + "grad_norm": 0.6276520182225253, + "learning_rate": 7.839618799878146e-07, + "loss": 0.3493, + "step": 18224 + }, + { + "epoch": 0.824847250509165, + "grad_norm": 0.6380138615809682, + "learning_rate": 7.835679157088716e-07, + "loss": 0.3136, + "step": 18225 + }, + { + "epoch": 0.8248925096175606, + "grad_norm": 0.5879257908734836, + "learning_rate": 7.831740420281031e-07, + "loss": 0.2854, + "step": 18226 + }, + { + "epoch": 0.8249377687259561, + "grad_norm": 0.6046994935357449, + "learning_rate": 7.827802589539751e-07, + "loss": 0.2752, + "step": 18227 + }, + { + "epoch": 0.8249830278343516, + "grad_norm": 0.2677090660018164, + "learning_rate": 7.823865664949464e-07, + "loss": 0.4613, + "step": 18228 + }, + { + "epoch": 0.8250282869427472, + "grad_norm": 0.672067112995326, + "learning_rate": 7.819929646594765e-07, + "loss": 0.2667, + "step": 18229 + }, + { + "epoch": 0.8250735460511428, + "grad_norm": 0.6228260122442586, + "learning_rate": 7.815994534560228e-07, + "loss": 0.2792, + "step": 18230 + }, + { + "epoch": 0.8251188051595384, + "grad_norm": 0.6467169822046223, + "learning_rate": 7.812060328930421e-07, + "loss": 0.3122, + "step": 18231 + }, + { + "epoch": 0.8251640642679339, + "grad_norm": 0.5694912430159714, + "learning_rate": 7.808127029789869e-07, + "loss": 0.2974, + "step": 18232 + }, + { + "epoch": 0.8252093233763295, + "grad_norm": 0.2691041012160917, + "learning_rate": 7.804194637223073e-07, + "loss": 0.4857, + "step": 18233 + }, + { + "epoch": 0.825254582484725, + "grad_norm": 0.6448056928408912, + "learning_rate": 7.800263151314536e-07, + "loss": 0.2878, + "step": 18234 + }, + { + "epoch": 0.8252998415931206, + "grad_norm": 0.5742698318047919, + "learning_rate": 7.796332572148752e-07, + "loss": 0.2713, + "step": 18235 + }, + { + "epoch": 0.8253451007015162, + "grad_norm": 0.6351411007375356, + "learning_rate": 7.792402899810164e-07, + "loss": 0.2851, + "step": 18236 + }, + { + "epoch": 0.8253903598099117, + "grad_norm": 0.6244020735748892, + "learning_rate": 7.788474134383195e-07, + "loss": 0.2896, + "step": 18237 + }, + { + "epoch": 0.8254356189183073, + "grad_norm": 0.27950882607118965, + "learning_rate": 7.784546275952281e-07, + "loss": 0.4723, + "step": 18238 + }, + { + "epoch": 0.8254808780267029, + "grad_norm": 0.622105002869538, + "learning_rate": 7.780619324601807e-07, + "loss": 0.2944, + "step": 18239 + }, + { + "epoch": 0.8255261371350985, + "grad_norm": 0.6414907773269223, + "learning_rate": 7.776693280416164e-07, + "loss": 0.2871, + "step": 18240 + }, + { + "epoch": 0.825571396243494, + "grad_norm": 0.9654656942328771, + "learning_rate": 7.772768143479703e-07, + "loss": 0.3158, + "step": 18241 + }, + { + "epoch": 0.8256166553518896, + "grad_norm": 0.7863181674165229, + "learning_rate": 7.768843913876756e-07, + "loss": 0.3062, + "step": 18242 + }, + { + "epoch": 0.8256619144602851, + "grad_norm": 0.5856414481972946, + "learning_rate": 7.76492059169165e-07, + "loss": 0.2808, + "step": 18243 + }, + { + "epoch": 0.8257071735686807, + "grad_norm": 0.7651089549539092, + "learning_rate": 7.760998177008694e-07, + "loss": 0.2964, + "step": 18244 + }, + { + "epoch": 0.8257524326770762, + "grad_norm": 0.987347191861977, + "learning_rate": 7.757076669912162e-07, + "loss": 0.3085, + "step": 18245 + }, + { + "epoch": 0.8257976917854718, + "grad_norm": 0.6591369804984707, + "learning_rate": 7.7531560704863e-07, + "loss": 0.3031, + "step": 18246 + }, + { + "epoch": 0.8258429508938674, + "grad_norm": 0.5720351463462361, + "learning_rate": 7.749236378815372e-07, + "loss": 0.2856, + "step": 18247 + }, + { + "epoch": 0.825888210002263, + "grad_norm": 0.6792636477123376, + "learning_rate": 7.745317594983598e-07, + "loss": 0.3181, + "step": 18248 + }, + { + "epoch": 0.8259334691106586, + "grad_norm": 0.2756247302938806, + "learning_rate": 7.741399719075154e-07, + "loss": 0.4846, + "step": 18249 + }, + { + "epoch": 0.825978728219054, + "grad_norm": 0.6683871282988517, + "learning_rate": 7.737482751174247e-07, + "loss": 0.3261, + "step": 18250 + }, + { + "epoch": 0.8260239873274496, + "grad_norm": 0.5851850370664479, + "learning_rate": 7.733566691365047e-07, + "loss": 0.2598, + "step": 18251 + }, + { + "epoch": 0.8260692464358452, + "grad_norm": 0.6077307922996963, + "learning_rate": 7.729651539731686e-07, + "loss": 0.3272, + "step": 18252 + }, + { + "epoch": 0.8261145055442408, + "grad_norm": 0.6496080829350848, + "learning_rate": 7.725737296358283e-07, + "loss": 0.3178, + "step": 18253 + }, + { + "epoch": 0.8261597646526363, + "grad_norm": 0.6895525446763424, + "learning_rate": 7.721823961328955e-07, + "loss": 0.3069, + "step": 18254 + }, + { + "epoch": 0.8262050237610319, + "grad_norm": 0.6007041300072309, + "learning_rate": 7.717911534727778e-07, + "loss": 0.3144, + "step": 18255 + }, + { + "epoch": 0.8262502828694275, + "grad_norm": 0.6795520769191841, + "learning_rate": 7.714000016638829e-07, + "loss": 0.3323, + "step": 18256 + }, + { + "epoch": 0.8262955419778231, + "grad_norm": 0.5915455209682717, + "learning_rate": 7.710089407146154e-07, + "loss": 0.2825, + "step": 18257 + }, + { + "epoch": 0.8263408010862185, + "grad_norm": 0.24806576764877158, + "learning_rate": 7.706179706333755e-07, + "loss": 0.4318, + "step": 18258 + }, + { + "epoch": 0.8263860601946141, + "grad_norm": 0.6538030184224267, + "learning_rate": 7.702270914285664e-07, + "loss": 0.2767, + "step": 18259 + }, + { + "epoch": 0.8264313193030097, + "grad_norm": 0.6195635754908934, + "learning_rate": 7.698363031085871e-07, + "loss": 0.2771, + "step": 18260 + }, + { + "epoch": 0.8264765784114053, + "grad_norm": 0.6598416768005659, + "learning_rate": 7.694456056818339e-07, + "loss": 0.2988, + "step": 18261 + }, + { + "epoch": 0.8265218375198009, + "grad_norm": 0.28054173240776076, + "learning_rate": 7.690549991567004e-07, + "loss": 0.4652, + "step": 18262 + }, + { + "epoch": 0.8265670966281964, + "grad_norm": 0.6181440261644516, + "learning_rate": 7.686644835415808e-07, + "loss": 0.2588, + "step": 18263 + }, + { + "epoch": 0.826612355736592, + "grad_norm": 0.6146368913346695, + "learning_rate": 7.682740588448667e-07, + "loss": 0.3479, + "step": 18264 + }, + { + "epoch": 0.8266576148449876, + "grad_norm": 0.25630777827620876, + "learning_rate": 7.67883725074946e-07, + "loss": 0.4507, + "step": 18265 + }, + { + "epoch": 0.8267028739533832, + "grad_norm": 0.5530316672521514, + "learning_rate": 7.674934822402052e-07, + "loss": 0.3121, + "step": 18266 + }, + { + "epoch": 0.8267481330617786, + "grad_norm": 1.172841472236167, + "learning_rate": 7.671033303490321e-07, + "loss": 0.3163, + "step": 18267 + }, + { + "epoch": 0.8267933921701742, + "grad_norm": 0.5813172977654625, + "learning_rate": 7.667132694098061e-07, + "loss": 0.3352, + "step": 18268 + }, + { + "epoch": 0.8268386512785698, + "grad_norm": 0.678137676491045, + "learning_rate": 7.663232994309122e-07, + "loss": 0.281, + "step": 18269 + }, + { + "epoch": 0.8268839103869654, + "grad_norm": 0.6224508351144359, + "learning_rate": 7.659334204207275e-07, + "loss": 0.3451, + "step": 18270 + }, + { + "epoch": 0.826929169495361, + "grad_norm": 0.6507117854871701, + "learning_rate": 7.655436323876286e-07, + "loss": 0.2906, + "step": 18271 + }, + { + "epoch": 0.8269744286037565, + "grad_norm": 0.6180031129751423, + "learning_rate": 7.651539353399917e-07, + "loss": 0.3121, + "step": 18272 + }, + { + "epoch": 0.8270196877121521, + "grad_norm": 0.7227123766917248, + "learning_rate": 7.647643292861917e-07, + "loss": 0.3068, + "step": 18273 + }, + { + "epoch": 0.8270649468205477, + "grad_norm": 0.6440853563337421, + "learning_rate": 7.643748142345985e-07, + "loss": 0.2751, + "step": 18274 + }, + { + "epoch": 0.8271102059289432, + "grad_norm": 0.6618308941143313, + "learning_rate": 7.639853901935812e-07, + "loss": 0.2505, + "step": 18275 + }, + { + "epoch": 0.8271554650373387, + "grad_norm": 0.6067429456867909, + "learning_rate": 7.635960571715073e-07, + "loss": 0.2894, + "step": 18276 + }, + { + "epoch": 0.8272007241457343, + "grad_norm": 0.6330449486181741, + "learning_rate": 7.632068151767447e-07, + "loss": 0.2784, + "step": 18277 + }, + { + "epoch": 0.8272459832541299, + "grad_norm": 0.6117914158146655, + "learning_rate": 7.628176642176549e-07, + "loss": 0.2569, + "step": 18278 + }, + { + "epoch": 0.8272912423625255, + "grad_norm": 0.5901946253104174, + "learning_rate": 7.624286043025991e-07, + "loss": 0.2808, + "step": 18279 + }, + { + "epoch": 0.827336501470921, + "grad_norm": 0.6189254349398654, + "learning_rate": 7.62039635439939e-07, + "loss": 0.3258, + "step": 18280 + }, + { + "epoch": 0.8273817605793166, + "grad_norm": 0.5864574378011577, + "learning_rate": 7.616507576380311e-07, + "loss": 0.2848, + "step": 18281 + }, + { + "epoch": 0.8274270196877122, + "grad_norm": 0.6289188315276548, + "learning_rate": 7.612619709052305e-07, + "loss": 0.3302, + "step": 18282 + }, + { + "epoch": 0.8274722787961077, + "grad_norm": 0.6599194063754822, + "learning_rate": 7.608732752498926e-07, + "loss": 0.2773, + "step": 18283 + }, + { + "epoch": 0.8275175379045033, + "grad_norm": 0.7143180101946018, + "learning_rate": 7.604846706803676e-07, + "loss": 0.306, + "step": 18284 + }, + { + "epoch": 0.8275627970128988, + "grad_norm": 0.5686795089354336, + "learning_rate": 7.600961572050076e-07, + "loss": 0.2777, + "step": 18285 + }, + { + "epoch": 0.8276080561212944, + "grad_norm": 0.5544043684290126, + "learning_rate": 7.59707734832159e-07, + "loss": 0.3026, + "step": 18286 + }, + { + "epoch": 0.82765331522969, + "grad_norm": 0.26360211053535226, + "learning_rate": 7.593194035701667e-07, + "loss": 0.4679, + "step": 18287 + }, + { + "epoch": 0.8276985743380856, + "grad_norm": 0.6223757948396181, + "learning_rate": 7.589311634273766e-07, + "loss": 0.3124, + "step": 18288 + }, + { + "epoch": 0.8277438334464811, + "grad_norm": 0.7011240909472778, + "learning_rate": 7.585430144121319e-07, + "loss": 0.2925, + "step": 18289 + }, + { + "epoch": 0.8277890925548766, + "grad_norm": 0.2836839740412367, + "learning_rate": 7.581549565327706e-07, + "loss": 0.4616, + "step": 18290 + }, + { + "epoch": 0.8278343516632722, + "grad_norm": 0.5970258776833657, + "learning_rate": 7.577669897976303e-07, + "loss": 0.2795, + "step": 18291 + }, + { + "epoch": 0.8278796107716678, + "grad_norm": 0.7240878255723272, + "learning_rate": 7.573791142150488e-07, + "loss": 0.2582, + "step": 18292 + }, + { + "epoch": 0.8279248698800633, + "grad_norm": 0.2803187872537055, + "learning_rate": 7.569913297933606e-07, + "loss": 0.4656, + "step": 18293 + }, + { + "epoch": 0.8279701289884589, + "grad_norm": 0.6074138860738469, + "learning_rate": 7.566036365408974e-07, + "loss": 0.3276, + "step": 18294 + }, + { + "epoch": 0.8280153880968545, + "grad_norm": 0.6355019438669003, + "learning_rate": 7.562160344659886e-07, + "loss": 0.2956, + "step": 18295 + }, + { + "epoch": 0.8280606472052501, + "grad_norm": 0.6349727647123978, + "learning_rate": 7.558285235769647e-07, + "loss": 0.3196, + "step": 18296 + }, + { + "epoch": 0.8281059063136457, + "grad_norm": 0.24541544171528995, + "learning_rate": 7.55441103882149e-07, + "loss": 0.4601, + "step": 18297 + }, + { + "epoch": 0.8281511654220411, + "grad_norm": 0.5882825549019338, + "learning_rate": 7.550537753898696e-07, + "loss": 0.2831, + "step": 18298 + }, + { + "epoch": 0.8281964245304367, + "grad_norm": 0.6734086488781459, + "learning_rate": 7.546665381084467e-07, + "loss": 0.2905, + "step": 18299 + }, + { + "epoch": 0.8282416836388323, + "grad_norm": 0.6355215550001918, + "learning_rate": 7.542793920462005e-07, + "loss": 0.2904, + "step": 18300 + }, + { + "epoch": 0.8282869427472279, + "grad_norm": 0.6804674889052729, + "learning_rate": 7.538923372114504e-07, + "loss": 0.3429, + "step": 18301 + }, + { + "epoch": 0.8283322018556234, + "grad_norm": 0.6514486631880623, + "learning_rate": 7.535053736125142e-07, + "loss": 0.296, + "step": 18302 + }, + { + "epoch": 0.828377460964019, + "grad_norm": 0.2913101001381168, + "learning_rate": 7.531185012577052e-07, + "loss": 0.4688, + "step": 18303 + }, + { + "epoch": 0.8284227200724146, + "grad_norm": 0.6438046832602575, + "learning_rate": 7.527317201553358e-07, + "loss": 0.2999, + "step": 18304 + }, + { + "epoch": 0.8284679791808102, + "grad_norm": 0.6767431632880623, + "learning_rate": 7.523450303137164e-07, + "loss": 0.2919, + "step": 18305 + }, + { + "epoch": 0.8285132382892058, + "grad_norm": 0.5343222301415081, + "learning_rate": 7.519584317411582e-07, + "loss": 0.2844, + "step": 18306 + }, + { + "epoch": 0.8285584973976012, + "grad_norm": 0.2829817591705852, + "learning_rate": 7.515719244459668e-07, + "loss": 0.4714, + "step": 18307 + }, + { + "epoch": 0.8286037565059968, + "grad_norm": 0.6824523978433078, + "learning_rate": 7.51185508436445e-07, + "loss": 0.3009, + "step": 18308 + }, + { + "epoch": 0.8286490156143924, + "grad_norm": 0.5993978929754125, + "learning_rate": 7.507991837208989e-07, + "loss": 0.2714, + "step": 18309 + }, + { + "epoch": 0.828694274722788, + "grad_norm": 0.5934461993346714, + "learning_rate": 7.504129503076263e-07, + "loss": 0.2783, + "step": 18310 + }, + { + "epoch": 0.8287395338311835, + "grad_norm": 0.6268985757761065, + "learning_rate": 7.500268082049294e-07, + "loss": 0.3129, + "step": 18311 + }, + { + "epoch": 0.8287847929395791, + "grad_norm": 0.630009354553089, + "learning_rate": 7.496407574211034e-07, + "loss": 0.2818, + "step": 18312 + }, + { + "epoch": 0.8288300520479747, + "grad_norm": 0.5945913692915438, + "learning_rate": 7.492547979644421e-07, + "loss": 0.2825, + "step": 18313 + }, + { + "epoch": 0.8288753111563703, + "grad_norm": 0.6051115804362879, + "learning_rate": 7.488689298432406e-07, + "loss": 0.2916, + "step": 18314 + }, + { + "epoch": 0.8289205702647657, + "grad_norm": 0.6861148878623919, + "learning_rate": 7.484831530657916e-07, + "loss": 0.3228, + "step": 18315 + }, + { + "epoch": 0.8289658293731613, + "grad_norm": 2.0859298361444627, + "learning_rate": 7.480974676403796e-07, + "loss": 0.2573, + "step": 18316 + }, + { + "epoch": 0.8290110884815569, + "grad_norm": 0.6780884526798012, + "learning_rate": 7.477118735752942e-07, + "loss": 0.3188, + "step": 18317 + }, + { + "epoch": 0.8290563475899525, + "grad_norm": 0.6263272543005641, + "learning_rate": 7.47326370878822e-07, + "loss": 0.2828, + "step": 18318 + }, + { + "epoch": 0.8291016066983481, + "grad_norm": 0.27210140659531634, + "learning_rate": 7.469409595592453e-07, + "loss": 0.4676, + "step": 18319 + }, + { + "epoch": 0.8291468658067436, + "grad_norm": 0.27514811354097995, + "learning_rate": 7.465556396248436e-07, + "loss": 0.4613, + "step": 18320 + }, + { + "epoch": 0.8291921249151392, + "grad_norm": 0.6326532116835614, + "learning_rate": 7.461704110838974e-07, + "loss": 0.2831, + "step": 18321 + }, + { + "epoch": 0.8292373840235348, + "grad_norm": 0.5798521668499196, + "learning_rate": 7.457852739446864e-07, + "loss": 0.2694, + "step": 18322 + }, + { + "epoch": 0.8292826431319303, + "grad_norm": 0.6280232175895027, + "learning_rate": 7.454002282154838e-07, + "loss": 0.3344, + "step": 18323 + }, + { + "epoch": 0.8293279022403258, + "grad_norm": 0.6556717243167894, + "learning_rate": 7.450152739045618e-07, + "loss": 0.2764, + "step": 18324 + }, + { + "epoch": 0.8293731613487214, + "grad_norm": 0.6230288274977733, + "learning_rate": 7.446304110201947e-07, + "loss": 0.2853, + "step": 18325 + }, + { + "epoch": 0.829418420457117, + "grad_norm": 1.2027587348251012, + "learning_rate": 7.442456395706493e-07, + "loss": 0.3202, + "step": 18326 + }, + { + "epoch": 0.8294636795655126, + "grad_norm": 0.5805160457644685, + "learning_rate": 7.43860959564196e-07, + "loss": 0.273, + "step": 18327 + }, + { + "epoch": 0.8295089386739081, + "grad_norm": 0.6256624824026773, + "learning_rate": 7.434763710090991e-07, + "loss": 0.2808, + "step": 18328 + }, + { + "epoch": 0.8295541977823037, + "grad_norm": 0.2540345517953192, + "learning_rate": 7.430918739136206e-07, + "loss": 0.4638, + "step": 18329 + }, + { + "epoch": 0.8295994568906992, + "grad_norm": 0.627344755177281, + "learning_rate": 7.427074682860242e-07, + "loss": 0.3041, + "step": 18330 + }, + { + "epoch": 0.8296447159990948, + "grad_norm": 0.5883979495725812, + "learning_rate": 7.423231541345694e-07, + "loss": 0.3091, + "step": 18331 + }, + { + "epoch": 0.8296899751074904, + "grad_norm": 0.6278410167294457, + "learning_rate": 7.41938931467514e-07, + "loss": 0.2699, + "step": 18332 + }, + { + "epoch": 0.8297352342158859, + "grad_norm": 0.2937449638440562, + "learning_rate": 7.415548002931122e-07, + "loss": 0.4733, + "step": 18333 + }, + { + "epoch": 0.8297804933242815, + "grad_norm": 0.2606301138768154, + "learning_rate": 7.411707606196189e-07, + "loss": 0.4704, + "step": 18334 + }, + { + "epoch": 0.8298257524326771, + "grad_norm": 0.6100340455285485, + "learning_rate": 7.40786812455287e-07, + "loss": 0.2954, + "step": 18335 + }, + { + "epoch": 0.8298710115410727, + "grad_norm": 0.2608413139470748, + "learning_rate": 7.404029558083653e-07, + "loss": 0.4779, + "step": 18336 + }, + { + "epoch": 0.8299162706494682, + "grad_norm": 0.6010332039258852, + "learning_rate": 7.400191906871007e-07, + "loss": 0.2791, + "step": 18337 + }, + { + "epoch": 0.8299615297578637, + "grad_norm": 0.6136009454379556, + "learning_rate": 7.396355170997411e-07, + "loss": 0.2787, + "step": 18338 + }, + { + "epoch": 0.8300067888662593, + "grad_norm": 0.6287725134443892, + "learning_rate": 7.392519350545286e-07, + "loss": 0.2675, + "step": 18339 + }, + { + "epoch": 0.8300520479746549, + "grad_norm": 0.6205791387005112, + "learning_rate": 7.388684445597072e-07, + "loss": 0.2872, + "step": 18340 + }, + { + "epoch": 0.8300973070830505, + "grad_norm": 0.6596207022524252, + "learning_rate": 7.384850456235154e-07, + "loss": 0.289, + "step": 18341 + }, + { + "epoch": 0.830142566191446, + "grad_norm": 0.6058336742195085, + "learning_rate": 7.38101738254191e-07, + "loss": 0.2587, + "step": 18342 + }, + { + "epoch": 0.8301878252998416, + "grad_norm": 0.6265365425373657, + "learning_rate": 7.377185224599709e-07, + "loss": 0.308, + "step": 18343 + }, + { + "epoch": 0.8302330844082372, + "grad_norm": 0.6376954651218508, + "learning_rate": 7.373353982490916e-07, + "loss": 0.3419, + "step": 18344 + }, + { + "epoch": 0.8302783435166328, + "grad_norm": 0.6332723305238434, + "learning_rate": 7.369523656297805e-07, + "loss": 0.2814, + "step": 18345 + }, + { + "epoch": 0.8303236026250282, + "grad_norm": 0.6078735243851932, + "learning_rate": 7.3656942461027e-07, + "loss": 0.2584, + "step": 18346 + }, + { + "epoch": 0.8303688617334238, + "grad_norm": 0.619477069126535, + "learning_rate": 7.361865751987879e-07, + "loss": 0.2874, + "step": 18347 + }, + { + "epoch": 0.8304141208418194, + "grad_norm": 0.5551233601014256, + "learning_rate": 7.358038174035642e-07, + "loss": 0.2428, + "step": 18348 + }, + { + "epoch": 0.830459379950215, + "grad_norm": 0.6291363041257584, + "learning_rate": 7.354211512328169e-07, + "loss": 0.3064, + "step": 18349 + }, + { + "epoch": 0.8305046390586105, + "grad_norm": 0.5688927307438404, + "learning_rate": 7.350385766947721e-07, + "loss": 0.2716, + "step": 18350 + }, + { + "epoch": 0.8305498981670061, + "grad_norm": 0.2701299884443355, + "learning_rate": 7.346560937976499e-07, + "loss": 0.4701, + "step": 18351 + }, + { + "epoch": 0.8305951572754017, + "grad_norm": 0.5826385085192366, + "learning_rate": 7.342737025496688e-07, + "loss": 0.3139, + "step": 18352 + }, + { + "epoch": 0.8306404163837973, + "grad_norm": 0.5941275237233942, + "learning_rate": 7.338914029590432e-07, + "loss": 0.3124, + "step": 18353 + }, + { + "epoch": 0.8306856754921929, + "grad_norm": 0.661803715012073, + "learning_rate": 7.335091950339901e-07, + "loss": 0.2959, + "step": 18354 + }, + { + "epoch": 0.8307309346005883, + "grad_norm": 0.5813518898375629, + "learning_rate": 7.3312707878272e-07, + "loss": 0.2954, + "step": 18355 + }, + { + "epoch": 0.8307761937089839, + "grad_norm": 0.6580523114531696, + "learning_rate": 7.327450542134457e-07, + "loss": 0.3009, + "step": 18356 + }, + { + "epoch": 0.8308214528173795, + "grad_norm": 0.6537994532148272, + "learning_rate": 7.323631213343735e-07, + "loss": 0.3442, + "step": 18357 + }, + { + "epoch": 0.8308667119257751, + "grad_norm": 0.26140961085679265, + "learning_rate": 7.319812801537101e-07, + "loss": 0.4757, + "step": 18358 + }, + { + "epoch": 0.8309119710341706, + "grad_norm": 0.2807159183875202, + "learning_rate": 7.315995306796608e-07, + "loss": 0.4598, + "step": 18359 + }, + { + "epoch": 0.8309572301425662, + "grad_norm": 0.635033300667888, + "learning_rate": 7.312178729204294e-07, + "loss": 0.2972, + "step": 18360 + }, + { + "epoch": 0.8310024892509618, + "grad_norm": 0.2736155481440812, + "learning_rate": 7.30836306884215e-07, + "loss": 0.4711, + "step": 18361 + }, + { + "epoch": 0.8310477483593574, + "grad_norm": 0.6417947220928917, + "learning_rate": 7.304548325792154e-07, + "loss": 0.2794, + "step": 18362 + }, + { + "epoch": 0.8310930074677528, + "grad_norm": 0.5762612450271447, + "learning_rate": 7.300734500136291e-07, + "loss": 0.3218, + "step": 18363 + }, + { + "epoch": 0.8311382665761484, + "grad_norm": 0.2596337538859704, + "learning_rate": 7.296921591956513e-07, + "loss": 0.457, + "step": 18364 + }, + { + "epoch": 0.831183525684544, + "grad_norm": 0.5954486161827727, + "learning_rate": 7.293109601334735e-07, + "loss": 0.308, + "step": 18365 + }, + { + "epoch": 0.8312287847929396, + "grad_norm": 0.6269519114385219, + "learning_rate": 7.289298528352857e-07, + "loss": 0.2823, + "step": 18366 + }, + { + "epoch": 0.8312740439013352, + "grad_norm": 0.518563367356704, + "learning_rate": 7.285488373092792e-07, + "loss": 0.2188, + "step": 18367 + }, + { + "epoch": 0.8313193030097307, + "grad_norm": 0.6214800550202545, + "learning_rate": 7.281679135636377e-07, + "loss": 0.346, + "step": 18368 + }, + { + "epoch": 0.8313645621181263, + "grad_norm": 0.5959904463385176, + "learning_rate": 7.27787081606549e-07, + "loss": 0.2907, + "step": 18369 + }, + { + "epoch": 0.8314098212265219, + "grad_norm": 0.6622710687024219, + "learning_rate": 7.274063414461952e-07, + "loss": 0.263, + "step": 18370 + }, + { + "epoch": 0.8314550803349174, + "grad_norm": 0.5778474098970238, + "learning_rate": 7.270256930907555e-07, + "loss": 0.3058, + "step": 18371 + }, + { + "epoch": 0.8315003394433129, + "grad_norm": 0.5545792197697489, + "learning_rate": 7.266451365484106e-07, + "loss": 0.274, + "step": 18372 + }, + { + "epoch": 0.8315455985517085, + "grad_norm": 0.8707822719027274, + "learning_rate": 7.262646718273392e-07, + "loss": 0.3256, + "step": 18373 + }, + { + "epoch": 0.8315908576601041, + "grad_norm": 0.6130864604779631, + "learning_rate": 7.258842989357118e-07, + "loss": 0.3162, + "step": 18374 + }, + { + "epoch": 0.8316361167684997, + "grad_norm": 0.6267185140516509, + "learning_rate": 7.255040178817035e-07, + "loss": 0.3024, + "step": 18375 + }, + { + "epoch": 0.8316813758768953, + "grad_norm": 0.2661307336173574, + "learning_rate": 7.251238286734863e-07, + "loss": 0.4653, + "step": 18376 + }, + { + "epoch": 0.8317266349852908, + "grad_norm": 0.5452519945094221, + "learning_rate": 7.247437313192307e-07, + "loss": 0.2627, + "step": 18377 + }, + { + "epoch": 0.8317718940936863, + "grad_norm": 0.597310206470709, + "learning_rate": 7.243637258270996e-07, + "loss": 0.332, + "step": 18378 + }, + { + "epoch": 0.8318171532020819, + "grad_norm": 0.6262737359671798, + "learning_rate": 7.239838122052612e-07, + "loss": 0.2937, + "step": 18379 + }, + { + "epoch": 0.8318624123104775, + "grad_norm": 0.2774610859586583, + "learning_rate": 7.23603990461878e-07, + "loss": 0.4686, + "step": 18380 + }, + { + "epoch": 0.831907671418873, + "grad_norm": 0.6236227179505209, + "learning_rate": 7.232242606051115e-07, + "loss": 0.2775, + "step": 18381 + }, + { + "epoch": 0.8319529305272686, + "grad_norm": 0.7734252543559725, + "learning_rate": 7.228446226431196e-07, + "loss": 0.2429, + "step": 18382 + }, + { + "epoch": 0.8319981896356642, + "grad_norm": 0.5598087332181514, + "learning_rate": 7.224650765840613e-07, + "loss": 0.2224, + "step": 18383 + }, + { + "epoch": 0.8320434487440598, + "grad_norm": 0.2714013413523971, + "learning_rate": 7.2208562243609e-07, + "loss": 0.4499, + "step": 18384 + }, + { + "epoch": 0.8320887078524553, + "grad_norm": 0.627473107254701, + "learning_rate": 7.21706260207361e-07, + "loss": 0.2592, + "step": 18385 + }, + { + "epoch": 0.8321339669608508, + "grad_norm": 0.5923881783519446, + "learning_rate": 7.213269899060249e-07, + "loss": 0.2811, + "step": 18386 + }, + { + "epoch": 0.8321792260692464, + "grad_norm": 0.26871978050997436, + "learning_rate": 7.209478115402302e-07, + "loss": 0.4794, + "step": 18387 + }, + { + "epoch": 0.832224485177642, + "grad_norm": 0.6224968148808394, + "learning_rate": 7.205687251181242e-07, + "loss": 0.3104, + "step": 18388 + }, + { + "epoch": 0.8322697442860376, + "grad_norm": 0.29372753562995985, + "learning_rate": 7.201897306478544e-07, + "loss": 0.4785, + "step": 18389 + }, + { + "epoch": 0.8323150033944331, + "grad_norm": 0.5670068481317462, + "learning_rate": 7.198108281375627e-07, + "loss": 0.2776, + "step": 18390 + }, + { + "epoch": 0.8323602625028287, + "grad_norm": 0.633384202144468, + "learning_rate": 7.194320175953901e-07, + "loss": 0.319, + "step": 18391 + }, + { + "epoch": 0.8324055216112243, + "grad_norm": 0.6352015901863839, + "learning_rate": 7.190532990294762e-07, + "loss": 0.3073, + "step": 18392 + }, + { + "epoch": 0.8324507807196199, + "grad_norm": 0.6621876582354893, + "learning_rate": 7.186746724479599e-07, + "loss": 0.3043, + "step": 18393 + }, + { + "epoch": 0.8324960398280153, + "grad_norm": 1.1329216180159083, + "learning_rate": 7.182961378589765e-07, + "loss": 0.2757, + "step": 18394 + }, + { + "epoch": 0.8325412989364109, + "grad_norm": 0.6590145185584861, + "learning_rate": 7.179176952706574e-07, + "loss": 0.2705, + "step": 18395 + }, + { + "epoch": 0.8325865580448065, + "grad_norm": 0.710785836852594, + "learning_rate": 7.175393446911366e-07, + "loss": 0.3282, + "step": 18396 + }, + { + "epoch": 0.8326318171532021, + "grad_norm": 0.5993552009304864, + "learning_rate": 7.171610861285417e-07, + "loss": 0.3095, + "step": 18397 + }, + { + "epoch": 0.8326770762615976, + "grad_norm": 0.5895560821888629, + "learning_rate": 7.167829195910026e-07, + "loss": 0.26, + "step": 18398 + }, + { + "epoch": 0.8327223353699932, + "grad_norm": 0.7544662890759368, + "learning_rate": 7.164048450866435e-07, + "loss": 0.2786, + "step": 18399 + }, + { + "epoch": 0.8327675944783888, + "grad_norm": 0.6287372418726318, + "learning_rate": 7.160268626235866e-07, + "loss": 0.2823, + "step": 18400 + }, + { + "epoch": 0.8328128535867844, + "grad_norm": 0.30292889477882584, + "learning_rate": 7.156489722099558e-07, + "loss": 0.4802, + "step": 18401 + }, + { + "epoch": 0.83285811269518, + "grad_norm": 0.26542531608177117, + "learning_rate": 7.152711738538725e-07, + "loss": 0.4563, + "step": 18402 + }, + { + "epoch": 0.8329033718035754, + "grad_norm": 0.6015363738942859, + "learning_rate": 7.148934675634494e-07, + "loss": 0.3128, + "step": 18403 + }, + { + "epoch": 0.832948630911971, + "grad_norm": 0.5985106227128415, + "learning_rate": 7.145158533468055e-07, + "loss": 0.2569, + "step": 18404 + }, + { + "epoch": 0.8329938900203666, + "grad_norm": 0.28834188236230374, + "learning_rate": 7.141383312120536e-07, + "loss": 0.4644, + "step": 18405 + }, + { + "epoch": 0.8330391491287622, + "grad_norm": 0.5680174625657594, + "learning_rate": 7.137609011673086e-07, + "loss": 0.3215, + "step": 18406 + }, + { + "epoch": 0.8330844082371577, + "grad_norm": 0.3533673673809758, + "learning_rate": 7.133835632206754e-07, + "loss": 0.4899, + "step": 18407 + }, + { + "epoch": 0.8331296673455533, + "grad_norm": 0.5735933751162591, + "learning_rate": 7.130063173802637e-07, + "loss": 0.2644, + "step": 18408 + }, + { + "epoch": 0.8331749264539489, + "grad_norm": 0.3244715465410745, + "learning_rate": 7.126291636541815e-07, + "loss": 0.4551, + "step": 18409 + }, + { + "epoch": 0.8332201855623445, + "grad_norm": 0.2608028807397206, + "learning_rate": 7.122521020505302e-07, + "loss": 0.4724, + "step": 18410 + }, + { + "epoch": 0.83326544467074, + "grad_norm": 0.6129342907073956, + "learning_rate": 7.11875132577412e-07, + "loss": 0.298, + "step": 18411 + }, + { + "epoch": 0.8333107037791355, + "grad_norm": 0.6824953976909873, + "learning_rate": 7.114982552429278e-07, + "loss": 0.2856, + "step": 18412 + }, + { + "epoch": 0.8333559628875311, + "grad_norm": 0.6085744465612274, + "learning_rate": 7.111214700551738e-07, + "loss": 0.271, + "step": 18413 + }, + { + "epoch": 0.8334012219959267, + "grad_norm": 0.7326685463051631, + "learning_rate": 7.107447770222486e-07, + "loss": 0.2786, + "step": 18414 + }, + { + "epoch": 0.8334464811043223, + "grad_norm": 0.6808975607293165, + "learning_rate": 7.103681761522446e-07, + "loss": 0.2859, + "step": 18415 + }, + { + "epoch": 0.8334917402127178, + "grad_norm": 0.5929595113087428, + "learning_rate": 7.099916674532526e-07, + "loss": 0.3479, + "step": 18416 + }, + { + "epoch": 0.8335369993211134, + "grad_norm": 0.2701159437656839, + "learning_rate": 7.096152509333642e-07, + "loss": 0.4868, + "step": 18417 + }, + { + "epoch": 0.833582258429509, + "grad_norm": 0.5661430809971738, + "learning_rate": 7.092389266006683e-07, + "loss": 0.2754, + "step": 18418 + }, + { + "epoch": 0.8336275175379045, + "grad_norm": 0.6159875929159425, + "learning_rate": 7.088626944632493e-07, + "loss": 0.3262, + "step": 18419 + }, + { + "epoch": 0.8336727766463, + "grad_norm": 0.6080998733893245, + "learning_rate": 7.084865545291914e-07, + "loss": 0.2933, + "step": 18420 + }, + { + "epoch": 0.8337180357546956, + "grad_norm": 0.6126167266070449, + "learning_rate": 7.081105068065764e-07, + "loss": 0.3477, + "step": 18421 + }, + { + "epoch": 0.8337632948630912, + "grad_norm": 0.2536339863608969, + "learning_rate": 7.077345513034861e-07, + "loss": 0.4426, + "step": 18422 + }, + { + "epoch": 0.8338085539714868, + "grad_norm": 0.2617414439907864, + "learning_rate": 7.073586880279981e-07, + "loss": 0.4655, + "step": 18423 + }, + { + "epoch": 0.8338538130798824, + "grad_norm": 0.5566006485219414, + "learning_rate": 7.06982916988187e-07, + "loss": 0.2721, + "step": 18424 + }, + { + "epoch": 0.8338990721882779, + "grad_norm": 0.6710321305525754, + "learning_rate": 7.066072381921285e-07, + "loss": 0.3213, + "step": 18425 + }, + { + "epoch": 0.8339443312966734, + "grad_norm": 0.6511717324963595, + "learning_rate": 7.06231651647894e-07, + "loss": 0.331, + "step": 18426 + }, + { + "epoch": 0.833989590405069, + "grad_norm": 0.5692816316713081, + "learning_rate": 7.058561573635548e-07, + "loss": 0.2835, + "step": 18427 + }, + { + "epoch": 0.8340348495134646, + "grad_norm": 0.663359168877623, + "learning_rate": 7.054807553471782e-07, + "loss": 0.278, + "step": 18428 + }, + { + "epoch": 0.8340801086218601, + "grad_norm": 0.5552336909462475, + "learning_rate": 7.05105445606829e-07, + "loss": 0.2733, + "step": 18429 + }, + { + "epoch": 0.8341253677302557, + "grad_norm": 0.5845264375176329, + "learning_rate": 7.047302281505735e-07, + "loss": 0.3261, + "step": 18430 + }, + { + "epoch": 0.8341706268386513, + "grad_norm": 0.6262122985968133, + "learning_rate": 7.043551029864759e-07, + "loss": 0.319, + "step": 18431 + }, + { + "epoch": 0.8342158859470469, + "grad_norm": 0.6622169142228267, + "learning_rate": 7.039800701225918e-07, + "loss": 0.2854, + "step": 18432 + }, + { + "epoch": 0.8342611450554424, + "grad_norm": 0.27194861515127755, + "learning_rate": 7.036051295669816e-07, + "loss": 0.461, + "step": 18433 + }, + { + "epoch": 0.834306404163838, + "grad_norm": 0.25211593145997274, + "learning_rate": 7.03230281327702e-07, + "loss": 0.4504, + "step": 18434 + }, + { + "epoch": 0.8343516632722335, + "grad_norm": 0.6034517931757836, + "learning_rate": 7.028555254128089e-07, + "loss": 0.2867, + "step": 18435 + }, + { + "epoch": 0.8343969223806291, + "grad_norm": 0.6218373751755876, + "learning_rate": 7.024808618303508e-07, + "loss": 0.2809, + "step": 18436 + }, + { + "epoch": 0.8344421814890247, + "grad_norm": 0.622619872023975, + "learning_rate": 7.021062905883802e-07, + "loss": 0.3138, + "step": 18437 + }, + { + "epoch": 0.8344874405974202, + "grad_norm": 0.25726287792657343, + "learning_rate": 7.017318116949468e-07, + "loss": 0.4385, + "step": 18438 + }, + { + "epoch": 0.8345326997058158, + "grad_norm": 0.5635510149757561, + "learning_rate": 7.013574251580956e-07, + "loss": 0.2942, + "step": 18439 + }, + { + "epoch": 0.8345779588142114, + "grad_norm": 0.5767968519994132, + "learning_rate": 7.009831309858701e-07, + "loss": 0.2885, + "step": 18440 + }, + { + "epoch": 0.834623217922607, + "grad_norm": 0.6454957968254394, + "learning_rate": 7.006089291863144e-07, + "loss": 0.2802, + "step": 18441 + }, + { + "epoch": 0.8346684770310024, + "grad_norm": 0.6431056047668252, + "learning_rate": 7.002348197674669e-07, + "loss": 0.2863, + "step": 18442 + }, + { + "epoch": 0.834713736139398, + "grad_norm": 0.6548391597943949, + "learning_rate": 6.998608027373694e-07, + "loss": 0.2709, + "step": 18443 + }, + { + "epoch": 0.8347589952477936, + "grad_norm": 0.5851244717476433, + "learning_rate": 6.994868781040553e-07, + "loss": 0.2773, + "step": 18444 + }, + { + "epoch": 0.8348042543561892, + "grad_norm": 0.6108327610865241, + "learning_rate": 6.991130458755596e-07, + "loss": 0.3102, + "step": 18445 + }, + { + "epoch": 0.8348495134645848, + "grad_norm": 0.5969419647703962, + "learning_rate": 6.987393060599157e-07, + "loss": 0.287, + "step": 18446 + }, + { + "epoch": 0.8348947725729803, + "grad_norm": 0.7424954024336571, + "learning_rate": 6.983656586651543e-07, + "loss": 0.2959, + "step": 18447 + }, + { + "epoch": 0.8349400316813759, + "grad_norm": 0.32286385121349703, + "learning_rate": 6.979921036993042e-07, + "loss": 0.4771, + "step": 18448 + }, + { + "epoch": 0.8349852907897715, + "grad_norm": 0.577601691006914, + "learning_rate": 6.976186411703894e-07, + "loss": 0.2719, + "step": 18449 + }, + { + "epoch": 0.835030549898167, + "grad_norm": 0.6326586227520709, + "learning_rate": 6.972452710864364e-07, + "loss": 0.3351, + "step": 18450 + }, + { + "epoch": 0.8350758090065625, + "grad_norm": 0.6604271876887587, + "learning_rate": 6.968719934554691e-07, + "loss": 0.3225, + "step": 18451 + }, + { + "epoch": 0.8351210681149581, + "grad_norm": 0.589944492600839, + "learning_rate": 6.964988082855062e-07, + "loss": 0.2977, + "step": 18452 + }, + { + "epoch": 0.8351663272233537, + "grad_norm": 0.5965548477237558, + "learning_rate": 6.961257155845658e-07, + "loss": 0.2546, + "step": 18453 + }, + { + "epoch": 0.8352115863317493, + "grad_norm": 0.6621594533928651, + "learning_rate": 6.957527153606664e-07, + "loss": 0.2908, + "step": 18454 + }, + { + "epoch": 0.8352568454401448, + "grad_norm": 0.5712805422352859, + "learning_rate": 6.953798076218204e-07, + "loss": 0.2905, + "step": 18455 + }, + { + "epoch": 0.8353021045485404, + "grad_norm": 0.5798015410898242, + "learning_rate": 6.950069923760433e-07, + "loss": 0.2914, + "step": 18456 + }, + { + "epoch": 0.835347363656936, + "grad_norm": 0.26862311862827815, + "learning_rate": 6.946342696313435e-07, + "loss": 0.4883, + "step": 18457 + }, + { + "epoch": 0.8353926227653315, + "grad_norm": 0.2655924882413599, + "learning_rate": 6.942616393957297e-07, + "loss": 0.4824, + "step": 18458 + }, + { + "epoch": 0.8354378818737271, + "grad_norm": 0.828216103418796, + "learning_rate": 6.938891016772092e-07, + "loss": 0.2974, + "step": 18459 + }, + { + "epoch": 0.8354831409821226, + "grad_norm": 0.6503665178507614, + "learning_rate": 6.935166564837875e-07, + "loss": 0.315, + "step": 18460 + }, + { + "epoch": 0.8355284000905182, + "grad_norm": 0.809480352605823, + "learning_rate": 6.93144303823467e-07, + "loss": 0.2828, + "step": 18461 + }, + { + "epoch": 0.8355736591989138, + "grad_norm": 0.6732385764784247, + "learning_rate": 6.927720437042462e-07, + "loss": 0.3042, + "step": 18462 + }, + { + "epoch": 0.8356189183073094, + "grad_norm": 0.29168950544423544, + "learning_rate": 6.923998761341261e-07, + "loss": 0.4947, + "step": 18463 + }, + { + "epoch": 0.8356641774157049, + "grad_norm": 0.6211694790944401, + "learning_rate": 6.920278011211034e-07, + "loss": 0.2732, + "step": 18464 + }, + { + "epoch": 0.8357094365241005, + "grad_norm": 0.6205506528108833, + "learning_rate": 6.916558186731726e-07, + "loss": 0.2932, + "step": 18465 + }, + { + "epoch": 0.835754695632496, + "grad_norm": 0.6014775172694962, + "learning_rate": 6.912839287983253e-07, + "loss": 0.2827, + "step": 18466 + }, + { + "epoch": 0.8357999547408916, + "grad_norm": 0.2592762946173183, + "learning_rate": 6.909121315045541e-07, + "loss": 0.4542, + "step": 18467 + }, + { + "epoch": 0.8358452138492871, + "grad_norm": 0.9117098601539213, + "learning_rate": 6.905404267998466e-07, + "loss": 0.3685, + "step": 18468 + }, + { + "epoch": 0.8358904729576827, + "grad_norm": 0.7034910533099269, + "learning_rate": 6.901688146921892e-07, + "loss": 0.2811, + "step": 18469 + }, + { + "epoch": 0.8359357320660783, + "grad_norm": 0.6019756167831373, + "learning_rate": 6.897972951895682e-07, + "loss": 0.3007, + "step": 18470 + }, + { + "epoch": 0.8359809911744739, + "grad_norm": 0.6456353538014693, + "learning_rate": 6.894258682999644e-07, + "loss": 0.3, + "step": 18471 + }, + { + "epoch": 0.8360262502828695, + "grad_norm": 0.2788759898919128, + "learning_rate": 6.890545340313609e-07, + "loss": 0.4847, + "step": 18472 + }, + { + "epoch": 0.836071509391265, + "grad_norm": 0.7049032865204843, + "learning_rate": 6.886832923917358e-07, + "loss": 0.3145, + "step": 18473 + }, + { + "epoch": 0.8361167684996605, + "grad_norm": 0.5249122167302466, + "learning_rate": 6.883121433890639e-07, + "loss": 0.2557, + "step": 18474 + }, + { + "epoch": 0.8361620276080561, + "grad_norm": 0.6918651384335092, + "learning_rate": 6.879410870313219e-07, + "loss": 0.3068, + "step": 18475 + }, + { + "epoch": 0.8362072867164517, + "grad_norm": 0.5832527914148368, + "learning_rate": 6.875701233264837e-07, + "loss": 0.251, + "step": 18476 + }, + { + "epoch": 0.8362525458248472, + "grad_norm": 0.6140431784941516, + "learning_rate": 6.871992522825183e-07, + "loss": 0.2806, + "step": 18477 + }, + { + "epoch": 0.8362978049332428, + "grad_norm": 0.26624596622234775, + "learning_rate": 6.868284739073949e-07, + "loss": 0.4759, + "step": 18478 + }, + { + "epoch": 0.8363430640416384, + "grad_norm": 0.6660657563262626, + "learning_rate": 6.8645778820908e-07, + "loss": 0.2673, + "step": 18479 + }, + { + "epoch": 0.836388323150034, + "grad_norm": 0.6050959441806862, + "learning_rate": 6.860871951955412e-07, + "loss": 0.2871, + "step": 18480 + }, + { + "epoch": 0.8364335822584296, + "grad_norm": 0.6525082970979413, + "learning_rate": 6.857166948747385e-07, + "loss": 0.3073, + "step": 18481 + }, + { + "epoch": 0.836478841366825, + "grad_norm": 0.2765877051458427, + "learning_rate": 6.853462872546329e-07, + "loss": 0.4547, + "step": 18482 + }, + { + "epoch": 0.8365241004752206, + "grad_norm": 0.6329547373858155, + "learning_rate": 6.849759723431853e-07, + "loss": 0.3209, + "step": 18483 + }, + { + "epoch": 0.8365693595836162, + "grad_norm": 0.6013514520423907, + "learning_rate": 6.846057501483505e-07, + "loss": 0.317, + "step": 18484 + }, + { + "epoch": 0.8366146186920118, + "grad_norm": 0.6708127579750481, + "learning_rate": 6.842356206780853e-07, + "loss": 0.267, + "step": 18485 + }, + { + "epoch": 0.8366598778004073, + "grad_norm": 0.6180890666149683, + "learning_rate": 6.838655839403419e-07, + "loss": 0.2944, + "step": 18486 + }, + { + "epoch": 0.8367051369088029, + "grad_norm": 0.5812224184554288, + "learning_rate": 6.834956399430703e-07, + "loss": 0.3012, + "step": 18487 + }, + { + "epoch": 0.8367503960171985, + "grad_norm": 0.2711922549276204, + "learning_rate": 6.8312578869422e-07, + "loss": 0.4628, + "step": 18488 + }, + { + "epoch": 0.8367956551255941, + "grad_norm": 0.797307266433614, + "learning_rate": 6.827560302017389e-07, + "loss": 0.3233, + "step": 18489 + }, + { + "epoch": 0.8368409142339895, + "grad_norm": 0.6030944382919147, + "learning_rate": 6.823863644735718e-07, + "loss": 0.2964, + "step": 18490 + }, + { + "epoch": 0.8368861733423851, + "grad_norm": 0.7432983858204644, + "learning_rate": 6.820167915176601e-07, + "loss": 0.3053, + "step": 18491 + }, + { + "epoch": 0.8369314324507807, + "grad_norm": 0.7667762688745203, + "learning_rate": 6.816473113419459e-07, + "loss": 0.2915, + "step": 18492 + }, + { + "epoch": 0.8369766915591763, + "grad_norm": 0.6022476658119068, + "learning_rate": 6.812779239543688e-07, + "loss": 0.2945, + "step": 18493 + }, + { + "epoch": 0.8370219506675719, + "grad_norm": 0.5689870681284912, + "learning_rate": 6.809086293628658e-07, + "loss": 0.2849, + "step": 18494 + }, + { + "epoch": 0.8370672097759674, + "grad_norm": 0.6094370358822103, + "learning_rate": 6.805394275753696e-07, + "loss": 0.2985, + "step": 18495 + }, + { + "epoch": 0.837112468884363, + "grad_norm": 0.6096821133744085, + "learning_rate": 6.801703185998165e-07, + "loss": 0.309, + "step": 18496 + }, + { + "epoch": 0.8371577279927586, + "grad_norm": 0.6210190486095353, + "learning_rate": 6.798013024441346e-07, + "loss": 0.2717, + "step": 18497 + }, + { + "epoch": 0.8372029871011542, + "grad_norm": 0.5832405292000097, + "learning_rate": 6.794323791162549e-07, + "loss": 0.2883, + "step": 18498 + }, + { + "epoch": 0.8372482462095496, + "grad_norm": 0.6939500706572298, + "learning_rate": 6.790635486241043e-07, + "loss": 0.2912, + "step": 18499 + }, + { + "epoch": 0.8372935053179452, + "grad_norm": 0.5921908749665488, + "learning_rate": 6.786948109756064e-07, + "loss": 0.3227, + "step": 18500 + }, + { + "epoch": 0.8373387644263408, + "grad_norm": 0.2750326264546234, + "learning_rate": 6.783261661786855e-07, + "loss": 0.479, + "step": 18501 + }, + { + "epoch": 0.8373840235347364, + "grad_norm": 0.24164130552241625, + "learning_rate": 6.77957614241263e-07, + "loss": 0.4474, + "step": 18502 + }, + { + "epoch": 0.8374292826431319, + "grad_norm": 0.5861892387606339, + "learning_rate": 6.775891551712555e-07, + "loss": 0.2594, + "step": 18503 + }, + { + "epoch": 0.8374745417515275, + "grad_norm": 0.7380838640426645, + "learning_rate": 6.77220788976582e-07, + "loss": 0.3095, + "step": 18504 + }, + { + "epoch": 0.8375198008599231, + "grad_norm": 0.6415416692617534, + "learning_rate": 6.768525156651589e-07, + "loss": 0.257, + "step": 18505 + }, + { + "epoch": 0.8375650599683186, + "grad_norm": 0.6310572300126577, + "learning_rate": 6.764843352448974e-07, + "loss": 0.297, + "step": 18506 + }, + { + "epoch": 0.8376103190767142, + "grad_norm": 0.5824094431531872, + "learning_rate": 6.761162477237076e-07, + "loss": 0.266, + "step": 18507 + }, + { + "epoch": 0.8376555781851097, + "grad_norm": 0.5821116481571228, + "learning_rate": 6.757482531094999e-07, + "loss": 0.2683, + "step": 18508 + }, + { + "epoch": 0.8377008372935053, + "grad_norm": 0.6269496246057881, + "learning_rate": 6.753803514101826e-07, + "loss": 0.2824, + "step": 18509 + }, + { + "epoch": 0.8377460964019009, + "grad_norm": 0.6786039172697631, + "learning_rate": 6.75012542633659e-07, + "loss": 0.3464, + "step": 18510 + }, + { + "epoch": 0.8377913555102965, + "grad_norm": 0.2701244215128959, + "learning_rate": 6.74644826787832e-07, + "loss": 0.4969, + "step": 18511 + }, + { + "epoch": 0.837836614618692, + "grad_norm": 0.5815568096960195, + "learning_rate": 6.742772038806045e-07, + "loss": 0.3244, + "step": 18512 + }, + { + "epoch": 0.8378818737270876, + "grad_norm": 0.6155053020966658, + "learning_rate": 6.739096739198731e-07, + "loss": 0.3038, + "step": 18513 + }, + { + "epoch": 0.8379271328354831, + "grad_norm": 0.2623644484991377, + "learning_rate": 6.735422369135375e-07, + "loss": 0.4636, + "step": 18514 + }, + { + "epoch": 0.8379723919438787, + "grad_norm": 0.9128337737816552, + "learning_rate": 6.731748928694914e-07, + "loss": 0.3144, + "step": 18515 + }, + { + "epoch": 0.8380176510522742, + "grad_norm": 0.6336593216480945, + "learning_rate": 6.72807641795627e-07, + "loss": 0.2951, + "step": 18516 + }, + { + "epoch": 0.8380629101606698, + "grad_norm": 0.6044869413049455, + "learning_rate": 6.724404836998366e-07, + "loss": 0.3192, + "step": 18517 + }, + { + "epoch": 0.8381081692690654, + "grad_norm": 0.6188016034575473, + "learning_rate": 6.720734185900101e-07, + "loss": 0.2872, + "step": 18518 + }, + { + "epoch": 0.838153428377461, + "grad_norm": 0.6214542611014593, + "learning_rate": 6.717064464740336e-07, + "loss": 0.2403, + "step": 18519 + }, + { + "epoch": 0.8381986874858566, + "grad_norm": 0.7963334781011263, + "learning_rate": 6.713395673597911e-07, + "loss": 0.2977, + "step": 18520 + }, + { + "epoch": 0.838243946594252, + "grad_norm": 0.28427234355197933, + "learning_rate": 6.709727812551669e-07, + "loss": 0.4759, + "step": 18521 + }, + { + "epoch": 0.8382892057026476, + "grad_norm": 0.6178113786648095, + "learning_rate": 6.706060881680432e-07, + "loss": 0.324, + "step": 18522 + }, + { + "epoch": 0.8383344648110432, + "grad_norm": 0.27607898173326817, + "learning_rate": 6.702394881062974e-07, + "loss": 0.4676, + "step": 18523 + }, + { + "epoch": 0.8383797239194388, + "grad_norm": 0.6041009697924854, + "learning_rate": 6.698729810778065e-07, + "loss": 0.3249, + "step": 18524 + }, + { + "epoch": 0.8384249830278343, + "grad_norm": 0.6633498696162133, + "learning_rate": 6.695065670904477e-07, + "loss": 0.2971, + "step": 18525 + }, + { + "epoch": 0.8384702421362299, + "grad_norm": 0.5935907065614485, + "learning_rate": 6.691402461520913e-07, + "loss": 0.2991, + "step": 18526 + }, + { + "epoch": 0.8385155012446255, + "grad_norm": 0.25082580423110495, + "learning_rate": 6.687740182706103e-07, + "loss": 0.4612, + "step": 18527 + }, + { + "epoch": 0.8385607603530211, + "grad_norm": 0.6078515373628862, + "learning_rate": 6.684078834538743e-07, + "loss": 0.2703, + "step": 18528 + }, + { + "epoch": 0.8386060194614167, + "grad_norm": 0.6176734156235348, + "learning_rate": 6.680418417097478e-07, + "loss": 0.3062, + "step": 18529 + }, + { + "epoch": 0.8386512785698121, + "grad_norm": 0.2644695950699044, + "learning_rate": 6.676758930460975e-07, + "loss": 0.4671, + "step": 18530 + }, + { + "epoch": 0.8386965376782077, + "grad_norm": 0.2695615966163874, + "learning_rate": 6.673100374707886e-07, + "loss": 0.4595, + "step": 18531 + }, + { + "epoch": 0.8387417967866033, + "grad_norm": 0.6319710434142184, + "learning_rate": 6.669442749916782e-07, + "loss": 0.2783, + "step": 18532 + }, + { + "epoch": 0.8387870558949989, + "grad_norm": 0.27180888854367324, + "learning_rate": 6.665786056166274e-07, + "loss": 0.4834, + "step": 18533 + }, + { + "epoch": 0.8388323150033944, + "grad_norm": 0.263547407515624, + "learning_rate": 6.662130293534941e-07, + "loss": 0.4602, + "step": 18534 + }, + { + "epoch": 0.83887757411179, + "grad_norm": 0.28470399398942675, + "learning_rate": 6.658475462101327e-07, + "loss": 0.4475, + "step": 18535 + }, + { + "epoch": 0.8389228332201856, + "grad_norm": 0.594393630301785, + "learning_rate": 6.654821561943953e-07, + "loss": 0.2538, + "step": 18536 + }, + { + "epoch": 0.8389680923285812, + "grad_norm": 0.6516799206285943, + "learning_rate": 6.651168593141339e-07, + "loss": 0.28, + "step": 18537 + }, + { + "epoch": 0.8390133514369766, + "grad_norm": 0.5994769333314872, + "learning_rate": 6.647516555771988e-07, + "loss": 0.3085, + "step": 18538 + }, + { + "epoch": 0.8390586105453722, + "grad_norm": 0.3360962215042519, + "learning_rate": 6.643865449914355e-07, + "loss": 0.4624, + "step": 18539 + }, + { + "epoch": 0.8391038696537678, + "grad_norm": 0.553898450685638, + "learning_rate": 6.640215275646889e-07, + "loss": 0.2594, + "step": 18540 + }, + { + "epoch": 0.8391491287621634, + "grad_norm": 0.6146330382727423, + "learning_rate": 6.636566033048037e-07, + "loss": 0.2977, + "step": 18541 + }, + { + "epoch": 0.839194387870559, + "grad_norm": 0.6534901474622999, + "learning_rate": 6.632917722196186e-07, + "loss": 0.3527, + "step": 18542 + }, + { + "epoch": 0.8392396469789545, + "grad_norm": 0.6374263507876112, + "learning_rate": 6.629270343169752e-07, + "loss": 0.2602, + "step": 18543 + }, + { + "epoch": 0.8392849060873501, + "grad_norm": 0.9073245039519338, + "learning_rate": 6.625623896047101e-07, + "loss": 0.2771, + "step": 18544 + }, + { + "epoch": 0.8393301651957457, + "grad_norm": 0.5576977394379002, + "learning_rate": 6.621978380906563e-07, + "loss": 0.2847, + "step": 18545 + }, + { + "epoch": 0.8393754243041412, + "grad_norm": 0.6261781506288243, + "learning_rate": 6.618333797826487e-07, + "loss": 0.3053, + "step": 18546 + }, + { + "epoch": 0.8394206834125367, + "grad_norm": 0.2783462372789997, + "learning_rate": 6.614690146885189e-07, + "loss": 0.4567, + "step": 18547 + }, + { + "epoch": 0.8394659425209323, + "grad_norm": 0.5963827788955498, + "learning_rate": 6.611047428160954e-07, + "loss": 0.3023, + "step": 18548 + }, + { + "epoch": 0.8395112016293279, + "grad_norm": 0.5959170023377508, + "learning_rate": 6.60740564173204e-07, + "loss": 0.2707, + "step": 18549 + }, + { + "epoch": 0.8395564607377235, + "grad_norm": 0.25713733466545213, + "learning_rate": 6.603764787676703e-07, + "loss": 0.4457, + "step": 18550 + }, + { + "epoch": 0.839601719846119, + "grad_norm": 0.38063115712027656, + "learning_rate": 6.600124866073199e-07, + "loss": 0.4678, + "step": 18551 + }, + { + "epoch": 0.8396469789545146, + "grad_norm": 0.5948071627450263, + "learning_rate": 6.596485876999714e-07, + "loss": 0.3188, + "step": 18552 + }, + { + "epoch": 0.8396922380629102, + "grad_norm": 0.5947740569630174, + "learning_rate": 6.592847820534432e-07, + "loss": 0.2585, + "step": 18553 + }, + { + "epoch": 0.8397374971713057, + "grad_norm": 0.26618875327704994, + "learning_rate": 6.589210696755549e-07, + "loss": 0.4601, + "step": 18554 + }, + { + "epoch": 0.8397827562797013, + "grad_norm": 0.3881434164089876, + "learning_rate": 6.585574505741188e-07, + "loss": 0.4836, + "step": 18555 + }, + { + "epoch": 0.8398280153880968, + "grad_norm": 0.6202801403288642, + "learning_rate": 6.581939247569508e-07, + "loss": 0.2967, + "step": 18556 + }, + { + "epoch": 0.8398732744964924, + "grad_norm": 0.5730519871310629, + "learning_rate": 6.578304922318607e-07, + "loss": 0.2491, + "step": 18557 + }, + { + "epoch": 0.839918533604888, + "grad_norm": 0.6228626600596571, + "learning_rate": 6.574671530066557e-07, + "loss": 0.3151, + "step": 18558 + }, + { + "epoch": 0.8399637927132836, + "grad_norm": 0.28480024563607853, + "learning_rate": 6.571039070891449e-07, + "loss": 0.4816, + "step": 18559 + }, + { + "epoch": 0.8400090518216791, + "grad_norm": 0.7009703026905671, + "learning_rate": 6.567407544871341e-07, + "loss": 0.2483, + "step": 18560 + }, + { + "epoch": 0.8400543109300747, + "grad_norm": 0.6737484038016858, + "learning_rate": 6.56377695208425e-07, + "loss": 0.2819, + "step": 18561 + }, + { + "epoch": 0.8400995700384702, + "grad_norm": 0.6495359229581547, + "learning_rate": 6.560147292608177e-07, + "loss": 0.3284, + "step": 18562 + }, + { + "epoch": 0.8401448291468658, + "grad_norm": 0.7227420956022628, + "learning_rate": 6.556518566521125e-07, + "loss": 0.2784, + "step": 18563 + }, + { + "epoch": 0.8401900882552614, + "grad_norm": 0.2921568050989983, + "learning_rate": 6.552890773901083e-07, + "loss": 0.4648, + "step": 18564 + }, + { + "epoch": 0.8402353473636569, + "grad_norm": 0.9524537477492414, + "learning_rate": 6.54926391482596e-07, + "loss": 0.2818, + "step": 18565 + }, + { + "epoch": 0.8402806064720525, + "grad_norm": 0.5970172164361539, + "learning_rate": 6.545637989373704e-07, + "loss": 0.3514, + "step": 18566 + }, + { + "epoch": 0.8403258655804481, + "grad_norm": 0.6591626744459652, + "learning_rate": 6.542012997622238e-07, + "loss": 0.3053, + "step": 18567 + }, + { + "epoch": 0.8403711246888437, + "grad_norm": 0.6245199254392384, + "learning_rate": 6.538388939649442e-07, + "loss": 0.263, + "step": 18568 + }, + { + "epoch": 0.8404163837972392, + "grad_norm": 0.6100214864717944, + "learning_rate": 6.534765815533179e-07, + "loss": 0.2426, + "step": 18569 + }, + { + "epoch": 0.8404616429056347, + "grad_norm": 0.6087616477739742, + "learning_rate": 6.531143625351316e-07, + "loss": 0.2755, + "step": 18570 + }, + { + "epoch": 0.8405069020140303, + "grad_norm": 0.5870832214071411, + "learning_rate": 6.527522369181655e-07, + "loss": 0.2807, + "step": 18571 + }, + { + "epoch": 0.8405521611224259, + "grad_norm": 0.26530881471943285, + "learning_rate": 6.523902047102038e-07, + "loss": 0.4648, + "step": 18572 + }, + { + "epoch": 0.8405974202308214, + "grad_norm": 0.6748885856919552, + "learning_rate": 6.520282659190241e-07, + "loss": 0.333, + "step": 18573 + }, + { + "epoch": 0.840642679339217, + "grad_norm": 0.6525134433216766, + "learning_rate": 6.516664205524021e-07, + "loss": 0.3334, + "step": 18574 + }, + { + "epoch": 0.8406879384476126, + "grad_norm": 0.626439705015834, + "learning_rate": 6.513046686181135e-07, + "loss": 0.3059, + "step": 18575 + }, + { + "epoch": 0.8407331975560082, + "grad_norm": 0.6414577042974596, + "learning_rate": 6.509430101239328e-07, + "loss": 0.3226, + "step": 18576 + }, + { + "epoch": 0.8407784566644038, + "grad_norm": 0.6366298350867605, + "learning_rate": 6.505814450776299e-07, + "loss": 0.3111, + "step": 18577 + }, + { + "epoch": 0.8408237157727992, + "grad_norm": 0.6135687883135332, + "learning_rate": 6.502199734869718e-07, + "loss": 0.3032, + "step": 18578 + }, + { + "epoch": 0.8408689748811948, + "grad_norm": 0.6618915855858729, + "learning_rate": 6.498585953597275e-07, + "loss": 0.2993, + "step": 18579 + }, + { + "epoch": 0.8409142339895904, + "grad_norm": 0.5810166039177667, + "learning_rate": 6.494973107036628e-07, + "loss": 0.2969, + "step": 18580 + }, + { + "epoch": 0.840959493097986, + "grad_norm": 0.6190871177789631, + "learning_rate": 6.491361195265394e-07, + "loss": 0.2933, + "step": 18581 + }, + { + "epoch": 0.8410047522063815, + "grad_norm": 0.571417004000721, + "learning_rate": 6.487750218361172e-07, + "loss": 0.2754, + "step": 18582 + }, + { + "epoch": 0.8410500113147771, + "grad_norm": 0.26255393521512566, + "learning_rate": 6.484140176401565e-07, + "loss": 0.45, + "step": 18583 + }, + { + "epoch": 0.8410952704231727, + "grad_norm": 0.5854899811507577, + "learning_rate": 6.48053106946413e-07, + "loss": 0.2464, + "step": 18584 + }, + { + "epoch": 0.8411405295315683, + "grad_norm": 0.685442932062727, + "learning_rate": 6.476922897626431e-07, + "loss": 0.2958, + "step": 18585 + }, + { + "epoch": 0.8411857886399637, + "grad_norm": 0.25921140126472897, + "learning_rate": 6.47331566096599e-07, + "loss": 0.465, + "step": 18586 + }, + { + "epoch": 0.8412310477483593, + "grad_norm": 0.6404645261086098, + "learning_rate": 6.4697093595603e-07, + "loss": 0.3098, + "step": 18587 + }, + { + "epoch": 0.8412763068567549, + "grad_norm": 0.2909442563168596, + "learning_rate": 6.466103993486866e-07, + "loss": 0.4794, + "step": 18588 + }, + { + "epoch": 0.8413215659651505, + "grad_norm": 0.6357938988339054, + "learning_rate": 6.462499562823166e-07, + "loss": 0.2952, + "step": 18589 + }, + { + "epoch": 0.8413668250735461, + "grad_norm": 0.5806777395557357, + "learning_rate": 6.45889606764663e-07, + "loss": 0.2687, + "step": 18590 + }, + { + "epoch": 0.8414120841819416, + "grad_norm": 0.2657439730935918, + "learning_rate": 6.455293508034682e-07, + "loss": 0.4687, + "step": 18591 + }, + { + "epoch": 0.8414573432903372, + "grad_norm": 1.2369104990125643, + "learning_rate": 6.451691884064737e-07, + "loss": 0.2837, + "step": 18592 + }, + { + "epoch": 0.8415026023987328, + "grad_norm": 0.5736522130921489, + "learning_rate": 6.44809119581421e-07, + "loss": 0.3056, + "step": 18593 + }, + { + "epoch": 0.8415478615071283, + "grad_norm": 0.5971583599445661, + "learning_rate": 6.444491443360423e-07, + "loss": 0.2915, + "step": 18594 + }, + { + "epoch": 0.8415931206155238, + "grad_norm": 0.26921985013098143, + "learning_rate": 6.440892626780742e-07, + "loss": 0.4945, + "step": 18595 + }, + { + "epoch": 0.8416383797239194, + "grad_norm": 0.702059806929883, + "learning_rate": 6.437294746152506e-07, + "loss": 0.3517, + "step": 18596 + }, + { + "epoch": 0.841683638832315, + "grad_norm": 0.6959560991057743, + "learning_rate": 6.433697801553018e-07, + "loss": 0.2978, + "step": 18597 + }, + { + "epoch": 0.8417288979407106, + "grad_norm": 0.6424117920937337, + "learning_rate": 6.430101793059545e-07, + "loss": 0.3127, + "step": 18598 + }, + { + "epoch": 0.8417741570491062, + "grad_norm": 0.6470073503248186, + "learning_rate": 6.426506720749382e-07, + "loss": 0.2602, + "step": 18599 + }, + { + "epoch": 0.8418194161575017, + "grad_norm": 0.5961597104114085, + "learning_rate": 6.422912584699753e-07, + "loss": 0.2543, + "step": 18600 + }, + { + "epoch": 0.8418646752658973, + "grad_norm": 0.6579959921697064, + "learning_rate": 6.41931938498791e-07, + "loss": 0.2955, + "step": 18601 + }, + { + "epoch": 0.8419099343742928, + "grad_norm": 0.5739176031467433, + "learning_rate": 6.415727121691029e-07, + "loss": 0.2896, + "step": 18602 + }, + { + "epoch": 0.8419551934826884, + "grad_norm": 0.7145701489626878, + "learning_rate": 6.412135794886326e-07, + "loss": 0.3284, + "step": 18603 + }, + { + "epoch": 0.8420004525910839, + "grad_norm": 0.6603734675555657, + "learning_rate": 6.408545404650945e-07, + "loss": 0.3477, + "step": 18604 + }, + { + "epoch": 0.8420457116994795, + "grad_norm": 0.60232612770229, + "learning_rate": 6.404955951062058e-07, + "loss": 0.3122, + "step": 18605 + }, + { + "epoch": 0.8420909708078751, + "grad_norm": 0.5689447404173003, + "learning_rate": 6.40136743419677e-07, + "loss": 0.2737, + "step": 18606 + }, + { + "epoch": 0.8421362299162707, + "grad_norm": 0.5942540051798831, + "learning_rate": 6.39777985413218e-07, + "loss": 0.2842, + "step": 18607 + }, + { + "epoch": 0.8421814890246662, + "grad_norm": 0.5761743364454467, + "learning_rate": 6.394193210945393e-07, + "loss": 0.236, + "step": 18608 + }, + { + "epoch": 0.8422267481330618, + "grad_norm": 0.6744896562351126, + "learning_rate": 6.390607504713476e-07, + "loss": 0.3192, + "step": 18609 + }, + { + "epoch": 0.8422720072414573, + "grad_norm": 0.5559132233571377, + "learning_rate": 6.387022735513465e-07, + "loss": 0.2759, + "step": 18610 + }, + { + "epoch": 0.8423172663498529, + "grad_norm": 0.5737130358392764, + "learning_rate": 6.383438903422384e-07, + "loss": 0.2971, + "step": 18611 + }, + { + "epoch": 0.8423625254582485, + "grad_norm": 0.5801767481601431, + "learning_rate": 6.379856008517249e-07, + "loss": 0.3357, + "step": 18612 + }, + { + "epoch": 0.842407784566644, + "grad_norm": 0.6083233227030705, + "learning_rate": 6.376274050875031e-07, + "loss": 0.302, + "step": 18613 + }, + { + "epoch": 0.8424530436750396, + "grad_norm": 0.644686585245738, + "learning_rate": 6.372693030572713e-07, + "loss": 0.3155, + "step": 18614 + }, + { + "epoch": 0.8424983027834352, + "grad_norm": 0.2862529176463453, + "learning_rate": 6.369112947687228e-07, + "loss": 0.5087, + "step": 18615 + }, + { + "epoch": 0.8425435618918308, + "grad_norm": 0.6002537971676198, + "learning_rate": 6.365533802295498e-07, + "loss": 0.2779, + "step": 18616 + }, + { + "epoch": 0.8425888210002263, + "grad_norm": 0.6794693559548842, + "learning_rate": 6.361955594474434e-07, + "loss": 0.2924, + "step": 18617 + }, + { + "epoch": 0.8426340801086218, + "grad_norm": 0.6557389050187346, + "learning_rate": 6.358378324300929e-07, + "loss": 0.2633, + "step": 18618 + }, + { + "epoch": 0.8426793392170174, + "grad_norm": 0.5913684893750959, + "learning_rate": 6.354801991851839e-07, + "loss": 0.2793, + "step": 18619 + }, + { + "epoch": 0.842724598325413, + "grad_norm": 0.56507780537573, + "learning_rate": 6.351226597203996e-07, + "loss": 0.2813, + "step": 18620 + }, + { + "epoch": 0.8427698574338085, + "grad_norm": 0.6543157854903586, + "learning_rate": 6.347652140434235e-07, + "loss": 0.3006, + "step": 18621 + }, + { + "epoch": 0.8428151165422041, + "grad_norm": 0.27062195826958063, + "learning_rate": 6.344078621619388e-07, + "loss": 0.4696, + "step": 18622 + }, + { + "epoch": 0.8428603756505997, + "grad_norm": 0.5956050072863, + "learning_rate": 6.340506040836186e-07, + "loss": 0.3229, + "step": 18623 + }, + { + "epoch": 0.8429056347589953, + "grad_norm": 0.6072215620693544, + "learning_rate": 6.336934398161421e-07, + "loss": 0.2641, + "step": 18624 + }, + { + "epoch": 0.8429508938673909, + "grad_norm": 0.6481129870782569, + "learning_rate": 6.333363693671846e-07, + "loss": 0.2568, + "step": 18625 + }, + { + "epoch": 0.8429961529757863, + "grad_norm": 0.6048251027030022, + "learning_rate": 6.329793927444178e-07, + "loss": 0.311, + "step": 18626 + }, + { + "epoch": 0.8430414120841819, + "grad_norm": 0.6737990008870608, + "learning_rate": 6.3262250995551e-07, + "loss": 0.3054, + "step": 18627 + }, + { + "epoch": 0.8430866711925775, + "grad_norm": 0.27320233517049275, + "learning_rate": 6.322657210081318e-07, + "loss": 0.4757, + "step": 18628 + }, + { + "epoch": 0.8431319303009731, + "grad_norm": 0.5951501062135266, + "learning_rate": 6.319090259099486e-07, + "loss": 0.2854, + "step": 18629 + }, + { + "epoch": 0.8431771894093686, + "grad_norm": 0.6803703893987928, + "learning_rate": 6.31552424668625e-07, + "loss": 0.2763, + "step": 18630 + }, + { + "epoch": 0.8432224485177642, + "grad_norm": 0.606461679888621, + "learning_rate": 6.311959172918225e-07, + "loss": 0.3098, + "step": 18631 + }, + { + "epoch": 0.8432677076261598, + "grad_norm": 0.6402462679139579, + "learning_rate": 6.308395037872034e-07, + "loss": 0.3023, + "step": 18632 + }, + { + "epoch": 0.8433129667345554, + "grad_norm": 0.6456806940445331, + "learning_rate": 6.304831841624231e-07, + "loss": 0.3131, + "step": 18633 + }, + { + "epoch": 0.843358225842951, + "grad_norm": 0.6475293952371274, + "learning_rate": 6.301269584251402e-07, + "loss": 0.3454, + "step": 18634 + }, + { + "epoch": 0.8434034849513464, + "grad_norm": 0.6217532563366727, + "learning_rate": 6.297708265830083e-07, + "loss": 0.3182, + "step": 18635 + }, + { + "epoch": 0.843448744059742, + "grad_norm": 0.624619552227697, + "learning_rate": 6.294147886436774e-07, + "loss": 0.2799, + "step": 18636 + }, + { + "epoch": 0.8434940031681376, + "grad_norm": 0.6023133975470476, + "learning_rate": 6.290588446148005e-07, + "loss": 0.2987, + "step": 18637 + }, + { + "epoch": 0.8435392622765332, + "grad_norm": 0.6966724938516231, + "learning_rate": 6.287029945040251e-07, + "loss": 0.2694, + "step": 18638 + }, + { + "epoch": 0.8435845213849287, + "grad_norm": 0.9611111517466708, + "learning_rate": 6.28347238318997e-07, + "loss": 0.2905, + "step": 18639 + }, + { + "epoch": 0.8436297804933243, + "grad_norm": 0.6493403691536899, + "learning_rate": 6.279915760673593e-07, + "loss": 0.3332, + "step": 18640 + }, + { + "epoch": 0.8436750396017199, + "grad_norm": 0.5719059711061655, + "learning_rate": 6.276360077567556e-07, + "loss": 0.286, + "step": 18641 + }, + { + "epoch": 0.8437202987101154, + "grad_norm": 0.6307456637814162, + "learning_rate": 6.27280533394825e-07, + "loss": 0.3522, + "step": 18642 + }, + { + "epoch": 0.8437655578185109, + "grad_norm": 0.6154099960134782, + "learning_rate": 6.269251529892067e-07, + "loss": 0.3227, + "step": 18643 + }, + { + "epoch": 0.8438108169269065, + "grad_norm": 0.5971840758665974, + "learning_rate": 6.265698665475362e-07, + "loss": 0.2806, + "step": 18644 + }, + { + "epoch": 0.8438560760353021, + "grad_norm": 0.5660815219881201, + "learning_rate": 6.26214674077446e-07, + "loss": 0.262, + "step": 18645 + }, + { + "epoch": 0.8439013351436977, + "grad_norm": 0.5582881587328437, + "learning_rate": 6.258595755865693e-07, + "loss": 0.2644, + "step": 18646 + }, + { + "epoch": 0.8439465942520933, + "grad_norm": 0.270000743015364, + "learning_rate": 6.255045710825375e-07, + "loss": 0.4854, + "step": 18647 + }, + { + "epoch": 0.8439918533604888, + "grad_norm": 0.5930373376426961, + "learning_rate": 6.251496605729773e-07, + "loss": 0.2698, + "step": 18648 + }, + { + "epoch": 0.8440371124688844, + "grad_norm": 0.2799876243783329, + "learning_rate": 6.247948440655133e-07, + "loss": 0.4542, + "step": 18649 + }, + { + "epoch": 0.8440823715772799, + "grad_norm": 0.5833763261664848, + "learning_rate": 6.244401215677709e-07, + "loss": 0.2915, + "step": 18650 + }, + { + "epoch": 0.8441276306856755, + "grad_norm": 0.6616761716049248, + "learning_rate": 6.240854930873735e-07, + "loss": 0.3143, + "step": 18651 + }, + { + "epoch": 0.844172889794071, + "grad_norm": 0.5763756407201951, + "learning_rate": 6.237309586319378e-07, + "loss": 0.3434, + "step": 18652 + }, + { + "epoch": 0.8442181489024666, + "grad_norm": 0.6994970687999154, + "learning_rate": 6.233765182090829e-07, + "loss": 0.2785, + "step": 18653 + }, + { + "epoch": 0.8442634080108622, + "grad_norm": 0.5907761492683824, + "learning_rate": 6.230221718264257e-07, + "loss": 0.2635, + "step": 18654 + }, + { + "epoch": 0.8443086671192578, + "grad_norm": 0.6022140883838824, + "learning_rate": 6.226679194915791e-07, + "loss": 0.3221, + "step": 18655 + }, + { + "epoch": 0.8443539262276533, + "grad_norm": 0.6181478771378632, + "learning_rate": 6.223137612121538e-07, + "loss": 0.2902, + "step": 18656 + }, + { + "epoch": 0.8443991853360489, + "grad_norm": 0.6570667306521998, + "learning_rate": 6.219596969957619e-07, + "loss": 0.2774, + "step": 18657 + }, + { + "epoch": 0.8444444444444444, + "grad_norm": 0.26911455724039257, + "learning_rate": 6.216057268500092e-07, + "loss": 0.4801, + "step": 18658 + }, + { + "epoch": 0.84448970355284, + "grad_norm": 0.28515342343215133, + "learning_rate": 6.212518507825027e-07, + "loss": 0.4859, + "step": 18659 + }, + { + "epoch": 0.8445349626612356, + "grad_norm": 0.6390100496113705, + "learning_rate": 6.208980688008453e-07, + "loss": 0.3107, + "step": 18660 + }, + { + "epoch": 0.8445802217696311, + "grad_norm": 0.6064606481601544, + "learning_rate": 6.205443809126399e-07, + "loss": 0.2957, + "step": 18661 + }, + { + "epoch": 0.8446254808780267, + "grad_norm": 0.6515192999358557, + "learning_rate": 6.201907871254836e-07, + "loss": 0.3138, + "step": 18662 + }, + { + "epoch": 0.8446707399864223, + "grad_norm": 0.5526030700262841, + "learning_rate": 6.198372874469777e-07, + "loss": 0.2815, + "step": 18663 + }, + { + "epoch": 0.8447159990948179, + "grad_norm": 0.27052209067226685, + "learning_rate": 6.194838818847155e-07, + "loss": 0.4647, + "step": 18664 + }, + { + "epoch": 0.8447612582032133, + "grad_norm": 0.6545133811559477, + "learning_rate": 6.191305704462897e-07, + "loss": 0.2587, + "step": 18665 + }, + { + "epoch": 0.8448065173116089, + "grad_norm": 0.6538530551934313, + "learning_rate": 6.187773531392932e-07, + "loss": 0.3043, + "step": 18666 + }, + { + "epoch": 0.8448517764200045, + "grad_norm": 0.640001109669132, + "learning_rate": 6.184242299713162e-07, + "loss": 0.282, + "step": 18667 + }, + { + "epoch": 0.8448970355284001, + "grad_norm": 0.25687967698350045, + "learning_rate": 6.180712009499462e-07, + "loss": 0.4548, + "step": 18668 + }, + { + "epoch": 0.8449422946367957, + "grad_norm": 0.27272353640801006, + "learning_rate": 6.177182660827664e-07, + "loss": 0.4615, + "step": 18669 + }, + { + "epoch": 0.8449875537451912, + "grad_norm": 0.561871817929382, + "learning_rate": 6.173654253773631e-07, + "loss": 0.2885, + "step": 18670 + }, + { + "epoch": 0.8450328128535868, + "grad_norm": 0.6008742399480839, + "learning_rate": 6.170126788413156e-07, + "loss": 0.2925, + "step": 18671 + }, + { + "epoch": 0.8450780719619824, + "grad_norm": 0.6377610207407267, + "learning_rate": 6.166600264822054e-07, + "loss": 0.3027, + "step": 18672 + }, + { + "epoch": 0.845123331070378, + "grad_norm": 0.625818270946855, + "learning_rate": 6.163074683076081e-07, + "loss": 0.2892, + "step": 18673 + }, + { + "epoch": 0.8451685901787734, + "grad_norm": 0.8744926735664028, + "learning_rate": 6.159550043251006e-07, + "loss": 0.3326, + "step": 18674 + }, + { + "epoch": 0.845213849287169, + "grad_norm": 0.6022902779539743, + "learning_rate": 6.156026345422539e-07, + "loss": 0.2761, + "step": 18675 + }, + { + "epoch": 0.8452591083955646, + "grad_norm": 0.7967324838798232, + "learning_rate": 6.152503589666426e-07, + "loss": 0.2864, + "step": 18676 + }, + { + "epoch": 0.8453043675039602, + "grad_norm": 0.7098888607833619, + "learning_rate": 6.148981776058344e-07, + "loss": 0.2881, + "step": 18677 + }, + { + "epoch": 0.8453496266123557, + "grad_norm": 0.6371350015862445, + "learning_rate": 6.14546090467395e-07, + "loss": 0.2999, + "step": 18678 + }, + { + "epoch": 0.8453948857207513, + "grad_norm": 0.649784256762448, + "learning_rate": 6.141940975588917e-07, + "loss": 0.312, + "step": 18679 + }, + { + "epoch": 0.8454401448291469, + "grad_norm": 0.6282063633236151, + "learning_rate": 6.138421988878884e-07, + "loss": 0.2819, + "step": 18680 + }, + { + "epoch": 0.8454854039375425, + "grad_norm": 0.600461964804082, + "learning_rate": 6.134903944619447e-07, + "loss": 0.2983, + "step": 18681 + }, + { + "epoch": 0.845530663045938, + "grad_norm": 0.24963576277311014, + "learning_rate": 6.131386842886194e-07, + "loss": 0.4629, + "step": 18682 + }, + { + "epoch": 0.8455759221543335, + "grad_norm": 0.6689409334850602, + "learning_rate": 6.127870683754717e-07, + "loss": 0.234, + "step": 18683 + }, + { + "epoch": 0.8456211812627291, + "grad_norm": 0.671592567687221, + "learning_rate": 6.124355467300558e-07, + "loss": 0.3267, + "step": 18684 + }, + { + "epoch": 0.8456664403711247, + "grad_norm": 0.6113351501273673, + "learning_rate": 6.120841193599231e-07, + "loss": 0.3232, + "step": 18685 + }, + { + "epoch": 0.8457116994795203, + "grad_norm": 0.9509336152397326, + "learning_rate": 6.11732786272628e-07, + "loss": 0.3147, + "step": 18686 + }, + { + "epoch": 0.8457569585879158, + "grad_norm": 0.6433261864621458, + "learning_rate": 6.113815474757162e-07, + "loss": 0.2984, + "step": 18687 + }, + { + "epoch": 0.8458022176963114, + "grad_norm": 0.6026784560014598, + "learning_rate": 6.110304029767372e-07, + "loss": 0.3001, + "step": 18688 + }, + { + "epoch": 0.845847476804707, + "grad_norm": 0.2685922247370795, + "learning_rate": 6.106793527832344e-07, + "loss": 0.4963, + "step": 18689 + }, + { + "epoch": 0.8458927359131025, + "grad_norm": 0.26093778596229505, + "learning_rate": 6.103283969027524e-07, + "loss": 0.4648, + "step": 18690 + }, + { + "epoch": 0.845937995021498, + "grad_norm": 0.6104503654019943, + "learning_rate": 6.099775353428306e-07, + "loss": 0.2752, + "step": 18691 + }, + { + "epoch": 0.8459832541298936, + "grad_norm": 0.6807608671938857, + "learning_rate": 6.096267681110097e-07, + "loss": 0.2844, + "step": 18692 + }, + { + "epoch": 0.8460285132382892, + "grad_norm": 0.27051557821575795, + "learning_rate": 6.092760952148253e-07, + "loss": 0.4885, + "step": 18693 + }, + { + "epoch": 0.8460737723466848, + "grad_norm": 0.26212738678058434, + "learning_rate": 6.089255166618113e-07, + "loss": 0.4542, + "step": 18694 + }, + { + "epoch": 0.8461190314550804, + "grad_norm": 0.57817303041535, + "learning_rate": 6.085750324595019e-07, + "loss": 0.3503, + "step": 18695 + }, + { + "epoch": 0.8461642905634759, + "grad_norm": 0.8624097034013176, + "learning_rate": 6.082246426154292e-07, + "loss": 0.2785, + "step": 18696 + }, + { + "epoch": 0.8462095496718715, + "grad_norm": 0.6297980038757982, + "learning_rate": 6.078743471371207e-07, + "loss": 0.3429, + "step": 18697 + }, + { + "epoch": 0.846254808780267, + "grad_norm": 0.28655257814501794, + "learning_rate": 6.075241460321013e-07, + "loss": 0.4822, + "step": 18698 + }, + { + "epoch": 0.8463000678886626, + "grad_norm": 0.6108057426107879, + "learning_rate": 6.071740393078995e-07, + "loss": 0.3127, + "step": 18699 + }, + { + "epoch": 0.8463453269970581, + "grad_norm": 0.5917538247552456, + "learning_rate": 6.068240269720343e-07, + "loss": 0.2779, + "step": 18700 + }, + { + "epoch": 0.8463905861054537, + "grad_norm": 0.5479848011861002, + "learning_rate": 6.064741090320297e-07, + "loss": 0.264, + "step": 18701 + }, + { + "epoch": 0.8464358452138493, + "grad_norm": 0.5713766082902766, + "learning_rate": 6.061242854954014e-07, + "loss": 0.302, + "step": 18702 + }, + { + "epoch": 0.8464811043222449, + "grad_norm": 0.641789067864487, + "learning_rate": 6.057745563696688e-07, + "loss": 0.334, + "step": 18703 + }, + { + "epoch": 0.8465263634306405, + "grad_norm": 0.6392789934629359, + "learning_rate": 6.054249216623437e-07, + "loss": 0.3085, + "step": 18704 + }, + { + "epoch": 0.846571622539036, + "grad_norm": 0.6277894109142051, + "learning_rate": 6.050753813809412e-07, + "loss": 0.2877, + "step": 18705 + }, + { + "epoch": 0.8466168816474315, + "grad_norm": 0.6102758916483524, + "learning_rate": 6.04725935532971e-07, + "loss": 0.2878, + "step": 18706 + }, + { + "epoch": 0.8466621407558271, + "grad_norm": 0.5987516515555169, + "learning_rate": 6.043765841259402e-07, + "loss": 0.2652, + "step": 18707 + }, + { + "epoch": 0.8467073998642227, + "grad_norm": 0.6540286604005203, + "learning_rate": 6.040273271673569e-07, + "loss": 0.2849, + "step": 18708 + }, + { + "epoch": 0.8467526589726182, + "grad_norm": 0.2626634167184486, + "learning_rate": 6.036781646647261e-07, + "loss": 0.4783, + "step": 18709 + }, + { + "epoch": 0.8467979180810138, + "grad_norm": 0.2624905961997942, + "learning_rate": 6.03329096625549e-07, + "loss": 0.4351, + "step": 18710 + }, + { + "epoch": 0.8468431771894094, + "grad_norm": 0.665480812530786, + "learning_rate": 6.029801230573252e-07, + "loss": 0.2784, + "step": 18711 + }, + { + "epoch": 0.846888436297805, + "grad_norm": 0.25667545942246284, + "learning_rate": 6.026312439675553e-07, + "loss": 0.4464, + "step": 18712 + }, + { + "epoch": 0.8469336954062004, + "grad_norm": 0.6136753121244453, + "learning_rate": 6.022824593637334e-07, + "loss": 0.3045, + "step": 18713 + }, + { + "epoch": 0.846978954514596, + "grad_norm": 0.5794067426638368, + "learning_rate": 6.019337692533556e-07, + "loss": 0.2807, + "step": 18714 + }, + { + "epoch": 0.8470242136229916, + "grad_norm": 0.5829253591861213, + "learning_rate": 6.015851736439138e-07, + "loss": 0.2631, + "step": 18715 + }, + { + "epoch": 0.8470694727313872, + "grad_norm": 0.5811772762809982, + "learning_rate": 6.01236672542897e-07, + "loss": 0.2638, + "step": 18716 + }, + { + "epoch": 0.8471147318397828, + "grad_norm": 0.606423734682036, + "learning_rate": 6.008882659577942e-07, + "loss": 0.2473, + "step": 18717 + }, + { + "epoch": 0.8471599909481783, + "grad_norm": 0.6286834947259153, + "learning_rate": 6.005399538960927e-07, + "loss": 0.2999, + "step": 18718 + }, + { + "epoch": 0.8472052500565739, + "grad_norm": 0.6016500099893688, + "learning_rate": 6.001917363652759e-07, + "loss": 0.2997, + "step": 18719 + }, + { + "epoch": 0.8472505091649695, + "grad_norm": 0.8563674664688892, + "learning_rate": 5.998436133728247e-07, + "loss": 0.2978, + "step": 18720 + }, + { + "epoch": 0.8472957682733651, + "grad_norm": 0.6109050842316257, + "learning_rate": 5.994955849262207e-07, + "loss": 0.3001, + "step": 18721 + }, + { + "epoch": 0.8473410273817605, + "grad_norm": 0.6730770713324719, + "learning_rate": 5.991476510329419e-07, + "loss": 0.2903, + "step": 18722 + }, + { + "epoch": 0.8473862864901561, + "grad_norm": 0.6298080817289243, + "learning_rate": 5.987998117004628e-07, + "loss": 0.266, + "step": 18723 + }, + { + "epoch": 0.8474315455985517, + "grad_norm": 0.6843953732002055, + "learning_rate": 5.984520669362587e-07, + "loss": 0.2539, + "step": 18724 + }, + { + "epoch": 0.8474768047069473, + "grad_norm": 0.6452454414382873, + "learning_rate": 5.981044167478017e-07, + "loss": 0.3277, + "step": 18725 + }, + { + "epoch": 0.8475220638153428, + "grad_norm": 0.6321163182383871, + "learning_rate": 5.977568611425621e-07, + "loss": 0.2997, + "step": 18726 + }, + { + "epoch": 0.8475673229237384, + "grad_norm": 0.6705720958675141, + "learning_rate": 5.974094001280056e-07, + "loss": 0.3082, + "step": 18727 + }, + { + "epoch": 0.847612582032134, + "grad_norm": 0.7455710756474468, + "learning_rate": 5.970620337116012e-07, + "loss": 0.3023, + "step": 18728 + }, + { + "epoch": 0.8476578411405296, + "grad_norm": 1.577587002978819, + "learning_rate": 5.967147619008096e-07, + "loss": 0.2658, + "step": 18729 + }, + { + "epoch": 0.8477031002489251, + "grad_norm": 0.6551931720234587, + "learning_rate": 5.963675847030953e-07, + "loss": 0.2914, + "step": 18730 + }, + { + "epoch": 0.8477483593573206, + "grad_norm": 0.6306636264131432, + "learning_rate": 5.960205021259158e-07, + "loss": 0.2945, + "step": 18731 + }, + { + "epoch": 0.8477936184657162, + "grad_norm": 0.6513955703366753, + "learning_rate": 5.956735141767306e-07, + "loss": 0.3369, + "step": 18732 + }, + { + "epoch": 0.8478388775741118, + "grad_norm": 0.6427932218936455, + "learning_rate": 5.953266208629943e-07, + "loss": 0.2704, + "step": 18733 + }, + { + "epoch": 0.8478841366825074, + "grad_norm": 0.6024984714834914, + "learning_rate": 5.949798221921616e-07, + "loss": 0.2858, + "step": 18734 + }, + { + "epoch": 0.8479293957909029, + "grad_norm": 0.6340217838797076, + "learning_rate": 5.946331181716836e-07, + "loss": 0.2667, + "step": 18735 + }, + { + "epoch": 0.8479746548992985, + "grad_norm": 0.6800193111456344, + "learning_rate": 5.942865088090088e-07, + "loss": 0.2957, + "step": 18736 + }, + { + "epoch": 0.848019914007694, + "grad_norm": 0.6755842385766843, + "learning_rate": 5.939399941115859e-07, + "loss": 0.2763, + "step": 18737 + }, + { + "epoch": 0.8480651731160896, + "grad_norm": 0.5660382128890884, + "learning_rate": 5.935935740868614e-07, + "loss": 0.3231, + "step": 18738 + }, + { + "epoch": 0.8481104322244851, + "grad_norm": 0.8721972221487893, + "learning_rate": 5.93247248742278e-07, + "loss": 0.289, + "step": 18739 + }, + { + "epoch": 0.8481556913328807, + "grad_norm": 0.5421885890616007, + "learning_rate": 5.929010180852756e-07, + "loss": 0.2935, + "step": 18740 + }, + { + "epoch": 0.8482009504412763, + "grad_norm": 0.2620228273329047, + "learning_rate": 5.925548821232957e-07, + "loss": 0.4406, + "step": 18741 + }, + { + "epoch": 0.8482462095496719, + "grad_norm": 0.7154912165594549, + "learning_rate": 5.922088408637743e-07, + "loss": 0.2586, + "step": 18742 + }, + { + "epoch": 0.8482914686580675, + "grad_norm": 0.5712037351530368, + "learning_rate": 5.918628943141486e-07, + "loss": 0.2971, + "step": 18743 + }, + { + "epoch": 0.848336727766463, + "grad_norm": 0.6005964579283544, + "learning_rate": 5.915170424818495e-07, + "loss": 0.2986, + "step": 18744 + }, + { + "epoch": 0.8483819868748586, + "grad_norm": 0.6268217753093963, + "learning_rate": 5.911712853743101e-07, + "loss": 0.3353, + "step": 18745 + }, + { + "epoch": 0.8484272459832541, + "grad_norm": 0.6444851868878317, + "learning_rate": 5.90825622998959e-07, + "loss": 0.2985, + "step": 18746 + }, + { + "epoch": 0.8484725050916497, + "grad_norm": 0.619428402824257, + "learning_rate": 5.90480055363224e-07, + "loss": 0.3326, + "step": 18747 + }, + { + "epoch": 0.8485177642000452, + "grad_norm": 0.5733968766422702, + "learning_rate": 5.901345824745297e-07, + "loss": 0.2932, + "step": 18748 + }, + { + "epoch": 0.8485630233084408, + "grad_norm": 0.6356372521086717, + "learning_rate": 5.897892043402986e-07, + "loss": 0.297, + "step": 18749 + }, + { + "epoch": 0.8486082824168364, + "grad_norm": 0.7481215559015781, + "learning_rate": 5.89443920967952e-07, + "loss": 0.2889, + "step": 18750 + }, + { + "epoch": 0.848653541525232, + "grad_norm": 0.7661646171965207, + "learning_rate": 5.890987323649122e-07, + "loss": 0.2871, + "step": 18751 + }, + { + "epoch": 0.8486988006336276, + "grad_norm": 0.565257020815254, + "learning_rate": 5.887536385385917e-07, + "loss": 0.2719, + "step": 18752 + }, + { + "epoch": 0.848744059742023, + "grad_norm": 0.2543059215630452, + "learning_rate": 5.884086394964067e-07, + "loss": 0.4449, + "step": 18753 + }, + { + "epoch": 0.8487893188504186, + "grad_norm": 0.6522712431560104, + "learning_rate": 5.880637352457724e-07, + "loss": 0.2935, + "step": 18754 + }, + { + "epoch": 0.8488345779588142, + "grad_norm": 0.7678483583609577, + "learning_rate": 5.87718925794098e-07, + "loss": 0.2842, + "step": 18755 + }, + { + "epoch": 0.8488798370672098, + "grad_norm": 0.8928166842517539, + "learning_rate": 5.873742111487917e-07, + "loss": 0.2841, + "step": 18756 + }, + { + "epoch": 0.8489250961756053, + "grad_norm": 0.5877417914147925, + "learning_rate": 5.870295913172625e-07, + "loss": 0.2586, + "step": 18757 + }, + { + "epoch": 0.8489703552840009, + "grad_norm": 0.6971168951778712, + "learning_rate": 5.866850663069124e-07, + "loss": 0.2773, + "step": 18758 + }, + { + "epoch": 0.8490156143923965, + "grad_norm": 0.7007595502752746, + "learning_rate": 5.863406361251472e-07, + "loss": 0.3014, + "step": 18759 + }, + { + "epoch": 0.8490608735007921, + "grad_norm": 0.6213943742869298, + "learning_rate": 5.859963007793651e-07, + "loss": 0.3526, + "step": 18760 + }, + { + "epoch": 0.8491061326091875, + "grad_norm": 0.5838266017627748, + "learning_rate": 5.856520602769667e-07, + "loss": 0.2767, + "step": 18761 + }, + { + "epoch": 0.8491513917175831, + "grad_norm": 0.5926360558327918, + "learning_rate": 5.853079146253471e-07, + "loss": 0.2932, + "step": 18762 + }, + { + "epoch": 0.8491966508259787, + "grad_norm": 0.5563599425447716, + "learning_rate": 5.849638638319027e-07, + "loss": 0.2734, + "step": 18763 + }, + { + "epoch": 0.8492419099343743, + "grad_norm": 0.2510181119494583, + "learning_rate": 5.846199079040249e-07, + "loss": 0.4497, + "step": 18764 + }, + { + "epoch": 0.8492871690427699, + "grad_norm": 0.6454068837621051, + "learning_rate": 5.842760468491037e-07, + "loss": 0.3179, + "step": 18765 + }, + { + "epoch": 0.8493324281511654, + "grad_norm": 0.6272581164047886, + "learning_rate": 5.839322806745285e-07, + "loss": 0.3043, + "step": 18766 + }, + { + "epoch": 0.849377687259561, + "grad_norm": 0.654346951581988, + "learning_rate": 5.835886093876863e-07, + "loss": 0.2909, + "step": 18767 + }, + { + "epoch": 0.8494229463679566, + "grad_norm": 0.5879851108229344, + "learning_rate": 5.832450329959616e-07, + "loss": 0.3006, + "step": 18768 + }, + { + "epoch": 0.8494682054763522, + "grad_norm": 0.7232430310009791, + "learning_rate": 5.829015515067344e-07, + "loss": 0.3012, + "step": 18769 + }, + { + "epoch": 0.8495134645847476, + "grad_norm": 0.6953028335043215, + "learning_rate": 5.825581649273881e-07, + "loss": 0.2543, + "step": 18770 + }, + { + "epoch": 0.8495587236931432, + "grad_norm": 0.6051757432812306, + "learning_rate": 5.822148732652988e-07, + "loss": 0.2915, + "step": 18771 + }, + { + "epoch": 0.8496039828015388, + "grad_norm": 0.5621090053762571, + "learning_rate": 5.818716765278443e-07, + "loss": 0.2894, + "step": 18772 + }, + { + "epoch": 0.8496492419099344, + "grad_norm": 0.6685361801495828, + "learning_rate": 5.815285747223975e-07, + "loss": 0.3113, + "step": 18773 + }, + { + "epoch": 0.8496945010183299, + "grad_norm": 0.6262082362319686, + "learning_rate": 5.811855678563322e-07, + "loss": 0.3037, + "step": 18774 + }, + { + "epoch": 0.8497397601267255, + "grad_norm": 0.27065950072618833, + "learning_rate": 5.808426559370172e-07, + "loss": 0.4543, + "step": 18775 + }, + { + "epoch": 0.8497850192351211, + "grad_norm": 0.6036759949570691, + "learning_rate": 5.804998389718214e-07, + "loss": 0.3205, + "step": 18776 + }, + { + "epoch": 0.8498302783435167, + "grad_norm": 0.5681712093767958, + "learning_rate": 5.801571169681108e-07, + "loss": 0.2772, + "step": 18777 + }, + { + "epoch": 0.8498755374519122, + "grad_norm": 0.5971504050818677, + "learning_rate": 5.798144899332486e-07, + "loss": 0.2948, + "step": 18778 + }, + { + "epoch": 0.8499207965603077, + "grad_norm": 0.7151176048313252, + "learning_rate": 5.794719578745972e-07, + "loss": 0.2643, + "step": 18779 + }, + { + "epoch": 0.8499660556687033, + "grad_norm": 0.684751460375087, + "learning_rate": 5.79129520799519e-07, + "loss": 0.299, + "step": 18780 + }, + { + "epoch": 0.8500113147770989, + "grad_norm": 0.2779298704574024, + "learning_rate": 5.787871787153676e-07, + "loss": 0.4719, + "step": 18781 + }, + { + "epoch": 0.8500565738854945, + "grad_norm": 0.599828675778426, + "learning_rate": 5.784449316295005e-07, + "loss": 0.306, + "step": 18782 + }, + { + "epoch": 0.85010183299389, + "grad_norm": 0.6811163672881649, + "learning_rate": 5.781027795492738e-07, + "loss": 0.2583, + "step": 18783 + }, + { + "epoch": 0.8501470921022856, + "grad_norm": 0.26523875058027263, + "learning_rate": 5.77760722482037e-07, + "loss": 0.4424, + "step": 18784 + }, + { + "epoch": 0.8501923512106812, + "grad_norm": 0.5872002054895826, + "learning_rate": 5.7741876043514e-07, + "loss": 0.3334, + "step": 18785 + }, + { + "epoch": 0.8502376103190767, + "grad_norm": 0.2891465936058678, + "learning_rate": 5.770768934159315e-07, + "loss": 0.4533, + "step": 18786 + }, + { + "epoch": 0.8502828694274723, + "grad_norm": 0.27265696093260977, + "learning_rate": 5.767351214317557e-07, + "loss": 0.46, + "step": 18787 + }, + { + "epoch": 0.8503281285358678, + "grad_norm": 0.606245514986393, + "learning_rate": 5.763934444899577e-07, + "loss": 0.2617, + "step": 18788 + }, + { + "epoch": 0.8503733876442634, + "grad_norm": 0.5748858767760663, + "learning_rate": 5.760518625978778e-07, + "loss": 0.358, + "step": 18789 + }, + { + "epoch": 0.850418646752659, + "grad_norm": 0.6154813445986127, + "learning_rate": 5.757103757628573e-07, + "loss": 0.28, + "step": 18790 + }, + { + "epoch": 0.8504639058610546, + "grad_norm": 0.5935345821580245, + "learning_rate": 5.753689839922321e-07, + "loss": 0.2952, + "step": 18791 + }, + { + "epoch": 0.8505091649694501, + "grad_norm": 0.6015336243939349, + "learning_rate": 5.750276872933386e-07, + "loss": 0.3154, + "step": 18792 + }, + { + "epoch": 0.8505544240778456, + "grad_norm": 0.6187644617250292, + "learning_rate": 5.746864856735102e-07, + "loss": 0.3443, + "step": 18793 + }, + { + "epoch": 0.8505996831862412, + "grad_norm": 0.24585577370749073, + "learning_rate": 5.743453791400766e-07, + "loss": 0.461, + "step": 18794 + }, + { + "epoch": 0.8506449422946368, + "grad_norm": 0.6568526354093772, + "learning_rate": 5.740043677003688e-07, + "loss": 0.2903, + "step": 18795 + }, + { + "epoch": 0.8506902014030323, + "grad_norm": 0.6077793753624469, + "learning_rate": 5.736634513617145e-07, + "loss": 0.2996, + "step": 18796 + }, + { + "epoch": 0.8507354605114279, + "grad_norm": 0.27141939267184767, + "learning_rate": 5.733226301314381e-07, + "loss": 0.4758, + "step": 18797 + }, + { + "epoch": 0.8507807196198235, + "grad_norm": 0.2860800943684531, + "learning_rate": 5.729819040168622e-07, + "loss": 0.4898, + "step": 18798 + }, + { + "epoch": 0.8508259787282191, + "grad_norm": 0.2722142642852313, + "learning_rate": 5.72641273025309e-07, + "loss": 0.4675, + "step": 18799 + }, + { + "epoch": 0.8508712378366147, + "grad_norm": 0.6485039319914844, + "learning_rate": 5.723007371640965e-07, + "loss": 0.317, + "step": 18800 + }, + { + "epoch": 0.8509164969450101, + "grad_norm": 0.6123542113321424, + "learning_rate": 5.719602964405441e-07, + "loss": 0.2897, + "step": 18801 + }, + { + "epoch": 0.8509617560534057, + "grad_norm": 0.6664532371643351, + "learning_rate": 5.716199508619635e-07, + "loss": 0.2893, + "step": 18802 + }, + { + "epoch": 0.8510070151618013, + "grad_norm": 0.6345223426266785, + "learning_rate": 5.712797004356707e-07, + "loss": 0.3076, + "step": 18803 + }, + { + "epoch": 0.8510522742701969, + "grad_norm": 0.6016173433152902, + "learning_rate": 5.709395451689748e-07, + "loss": 0.2654, + "step": 18804 + }, + { + "epoch": 0.8510975333785924, + "grad_norm": 0.5833362501878927, + "learning_rate": 5.705994850691854e-07, + "loss": 0.2816, + "step": 18805 + }, + { + "epoch": 0.851142792486988, + "grad_norm": 0.6211038318307641, + "learning_rate": 5.702595201436101e-07, + "loss": 0.3097, + "step": 18806 + }, + { + "epoch": 0.8511880515953836, + "grad_norm": 0.6724082872979813, + "learning_rate": 5.699196503995513e-07, + "loss": 0.3061, + "step": 18807 + }, + { + "epoch": 0.8512333107037792, + "grad_norm": 0.6125127055127121, + "learning_rate": 5.695798758443133e-07, + "loss": 0.3068, + "step": 18808 + }, + { + "epoch": 0.8512785698121746, + "grad_norm": 0.656838136626938, + "learning_rate": 5.692401964851985e-07, + "loss": 0.2419, + "step": 18809 + }, + { + "epoch": 0.8513238289205702, + "grad_norm": 0.5754169823983257, + "learning_rate": 5.689006123295021e-07, + "loss": 0.3015, + "step": 18810 + }, + { + "epoch": 0.8513690880289658, + "grad_norm": 0.6049474580450107, + "learning_rate": 5.685611233845228e-07, + "loss": 0.2808, + "step": 18811 + }, + { + "epoch": 0.8514143471373614, + "grad_norm": 0.6100428373955716, + "learning_rate": 5.682217296575554e-07, + "loss": 0.3118, + "step": 18812 + }, + { + "epoch": 0.851459606245757, + "grad_norm": 0.6017512056907653, + "learning_rate": 5.678824311558923e-07, + "loss": 0.3305, + "step": 18813 + }, + { + "epoch": 0.8515048653541525, + "grad_norm": 0.6388400015965405, + "learning_rate": 5.675432278868221e-07, + "loss": 0.3152, + "step": 18814 + }, + { + "epoch": 0.8515501244625481, + "grad_norm": 0.5639933085295421, + "learning_rate": 5.672041198576345e-07, + "loss": 0.2422, + "step": 18815 + }, + { + "epoch": 0.8515953835709437, + "grad_norm": 0.27213150733841973, + "learning_rate": 5.668651070756176e-07, + "loss": 0.4692, + "step": 18816 + }, + { + "epoch": 0.8516406426793393, + "grad_norm": 0.6384639020043845, + "learning_rate": 5.66526189548054e-07, + "loss": 0.3034, + "step": 18817 + }, + { + "epoch": 0.8516859017877347, + "grad_norm": 0.6115731159495899, + "learning_rate": 5.661873672822249e-07, + "loss": 0.2785, + "step": 18818 + }, + { + "epoch": 0.8517311608961303, + "grad_norm": 0.6277971478222045, + "learning_rate": 5.658486402854136e-07, + "loss": 0.2857, + "step": 18819 + }, + { + "epoch": 0.8517764200045259, + "grad_norm": 0.6595635277979729, + "learning_rate": 5.655100085648945e-07, + "loss": 0.3337, + "step": 18820 + }, + { + "epoch": 0.8518216791129215, + "grad_norm": 0.27716929417854275, + "learning_rate": 5.651714721279478e-07, + "loss": 0.4378, + "step": 18821 + }, + { + "epoch": 0.8518669382213171, + "grad_norm": 0.6675023621782427, + "learning_rate": 5.648330309818451e-07, + "loss": 0.3588, + "step": 18822 + }, + { + "epoch": 0.8519121973297126, + "grad_norm": 0.6199954797671143, + "learning_rate": 5.644946851338584e-07, + "loss": 0.2927, + "step": 18823 + }, + { + "epoch": 0.8519574564381082, + "grad_norm": 0.6273237859124571, + "learning_rate": 5.641564345912581e-07, + "loss": 0.3057, + "step": 18824 + }, + { + "epoch": 0.8520027155465038, + "grad_norm": 0.6171373265787468, + "learning_rate": 5.638182793613134e-07, + "loss": 0.2555, + "step": 18825 + }, + { + "epoch": 0.8520479746548993, + "grad_norm": 0.2743814316281519, + "learning_rate": 5.634802194512889e-07, + "loss": 0.4488, + "step": 18826 + }, + { + "epoch": 0.8520932337632948, + "grad_norm": 0.6320920857305001, + "learning_rate": 5.631422548684479e-07, + "loss": 0.2773, + "step": 18827 + }, + { + "epoch": 0.8521384928716904, + "grad_norm": 0.5809184334187234, + "learning_rate": 5.628043856200543e-07, + "loss": 0.269, + "step": 18828 + }, + { + "epoch": 0.852183751980086, + "grad_norm": 0.636732723488326, + "learning_rate": 5.624666117133653e-07, + "loss": 0.2843, + "step": 18829 + }, + { + "epoch": 0.8522290110884816, + "grad_norm": 0.5666407427293152, + "learning_rate": 5.621289331556413e-07, + "loss": 0.2933, + "step": 18830 + }, + { + "epoch": 0.8522742701968771, + "grad_norm": 0.6216220501351425, + "learning_rate": 5.617913499541355e-07, + "loss": 0.3025, + "step": 18831 + }, + { + "epoch": 0.8523195293052727, + "grad_norm": 0.6165932173152734, + "learning_rate": 5.614538621161036e-07, + "loss": 0.2516, + "step": 18832 + }, + { + "epoch": 0.8523647884136683, + "grad_norm": 0.5415014831368999, + "learning_rate": 5.611164696487953e-07, + "loss": 0.2596, + "step": 18833 + }, + { + "epoch": 0.8524100475220638, + "grad_norm": 0.6254587466014427, + "learning_rate": 5.607791725594619e-07, + "loss": 0.2992, + "step": 18834 + }, + { + "epoch": 0.8524553066304594, + "grad_norm": 0.2603911234479618, + "learning_rate": 5.604419708553504e-07, + "loss": 0.4804, + "step": 18835 + }, + { + "epoch": 0.8525005657388549, + "grad_norm": 0.5863409592256777, + "learning_rate": 5.601048645437046e-07, + "loss": 0.2681, + "step": 18836 + }, + { + "epoch": 0.8525458248472505, + "grad_norm": 0.5850552431665895, + "learning_rate": 5.597678536317697e-07, + "loss": 0.3173, + "step": 18837 + }, + { + "epoch": 0.8525910839556461, + "grad_norm": 0.6171999858745233, + "learning_rate": 5.594309381267882e-07, + "loss": 0.2922, + "step": 18838 + }, + { + "epoch": 0.8526363430640417, + "grad_norm": 0.6624620574115613, + "learning_rate": 5.590941180359954e-07, + "loss": 0.3304, + "step": 18839 + }, + { + "epoch": 0.8526816021724372, + "grad_norm": 0.5769346122637378, + "learning_rate": 5.587573933666307e-07, + "loss": 0.282, + "step": 18840 + }, + { + "epoch": 0.8527268612808327, + "grad_norm": 0.5673563829975743, + "learning_rate": 5.584207641259309e-07, + "loss": 0.2493, + "step": 18841 + }, + { + "epoch": 0.8527721203892283, + "grad_norm": 0.2607286583379762, + "learning_rate": 5.580842303211275e-07, + "loss": 0.4406, + "step": 18842 + }, + { + "epoch": 0.8528173794976239, + "grad_norm": 0.6296153721567521, + "learning_rate": 5.577477919594504e-07, + "loss": 0.302, + "step": 18843 + }, + { + "epoch": 0.8528626386060194, + "grad_norm": 0.599328037123013, + "learning_rate": 5.574114490481303e-07, + "loss": 0.3279, + "step": 18844 + }, + { + "epoch": 0.852907897714415, + "grad_norm": 0.6831492461060868, + "learning_rate": 5.570752015943942e-07, + "loss": 0.3385, + "step": 18845 + }, + { + "epoch": 0.8529531568228106, + "grad_norm": 0.5819428657972001, + "learning_rate": 5.56739049605467e-07, + "loss": 0.2928, + "step": 18846 + }, + { + "epoch": 0.8529984159312062, + "grad_norm": 0.5730264752763495, + "learning_rate": 5.5640299308857e-07, + "loss": 0.2754, + "step": 18847 + }, + { + "epoch": 0.8530436750396018, + "grad_norm": 0.5948009048473426, + "learning_rate": 5.560670320509265e-07, + "loss": 0.3011, + "step": 18848 + }, + { + "epoch": 0.8530889341479972, + "grad_norm": 0.6239783490017728, + "learning_rate": 5.557311664997528e-07, + "loss": 0.265, + "step": 18849 + }, + { + "epoch": 0.8531341932563928, + "grad_norm": 0.6317259979875175, + "learning_rate": 5.553953964422681e-07, + "loss": 0.287, + "step": 18850 + }, + { + "epoch": 0.8531794523647884, + "grad_norm": 0.26053981077165644, + "learning_rate": 5.550597218856857e-07, + "loss": 0.4627, + "step": 18851 + }, + { + "epoch": 0.853224711473184, + "grad_norm": 0.5959948035580483, + "learning_rate": 5.547241428372169e-07, + "loss": 0.2711, + "step": 18852 + }, + { + "epoch": 0.8532699705815795, + "grad_norm": 0.605770568665207, + "learning_rate": 5.543886593040737e-07, + "loss": 0.2901, + "step": 18853 + }, + { + "epoch": 0.8533152296899751, + "grad_norm": 0.6075204232134758, + "learning_rate": 5.54053271293466e-07, + "loss": 0.2762, + "step": 18854 + }, + { + "epoch": 0.8533604887983707, + "grad_norm": 0.6230506570272137, + "learning_rate": 5.537179788125985e-07, + "loss": 0.2638, + "step": 18855 + }, + { + "epoch": 0.8534057479067663, + "grad_norm": 0.644161196347823, + "learning_rate": 5.533827818686749e-07, + "loss": 0.2977, + "step": 18856 + }, + { + "epoch": 0.8534510070151619, + "grad_norm": 0.2786588955899518, + "learning_rate": 5.530476804688994e-07, + "loss": 0.4676, + "step": 18857 + }, + { + "epoch": 0.8534962661235573, + "grad_norm": 0.6168369356365765, + "learning_rate": 5.527126746204708e-07, + "loss": 0.2872, + "step": 18858 + }, + { + "epoch": 0.8535415252319529, + "grad_norm": 0.568447828785355, + "learning_rate": 5.523777643305888e-07, + "loss": 0.2992, + "step": 18859 + }, + { + "epoch": 0.8535867843403485, + "grad_norm": 0.6224121972005846, + "learning_rate": 5.520429496064483e-07, + "loss": 0.2535, + "step": 18860 + }, + { + "epoch": 0.8536320434487441, + "grad_norm": 0.2702032240620627, + "learning_rate": 5.517082304552446e-07, + "loss": 0.4564, + "step": 18861 + }, + { + "epoch": 0.8536773025571396, + "grad_norm": 0.6557325387237996, + "learning_rate": 5.513736068841679e-07, + "loss": 0.2937, + "step": 18862 + }, + { + "epoch": 0.8537225616655352, + "grad_norm": 0.6332128454241116, + "learning_rate": 5.510390789004105e-07, + "loss": 0.3211, + "step": 18863 + }, + { + "epoch": 0.8537678207739308, + "grad_norm": 0.5827273766818591, + "learning_rate": 5.507046465111598e-07, + "loss": 0.3144, + "step": 18864 + }, + { + "epoch": 0.8538130798823264, + "grad_norm": 0.9306378137704379, + "learning_rate": 5.503703097236002e-07, + "loss": 0.2749, + "step": 18865 + }, + { + "epoch": 0.8538583389907218, + "grad_norm": 0.6298125599693024, + "learning_rate": 5.500360685449163e-07, + "loss": 0.2842, + "step": 18866 + }, + { + "epoch": 0.8539035980991174, + "grad_norm": 0.5290613603715577, + "learning_rate": 5.497019229822914e-07, + "loss": 0.2998, + "step": 18867 + }, + { + "epoch": 0.853948857207513, + "grad_norm": 0.6734853602452973, + "learning_rate": 5.493678730429041e-07, + "loss": 0.3622, + "step": 18868 + }, + { + "epoch": 0.8539941163159086, + "grad_norm": 0.6189222555767221, + "learning_rate": 5.490339187339317e-07, + "loss": 0.323, + "step": 18869 + }, + { + "epoch": 0.8540393754243042, + "grad_norm": 0.6925699517107268, + "learning_rate": 5.487000600625509e-07, + "loss": 0.2794, + "step": 18870 + }, + { + "epoch": 0.8540846345326997, + "grad_norm": 0.5761053277661985, + "learning_rate": 5.483662970359344e-07, + "loss": 0.3002, + "step": 18871 + }, + { + "epoch": 0.8541298936410953, + "grad_norm": 0.6464114513484791, + "learning_rate": 5.480326296612532e-07, + "loss": 0.3006, + "step": 18872 + }, + { + "epoch": 0.8541751527494909, + "grad_norm": 0.6209160449026865, + "learning_rate": 5.476990579456776e-07, + "loss": 0.2717, + "step": 18873 + }, + { + "epoch": 0.8542204118578864, + "grad_norm": 0.6411823424903216, + "learning_rate": 5.473655818963758e-07, + "loss": 0.2691, + "step": 18874 + }, + { + "epoch": 0.8542656709662819, + "grad_norm": 0.6069517485269621, + "learning_rate": 5.470322015205132e-07, + "loss": 0.2636, + "step": 18875 + }, + { + "epoch": 0.8543109300746775, + "grad_norm": 0.6509685693287112, + "learning_rate": 5.466989168252506e-07, + "loss": 0.2925, + "step": 18876 + }, + { + "epoch": 0.8543561891830731, + "grad_norm": 0.5782497626693441, + "learning_rate": 5.463657278177526e-07, + "loss": 0.3091, + "step": 18877 + }, + { + "epoch": 0.8544014482914687, + "grad_norm": 0.6516349786922266, + "learning_rate": 5.460326345051753e-07, + "loss": 0.3234, + "step": 18878 + }, + { + "epoch": 0.8544467073998642, + "grad_norm": 0.5935709697545233, + "learning_rate": 5.456996368946782e-07, + "loss": 0.2522, + "step": 18879 + }, + { + "epoch": 0.8544919665082598, + "grad_norm": 0.275509383061996, + "learning_rate": 5.45366734993416e-07, + "loss": 0.4659, + "step": 18880 + }, + { + "epoch": 0.8545372256166553, + "grad_norm": 0.5750454126126509, + "learning_rate": 5.450339288085404e-07, + "loss": 0.2656, + "step": 18881 + }, + { + "epoch": 0.8545824847250509, + "grad_norm": 0.6455719189966478, + "learning_rate": 5.447012183472027e-07, + "loss": 0.29, + "step": 18882 + }, + { + "epoch": 0.8546277438334465, + "grad_norm": 0.6337452815116204, + "learning_rate": 5.443686036165541e-07, + "loss": 0.2941, + "step": 18883 + }, + { + "epoch": 0.854673002941842, + "grad_norm": 0.6011439559025845, + "learning_rate": 5.440360846237397e-07, + "loss": 0.3315, + "step": 18884 + }, + { + "epoch": 0.8547182620502376, + "grad_norm": 0.6416022117789674, + "learning_rate": 5.437036613759028e-07, + "loss": 0.2904, + "step": 18885 + }, + { + "epoch": 0.8547635211586332, + "grad_norm": 0.5634699989693578, + "learning_rate": 5.433713338801883e-07, + "loss": 0.2722, + "step": 18886 + }, + { + "epoch": 0.8548087802670288, + "grad_norm": 0.614677168689508, + "learning_rate": 5.43039102143737e-07, + "loss": 0.3061, + "step": 18887 + }, + { + "epoch": 0.8548540393754243, + "grad_norm": 0.2506225998611218, + "learning_rate": 5.427069661736873e-07, + "loss": 0.4437, + "step": 18888 + }, + { + "epoch": 0.8548992984838198, + "grad_norm": 0.5797397847475295, + "learning_rate": 5.423749259771738e-07, + "loss": 0.3086, + "step": 18889 + }, + { + "epoch": 0.8549445575922154, + "grad_norm": 0.5874281031572004, + "learning_rate": 5.420429815613343e-07, + "loss": 0.2535, + "step": 18890 + }, + { + "epoch": 0.854989816700611, + "grad_norm": 0.6109100658942526, + "learning_rate": 5.41711132933298e-07, + "loss": 0.3137, + "step": 18891 + }, + { + "epoch": 0.8550350758090066, + "grad_norm": 0.2686910506299942, + "learning_rate": 5.413793801001981e-07, + "loss": 0.4826, + "step": 18892 + }, + { + "epoch": 0.8550803349174021, + "grad_norm": 0.6193657188011698, + "learning_rate": 5.410477230691618e-07, + "loss": 0.291, + "step": 18893 + }, + { + "epoch": 0.8551255940257977, + "grad_norm": 0.2658226084268709, + "learning_rate": 5.407161618473139e-07, + "loss": 0.4563, + "step": 18894 + }, + { + "epoch": 0.8551708531341933, + "grad_norm": 0.2628564515559104, + "learning_rate": 5.403846964417803e-07, + "loss": 0.4571, + "step": 18895 + }, + { + "epoch": 0.8552161122425889, + "grad_norm": 0.7782007656221628, + "learning_rate": 5.400533268596841e-07, + "loss": 0.3062, + "step": 18896 + }, + { + "epoch": 0.8552613713509843, + "grad_norm": 0.6839910168496154, + "learning_rate": 5.397220531081437e-07, + "loss": 0.3558, + "step": 18897 + }, + { + "epoch": 0.8553066304593799, + "grad_norm": 0.6251850507046652, + "learning_rate": 5.393908751942773e-07, + "loss": 0.2878, + "step": 18898 + }, + { + "epoch": 0.8553518895677755, + "grad_norm": 0.7957418093710049, + "learning_rate": 5.390597931252017e-07, + "loss": 0.3013, + "step": 18899 + }, + { + "epoch": 0.8553971486761711, + "grad_norm": 0.638593999873009, + "learning_rate": 5.387288069080298e-07, + "loss": 0.2899, + "step": 18900 + }, + { + "epoch": 0.8554424077845666, + "grad_norm": 0.6447274806725127, + "learning_rate": 5.383979165498748e-07, + "loss": 0.2584, + "step": 18901 + }, + { + "epoch": 0.8554876668929622, + "grad_norm": 0.6147292165958476, + "learning_rate": 5.380671220578454e-07, + "loss": 0.2995, + "step": 18902 + }, + { + "epoch": 0.8555329260013578, + "grad_norm": 0.5864371107534302, + "learning_rate": 5.377364234390503e-07, + "loss": 0.314, + "step": 18903 + }, + { + "epoch": 0.8555781851097534, + "grad_norm": 0.7160624991087194, + "learning_rate": 5.374058207005945e-07, + "loss": 0.2921, + "step": 18904 + }, + { + "epoch": 0.855623444218149, + "grad_norm": 0.5965671451551371, + "learning_rate": 5.37075313849581e-07, + "loss": 0.2886, + "step": 18905 + }, + { + "epoch": 0.8556687033265444, + "grad_norm": 0.25418867193021494, + "learning_rate": 5.367449028931133e-07, + "loss": 0.4619, + "step": 18906 + }, + { + "epoch": 0.85571396243494, + "grad_norm": 0.5835242042302965, + "learning_rate": 5.364145878382887e-07, + "loss": 0.3046, + "step": 18907 + }, + { + "epoch": 0.8557592215433356, + "grad_norm": 0.6649529932271935, + "learning_rate": 5.360843686922068e-07, + "loss": 0.2951, + "step": 18908 + }, + { + "epoch": 0.8558044806517312, + "grad_norm": 0.6184811126092957, + "learning_rate": 5.357542454619619e-07, + "loss": 0.3277, + "step": 18909 + }, + { + "epoch": 0.8558497397601267, + "grad_norm": 0.5683251110712597, + "learning_rate": 5.354242181546465e-07, + "loss": 0.2856, + "step": 18910 + }, + { + "epoch": 0.8558949988685223, + "grad_norm": 0.658689800101766, + "learning_rate": 5.350942867773523e-07, + "loss": 0.276, + "step": 18911 + }, + { + "epoch": 0.8559402579769179, + "grad_norm": 0.700714967968001, + "learning_rate": 5.347644513371702e-07, + "loss": 0.3159, + "step": 18912 + }, + { + "epoch": 0.8559855170853135, + "grad_norm": 0.2668507086692464, + "learning_rate": 5.344347118411863e-07, + "loss": 0.4583, + "step": 18913 + }, + { + "epoch": 0.8560307761937089, + "grad_norm": 0.6919274318584268, + "learning_rate": 5.341050682964844e-07, + "loss": 0.2797, + "step": 18914 + }, + { + "epoch": 0.8560760353021045, + "grad_norm": 0.6035576730213521, + "learning_rate": 5.337755207101486e-07, + "loss": 0.2587, + "step": 18915 + }, + { + "epoch": 0.8561212944105001, + "grad_norm": 0.6375513840137232, + "learning_rate": 5.334460690892613e-07, + "loss": 0.2689, + "step": 18916 + }, + { + "epoch": 0.8561665535188957, + "grad_norm": 0.7166262430313622, + "learning_rate": 5.331167134408994e-07, + "loss": 0.3083, + "step": 18917 + }, + { + "epoch": 0.8562118126272913, + "grad_norm": 0.5963358319668006, + "learning_rate": 5.327874537721395e-07, + "loss": 0.3031, + "step": 18918 + }, + { + "epoch": 0.8562570717356868, + "grad_norm": 0.6120859632707557, + "learning_rate": 5.324582900900587e-07, + "loss": 0.3269, + "step": 18919 + }, + { + "epoch": 0.8563023308440824, + "grad_norm": 0.6375785644361093, + "learning_rate": 5.321292224017266e-07, + "loss": 0.2969, + "step": 18920 + }, + { + "epoch": 0.856347589952478, + "grad_norm": 0.6517198634795168, + "learning_rate": 5.318002507142167e-07, + "loss": 0.3002, + "step": 18921 + }, + { + "epoch": 0.8563928490608735, + "grad_norm": 0.7554083589509392, + "learning_rate": 5.314713750345968e-07, + "loss": 0.295, + "step": 18922 + }, + { + "epoch": 0.856438108169269, + "grad_norm": 0.6673993566220832, + "learning_rate": 5.311425953699312e-07, + "loss": 0.331, + "step": 18923 + }, + { + "epoch": 0.8564833672776646, + "grad_norm": 0.278931041184877, + "learning_rate": 5.30813911727287e-07, + "loss": 0.452, + "step": 18924 + }, + { + "epoch": 0.8565286263860602, + "grad_norm": 0.6127972655988442, + "learning_rate": 5.304853241137264e-07, + "loss": 0.3178, + "step": 18925 + }, + { + "epoch": 0.8565738854944558, + "grad_norm": 0.61155340954273, + "learning_rate": 5.301568325363088e-07, + "loss": 0.2958, + "step": 18926 + }, + { + "epoch": 0.8566191446028514, + "grad_norm": 0.6030392436805826, + "learning_rate": 5.298284370020923e-07, + "loss": 0.304, + "step": 18927 + }, + { + "epoch": 0.8566644037112469, + "grad_norm": 0.2969858127149444, + "learning_rate": 5.295001375181336e-07, + "loss": 0.4643, + "step": 18928 + }, + { + "epoch": 0.8567096628196424, + "grad_norm": 0.5792598709562329, + "learning_rate": 5.291719340914875e-07, + "loss": 0.3374, + "step": 18929 + }, + { + "epoch": 0.856754921928038, + "grad_norm": 0.6383794283244996, + "learning_rate": 5.288438267292057e-07, + "loss": 0.3105, + "step": 18930 + }, + { + "epoch": 0.8568001810364336, + "grad_norm": 0.6203512639753918, + "learning_rate": 5.285158154383369e-07, + "loss": 0.3027, + "step": 18931 + }, + { + "epoch": 0.8568454401448291, + "grad_norm": 0.5828768141171531, + "learning_rate": 5.28187900225931e-07, + "loss": 0.2773, + "step": 18932 + }, + { + "epoch": 0.8568906992532247, + "grad_norm": 0.6918531275424276, + "learning_rate": 5.27860081099032e-07, + "loss": 0.2567, + "step": 18933 + }, + { + "epoch": 0.8569359583616203, + "grad_norm": 0.6421699593795226, + "learning_rate": 5.275323580646857e-07, + "loss": 0.2882, + "step": 18934 + }, + { + "epoch": 0.8569812174700159, + "grad_norm": 0.68124116019829, + "learning_rate": 5.272047311299333e-07, + "loss": 0.2321, + "step": 18935 + }, + { + "epoch": 0.8570264765784114, + "grad_norm": 1.7539131270402744, + "learning_rate": 5.268772003018124e-07, + "loss": 0.2825, + "step": 18936 + }, + { + "epoch": 0.857071735686807, + "grad_norm": 0.6545848551286552, + "learning_rate": 5.26549765587363e-07, + "loss": 0.2473, + "step": 18937 + }, + { + "epoch": 0.8571169947952025, + "grad_norm": 0.5745584200238323, + "learning_rate": 5.262224269936217e-07, + "loss": 0.2917, + "step": 18938 + }, + { + "epoch": 0.8571622539035981, + "grad_norm": 0.596404529852008, + "learning_rate": 5.258951845276178e-07, + "loss": 0.2557, + "step": 18939 + }, + { + "epoch": 0.8572075130119937, + "grad_norm": 0.6719309822426754, + "learning_rate": 5.255680381963856e-07, + "loss": 0.3002, + "step": 18940 + }, + { + "epoch": 0.8572527721203892, + "grad_norm": 0.30837623461057606, + "learning_rate": 5.252409880069553e-07, + "loss": 0.4646, + "step": 18941 + }, + { + "epoch": 0.8572980312287848, + "grad_norm": 0.6469260324986573, + "learning_rate": 5.249140339663533e-07, + "loss": 0.2939, + "step": 18942 + }, + { + "epoch": 0.8573432903371804, + "grad_norm": 0.2768016336533323, + "learning_rate": 5.245871760816029e-07, + "loss": 0.4531, + "step": 18943 + }, + { + "epoch": 0.857388549445576, + "grad_norm": 0.6037990091196405, + "learning_rate": 5.24260414359729e-07, + "loss": 0.3148, + "step": 18944 + }, + { + "epoch": 0.8574338085539714, + "grad_norm": 0.6346860481158199, + "learning_rate": 5.239337488077539e-07, + "loss": 0.3031, + "step": 18945 + }, + { + "epoch": 0.857479067662367, + "grad_norm": 0.6893880255876333, + "learning_rate": 5.236071794326952e-07, + "loss": 0.329, + "step": 18946 + }, + { + "epoch": 0.8575243267707626, + "grad_norm": 0.6232018323259965, + "learning_rate": 5.232807062415691e-07, + "loss": 0.2441, + "step": 18947 + }, + { + "epoch": 0.8575695858791582, + "grad_norm": 0.6015287791266947, + "learning_rate": 5.229543292413919e-07, + "loss": 0.2605, + "step": 18948 + }, + { + "epoch": 0.8576148449875537, + "grad_norm": 0.6469701975664196, + "learning_rate": 5.226280484391754e-07, + "loss": 0.2543, + "step": 18949 + }, + { + "epoch": 0.8576601040959493, + "grad_norm": 0.5637765135573233, + "learning_rate": 5.22301863841932e-07, + "loss": 0.2346, + "step": 18950 + }, + { + "epoch": 0.8577053632043449, + "grad_norm": 0.6134828769689903, + "learning_rate": 5.219757754566696e-07, + "loss": 0.3188, + "step": 18951 + }, + { + "epoch": 0.8577506223127405, + "grad_norm": 0.6391224852971039, + "learning_rate": 5.216497832903927e-07, + "loss": 0.2864, + "step": 18952 + }, + { + "epoch": 0.857795881421136, + "grad_norm": 0.5689721588401953, + "learning_rate": 5.213238873501086e-07, + "loss": 0.3206, + "step": 18953 + }, + { + "epoch": 0.8578411405295315, + "grad_norm": 0.7111119399235565, + "learning_rate": 5.209980876428195e-07, + "loss": 0.3183, + "step": 18954 + }, + { + "epoch": 0.8578863996379271, + "grad_norm": 0.7379886429307578, + "learning_rate": 5.206723841755257e-07, + "loss": 0.3008, + "step": 18955 + }, + { + "epoch": 0.8579316587463227, + "grad_norm": 0.6115833659110569, + "learning_rate": 5.203467769552239e-07, + "loss": 0.274, + "step": 18956 + }, + { + "epoch": 0.8579769178547183, + "grad_norm": 0.24484072528037706, + "learning_rate": 5.200212659889114e-07, + "loss": 0.4851, + "step": 18957 + }, + { + "epoch": 0.8580221769631138, + "grad_norm": 0.6280509556954103, + "learning_rate": 5.196958512835843e-07, + "loss": 0.257, + "step": 18958 + }, + { + "epoch": 0.8580674360715094, + "grad_norm": 0.588999369246937, + "learning_rate": 5.193705328462328e-07, + "loss": 0.2908, + "step": 18959 + }, + { + "epoch": 0.858112695179905, + "grad_norm": 0.6195333674917924, + "learning_rate": 5.190453106838461e-07, + "loss": 0.3257, + "step": 18960 + }, + { + "epoch": 0.8581579542883006, + "grad_norm": 0.6086824841574335, + "learning_rate": 5.187201848034146e-07, + "loss": 0.318, + "step": 18961 + }, + { + "epoch": 0.858203213396696, + "grad_norm": 0.58719677522352, + "learning_rate": 5.183951552119227e-07, + "loss": 0.2802, + "step": 18962 + }, + { + "epoch": 0.8582484725050916, + "grad_norm": 0.580374303078345, + "learning_rate": 5.180702219163552e-07, + "loss": 0.327, + "step": 18963 + }, + { + "epoch": 0.8582937316134872, + "grad_norm": 0.6288797524191543, + "learning_rate": 5.177453849236935e-07, + "loss": 0.3092, + "step": 18964 + }, + { + "epoch": 0.8583389907218828, + "grad_norm": 0.25583946705194716, + "learning_rate": 5.174206442409163e-07, + "loss": 0.4658, + "step": 18965 + }, + { + "epoch": 0.8583842498302784, + "grad_norm": 0.6087906512514828, + "learning_rate": 5.17095999875002e-07, + "loss": 0.2621, + "step": 18966 + }, + { + "epoch": 0.8584295089386739, + "grad_norm": 0.5619462201669044, + "learning_rate": 5.167714518329286e-07, + "loss": 0.2717, + "step": 18967 + }, + { + "epoch": 0.8584747680470695, + "grad_norm": 0.6351139673480072, + "learning_rate": 5.16447000121666e-07, + "loss": 0.3056, + "step": 18968 + }, + { + "epoch": 0.858520027155465, + "grad_norm": 0.5868098674192496, + "learning_rate": 5.161226447481865e-07, + "loss": 0.3107, + "step": 18969 + }, + { + "epoch": 0.8585652862638606, + "grad_norm": 0.2619592796412879, + "learning_rate": 5.157983857194615e-07, + "loss": 0.4457, + "step": 18970 + }, + { + "epoch": 0.8586105453722561, + "grad_norm": 0.6057152229425675, + "learning_rate": 5.154742230424575e-07, + "loss": 0.2846, + "step": 18971 + }, + { + "epoch": 0.8586558044806517, + "grad_norm": 0.6068278424363579, + "learning_rate": 5.151501567241373e-07, + "loss": 0.3123, + "step": 18972 + }, + { + "epoch": 0.8587010635890473, + "grad_norm": 0.6502329942997074, + "learning_rate": 5.148261867714671e-07, + "loss": 0.2686, + "step": 18973 + }, + { + "epoch": 0.8587463226974429, + "grad_norm": 0.6056539004633698, + "learning_rate": 5.145023131914074e-07, + "loss": 0.2948, + "step": 18974 + }, + { + "epoch": 0.8587915818058385, + "grad_norm": 0.6625029521242054, + "learning_rate": 5.141785359909168e-07, + "loss": 0.3182, + "step": 18975 + }, + { + "epoch": 0.858836840914234, + "grad_norm": 0.6228892249909714, + "learning_rate": 5.138548551769512e-07, + "loss": 0.2629, + "step": 18976 + }, + { + "epoch": 0.8588821000226295, + "grad_norm": 0.5779265648025561, + "learning_rate": 5.135312707564683e-07, + "loss": 0.2828, + "step": 18977 + }, + { + "epoch": 0.8589273591310251, + "grad_norm": 0.6126297100733569, + "learning_rate": 5.132077827364174e-07, + "loss": 0.3255, + "step": 18978 + }, + { + "epoch": 0.8589726182394207, + "grad_norm": 0.2882798253765335, + "learning_rate": 5.128843911237525e-07, + "loss": 0.4581, + "step": 18979 + }, + { + "epoch": 0.8590178773478162, + "grad_norm": 0.5635818665300483, + "learning_rate": 5.125610959254213e-07, + "loss": 0.2562, + "step": 18980 + }, + { + "epoch": 0.8590631364562118, + "grad_norm": 0.6155989790532402, + "learning_rate": 5.122378971483683e-07, + "loss": 0.305, + "step": 18981 + }, + { + "epoch": 0.8591083955646074, + "grad_norm": 0.6168694726468859, + "learning_rate": 5.119147947995401e-07, + "loss": 0.3282, + "step": 18982 + }, + { + "epoch": 0.859153654673003, + "grad_norm": 0.7878038210236072, + "learning_rate": 5.115917888858802e-07, + "loss": 0.324, + "step": 18983 + }, + { + "epoch": 0.8591989137813985, + "grad_norm": 0.2860775231703091, + "learning_rate": 5.112688794143273e-07, + "loss": 0.4733, + "step": 18984 + }, + { + "epoch": 0.859244172889794, + "grad_norm": 0.584825913982036, + "learning_rate": 5.109460663918192e-07, + "loss": 0.3034, + "step": 18985 + }, + { + "epoch": 0.8592894319981896, + "grad_norm": 0.634246711145265, + "learning_rate": 5.106233498252927e-07, + "loss": 0.3081, + "step": 18986 + }, + { + "epoch": 0.8593346911065852, + "grad_norm": 0.2571638233495209, + "learning_rate": 5.103007297216838e-07, + "loss": 0.4738, + "step": 18987 + }, + { + "epoch": 0.8593799502149808, + "grad_norm": 0.636572794194953, + "learning_rate": 5.099782060879227e-07, + "loss": 0.2935, + "step": 18988 + }, + { + "epoch": 0.8594252093233763, + "grad_norm": 0.8216739640425371, + "learning_rate": 5.096557789309392e-07, + "loss": 0.3017, + "step": 18989 + }, + { + "epoch": 0.8594704684317719, + "grad_norm": 0.5856909949197273, + "learning_rate": 5.093334482576634e-07, + "loss": 0.3003, + "step": 18990 + }, + { + "epoch": 0.8595157275401675, + "grad_norm": 0.6767030167755445, + "learning_rate": 5.09011214075018e-07, + "loss": 0.3364, + "step": 18991 + }, + { + "epoch": 0.8595609866485631, + "grad_norm": 0.26674590985470387, + "learning_rate": 5.086890763899299e-07, + "loss": 0.4833, + "step": 18992 + }, + { + "epoch": 0.8596062457569585, + "grad_norm": 0.5781600172894498, + "learning_rate": 5.083670352093196e-07, + "loss": 0.3348, + "step": 18993 + }, + { + "epoch": 0.8596515048653541, + "grad_norm": 0.6306420137429097, + "learning_rate": 5.080450905401057e-07, + "loss": 0.3246, + "step": 18994 + }, + { + "epoch": 0.8596967639737497, + "grad_norm": 0.6241465051839432, + "learning_rate": 5.07723242389207e-07, + "loss": 0.2773, + "step": 18995 + }, + { + "epoch": 0.8597420230821453, + "grad_norm": 0.6184597563032113, + "learning_rate": 5.074014907635405e-07, + "loss": 0.3036, + "step": 18996 + }, + { + "epoch": 0.8597872821905408, + "grad_norm": 0.5868468882841714, + "learning_rate": 5.070798356700163e-07, + "loss": 0.3068, + "step": 18997 + }, + { + "epoch": 0.8598325412989364, + "grad_norm": 0.6258664955028327, + "learning_rate": 5.067582771155472e-07, + "loss": 0.2949, + "step": 18998 + }, + { + "epoch": 0.859877800407332, + "grad_norm": 0.578382879309725, + "learning_rate": 5.064368151070431e-07, + "loss": 0.2924, + "step": 18999 + }, + { + "epoch": 0.8599230595157276, + "grad_norm": 0.2750712488483772, + "learning_rate": 5.061154496514125e-07, + "loss": 0.4532, + "step": 19000 + }, + { + "epoch": 0.8599683186241232, + "grad_norm": 0.6657087671047848, + "learning_rate": 5.057941807555571e-07, + "loss": 0.2935, + "step": 19001 + }, + { + "epoch": 0.8600135777325186, + "grad_norm": 0.6026886189492119, + "learning_rate": 5.05473008426382e-07, + "loss": 0.2981, + "step": 19002 + }, + { + "epoch": 0.8600588368409142, + "grad_norm": 0.6383401618594063, + "learning_rate": 5.051519326707893e-07, + "loss": 0.3204, + "step": 19003 + }, + { + "epoch": 0.8601040959493098, + "grad_norm": 0.6416246251796458, + "learning_rate": 5.048309534956763e-07, + "loss": 0.3278, + "step": 19004 + }, + { + "epoch": 0.8601493550577054, + "grad_norm": 0.6460942069471778, + "learning_rate": 5.045100709079393e-07, + "loss": 0.2715, + "step": 19005 + }, + { + "epoch": 0.8601946141661009, + "grad_norm": 0.40949862405358045, + "learning_rate": 5.041892849144753e-07, + "loss": 0.4784, + "step": 19006 + }, + { + "epoch": 0.8602398732744965, + "grad_norm": 0.5929098033750538, + "learning_rate": 5.038685955221745e-07, + "loss": 0.3326, + "step": 19007 + }, + { + "epoch": 0.8602851323828921, + "grad_norm": 0.61831375951075, + "learning_rate": 5.035480027379297e-07, + "loss": 0.3042, + "step": 19008 + }, + { + "epoch": 0.8603303914912876, + "grad_norm": 0.6487690460983344, + "learning_rate": 5.032275065686287e-07, + "loss": 0.2795, + "step": 19009 + }, + { + "epoch": 0.8603756505996832, + "grad_norm": 0.6187830430063399, + "learning_rate": 5.029071070211566e-07, + "loss": 0.3131, + "step": 19010 + }, + { + "epoch": 0.8604209097080787, + "grad_norm": 0.5865905552575247, + "learning_rate": 5.025868041023996e-07, + "loss": 0.3177, + "step": 19011 + }, + { + "epoch": 0.8604661688164743, + "grad_norm": 0.6062723886429946, + "learning_rate": 5.022665978192398e-07, + "loss": 0.2824, + "step": 19012 + }, + { + "epoch": 0.8605114279248699, + "grad_norm": 1.024759836194424, + "learning_rate": 5.019464881785569e-07, + "loss": 0.2734, + "step": 19013 + }, + { + "epoch": 0.8605566870332655, + "grad_norm": 0.599430239637231, + "learning_rate": 5.016264751872291e-07, + "loss": 0.2917, + "step": 19014 + }, + { + "epoch": 0.860601946141661, + "grad_norm": 0.5957761500719001, + "learning_rate": 5.013065588521321e-07, + "loss": 0.3019, + "step": 19015 + }, + { + "epoch": 0.8606472052500566, + "grad_norm": 0.6263540868459652, + "learning_rate": 5.009867391801415e-07, + "loss": 0.2628, + "step": 19016 + }, + { + "epoch": 0.8606924643584521, + "grad_norm": 0.6826419226058544, + "learning_rate": 5.00667016178128e-07, + "loss": 0.2809, + "step": 19017 + }, + { + "epoch": 0.8607377234668477, + "grad_norm": 0.2711226807270318, + "learning_rate": 5.00347389852961e-07, + "loss": 0.4614, + "step": 19018 + }, + { + "epoch": 0.8607829825752432, + "grad_norm": 0.5811627750482236, + "learning_rate": 5.0002786021151e-07, + "loss": 0.3382, + "step": 19019 + }, + { + "epoch": 0.8608282416836388, + "grad_norm": 0.674512844703926, + "learning_rate": 4.997084272606384e-07, + "loss": 0.3412, + "step": 19020 + }, + { + "epoch": 0.8608735007920344, + "grad_norm": 0.6337745654730212, + "learning_rate": 4.993890910072124e-07, + "loss": 0.2769, + "step": 19021 + }, + { + "epoch": 0.86091875990043, + "grad_norm": 0.5466312533797884, + "learning_rate": 4.990698514580922e-07, + "loss": 0.295, + "step": 19022 + }, + { + "epoch": 0.8609640190088256, + "grad_norm": 0.6220405566632338, + "learning_rate": 4.987507086201359e-07, + "loss": 0.2996, + "step": 19023 + }, + { + "epoch": 0.861009278117221, + "grad_norm": 0.5593024505119963, + "learning_rate": 4.984316625002029e-07, + "loss": 0.2729, + "step": 19024 + }, + { + "epoch": 0.8610545372256166, + "grad_norm": 0.597273768740094, + "learning_rate": 4.981127131051494e-07, + "loss": 0.2785, + "step": 19025 + }, + { + "epoch": 0.8610997963340122, + "grad_norm": 0.581083934494568, + "learning_rate": 4.977938604418259e-07, + "loss": 0.2976, + "step": 19026 + }, + { + "epoch": 0.8611450554424078, + "grad_norm": 0.8116799266557139, + "learning_rate": 4.974751045170845e-07, + "loss": 0.306, + "step": 19027 + }, + { + "epoch": 0.8611903145508033, + "grad_norm": 0.5998911145327874, + "learning_rate": 4.971564453377748e-07, + "loss": 0.2869, + "step": 19028 + }, + { + "epoch": 0.8612355736591989, + "grad_norm": 0.6301029855331602, + "learning_rate": 4.968378829107451e-07, + "loss": 0.2929, + "step": 19029 + }, + { + "epoch": 0.8612808327675945, + "grad_norm": 0.6169160539266364, + "learning_rate": 4.965194172428378e-07, + "loss": 0.3149, + "step": 19030 + }, + { + "epoch": 0.8613260918759901, + "grad_norm": 0.6082576689734003, + "learning_rate": 4.962010483408964e-07, + "loss": 0.2762, + "step": 19031 + }, + { + "epoch": 0.8613713509843856, + "grad_norm": 0.658833602087984, + "learning_rate": 4.95882776211763e-07, + "loss": 0.2853, + "step": 19032 + }, + { + "epoch": 0.8614166100927811, + "grad_norm": 0.2621143460969143, + "learning_rate": 4.955646008622755e-07, + "loss": 0.4381, + "step": 19033 + }, + { + "epoch": 0.8614618692011767, + "grad_norm": 0.7241578876038501, + "learning_rate": 4.952465222992692e-07, + "loss": 0.3295, + "step": 19034 + }, + { + "epoch": 0.8615071283095723, + "grad_norm": 0.5715552371285294, + "learning_rate": 4.949285405295812e-07, + "loss": 0.2792, + "step": 19035 + }, + { + "epoch": 0.8615523874179679, + "grad_norm": 0.627102608544472, + "learning_rate": 4.94610655560041e-07, + "loss": 0.2917, + "step": 19036 + }, + { + "epoch": 0.8615976465263634, + "grad_norm": 0.5924211155800102, + "learning_rate": 4.942928673974823e-07, + "loss": 0.2747, + "step": 19037 + }, + { + "epoch": 0.861642905634759, + "grad_norm": 0.6872239178605828, + "learning_rate": 4.93975176048731e-07, + "loss": 0.3035, + "step": 19038 + }, + { + "epoch": 0.8616881647431546, + "grad_norm": 0.6337648406533888, + "learning_rate": 4.936575815206134e-07, + "loss": 0.2408, + "step": 19039 + }, + { + "epoch": 0.8617334238515502, + "grad_norm": 0.600265567939054, + "learning_rate": 4.933400838199543e-07, + "loss": 0.3157, + "step": 19040 + }, + { + "epoch": 0.8617786829599456, + "grad_norm": 0.5771906865411033, + "learning_rate": 4.930226829535767e-07, + "loss": 0.2966, + "step": 19041 + }, + { + "epoch": 0.8618239420683412, + "grad_norm": 0.6112288585274278, + "learning_rate": 4.927053789282988e-07, + "loss": 0.3165, + "step": 19042 + }, + { + "epoch": 0.8618692011767368, + "grad_norm": 0.5944446545435385, + "learning_rate": 4.923881717509388e-07, + "loss": 0.2963, + "step": 19043 + }, + { + "epoch": 0.8619144602851324, + "grad_norm": 0.6289189375880971, + "learning_rate": 4.920710614283131e-07, + "loss": 0.2409, + "step": 19044 + }, + { + "epoch": 0.861959719393528, + "grad_norm": 0.599893130066422, + "learning_rate": 4.917540479672356e-07, + "loss": 0.2782, + "step": 19045 + }, + { + "epoch": 0.8620049785019235, + "grad_norm": 0.5993331924655295, + "learning_rate": 4.914371313745181e-07, + "loss": 0.2983, + "step": 19046 + }, + { + "epoch": 0.8620502376103191, + "grad_norm": 0.6404876556911271, + "learning_rate": 4.911203116569685e-07, + "loss": 0.315, + "step": 19047 + }, + { + "epoch": 0.8620954967187147, + "grad_norm": 0.5569564271980745, + "learning_rate": 4.908035888213964e-07, + "loss": 0.3065, + "step": 19048 + }, + { + "epoch": 0.8621407558271103, + "grad_norm": 0.6423297856896808, + "learning_rate": 4.904869628746051e-07, + "loss": 0.3015, + "step": 19049 + }, + { + "epoch": 0.8621860149355057, + "grad_norm": 0.6286364099047824, + "learning_rate": 4.901704338234004e-07, + "loss": 0.2633, + "step": 19050 + }, + { + "epoch": 0.8622312740439013, + "grad_norm": 0.5863196918495058, + "learning_rate": 4.898540016745818e-07, + "loss": 0.2815, + "step": 19051 + }, + { + "epoch": 0.8622765331522969, + "grad_norm": 0.5968521463422396, + "learning_rate": 4.895376664349482e-07, + "loss": 0.3161, + "step": 19052 + }, + { + "epoch": 0.8623217922606925, + "grad_norm": 0.6459674874191642, + "learning_rate": 4.892214281112973e-07, + "loss": 0.346, + "step": 19053 + }, + { + "epoch": 0.862367051369088, + "grad_norm": 0.6018249731642297, + "learning_rate": 4.88905286710426e-07, + "loss": 0.2461, + "step": 19054 + }, + { + "epoch": 0.8624123104774836, + "grad_norm": 0.646433803061555, + "learning_rate": 4.88589242239123e-07, + "loss": 0.2744, + "step": 19055 + }, + { + "epoch": 0.8624575695858792, + "grad_norm": 0.5662144953473035, + "learning_rate": 4.882732947041818e-07, + "loss": 0.2501, + "step": 19056 + }, + { + "epoch": 0.8625028286942747, + "grad_norm": 0.620217354053335, + "learning_rate": 4.879574441123907e-07, + "loss": 0.2622, + "step": 19057 + }, + { + "epoch": 0.8625480878026703, + "grad_norm": 0.5854897297948762, + "learning_rate": 4.876416904705384e-07, + "loss": 0.2975, + "step": 19058 + }, + { + "epoch": 0.8625933469110658, + "grad_norm": 0.26857698043195, + "learning_rate": 4.873260337854058e-07, + "loss": 0.4751, + "step": 19059 + }, + { + "epoch": 0.8626386060194614, + "grad_norm": 0.6472511347380544, + "learning_rate": 4.870104740637771e-07, + "loss": 0.2864, + "step": 19060 + }, + { + "epoch": 0.862683865127857, + "grad_norm": 0.7115035145085302, + "learning_rate": 4.866950113124335e-07, + "loss": 0.2362, + "step": 19061 + }, + { + "epoch": 0.8627291242362526, + "grad_norm": 0.5470744766222366, + "learning_rate": 4.863796455381525e-07, + "loss": 0.2481, + "step": 19062 + }, + { + "epoch": 0.8627743833446481, + "grad_norm": 0.5624476468618756, + "learning_rate": 4.860643767477097e-07, + "loss": 0.2607, + "step": 19063 + }, + { + "epoch": 0.8628196424530437, + "grad_norm": 0.6459982206651735, + "learning_rate": 4.857492049478807e-07, + "loss": 0.2816, + "step": 19064 + }, + { + "epoch": 0.8628649015614392, + "grad_norm": 0.7126904269804301, + "learning_rate": 4.854341301454357e-07, + "loss": 0.2854, + "step": 19065 + }, + { + "epoch": 0.8629101606698348, + "grad_norm": 0.6386738086723427, + "learning_rate": 4.851191523471465e-07, + "loss": 0.2792, + "step": 19066 + }, + { + "epoch": 0.8629554197782303, + "grad_norm": 0.26909620891534086, + "learning_rate": 4.848042715597811e-07, + "loss": 0.44, + "step": 19067 + }, + { + "epoch": 0.8630006788866259, + "grad_norm": 0.6898405599890808, + "learning_rate": 4.84489487790103e-07, + "loss": 0.2905, + "step": 19068 + }, + { + "epoch": 0.8630459379950215, + "grad_norm": 0.6080263444100681, + "learning_rate": 4.841748010448777e-07, + "loss": 0.2512, + "step": 19069 + }, + { + "epoch": 0.8630911971034171, + "grad_norm": 0.6142457407787201, + "learning_rate": 4.838602113308677e-07, + "loss": 0.2908, + "step": 19070 + }, + { + "epoch": 0.8631364562118127, + "grad_norm": 0.6201942893627198, + "learning_rate": 4.835457186548315e-07, + "loss": 0.2512, + "step": 19071 + }, + { + "epoch": 0.8631817153202082, + "grad_norm": 0.6845755794045176, + "learning_rate": 4.832313230235253e-07, + "loss": 0.2799, + "step": 19072 + }, + { + "epoch": 0.8632269744286037, + "grad_norm": 1.158425573208752, + "learning_rate": 4.829170244437064e-07, + "loss": 0.3014, + "step": 19073 + }, + { + "epoch": 0.8632722335369993, + "grad_norm": 0.6331246849845407, + "learning_rate": 4.82602822922128e-07, + "loss": 0.2885, + "step": 19074 + }, + { + "epoch": 0.8633174926453949, + "grad_norm": 0.5773053131214465, + "learning_rate": 4.822887184655406e-07, + "loss": 0.2824, + "step": 19075 + }, + { + "epoch": 0.8633627517537904, + "grad_norm": 0.6958738240209923, + "learning_rate": 4.819747110806928e-07, + "loss": 0.3117, + "step": 19076 + }, + { + "epoch": 0.863408010862186, + "grad_norm": 0.6302093385948211, + "learning_rate": 4.816608007743335e-07, + "loss": 0.3118, + "step": 19077 + }, + { + "epoch": 0.8634532699705816, + "grad_norm": 0.26448191342255795, + "learning_rate": 4.813469875532056e-07, + "loss": 0.479, + "step": 19078 + }, + { + "epoch": 0.8634985290789772, + "grad_norm": 0.6490925546607369, + "learning_rate": 4.810332714240534e-07, + "loss": 0.311, + "step": 19079 + }, + { + "epoch": 0.8635437881873728, + "grad_norm": 0.6298511384731518, + "learning_rate": 4.80719652393618e-07, + "loss": 0.3362, + "step": 19080 + }, + { + "epoch": 0.8635890472957682, + "grad_norm": 0.6095024090671313, + "learning_rate": 4.804061304686358e-07, + "loss": 0.2934, + "step": 19081 + }, + { + "epoch": 0.8636343064041638, + "grad_norm": 0.5775729096049567, + "learning_rate": 4.800927056558452e-07, + "loss": 0.3192, + "step": 19082 + }, + { + "epoch": 0.8636795655125594, + "grad_norm": 0.6587368973474238, + "learning_rate": 4.79779377961982e-07, + "loss": 0.3155, + "step": 19083 + }, + { + "epoch": 0.863724824620955, + "grad_norm": 0.6612433715020307, + "learning_rate": 4.794661473937761e-07, + "loss": 0.3334, + "step": 19084 + }, + { + "epoch": 0.8637700837293505, + "grad_norm": 0.41205551413684943, + "learning_rate": 4.791530139579586e-07, + "loss": 0.4772, + "step": 19085 + }, + { + "epoch": 0.8638153428377461, + "grad_norm": 0.6138513167648837, + "learning_rate": 4.788399776612584e-07, + "loss": 0.2692, + "step": 19086 + }, + { + "epoch": 0.8638606019461417, + "grad_norm": 0.6230080093900928, + "learning_rate": 4.785270385104018e-07, + "loss": 0.2664, + "step": 19087 + }, + { + "epoch": 0.8639058610545373, + "grad_norm": 0.6425162193038234, + "learning_rate": 4.782141965121129e-07, + "loss": 0.3199, + "step": 19088 + }, + { + "epoch": 0.8639511201629327, + "grad_norm": 0.28171184535994015, + "learning_rate": 4.779014516731123e-07, + "loss": 0.4722, + "step": 19089 + }, + { + "epoch": 0.8639963792713283, + "grad_norm": 0.5849269358010359, + "learning_rate": 4.775888040001214e-07, + "loss": 0.2832, + "step": 19090 + }, + { + "epoch": 0.8640416383797239, + "grad_norm": 0.6586879746802395, + "learning_rate": 4.772762534998582e-07, + "loss": 0.2955, + "step": 19091 + }, + { + "epoch": 0.8640868974881195, + "grad_norm": 0.642220205319096, + "learning_rate": 4.769638001790366e-07, + "loss": 0.2682, + "step": 19092 + }, + { + "epoch": 0.8641321565965151, + "grad_norm": 0.6185275549850182, + "learning_rate": 4.766514440443726e-07, + "loss": 0.2928, + "step": 19093 + }, + { + "epoch": 0.8641774157049106, + "grad_norm": 0.6533054865884008, + "learning_rate": 4.763391851025756e-07, + "loss": 0.2647, + "step": 19094 + }, + { + "epoch": 0.8642226748133062, + "grad_norm": 0.7854922368088226, + "learning_rate": 4.76027023360357e-07, + "loss": 0.304, + "step": 19095 + }, + { + "epoch": 0.8642679339217018, + "grad_norm": 0.643605119309677, + "learning_rate": 4.7571495882442363e-07, + "loss": 0.2787, + "step": 19096 + }, + { + "epoch": 0.8643131930300973, + "grad_norm": 0.2500595226999286, + "learning_rate": 4.7540299150147906e-07, + "loss": 0.4567, + "step": 19097 + }, + { + "epoch": 0.8643584521384928, + "grad_norm": 0.654488899090377, + "learning_rate": 4.7509112139822846e-07, + "loss": 0.2883, + "step": 19098 + }, + { + "epoch": 0.8644037112468884, + "grad_norm": 0.6683060747430509, + "learning_rate": 4.7477934852137306e-07, + "loss": 0.3268, + "step": 19099 + }, + { + "epoch": 0.864448970355284, + "grad_norm": 0.3008385202888705, + "learning_rate": 4.7446767287761154e-07, + "loss": 0.4767, + "step": 19100 + }, + { + "epoch": 0.8644942294636796, + "grad_norm": 0.6540711942901398, + "learning_rate": 4.741560944736395e-07, + "loss": 0.3294, + "step": 19101 + }, + { + "epoch": 0.8645394885720751, + "grad_norm": 0.6253051333606227, + "learning_rate": 4.7384461331615284e-07, + "loss": 0.2962, + "step": 19102 + }, + { + "epoch": 0.8645847476804707, + "grad_norm": 0.6168597078443402, + "learning_rate": 4.735332294118455e-07, + "loss": 0.2801, + "step": 19103 + }, + { + "epoch": 0.8646300067888663, + "grad_norm": 0.6142779127798528, + "learning_rate": 4.732219427674073e-07, + "loss": 0.3245, + "step": 19104 + }, + { + "epoch": 0.8646752658972618, + "grad_norm": 0.6313286970148196, + "learning_rate": 4.729107533895255e-07, + "loss": 0.298, + "step": 19105 + }, + { + "epoch": 0.8647205250056574, + "grad_norm": 0.5928257456260824, + "learning_rate": 4.7259966128488876e-07, + "loss": 0.3042, + "step": 19106 + }, + { + "epoch": 0.8647657841140529, + "grad_norm": 0.554898880701603, + "learning_rate": 4.722886664601795e-07, + "loss": 0.254, + "step": 19107 + }, + { + "epoch": 0.8648110432224485, + "grad_norm": 0.6208995354350286, + "learning_rate": 4.719777689220817e-07, + "loss": 0.3293, + "step": 19108 + }, + { + "epoch": 0.8648563023308441, + "grad_norm": 0.6028647084343479, + "learning_rate": 4.716669686772751e-07, + "loss": 0.2971, + "step": 19109 + }, + { + "epoch": 0.8649015614392397, + "grad_norm": 0.5898969298823253, + "learning_rate": 4.7135626573243607e-07, + "loss": 0.2651, + "step": 19110 + }, + { + "epoch": 0.8649468205476352, + "grad_norm": 0.6482671602848603, + "learning_rate": 4.710456600942431e-07, + "loss": 0.303, + "step": 19111 + }, + { + "epoch": 0.8649920796560308, + "grad_norm": 0.6068135500269236, + "learning_rate": 4.707351517693698e-07, + "loss": 0.2706, + "step": 19112 + }, + { + "epoch": 0.8650373387644263, + "grad_norm": 0.6790603909443061, + "learning_rate": 4.704247407644874e-07, + "loss": 0.2919, + "step": 19113 + }, + { + "epoch": 0.8650825978728219, + "grad_norm": 0.31337349374409723, + "learning_rate": 4.701144270862651e-07, + "loss": 0.4753, + "step": 19114 + }, + { + "epoch": 0.8651278569812175, + "grad_norm": 0.285205795606402, + "learning_rate": 4.6980421074137137e-07, + "loss": 0.4887, + "step": 19115 + }, + { + "epoch": 0.865173116089613, + "grad_norm": 0.6040188107489841, + "learning_rate": 4.6949409173647267e-07, + "loss": 0.2864, + "step": 19116 + }, + { + "epoch": 0.8652183751980086, + "grad_norm": 0.28986218884428416, + "learning_rate": 4.691840700782313e-07, + "loss": 0.4959, + "step": 19117 + }, + { + "epoch": 0.8652636343064042, + "grad_norm": 0.6388514118297662, + "learning_rate": 4.6887414577330814e-07, + "loss": 0.3104, + "step": 19118 + }, + { + "epoch": 0.8653088934147998, + "grad_norm": 0.620333813228641, + "learning_rate": 4.6856431882836397e-07, + "loss": 0.2906, + "step": 19119 + }, + { + "epoch": 0.8653541525231953, + "grad_norm": 0.604231251337452, + "learning_rate": 4.682545892500545e-07, + "loss": 0.2928, + "step": 19120 + }, + { + "epoch": 0.8653994116315908, + "grad_norm": 0.6711168213936696, + "learning_rate": 4.679449570450367e-07, + "loss": 0.3025, + "step": 19121 + }, + { + "epoch": 0.8654446707399864, + "grad_norm": 0.5841088916852475, + "learning_rate": 4.676354222199625e-07, + "loss": 0.3123, + "step": 19122 + }, + { + "epoch": 0.865489929848382, + "grad_norm": 0.6040892371615114, + "learning_rate": 4.6732598478148264e-07, + "loss": 0.29, + "step": 19123 + }, + { + "epoch": 0.8655351889567775, + "grad_norm": 0.26525114918965464, + "learning_rate": 4.6701664473624677e-07, + "loss": 0.454, + "step": 19124 + }, + { + "epoch": 0.8655804480651731, + "grad_norm": 0.6638484740908424, + "learning_rate": 4.667074020909013e-07, + "loss": 0.2827, + "step": 19125 + }, + { + "epoch": 0.8656257071735687, + "grad_norm": 0.6088432095068631, + "learning_rate": 4.663982568520897e-07, + "loss": 0.2832, + "step": 19126 + }, + { + "epoch": 0.8656709662819643, + "grad_norm": 0.606926588924379, + "learning_rate": 4.660892090264557e-07, + "loss": 0.3158, + "step": 19127 + }, + { + "epoch": 0.8657162253903599, + "grad_norm": 0.5796853979962601, + "learning_rate": 4.657802586206411e-07, + "loss": 0.2902, + "step": 19128 + }, + { + "epoch": 0.8657614844987553, + "grad_norm": 0.6563871862561939, + "learning_rate": 4.6547140564128236e-07, + "loss": 0.2957, + "step": 19129 + }, + { + "epoch": 0.8658067436071509, + "grad_norm": 0.6368524606296716, + "learning_rate": 4.651626500950157e-07, + "loss": 0.2634, + "step": 19130 + }, + { + "epoch": 0.8658520027155465, + "grad_norm": 0.6389499416717722, + "learning_rate": 4.648539919884759e-07, + "loss": 0.3179, + "step": 19131 + }, + { + "epoch": 0.8658972618239421, + "grad_norm": 0.5272471866796019, + "learning_rate": 4.6454543132829653e-07, + "loss": 0.2628, + "step": 19132 + }, + { + "epoch": 0.8659425209323376, + "grad_norm": 0.6371155766939642, + "learning_rate": 4.6423696812110564e-07, + "loss": 0.2985, + "step": 19133 + }, + { + "epoch": 0.8659877800407332, + "grad_norm": 0.5966897807770152, + "learning_rate": 4.639286023735312e-07, + "loss": 0.2576, + "step": 19134 + }, + { + "epoch": 0.8660330391491288, + "grad_norm": 0.6236122764447524, + "learning_rate": 4.6362033409220077e-07, + "loss": 0.2903, + "step": 19135 + }, + { + "epoch": 0.8660782982575244, + "grad_norm": 0.5832164595872447, + "learning_rate": 4.6331216328373565e-07, + "loss": 0.3142, + "step": 19136 + }, + { + "epoch": 0.8661235573659198, + "grad_norm": 0.6123744465516868, + "learning_rate": 4.6300408995476e-07, + "loss": 0.3063, + "step": 19137 + }, + { + "epoch": 0.8661688164743154, + "grad_norm": 0.5975181078415203, + "learning_rate": 4.6269611411189185e-07, + "loss": 0.3219, + "step": 19138 + }, + { + "epoch": 0.866214075582711, + "grad_norm": 0.5996937805734076, + "learning_rate": 4.6238823576174817e-07, + "loss": 0.3384, + "step": 19139 + }, + { + "epoch": 0.8662593346911066, + "grad_norm": 0.6347639658352031, + "learning_rate": 4.620804549109448e-07, + "loss": 0.2838, + "step": 19140 + }, + { + "epoch": 0.8663045937995022, + "grad_norm": 0.2863017410983737, + "learning_rate": 4.6177277156609634e-07, + "loss": 0.475, + "step": 19141 + }, + { + "epoch": 0.8663498529078977, + "grad_norm": 0.5374663072332243, + "learning_rate": 4.6146518573381314e-07, + "loss": 0.2555, + "step": 19142 + }, + { + "epoch": 0.8663951120162933, + "grad_norm": 0.6177097756381084, + "learning_rate": 4.6115769742070326e-07, + "loss": 0.2865, + "step": 19143 + }, + { + "epoch": 0.8664403711246889, + "grad_norm": 0.5915875014756634, + "learning_rate": 4.608503066333742e-07, + "loss": 0.3041, + "step": 19144 + }, + { + "epoch": 0.8664856302330844, + "grad_norm": 0.2633069387160369, + "learning_rate": 4.6054301337843165e-07, + "loss": 0.4299, + "step": 19145 + }, + { + "epoch": 0.8665308893414799, + "grad_norm": 0.2848193531287552, + "learning_rate": 4.6023581766247825e-07, + "loss": 0.4688, + "step": 19146 + }, + { + "epoch": 0.8665761484498755, + "grad_norm": 0.5622954185203691, + "learning_rate": 4.5992871949211373e-07, + "loss": 0.2719, + "step": 19147 + }, + { + "epoch": 0.8666214075582711, + "grad_norm": 0.2668497744033602, + "learning_rate": 4.596217188739377e-07, + "loss": 0.4457, + "step": 19148 + }, + { + "epoch": 0.8666666666666667, + "grad_norm": 0.6091572459376194, + "learning_rate": 4.593148158145455e-07, + "loss": 0.2476, + "step": 19149 + }, + { + "epoch": 0.8667119257750623, + "grad_norm": 0.5863981063835381, + "learning_rate": 4.59008010320533e-07, + "loss": 0.2957, + "step": 19150 + }, + { + "epoch": 0.8667571848834578, + "grad_norm": 0.7047953065256126, + "learning_rate": 4.587013023984921e-07, + "loss": 0.2619, + "step": 19151 + }, + { + "epoch": 0.8668024439918534, + "grad_norm": 0.7000958588469114, + "learning_rate": 4.583946920550114e-07, + "loss": 0.2535, + "step": 19152 + }, + { + "epoch": 0.866847703100249, + "grad_norm": 0.6598872071664383, + "learning_rate": 4.580881792966807e-07, + "loss": 0.2703, + "step": 19153 + }, + { + "epoch": 0.8668929622086445, + "grad_norm": 0.602439683038341, + "learning_rate": 4.577817641300869e-07, + "loss": 0.2874, + "step": 19154 + }, + { + "epoch": 0.86693822131704, + "grad_norm": 0.590883109590152, + "learning_rate": 4.574754465618114e-07, + "loss": 0.2972, + "step": 19155 + }, + { + "epoch": 0.8669834804254356, + "grad_norm": 0.6449816228421348, + "learning_rate": 4.571692265984368e-07, + "loss": 0.3008, + "step": 19156 + }, + { + "epoch": 0.8670287395338312, + "grad_norm": 0.6160323402068825, + "learning_rate": 4.5686310424654325e-07, + "loss": 0.28, + "step": 19157 + }, + { + "epoch": 0.8670739986422268, + "grad_norm": 0.6428005330677683, + "learning_rate": 4.565570795127106e-07, + "loss": 0.2698, + "step": 19158 + }, + { + "epoch": 0.8671192577506223, + "grad_norm": 0.5738385724075437, + "learning_rate": 4.5625115240351016e-07, + "loss": 0.3133, + "step": 19159 + }, + { + "epoch": 0.8671645168590179, + "grad_norm": 0.8398364145820625, + "learning_rate": 4.559453229255173e-07, + "loss": 0.2853, + "step": 19160 + }, + { + "epoch": 0.8672097759674134, + "grad_norm": 0.5485241443634394, + "learning_rate": 4.5563959108530455e-07, + "loss": 0.2243, + "step": 19161 + }, + { + "epoch": 0.867255035075809, + "grad_norm": 0.580763951997358, + "learning_rate": 4.553339568894399e-07, + "loss": 0.2844, + "step": 19162 + }, + { + "epoch": 0.8673002941842046, + "grad_norm": 0.6433553330581192, + "learning_rate": 4.550284203444899e-07, + "loss": 0.2904, + "step": 19163 + }, + { + "epoch": 0.8673455532926001, + "grad_norm": 0.4912122765876196, + "learning_rate": 4.5472298145702144e-07, + "loss": 0.4849, + "step": 19164 + }, + { + "epoch": 0.8673908124009957, + "grad_norm": 0.6330924353046441, + "learning_rate": 4.5441764023359483e-07, + "loss": 0.2417, + "step": 19165 + }, + { + "epoch": 0.8674360715093913, + "grad_norm": 0.6120571184065314, + "learning_rate": 4.5411239668077366e-07, + "loss": 0.3085, + "step": 19166 + }, + { + "epoch": 0.8674813306177869, + "grad_norm": 0.2666466628316768, + "learning_rate": 4.5380725080511555e-07, + "loss": 0.4664, + "step": 19167 + }, + { + "epoch": 0.8675265897261824, + "grad_norm": 0.6969170437946436, + "learning_rate": 4.5350220261317633e-07, + "loss": 0.2461, + "step": 19168 + }, + { + "epoch": 0.8675718488345779, + "grad_norm": 0.6219580735827156, + "learning_rate": 4.5319725211151077e-07, + "loss": 0.262, + "step": 19169 + }, + { + "epoch": 0.8676171079429735, + "grad_norm": 0.5670702283976174, + "learning_rate": 4.5289239930667304e-07, + "loss": 0.2742, + "step": 19170 + }, + { + "epoch": 0.8676623670513691, + "grad_norm": 0.6359844552906783, + "learning_rate": 4.525876442052124e-07, + "loss": 0.3061, + "step": 19171 + }, + { + "epoch": 0.8677076261597646, + "grad_norm": 0.7597180178156122, + "learning_rate": 4.522829868136758e-07, + "loss": 0.3253, + "step": 19172 + }, + { + "epoch": 0.8677528852681602, + "grad_norm": 0.5932075929375237, + "learning_rate": 4.519784271386107e-07, + "loss": 0.2991, + "step": 19173 + }, + { + "epoch": 0.8677981443765558, + "grad_norm": 0.6196670959860574, + "learning_rate": 4.516739651865615e-07, + "loss": 0.2778, + "step": 19174 + }, + { + "epoch": 0.8678434034849514, + "grad_norm": 0.6200868791032506, + "learning_rate": 4.5136960096407e-07, + "loss": 0.3191, + "step": 19175 + }, + { + "epoch": 0.867888662593347, + "grad_norm": 0.6199821020576625, + "learning_rate": 4.5106533447767496e-07, + "loss": 0.2861, + "step": 19176 + }, + { + "epoch": 0.8679339217017424, + "grad_norm": 0.6538168468033612, + "learning_rate": 4.507611657339156e-07, + "loss": 0.3108, + "step": 19177 + }, + { + "epoch": 0.867979180810138, + "grad_norm": 0.25948403348974586, + "learning_rate": 4.504570947393261e-07, + "loss": 0.4587, + "step": 19178 + }, + { + "epoch": 0.8680244399185336, + "grad_norm": 0.7001967108251121, + "learning_rate": 4.5015312150044177e-07, + "loss": 0.3174, + "step": 19179 + }, + { + "epoch": 0.8680696990269292, + "grad_norm": 0.6252283624355447, + "learning_rate": 4.49849246023793e-07, + "loss": 0.3012, + "step": 19180 + }, + { + "epoch": 0.8681149581353247, + "grad_norm": 0.2573979034500562, + "learning_rate": 4.4954546831590837e-07, + "loss": 0.4589, + "step": 19181 + }, + { + "epoch": 0.8681602172437203, + "grad_norm": 0.5719706327436379, + "learning_rate": 4.4924178838331554e-07, + "loss": 0.2555, + "step": 19182 + }, + { + "epoch": 0.8682054763521159, + "grad_norm": 0.6167229877308994, + "learning_rate": 4.4893820623254257e-07, + "loss": 0.2688, + "step": 19183 + }, + { + "epoch": 0.8682507354605115, + "grad_norm": 0.6365963953210592, + "learning_rate": 4.486347218701076e-07, + "loss": 0.3255, + "step": 19184 + }, + { + "epoch": 0.868295994568907, + "grad_norm": 0.6035459960710321, + "learning_rate": 4.4833133530253425e-07, + "loss": 0.2828, + "step": 19185 + }, + { + "epoch": 0.8683412536773025, + "grad_norm": 0.7515385783969697, + "learning_rate": 4.4802804653634124e-07, + "loss": 0.2625, + "step": 19186 + }, + { + "epoch": 0.8683865127856981, + "grad_norm": 0.6160544269891035, + "learning_rate": 4.477248555780467e-07, + "loss": 0.2819, + "step": 19187 + }, + { + "epoch": 0.8684317718940937, + "grad_norm": 0.2900133552382891, + "learning_rate": 4.4742176243416257e-07, + "loss": 0.4588, + "step": 19188 + }, + { + "epoch": 0.8684770310024893, + "grad_norm": 0.5679561213409132, + "learning_rate": 4.4711876711120206e-07, + "loss": 0.2858, + "step": 19189 + }, + { + "epoch": 0.8685222901108848, + "grad_norm": 2.1890383143218557, + "learning_rate": 4.4681586961567714e-07, + "loss": 0.3471, + "step": 19190 + }, + { + "epoch": 0.8685675492192804, + "grad_norm": 0.5816623983622343, + "learning_rate": 4.4651306995409485e-07, + "loss": 0.2829, + "step": 19191 + }, + { + "epoch": 0.868612808327676, + "grad_norm": 0.5621643901170886, + "learning_rate": 4.462103681329616e-07, + "loss": 0.31, + "step": 19192 + }, + { + "epoch": 0.8686580674360715, + "grad_norm": 0.6637047733200644, + "learning_rate": 4.4590776415878166e-07, + "loss": 0.3343, + "step": 19193 + }, + { + "epoch": 0.868703326544467, + "grad_norm": 0.6641349132062052, + "learning_rate": 4.4560525803805654e-07, + "loss": 0.2687, + "step": 19194 + }, + { + "epoch": 0.8687485856528626, + "grad_norm": 0.5905567406914118, + "learning_rate": 4.453028497772877e-07, + "loss": 0.2857, + "step": 19195 + }, + { + "epoch": 0.8687938447612582, + "grad_norm": 0.6181811323044204, + "learning_rate": 4.4500053938297205e-07, + "loss": 0.2685, + "step": 19196 + }, + { + "epoch": 0.8688391038696538, + "grad_norm": 0.5688526447255133, + "learning_rate": 4.4469832686160395e-07, + "loss": 0.3152, + "step": 19197 + }, + { + "epoch": 0.8688843629780494, + "grad_norm": 0.5609431518771413, + "learning_rate": 4.443962122196782e-07, + "loss": 0.2868, + "step": 19198 + }, + { + "epoch": 0.8689296220864449, + "grad_norm": 0.61520595196673, + "learning_rate": 4.4409419546368735e-07, + "loss": 0.2702, + "step": 19199 + }, + { + "epoch": 0.8689748811948405, + "grad_norm": 0.6052659169977241, + "learning_rate": 4.437922766001201e-07, + "loss": 0.2714, + "step": 19200 + }, + { + "epoch": 0.869020140303236, + "grad_norm": 0.27276169252499394, + "learning_rate": 4.4349045563546245e-07, + "loss": 0.4929, + "step": 19201 + }, + { + "epoch": 0.8690653994116316, + "grad_norm": 0.6297723203334212, + "learning_rate": 4.4318873257620077e-07, + "loss": 0.2841, + "step": 19202 + }, + { + "epoch": 0.8691106585200271, + "grad_norm": 0.5606502021223138, + "learning_rate": 4.428871074288188e-07, + "loss": 0.261, + "step": 19203 + }, + { + "epoch": 0.8691559176284227, + "grad_norm": 0.6184436659663045, + "learning_rate": 4.425855801997969e-07, + "loss": 0.2845, + "step": 19204 + }, + { + "epoch": 0.8692011767368183, + "grad_norm": 0.26524336851540486, + "learning_rate": 4.422841508956127e-07, + "loss": 0.4507, + "step": 19205 + }, + { + "epoch": 0.8692464358452139, + "grad_norm": 0.6086760919671172, + "learning_rate": 4.419828195227455e-07, + "loss": 0.2866, + "step": 19206 + }, + { + "epoch": 0.8692916949536094, + "grad_norm": 0.6494256573886917, + "learning_rate": 4.416815860876672e-07, + "loss": 0.2894, + "step": 19207 + }, + { + "epoch": 0.869336954062005, + "grad_norm": 0.6112122012915637, + "learning_rate": 4.413804505968533e-07, + "loss": 0.2499, + "step": 19208 + }, + { + "epoch": 0.8693822131704005, + "grad_norm": 0.6051172673894841, + "learning_rate": 4.410794130567725e-07, + "loss": 0.2771, + "step": 19209 + }, + { + "epoch": 0.8694274722787961, + "grad_norm": 0.5811739345315933, + "learning_rate": 4.4077847347389236e-07, + "loss": 0.28, + "step": 19210 + }, + { + "epoch": 0.8694727313871917, + "grad_norm": 0.6868280778020592, + "learning_rate": 4.404776318546805e-07, + "loss": 0.3089, + "step": 19211 + }, + { + "epoch": 0.8695179904955872, + "grad_norm": 0.5590297594558641, + "learning_rate": 4.401768882056012e-07, + "loss": 0.2917, + "step": 19212 + }, + { + "epoch": 0.8695632496039828, + "grad_norm": 0.6357604627136214, + "learning_rate": 4.3987624253311657e-07, + "loss": 0.2982, + "step": 19213 + }, + { + "epoch": 0.8696085087123784, + "grad_norm": 0.25405496257449234, + "learning_rate": 4.3957569484368523e-07, + "loss": 0.4706, + "step": 19214 + }, + { + "epoch": 0.869653767820774, + "grad_norm": 0.5730905648746117, + "learning_rate": 4.3927524514376596e-07, + "loss": 0.283, + "step": 19215 + }, + { + "epoch": 0.8696990269291694, + "grad_norm": 0.5808504897339036, + "learning_rate": 4.389748934398164e-07, + "loss": 0.2872, + "step": 19216 + }, + { + "epoch": 0.869744286037565, + "grad_norm": 0.5989775669737522, + "learning_rate": 4.386746397382863e-07, + "loss": 0.3183, + "step": 19217 + }, + { + "epoch": 0.8697895451459606, + "grad_norm": 0.2539415810006778, + "learning_rate": 4.3837448404562886e-07, + "loss": 0.4799, + "step": 19218 + }, + { + "epoch": 0.8698348042543562, + "grad_norm": 0.7084341348935672, + "learning_rate": 4.3807442636829513e-07, + "loss": 0.2673, + "step": 19219 + }, + { + "epoch": 0.8698800633627517, + "grad_norm": 0.6404269693956344, + "learning_rate": 4.3777446671273093e-07, + "loss": 0.2818, + "step": 19220 + }, + { + "epoch": 0.8699253224711473, + "grad_norm": 0.6818600806110079, + "learning_rate": 4.3747460508538064e-07, + "loss": 0.3055, + "step": 19221 + }, + { + "epoch": 0.8699705815795429, + "grad_norm": 0.5967480519356161, + "learning_rate": 4.371748414926896e-07, + "loss": 0.2804, + "step": 19222 + }, + { + "epoch": 0.8700158406879385, + "grad_norm": 0.6705196046683227, + "learning_rate": 4.3687517594109664e-07, + "loss": 0.3152, + "step": 19223 + }, + { + "epoch": 0.8700610997963341, + "grad_norm": 0.27213909087842897, + "learning_rate": 4.3657560843704207e-07, + "loss": 0.4877, + "step": 19224 + }, + { + "epoch": 0.8701063589047295, + "grad_norm": 0.5580890194538464, + "learning_rate": 4.362761389869624e-07, + "loss": 0.2902, + "step": 19225 + }, + { + "epoch": 0.8701516180131251, + "grad_norm": 0.6272575113776604, + "learning_rate": 4.3597676759729147e-07, + "loss": 0.2687, + "step": 19226 + }, + { + "epoch": 0.8701968771215207, + "grad_norm": 0.6501694422134591, + "learning_rate": 4.356774942744618e-07, + "loss": 0.3212, + "step": 19227 + }, + { + "epoch": 0.8702421362299163, + "grad_norm": 0.6227079354249133, + "learning_rate": 4.353783190249061e-07, + "loss": 0.3212, + "step": 19228 + }, + { + "epoch": 0.8702873953383118, + "grad_norm": 0.6774682709699127, + "learning_rate": 4.350792418550509e-07, + "loss": 0.3429, + "step": 19229 + }, + { + "epoch": 0.8703326544467074, + "grad_norm": 0.6162439245033716, + "learning_rate": 4.3478026277132157e-07, + "loss": 0.3038, + "step": 19230 + }, + { + "epoch": 0.870377913555103, + "grad_norm": 0.5618576367930656, + "learning_rate": 4.3448138178014354e-07, + "loss": 0.2752, + "step": 19231 + }, + { + "epoch": 0.8704231726634986, + "grad_norm": 0.8201144878058423, + "learning_rate": 4.3418259888794e-07, + "loss": 0.2934, + "step": 19232 + }, + { + "epoch": 0.8704684317718941, + "grad_norm": 0.3217543622772321, + "learning_rate": 4.338839141011292e-07, + "loss": 0.4781, + "step": 19233 + }, + { + "epoch": 0.8705136908802896, + "grad_norm": 0.5878816880054325, + "learning_rate": 4.3358532742612814e-07, + "loss": 0.2976, + "step": 19234 + }, + { + "epoch": 0.8705589499886852, + "grad_norm": 0.26947232868969295, + "learning_rate": 4.3328683886935507e-07, + "loss": 0.4722, + "step": 19235 + }, + { + "epoch": 0.8706042090970808, + "grad_norm": 0.7166495013877396, + "learning_rate": 4.329884484372215e-07, + "loss": 0.3097, + "step": 19236 + }, + { + "epoch": 0.8706494682054764, + "grad_norm": 0.6034099383936006, + "learning_rate": 4.326901561361402e-07, + "loss": 0.2757, + "step": 19237 + }, + { + "epoch": 0.8706947273138719, + "grad_norm": 0.5717583707219932, + "learning_rate": 4.3239196197252034e-07, + "loss": 0.2653, + "step": 19238 + }, + { + "epoch": 0.8707399864222675, + "grad_norm": 0.5895933010053086, + "learning_rate": 4.3209386595276737e-07, + "loss": 0.2792, + "step": 19239 + }, + { + "epoch": 0.870785245530663, + "grad_norm": 0.6151786474253428, + "learning_rate": 4.317958680832884e-07, + "loss": 0.2686, + "step": 19240 + }, + { + "epoch": 0.8708305046390586, + "grad_norm": 0.6573071928254116, + "learning_rate": 4.3149796837048677e-07, + "loss": 0.289, + "step": 19241 + }, + { + "epoch": 0.8708757637474541, + "grad_norm": 0.6535222992402957, + "learning_rate": 4.3120016682076324e-07, + "loss": 0.306, + "step": 19242 + }, + { + "epoch": 0.8709210228558497, + "grad_norm": 0.6144457344128956, + "learning_rate": 4.309024634405146e-07, + "loss": 0.2703, + "step": 19243 + }, + { + "epoch": 0.8709662819642453, + "grad_norm": 0.5685603172224465, + "learning_rate": 4.306048582361394e-07, + "loss": 0.2945, + "step": 19244 + }, + { + "epoch": 0.8710115410726409, + "grad_norm": 0.2646618931089399, + "learning_rate": 4.3030735121403376e-07, + "loss": 0.4831, + "step": 19245 + }, + { + "epoch": 0.8710568001810365, + "grad_norm": 0.2749930163517183, + "learning_rate": 4.300099423805865e-07, + "loss": 0.4574, + "step": 19246 + }, + { + "epoch": 0.871102059289432, + "grad_norm": 0.27635098523908386, + "learning_rate": 4.2971263174219014e-07, + "loss": 0.4804, + "step": 19247 + }, + { + "epoch": 0.8711473183978276, + "grad_norm": 0.5877024693849136, + "learning_rate": 4.2941541930523356e-07, + "loss": 0.2964, + "step": 19248 + }, + { + "epoch": 0.8711925775062231, + "grad_norm": 0.6122661833595895, + "learning_rate": 4.291183050761022e-07, + "loss": 0.2738, + "step": 19249 + }, + { + "epoch": 0.8712378366146187, + "grad_norm": 0.6142665018253046, + "learning_rate": 4.288212890611787e-07, + "loss": 0.2818, + "step": 19250 + }, + { + "epoch": 0.8712830957230142, + "grad_norm": 0.6032876851784271, + "learning_rate": 4.28524371266848e-07, + "loss": 0.3234, + "step": 19251 + }, + { + "epoch": 0.8713283548314098, + "grad_norm": 0.6290539406396431, + "learning_rate": 4.2822755169948714e-07, + "loss": 0.3027, + "step": 19252 + }, + { + "epoch": 0.8713736139398054, + "grad_norm": 0.5551021014013245, + "learning_rate": 4.2793083036547554e-07, + "loss": 0.2438, + "step": 19253 + }, + { + "epoch": 0.871418873048201, + "grad_norm": 0.7048705729104559, + "learning_rate": 4.276342072711881e-07, + "loss": 0.2844, + "step": 19254 + }, + { + "epoch": 0.8714641321565965, + "grad_norm": 0.28930790958547165, + "learning_rate": 4.273376824229991e-07, + "loss": 0.4484, + "step": 19255 + }, + { + "epoch": 0.871509391264992, + "grad_norm": 0.615694385686459, + "learning_rate": 4.270412558272785e-07, + "loss": 0.3384, + "step": 19256 + }, + { + "epoch": 0.8715546503733876, + "grad_norm": 0.2746311252453542, + "learning_rate": 4.267449274903979e-07, + "loss": 0.5046, + "step": 19257 + }, + { + "epoch": 0.8715999094817832, + "grad_norm": 0.25477527336527783, + "learning_rate": 4.2644869741872263e-07, + "loss": 0.4676, + "step": 19258 + }, + { + "epoch": 0.8716451685901788, + "grad_norm": 0.6237616386439251, + "learning_rate": 4.2615256561861773e-07, + "loss": 0.3168, + "step": 19259 + }, + { + "epoch": 0.8716904276985743, + "grad_norm": 0.5799414767663191, + "learning_rate": 4.258565320964464e-07, + "loss": 0.2646, + "step": 19260 + }, + { + "epoch": 0.8717356868069699, + "grad_norm": 0.5581433964083954, + "learning_rate": 4.2556059685857133e-07, + "loss": 0.2521, + "step": 19261 + }, + { + "epoch": 0.8717809459153655, + "grad_norm": 0.647202387736622, + "learning_rate": 4.252647599113491e-07, + "loss": 0.2704, + "step": 19262 + }, + { + "epoch": 0.8718262050237611, + "grad_norm": 0.608873076960802, + "learning_rate": 4.2496902126113626e-07, + "loss": 0.3396, + "step": 19263 + }, + { + "epoch": 0.8718714641321565, + "grad_norm": 0.689268710860395, + "learning_rate": 4.246733809142889e-07, + "loss": 0.3353, + "step": 19264 + }, + { + "epoch": 0.8719167232405521, + "grad_norm": 0.6566491745917127, + "learning_rate": 4.2437783887715745e-07, + "loss": 0.282, + "step": 19265 + }, + { + "epoch": 0.8719619823489477, + "grad_norm": 0.28732687237202653, + "learning_rate": 4.2408239515609407e-07, + "loss": 0.4821, + "step": 19266 + }, + { + "epoch": 0.8720072414573433, + "grad_norm": 0.6146920890965886, + "learning_rate": 4.2378704975744646e-07, + "loss": 0.2778, + "step": 19267 + }, + { + "epoch": 0.8720525005657389, + "grad_norm": 0.6326512945876744, + "learning_rate": 4.2349180268755953e-07, + "loss": 0.2898, + "step": 19268 + }, + { + "epoch": 0.8720977596741344, + "grad_norm": 0.6194643099384461, + "learning_rate": 4.231966539527782e-07, + "loss": 0.3457, + "step": 19269 + }, + { + "epoch": 0.87214301878253, + "grad_norm": 0.5949243243637904, + "learning_rate": 4.2290160355944467e-07, + "loss": 0.2819, + "step": 19270 + }, + { + "epoch": 0.8721882778909256, + "grad_norm": 0.6479036250261243, + "learning_rate": 4.2260665151389825e-07, + "loss": 0.2972, + "step": 19271 + }, + { + "epoch": 0.8722335369993212, + "grad_norm": 0.6817991465424627, + "learning_rate": 4.223117978224761e-07, + "loss": 0.2769, + "step": 19272 + }, + { + "epoch": 0.8722787961077166, + "grad_norm": 0.6523627812235554, + "learning_rate": 4.2201704249151377e-07, + "loss": 0.339, + "step": 19273 + }, + { + "epoch": 0.8723240552161122, + "grad_norm": 0.630393557338104, + "learning_rate": 4.217223855273467e-07, + "loss": 0.3178, + "step": 19274 + }, + { + "epoch": 0.8723693143245078, + "grad_norm": 0.6068565329435825, + "learning_rate": 4.214278269363026e-07, + "loss": 0.3071, + "step": 19275 + }, + { + "epoch": 0.8724145734329034, + "grad_norm": 0.26120053279786065, + "learning_rate": 4.211333667247125e-07, + "loss": 0.4875, + "step": 19276 + }, + { + "epoch": 0.8724598325412989, + "grad_norm": 0.6359553199205671, + "learning_rate": 4.208390048989047e-07, + "loss": 0.3256, + "step": 19277 + }, + { + "epoch": 0.8725050916496945, + "grad_norm": 0.28486232174258747, + "learning_rate": 4.2054474146520254e-07, + "loss": 0.449, + "step": 19278 + }, + { + "epoch": 0.8725503507580901, + "grad_norm": 0.5948076828239628, + "learning_rate": 4.202505764299286e-07, + "loss": 0.3179, + "step": 19279 + }, + { + "epoch": 0.8725956098664857, + "grad_norm": 0.683832652398244, + "learning_rate": 4.199565097994046e-07, + "loss": 0.2939, + "step": 19280 + }, + { + "epoch": 0.8726408689748812, + "grad_norm": 0.29879773745404237, + "learning_rate": 4.1966254157994826e-07, + "loss": 0.4816, + "step": 19281 + }, + { + "epoch": 0.8726861280832767, + "grad_norm": 0.27659396071200854, + "learning_rate": 4.1936867177787723e-07, + "loss": 0.4963, + "step": 19282 + }, + { + "epoch": 0.8727313871916723, + "grad_norm": 0.6063704551071534, + "learning_rate": 4.190749003995037e-07, + "loss": 0.3116, + "step": 19283 + }, + { + "epoch": 0.8727766463000679, + "grad_norm": 0.6478424532617484, + "learning_rate": 4.187812274511427e-07, + "loss": 0.2813, + "step": 19284 + }, + { + "epoch": 0.8728219054084635, + "grad_norm": 0.25002317875374175, + "learning_rate": 4.1848765293910187e-07, + "loss": 0.4547, + "step": 19285 + }, + { + "epoch": 0.872867164516859, + "grad_norm": 0.8394752438160599, + "learning_rate": 4.181941768696912e-07, + "loss": 0.3154, + "step": 19286 + }, + { + "epoch": 0.8729124236252546, + "grad_norm": 0.3167525239809427, + "learning_rate": 4.1790079924921625e-07, + "loss": 0.4757, + "step": 19287 + }, + { + "epoch": 0.8729576827336502, + "grad_norm": 0.6515488862914801, + "learning_rate": 4.176075200839791e-07, + "loss": 0.2937, + "step": 19288 + }, + { + "epoch": 0.8730029418420457, + "grad_norm": 0.6422687423007235, + "learning_rate": 4.173143393802825e-07, + "loss": 0.3317, + "step": 19289 + }, + { + "epoch": 0.8730482009504412, + "grad_norm": 0.6589076061519356, + "learning_rate": 4.170212571444271e-07, + "loss": 0.2833, + "step": 19290 + }, + { + "epoch": 0.8730934600588368, + "grad_norm": 0.26402214706993543, + "learning_rate": 4.1672827338270884e-07, + "loss": 0.4838, + "step": 19291 + }, + { + "epoch": 0.8731387191672324, + "grad_norm": 0.5423897183305791, + "learning_rate": 4.1643538810142324e-07, + "loss": 0.2827, + "step": 19292 + }, + { + "epoch": 0.873183978275628, + "grad_norm": 0.693746363923824, + "learning_rate": 4.1614260130686424e-07, + "loss": 0.29, + "step": 19293 + }, + { + "epoch": 0.8732292373840236, + "grad_norm": 0.6786270636978193, + "learning_rate": 4.158499130053223e-07, + "loss": 0.2907, + "step": 19294 + }, + { + "epoch": 0.8732744964924191, + "grad_norm": 0.5433855176325215, + "learning_rate": 4.155573232030868e-07, + "loss": 0.2811, + "step": 19295 + }, + { + "epoch": 0.8733197556008147, + "grad_norm": 0.6064155161405602, + "learning_rate": 4.152648319064445e-07, + "loss": 0.2744, + "step": 19296 + }, + { + "epoch": 0.8733650147092102, + "grad_norm": 0.4722005543780067, + "learning_rate": 4.1497243912167975e-07, + "loss": 0.4553, + "step": 19297 + }, + { + "epoch": 0.8734102738176058, + "grad_norm": 0.6905482528007223, + "learning_rate": 4.146801448550747e-07, + "loss": 0.2969, + "step": 19298 + }, + { + "epoch": 0.8734555329260013, + "grad_norm": 0.6159339429816463, + "learning_rate": 4.143879491129116e-07, + "loss": 0.2956, + "step": 19299 + }, + { + "epoch": 0.8735007920343969, + "grad_norm": 0.5837472702885921, + "learning_rate": 4.140958519014682e-07, + "loss": 0.2665, + "step": 19300 + }, + { + "epoch": 0.8735460511427925, + "grad_norm": 0.26396583160348003, + "learning_rate": 4.1380385322701945e-07, + "loss": 0.443, + "step": 19301 + }, + { + "epoch": 0.8735913102511881, + "grad_norm": 0.6096296037623206, + "learning_rate": 4.1351195309584034e-07, + "loss": 0.2684, + "step": 19302 + }, + { + "epoch": 0.8736365693595837, + "grad_norm": 0.5867769778710267, + "learning_rate": 4.132201515142037e-07, + "loss": 0.2921, + "step": 19303 + }, + { + "epoch": 0.8736818284679791, + "grad_norm": 0.7311603956808999, + "learning_rate": 4.129284484883789e-07, + "loss": 0.2702, + "step": 19304 + }, + { + "epoch": 0.8737270875763747, + "grad_norm": 0.5948717141358987, + "learning_rate": 4.126368440246331e-07, + "loss": 0.3367, + "step": 19305 + }, + { + "epoch": 0.8737723466847703, + "grad_norm": 0.572718298861418, + "learning_rate": 4.1234533812923307e-07, + "loss": 0.2991, + "step": 19306 + }, + { + "epoch": 0.8738176057931659, + "grad_norm": 0.5464262209786708, + "learning_rate": 4.120539308084409e-07, + "loss": 0.2663, + "step": 19307 + }, + { + "epoch": 0.8738628649015614, + "grad_norm": 0.6203983342770546, + "learning_rate": 4.1176262206852e-07, + "loss": 0.2939, + "step": 19308 + }, + { + "epoch": 0.873908124009957, + "grad_norm": 0.6275878683163069, + "learning_rate": 4.114714119157287e-07, + "loss": 0.2894, + "step": 19309 + }, + { + "epoch": 0.8739533831183526, + "grad_norm": 0.2656071642941677, + "learning_rate": 4.111803003563231e-07, + "loss": 0.4439, + "step": 19310 + }, + { + "epoch": 0.8739986422267482, + "grad_norm": 0.6453478987417932, + "learning_rate": 4.108892873965603e-07, + "loss": 0.2719, + "step": 19311 + }, + { + "epoch": 0.8740439013351436, + "grad_norm": 0.6799707695236196, + "learning_rate": 4.105983730426916e-07, + "loss": 0.3123, + "step": 19312 + }, + { + "epoch": 0.8740891604435392, + "grad_norm": 0.5816988735360314, + "learning_rate": 4.103075573009691e-07, + "loss": 0.2624, + "step": 19313 + }, + { + "epoch": 0.8741344195519348, + "grad_norm": 0.6331262659881548, + "learning_rate": 4.1001684017764053e-07, + "loss": 0.2969, + "step": 19314 + }, + { + "epoch": 0.8741796786603304, + "grad_norm": 0.7024568024365313, + "learning_rate": 4.097262216789538e-07, + "loss": 0.2979, + "step": 19315 + }, + { + "epoch": 0.874224937768726, + "grad_norm": 0.6067067548843695, + "learning_rate": 4.0943570181115275e-07, + "loss": 0.2764, + "step": 19316 + }, + { + "epoch": 0.8742701968771215, + "grad_norm": 0.608306134835602, + "learning_rate": 4.091452805804785e-07, + "loss": 0.3062, + "step": 19317 + }, + { + "epoch": 0.8743154559855171, + "grad_norm": 0.2563566022423515, + "learning_rate": 4.088549579931722e-07, + "loss": 0.4438, + "step": 19318 + }, + { + "epoch": 0.8743607150939127, + "grad_norm": 0.6088891635681201, + "learning_rate": 4.085647340554738e-07, + "loss": 0.2903, + "step": 19319 + }, + { + "epoch": 0.8744059742023083, + "grad_norm": 0.6325971987131987, + "learning_rate": 4.0827460877361724e-07, + "loss": 0.2875, + "step": 19320 + }, + { + "epoch": 0.8744512333107037, + "grad_norm": 0.5723939317045896, + "learning_rate": 4.079845821538364e-07, + "loss": 0.2751, + "step": 19321 + }, + { + "epoch": 0.8744964924190993, + "grad_norm": 0.7989220925329679, + "learning_rate": 4.0769465420236407e-07, + "loss": 0.281, + "step": 19322 + }, + { + "epoch": 0.8745417515274949, + "grad_norm": 0.5519022007177745, + "learning_rate": 4.0740482492542864e-07, + "loss": 0.3168, + "step": 19323 + }, + { + "epoch": 0.8745870106358905, + "grad_norm": 0.27196518760632776, + "learning_rate": 4.0711509432925955e-07, + "loss": 0.4728, + "step": 19324 + }, + { + "epoch": 0.874632269744286, + "grad_norm": 0.5852629861057203, + "learning_rate": 4.0682546242008017e-07, + "loss": 0.2973, + "step": 19325 + }, + { + "epoch": 0.8746775288526816, + "grad_norm": 0.8416817462412677, + "learning_rate": 4.0653592920411545e-07, + "loss": 0.3121, + "step": 19326 + }, + { + "epoch": 0.8747227879610772, + "grad_norm": 0.656747312664601, + "learning_rate": 4.0624649468758494e-07, + "loss": 0.2591, + "step": 19327 + }, + { + "epoch": 0.8747680470694728, + "grad_norm": 0.2852204573784242, + "learning_rate": 4.0595715887670973e-07, + "loss": 0.4515, + "step": 19328 + }, + { + "epoch": 0.8748133061778683, + "grad_norm": 0.5623803990170919, + "learning_rate": 4.056679217777054e-07, + "loss": 0.2606, + "step": 19329 + }, + { + "epoch": 0.8748585652862638, + "grad_norm": 0.5641726428377336, + "learning_rate": 4.0537878339678647e-07, + "loss": 0.2584, + "step": 19330 + }, + { + "epoch": 0.8749038243946594, + "grad_norm": 0.6174679946555726, + "learning_rate": 4.050897437401657e-07, + "loss": 0.2992, + "step": 19331 + }, + { + "epoch": 0.874949083503055, + "grad_norm": 0.2592118664339325, + "learning_rate": 4.0480080281405544e-07, + "loss": 0.4789, + "step": 19332 + }, + { + "epoch": 0.8749943426114506, + "grad_norm": 0.6222844653809816, + "learning_rate": 4.045119606246628e-07, + "loss": 0.2708, + "step": 19333 + }, + { + "epoch": 0.8750396017198461, + "grad_norm": 0.561262388965449, + "learning_rate": 4.0422321717819347e-07, + "loss": 0.2892, + "step": 19334 + }, + { + "epoch": 0.8750848608282417, + "grad_norm": 0.6080040410481603, + "learning_rate": 4.03934572480853e-07, + "loss": 0.277, + "step": 19335 + }, + { + "epoch": 0.8751301199366373, + "grad_norm": 0.6680865928353568, + "learning_rate": 4.03646026538842e-07, + "loss": 0.2712, + "step": 19336 + }, + { + "epoch": 0.8751753790450328, + "grad_norm": 0.2658129002143189, + "learning_rate": 4.0335757935836216e-07, + "loss": 0.4717, + "step": 19337 + }, + { + "epoch": 0.8752206381534284, + "grad_norm": 0.6268641775392694, + "learning_rate": 4.0306923094561025e-07, + "loss": 0.2591, + "step": 19338 + }, + { + "epoch": 0.8752658972618239, + "grad_norm": 0.6131332345601052, + "learning_rate": 4.027809813067812e-07, + "loss": 0.3132, + "step": 19339 + }, + { + "epoch": 0.8753111563702195, + "grad_norm": 0.6250152587687039, + "learning_rate": 4.024928304480696e-07, + "loss": 0.2804, + "step": 19340 + }, + { + "epoch": 0.8753564154786151, + "grad_norm": 0.6700185047153728, + "learning_rate": 4.022047783756683e-07, + "loss": 0.2782, + "step": 19341 + }, + { + "epoch": 0.8754016745870107, + "grad_norm": 0.5891930228963966, + "learning_rate": 4.0191682509576503e-07, + "loss": 0.2948, + "step": 19342 + }, + { + "epoch": 0.8754469336954062, + "grad_norm": 0.27279569033472617, + "learning_rate": 4.0162897061454596e-07, + "loss": 0.4634, + "step": 19343 + }, + { + "epoch": 0.8754921928038017, + "grad_norm": 0.6391335136643886, + "learning_rate": 4.0134121493819897e-07, + "loss": 0.3157, + "step": 19344 + }, + { + "epoch": 0.8755374519121973, + "grad_norm": 0.6616544774477524, + "learning_rate": 4.0105355807290523e-07, + "loss": 0.3173, + "step": 19345 + }, + { + "epoch": 0.8755827110205929, + "grad_norm": 0.2702210661503924, + "learning_rate": 4.0076600002484533e-07, + "loss": 0.4681, + "step": 19346 + }, + { + "epoch": 0.8756279701289884, + "grad_norm": 0.285967670957762, + "learning_rate": 4.004785408001982e-07, + "loss": 0.475, + "step": 19347 + }, + { + "epoch": 0.875673229237384, + "grad_norm": 0.6405104658919056, + "learning_rate": 4.001911804051417e-07, + "loss": 0.3145, + "step": 19348 + }, + { + "epoch": 0.8757184883457796, + "grad_norm": 0.6727554754483442, + "learning_rate": 3.999039188458498e-07, + "loss": 0.3267, + "step": 19349 + }, + { + "epoch": 0.8757637474541752, + "grad_norm": 0.6028274037616632, + "learning_rate": 3.996167561284936e-07, + "loss": 0.258, + "step": 19350 + }, + { + "epoch": 0.8758090065625708, + "grad_norm": 0.5946123279302, + "learning_rate": 3.9932969225924546e-07, + "loss": 0.2794, + "step": 19351 + }, + { + "epoch": 0.8758542656709662, + "grad_norm": 0.5871803342675288, + "learning_rate": 3.990427272442715e-07, + "loss": 0.2662, + "step": 19352 + }, + { + "epoch": 0.8758995247793618, + "grad_norm": 0.6323833531414866, + "learning_rate": 3.987558610897391e-07, + "loss": 0.2839, + "step": 19353 + }, + { + "epoch": 0.8759447838877574, + "grad_norm": 0.5265504978894515, + "learning_rate": 3.9846909380181096e-07, + "loss": 0.2938, + "step": 19354 + }, + { + "epoch": 0.875990042996153, + "grad_norm": 0.6319468212936, + "learning_rate": 3.981824253866501e-07, + "loss": 0.3392, + "step": 19355 + }, + { + "epoch": 0.8760353021045485, + "grad_norm": 0.6347599472287441, + "learning_rate": 3.978958558504148e-07, + "loss": 0.2472, + "step": 19356 + }, + { + "epoch": 0.8760805612129441, + "grad_norm": 0.29546727970639075, + "learning_rate": 3.9760938519926404e-07, + "loss": 0.4936, + "step": 19357 + }, + { + "epoch": 0.8761258203213397, + "grad_norm": 0.6320587601466439, + "learning_rate": 3.9732301343935243e-07, + "loss": 0.2786, + "step": 19358 + }, + { + "epoch": 0.8761710794297353, + "grad_norm": 0.63209996992386, + "learning_rate": 3.970367405768322e-07, + "loss": 0.2796, + "step": 19359 + }, + { + "epoch": 0.8762163385381307, + "grad_norm": 0.26120033002662857, + "learning_rate": 3.9675056661785563e-07, + "loss": 0.4559, + "step": 19360 + }, + { + "epoch": 0.8762615976465263, + "grad_norm": 0.5661813233089844, + "learning_rate": 3.964644915685728e-07, + "loss": 0.2847, + "step": 19361 + }, + { + "epoch": 0.8763068567549219, + "grad_norm": 0.6678577903977209, + "learning_rate": 3.961785154351289e-07, + "loss": 0.3123, + "step": 19362 + }, + { + "epoch": 0.8763521158633175, + "grad_norm": 0.6577484082745595, + "learning_rate": 3.9589263822366886e-07, + "loss": 0.3141, + "step": 19363 + }, + { + "epoch": 0.8763973749717131, + "grad_norm": 0.6469438211303374, + "learning_rate": 3.9560685994033566e-07, + "loss": 0.2685, + "step": 19364 + }, + { + "epoch": 0.8764426340801086, + "grad_norm": 0.6733623265000933, + "learning_rate": 3.9532118059126935e-07, + "loss": 0.2983, + "step": 19365 + }, + { + "epoch": 0.8764878931885042, + "grad_norm": 0.2691216368567256, + "learning_rate": 3.9503560018260945e-07, + "loss": 0.4546, + "step": 19366 + }, + { + "epoch": 0.8765331522968998, + "grad_norm": 0.631367657505299, + "learning_rate": 3.9475011872049164e-07, + "loss": 0.285, + "step": 19367 + }, + { + "epoch": 0.8765784114052954, + "grad_norm": 0.28257712047841743, + "learning_rate": 3.9446473621104877e-07, + "loss": 0.4689, + "step": 19368 + }, + { + "epoch": 0.8766236705136908, + "grad_norm": 0.6845795804766492, + "learning_rate": 3.9417945266041367e-07, + "loss": 0.3236, + "step": 19369 + }, + { + "epoch": 0.8766689296220864, + "grad_norm": 0.2629276730359694, + "learning_rate": 3.9389426807471764e-07, + "loss": 0.4324, + "step": 19370 + }, + { + "epoch": 0.876714188730482, + "grad_norm": 0.6353848202383956, + "learning_rate": 3.9360918246008684e-07, + "loss": 0.2742, + "step": 19371 + }, + { + "epoch": 0.8767594478388776, + "grad_norm": 0.5924153676161918, + "learning_rate": 3.933241958226469e-07, + "loss": 0.321, + "step": 19372 + }, + { + "epoch": 0.8768047069472732, + "grad_norm": 0.7712837707043331, + "learning_rate": 3.930393081685213e-07, + "loss": 0.2434, + "step": 19373 + }, + { + "epoch": 0.8768499660556687, + "grad_norm": 0.2724202673210525, + "learning_rate": 3.9275451950383346e-07, + "loss": 0.4528, + "step": 19374 + }, + { + "epoch": 0.8768952251640643, + "grad_norm": 0.6339317784538435, + "learning_rate": 3.924698298346996e-07, + "loss": 0.3075, + "step": 19375 + }, + { + "epoch": 0.8769404842724599, + "grad_norm": 0.6102452598889615, + "learning_rate": 3.9218523916723814e-07, + "loss": 0.3037, + "step": 19376 + }, + { + "epoch": 0.8769857433808554, + "grad_norm": 0.24234823449444134, + "learning_rate": 3.9190074750756424e-07, + "loss": 0.4438, + "step": 19377 + }, + { + "epoch": 0.8770310024892509, + "grad_norm": 0.7741991460040867, + "learning_rate": 3.916163548617913e-07, + "loss": 0.3213, + "step": 19378 + }, + { + "epoch": 0.8770762615976465, + "grad_norm": 0.5343018843761397, + "learning_rate": 3.913320612360283e-07, + "loss": 0.2576, + "step": 19379 + }, + { + "epoch": 0.8771215207060421, + "grad_norm": 0.6245680766490284, + "learning_rate": 3.9104786663638537e-07, + "loss": 0.2738, + "step": 19380 + }, + { + "epoch": 0.8771667798144377, + "grad_norm": 0.6527068583160411, + "learning_rate": 3.9076377106896765e-07, + "loss": 0.3479, + "step": 19381 + }, + { + "epoch": 0.8772120389228332, + "grad_norm": 0.5873223696868036, + "learning_rate": 3.904797745398814e-07, + "loss": 0.3232, + "step": 19382 + }, + { + "epoch": 0.8772572980312288, + "grad_norm": 0.8239279730871042, + "learning_rate": 3.901958770552272e-07, + "loss": 0.3038, + "step": 19383 + }, + { + "epoch": 0.8773025571396244, + "grad_norm": 0.2989201442753827, + "learning_rate": 3.899120786211058e-07, + "loss": 0.4633, + "step": 19384 + }, + { + "epoch": 0.8773478162480199, + "grad_norm": 0.588468140386913, + "learning_rate": 3.8962837924361454e-07, + "loss": 0.3101, + "step": 19385 + }, + { + "epoch": 0.8773930753564155, + "grad_norm": 0.6190450837806823, + "learning_rate": 3.893447789288507e-07, + "loss": 0.3124, + "step": 19386 + }, + { + "epoch": 0.877438334464811, + "grad_norm": 0.5657772448108447, + "learning_rate": 3.890612776829067e-07, + "loss": 0.3072, + "step": 19387 + }, + { + "epoch": 0.8774835935732066, + "grad_norm": 0.7836726961639346, + "learning_rate": 3.887778755118743e-07, + "loss": 0.2591, + "step": 19388 + }, + { + "epoch": 0.8775288526816022, + "grad_norm": 0.6392129475718579, + "learning_rate": 3.884945724218425e-07, + "loss": 0.3644, + "step": 19389 + }, + { + "epoch": 0.8775741117899978, + "grad_norm": 0.7234741241049042, + "learning_rate": 3.882113684188998e-07, + "loss": 0.2711, + "step": 19390 + }, + { + "epoch": 0.8776193708983933, + "grad_norm": 0.6200475117502785, + "learning_rate": 3.879282635091308e-07, + "loss": 0.3199, + "step": 19391 + }, + { + "epoch": 0.8776646300067888, + "grad_norm": 0.6234293255662298, + "learning_rate": 3.876452576986184e-07, + "loss": 0.3023, + "step": 19392 + }, + { + "epoch": 0.8777098891151844, + "grad_norm": 0.701509162656271, + "learning_rate": 3.8736235099344375e-07, + "loss": 0.2745, + "step": 19393 + }, + { + "epoch": 0.87775514822358, + "grad_norm": 0.6403016579755417, + "learning_rate": 3.870795433996849e-07, + "loss": 0.2669, + "step": 19394 + }, + { + "epoch": 0.8778004073319755, + "grad_norm": 0.6178465099533741, + "learning_rate": 3.8679683492342023e-07, + "loss": 0.3066, + "step": 19395 + }, + { + "epoch": 0.8778456664403711, + "grad_norm": 0.5918812381592131, + "learning_rate": 3.865142255707222e-07, + "loss": 0.3097, + "step": 19396 + }, + { + "epoch": 0.8778909255487667, + "grad_norm": 0.6154359170578279, + "learning_rate": 3.862317153476647e-07, + "loss": 0.2848, + "step": 19397 + }, + { + "epoch": 0.8779361846571623, + "grad_norm": 0.60710922438913, + "learning_rate": 3.859493042603174e-07, + "loss": 0.3003, + "step": 19398 + }, + { + "epoch": 0.8779814437655579, + "grad_norm": 0.6197900213750153, + "learning_rate": 3.856669923147488e-07, + "loss": 0.2623, + "step": 19399 + }, + { + "epoch": 0.8780267028739533, + "grad_norm": 0.6219230728627466, + "learning_rate": 3.8538477951702515e-07, + "loss": 0.2578, + "step": 19400 + }, + { + "epoch": 0.8780719619823489, + "grad_norm": 0.6047950271722398, + "learning_rate": 3.8510266587320876e-07, + "loss": 0.2988, + "step": 19401 + }, + { + "epoch": 0.8781172210907445, + "grad_norm": 0.6586513286928564, + "learning_rate": 3.8482065138936263e-07, + "loss": 0.3019, + "step": 19402 + }, + { + "epoch": 0.8781624801991401, + "grad_norm": 0.27443656822880147, + "learning_rate": 3.84538736071548e-07, + "loss": 0.4491, + "step": 19403 + }, + { + "epoch": 0.8782077393075356, + "grad_norm": 0.6154796542159318, + "learning_rate": 3.8425691992581836e-07, + "loss": 0.2536, + "step": 19404 + }, + { + "epoch": 0.8782529984159312, + "grad_norm": 0.6173411982057696, + "learning_rate": 3.839752029582322e-07, + "loss": 0.2785, + "step": 19405 + }, + { + "epoch": 0.8782982575243268, + "grad_norm": 0.6446679072516598, + "learning_rate": 3.836935851748419e-07, + "loss": 0.2677, + "step": 19406 + }, + { + "epoch": 0.8783435166327224, + "grad_norm": 0.6188887158583402, + "learning_rate": 3.834120665816993e-07, + "loss": 0.3072, + "step": 19407 + }, + { + "epoch": 0.878388775741118, + "grad_norm": 0.6223118704251513, + "learning_rate": 3.8313064718485116e-07, + "loss": 0.2933, + "step": 19408 + }, + { + "epoch": 0.8784340348495134, + "grad_norm": 0.6092096403353362, + "learning_rate": 3.8284932699034717e-07, + "loss": 0.2547, + "step": 19409 + }, + { + "epoch": 0.878479293957909, + "grad_norm": 0.2827938848992071, + "learning_rate": 3.825681060042297e-07, + "loss": 0.4956, + "step": 19410 + }, + { + "epoch": 0.8785245530663046, + "grad_norm": 0.6332323053744587, + "learning_rate": 3.822869842325427e-07, + "loss": 0.3117, + "step": 19411 + }, + { + "epoch": 0.8785698121747002, + "grad_norm": 0.5821023599323277, + "learning_rate": 3.8200596168132596e-07, + "loss": 0.3028, + "step": 19412 + }, + { + "epoch": 0.8786150712830957, + "grad_norm": 0.6325107408918255, + "learning_rate": 3.8172503835661846e-07, + "loss": 0.2867, + "step": 19413 + }, + { + "epoch": 0.8786603303914913, + "grad_norm": 0.2831159664124358, + "learning_rate": 3.814442142644548e-07, + "loss": 0.4556, + "step": 19414 + }, + { + "epoch": 0.8787055894998869, + "grad_norm": 0.5849533493887362, + "learning_rate": 3.8116348941087176e-07, + "loss": 0.2689, + "step": 19415 + }, + { + "epoch": 0.8787508486082825, + "grad_norm": 0.6519080783139413, + "learning_rate": 3.808828638018991e-07, + "loss": 0.2817, + "step": 19416 + }, + { + "epoch": 0.8787961077166779, + "grad_norm": 0.5594713028214605, + "learning_rate": 3.8060233744356634e-07, + "loss": 0.2636, + "step": 19417 + }, + { + "epoch": 0.8788413668250735, + "grad_norm": 0.5705113920596403, + "learning_rate": 3.8032191034190204e-07, + "loss": 0.2823, + "step": 19418 + }, + { + "epoch": 0.8788866259334691, + "grad_norm": 0.6227759180273239, + "learning_rate": 3.8004158250293246e-07, + "loss": 0.2877, + "step": 19419 + }, + { + "epoch": 0.8789318850418647, + "grad_norm": 0.6259684011915944, + "learning_rate": 3.7976135393268057e-07, + "loss": 0.2699, + "step": 19420 + }, + { + "epoch": 0.8789771441502603, + "grad_norm": 0.6387988070703673, + "learning_rate": 3.79481224637166e-07, + "loss": 0.2828, + "step": 19421 + }, + { + "epoch": 0.8790224032586558, + "grad_norm": 0.6254957096979336, + "learning_rate": 3.7920119462241e-07, + "loss": 0.3304, + "step": 19422 + }, + { + "epoch": 0.8790676623670514, + "grad_norm": 0.5490326198817921, + "learning_rate": 3.789212638944273e-07, + "loss": 0.2539, + "step": 19423 + }, + { + "epoch": 0.879112921475447, + "grad_norm": 0.25844367227032256, + "learning_rate": 3.786414324592358e-07, + "loss": 0.4545, + "step": 19424 + }, + { + "epoch": 0.8791581805838425, + "grad_norm": 0.707449751803239, + "learning_rate": 3.7836170032284516e-07, + "loss": 0.276, + "step": 19425 + }, + { + "epoch": 0.879203439692238, + "grad_norm": 0.7146855031244232, + "learning_rate": 3.7808206749126777e-07, + "loss": 0.2782, + "step": 19426 + }, + { + "epoch": 0.8792486988006336, + "grad_norm": 0.681521661479075, + "learning_rate": 3.778025339705116e-07, + "loss": 0.2449, + "step": 19427 + }, + { + "epoch": 0.8792939579090292, + "grad_norm": 0.624553488593751, + "learning_rate": 3.7752309976658295e-07, + "loss": 0.2698, + "step": 19428 + }, + { + "epoch": 0.8793392170174248, + "grad_norm": 0.5821597816164235, + "learning_rate": 3.7724376488548655e-07, + "loss": 0.2938, + "step": 19429 + }, + { + "epoch": 0.8793844761258203, + "grad_norm": 0.25441806517758003, + "learning_rate": 3.7696452933322305e-07, + "loss": 0.4679, + "step": 19430 + }, + { + "epoch": 0.8794297352342159, + "grad_norm": 0.24762157896644646, + "learning_rate": 3.766853931157932e-07, + "loss": 0.436, + "step": 19431 + }, + { + "epoch": 0.8794749943426114, + "grad_norm": 0.5959314807385596, + "learning_rate": 3.7640635623919674e-07, + "loss": 0.317, + "step": 19432 + }, + { + "epoch": 0.879520253451007, + "grad_norm": 0.6562698471158812, + "learning_rate": 3.761274187094255e-07, + "loss": 0.3062, + "step": 19433 + }, + { + "epoch": 0.8795655125594026, + "grad_norm": 0.27390885110652485, + "learning_rate": 3.758485805324746e-07, + "loss": 0.4677, + "step": 19434 + }, + { + "epoch": 0.8796107716677981, + "grad_norm": 0.692513863185441, + "learning_rate": 3.7556984171433663e-07, + "loss": 0.2797, + "step": 19435 + }, + { + "epoch": 0.8796560307761937, + "grad_norm": 0.6152037869565279, + "learning_rate": 3.752912022610006e-07, + "loss": 0.2814, + "step": 19436 + }, + { + "epoch": 0.8797012898845893, + "grad_norm": 0.24162605717765784, + "learning_rate": 3.750126621784511e-07, + "loss": 0.4398, + "step": 19437 + }, + { + "epoch": 0.8797465489929849, + "grad_norm": 0.6122811807147558, + "learning_rate": 3.7473422147267623e-07, + "loss": 0.285, + "step": 19438 + }, + { + "epoch": 0.8797918081013804, + "grad_norm": 0.5819518988029752, + "learning_rate": 3.744558801496567e-07, + "loss": 0.2805, + "step": 19439 + }, + { + "epoch": 0.879837067209776, + "grad_norm": 0.8562388874106288, + "learning_rate": 3.74177638215375e-07, + "loss": 0.2991, + "step": 19440 + }, + { + "epoch": 0.8798823263181715, + "grad_norm": 0.6023165883846132, + "learning_rate": 3.73899495675808e-07, + "loss": 0.2922, + "step": 19441 + }, + { + "epoch": 0.8799275854265671, + "grad_norm": 0.6116074594476207, + "learning_rate": 3.736214525369336e-07, + "loss": 0.3151, + "step": 19442 + }, + { + "epoch": 0.8799728445349626, + "grad_norm": 0.3015748179436036, + "learning_rate": 3.7334350880472434e-07, + "loss": 0.4811, + "step": 19443 + }, + { + "epoch": 0.8800181036433582, + "grad_norm": 0.5836461033173239, + "learning_rate": 3.730656644851538e-07, + "loss": 0.3192, + "step": 19444 + }, + { + "epoch": 0.8800633627517538, + "grad_norm": 1.1388168216379084, + "learning_rate": 3.727879195841921e-07, + "loss": 0.2662, + "step": 19445 + }, + { + "epoch": 0.8801086218601494, + "grad_norm": 0.6501681088951335, + "learning_rate": 3.7251027410780573e-07, + "loss": 0.3012, + "step": 19446 + }, + { + "epoch": 0.880153880968545, + "grad_norm": 0.6716599059645094, + "learning_rate": 3.722327280619614e-07, + "loss": 0.3035, + "step": 19447 + }, + { + "epoch": 0.8801991400769404, + "grad_norm": 0.6544174561508919, + "learning_rate": 3.7195528145262337e-07, + "loss": 0.3487, + "step": 19448 + }, + { + "epoch": 0.880244399185336, + "grad_norm": 0.6577095681684545, + "learning_rate": 3.7167793428575236e-07, + "loss": 0.2736, + "step": 19449 + }, + { + "epoch": 0.8802896582937316, + "grad_norm": 0.6075001612961495, + "learning_rate": 3.71400686567307e-07, + "loss": 0.2975, + "step": 19450 + }, + { + "epoch": 0.8803349174021272, + "grad_norm": 0.6407300018974064, + "learning_rate": 3.7112353830324576e-07, + "loss": 0.2914, + "step": 19451 + }, + { + "epoch": 0.8803801765105227, + "grad_norm": 0.6312364077947341, + "learning_rate": 3.7084648949952284e-07, + "loss": 0.2878, + "step": 19452 + }, + { + "epoch": 0.8804254356189183, + "grad_norm": 0.7431039517859039, + "learning_rate": 3.705695401620918e-07, + "loss": 0.3099, + "step": 19453 + }, + { + "epoch": 0.8804706947273139, + "grad_norm": 0.5882110778724492, + "learning_rate": 3.7029269029690287e-07, + "loss": 0.2982, + "step": 19454 + }, + { + "epoch": 0.8805159538357095, + "grad_norm": 0.8440818057248546, + "learning_rate": 3.700159399099057e-07, + "loss": 0.2796, + "step": 19455 + }, + { + "epoch": 0.880561212944105, + "grad_norm": 0.2834390846480046, + "learning_rate": 3.6973928900704503e-07, + "loss": 0.4566, + "step": 19456 + }, + { + "epoch": 0.8806064720525005, + "grad_norm": 0.5899447925861252, + "learning_rate": 3.6946273759426667e-07, + "loss": 0.3159, + "step": 19457 + }, + { + "epoch": 0.8806517311608961, + "grad_norm": 0.5717095051810808, + "learning_rate": 3.69186285677513e-07, + "loss": 0.2919, + "step": 19458 + }, + { + "epoch": 0.8806969902692917, + "grad_norm": 0.5766207199446806, + "learning_rate": 3.6890993326272273e-07, + "loss": 0.3135, + "step": 19459 + }, + { + "epoch": 0.8807422493776873, + "grad_norm": 0.6403324284212105, + "learning_rate": 3.6863368035583494e-07, + "loss": 0.2814, + "step": 19460 + }, + { + "epoch": 0.8807875084860828, + "grad_norm": 0.5780060358993144, + "learning_rate": 3.683575269627865e-07, + "loss": 0.3103, + "step": 19461 + }, + { + "epoch": 0.8808327675944784, + "grad_norm": 0.5773243391426244, + "learning_rate": 3.680814730895077e-07, + "loss": 0.2913, + "step": 19462 + }, + { + "epoch": 0.880878026702874, + "grad_norm": 0.2893917732617025, + "learning_rate": 3.6780551874193273e-07, + "loss": 0.4877, + "step": 19463 + }, + { + "epoch": 0.8809232858112696, + "grad_norm": 0.6755635937322024, + "learning_rate": 3.675296639259912e-07, + "loss": 0.3022, + "step": 19464 + }, + { + "epoch": 0.880968544919665, + "grad_norm": 1.2261435587924292, + "learning_rate": 3.672539086476101e-07, + "loss": 0.3154, + "step": 19465 + }, + { + "epoch": 0.8810138040280606, + "grad_norm": 0.593591631903637, + "learning_rate": 3.669782529127125e-07, + "loss": 0.2787, + "step": 19466 + }, + { + "epoch": 0.8810590631364562, + "grad_norm": 0.2859027748282125, + "learning_rate": 3.667026967272236e-07, + "loss": 0.4651, + "step": 19467 + }, + { + "epoch": 0.8811043222448518, + "grad_norm": 0.5509629077468778, + "learning_rate": 3.6642724009706423e-07, + "loss": 0.2995, + "step": 19468 + }, + { + "epoch": 0.8811495813532474, + "grad_norm": 0.552569680672477, + "learning_rate": 3.661518830281524e-07, + "loss": 0.3176, + "step": 19469 + }, + { + "epoch": 0.8811948404616429, + "grad_norm": 0.6266113022409174, + "learning_rate": 3.658766255264046e-07, + "loss": 0.3136, + "step": 19470 + }, + { + "epoch": 0.8812400995700385, + "grad_norm": 0.6089083806216865, + "learning_rate": 3.65601467597736e-07, + "loss": 0.3461, + "step": 19471 + }, + { + "epoch": 0.881285358678434, + "grad_norm": 0.2774525191944238, + "learning_rate": 3.653264092480574e-07, + "loss": 0.4639, + "step": 19472 + }, + { + "epoch": 0.8813306177868296, + "grad_norm": 0.2802605123799578, + "learning_rate": 3.650514504832808e-07, + "loss": 0.469, + "step": 19473 + }, + { + "epoch": 0.8813758768952251, + "grad_norm": 0.6079325883555515, + "learning_rate": 3.647765913093132e-07, + "loss": 0.2696, + "step": 19474 + }, + { + "epoch": 0.8814211360036207, + "grad_norm": 0.7064070655192801, + "learning_rate": 3.6450183173205975e-07, + "loss": 0.2796, + "step": 19475 + }, + { + "epoch": 0.8814663951120163, + "grad_norm": 0.6073929551673423, + "learning_rate": 3.6422717175742584e-07, + "loss": 0.3106, + "step": 19476 + }, + { + "epoch": 0.8815116542204119, + "grad_norm": 0.6969037107691501, + "learning_rate": 3.639526113913122e-07, + "loss": 0.3233, + "step": 19477 + }, + { + "epoch": 0.8815569133288074, + "grad_norm": 0.6067566723679195, + "learning_rate": 3.636781506396192e-07, + "loss": 0.2654, + "step": 19478 + }, + { + "epoch": 0.881602172437203, + "grad_norm": 0.27022736806054953, + "learning_rate": 3.634037895082421e-07, + "loss": 0.4727, + "step": 19479 + }, + { + "epoch": 0.8816474315455985, + "grad_norm": 0.2748053658632415, + "learning_rate": 3.631295280030783e-07, + "loss": 0.4623, + "step": 19480 + }, + { + "epoch": 0.8816926906539941, + "grad_norm": 0.6392027857838919, + "learning_rate": 3.628553661300194e-07, + "loss": 0.2831, + "step": 19481 + }, + { + "epoch": 0.8817379497623897, + "grad_norm": 0.2676240449487449, + "learning_rate": 3.6258130389495714e-07, + "loss": 0.4477, + "step": 19482 + }, + { + "epoch": 0.8817832088707852, + "grad_norm": 0.603529385483359, + "learning_rate": 3.623073413037792e-07, + "loss": 0.2927, + "step": 19483 + }, + { + "epoch": 0.8818284679791808, + "grad_norm": 0.5873583234591071, + "learning_rate": 3.620334783623736e-07, + "loss": 0.3097, + "step": 19484 + }, + { + "epoch": 0.8818737270875764, + "grad_norm": 0.6453616824837317, + "learning_rate": 3.6175971507662334e-07, + "loss": 0.2814, + "step": 19485 + }, + { + "epoch": 0.881918986195972, + "grad_norm": 0.7953146155133014, + "learning_rate": 3.6148605145241264e-07, + "loss": 0.2953, + "step": 19486 + }, + { + "epoch": 0.8819642453043675, + "grad_norm": 0.6501379276469679, + "learning_rate": 3.612124874956202e-07, + "loss": 0.3198, + "step": 19487 + }, + { + "epoch": 0.882009504412763, + "grad_norm": 0.6536634477685312, + "learning_rate": 3.6093902321212405e-07, + "loss": 0.3009, + "step": 19488 + }, + { + "epoch": 0.8820547635211586, + "grad_norm": 0.6306391823413456, + "learning_rate": 3.606656586078e-07, + "loss": 0.2996, + "step": 19489 + }, + { + "epoch": 0.8821000226295542, + "grad_norm": 0.6143080705714032, + "learning_rate": 3.603923936885234e-07, + "loss": 0.2883, + "step": 19490 + }, + { + "epoch": 0.8821452817379498, + "grad_norm": 0.5784749363086075, + "learning_rate": 3.6011922846016513e-07, + "loss": 0.2809, + "step": 19491 + }, + { + "epoch": 0.8821905408463453, + "grad_norm": 0.24860289722800014, + "learning_rate": 3.598461629285932e-07, + "loss": 0.4634, + "step": 19492 + }, + { + "epoch": 0.8822357999547409, + "grad_norm": 0.25458788983963304, + "learning_rate": 3.5957319709967686e-07, + "loss": 0.4477, + "step": 19493 + }, + { + "epoch": 0.8822810590631365, + "grad_norm": 0.5409817762850514, + "learning_rate": 3.5930033097928086e-07, + "loss": 0.2921, + "step": 19494 + }, + { + "epoch": 0.8823263181715321, + "grad_norm": 0.82768520924384, + "learning_rate": 3.590275645732666e-07, + "loss": 0.3658, + "step": 19495 + }, + { + "epoch": 0.8823715772799275, + "grad_norm": 0.6294611657462783, + "learning_rate": 3.5875489788749665e-07, + "loss": 0.271, + "step": 19496 + }, + { + "epoch": 0.8824168363883231, + "grad_norm": 0.6913804587027634, + "learning_rate": 3.5848233092783015e-07, + "loss": 0.362, + "step": 19497 + }, + { + "epoch": 0.8824620954967187, + "grad_norm": 0.25888332790044416, + "learning_rate": 3.5820986370012303e-07, + "loss": 0.473, + "step": 19498 + }, + { + "epoch": 0.8825073546051143, + "grad_norm": 0.6432899343617813, + "learning_rate": 3.579374962102289e-07, + "loss": 0.2763, + "step": 19499 + }, + { + "epoch": 0.8825526137135098, + "grad_norm": 0.6067096968977975, + "learning_rate": 3.57665228464002e-07, + "loss": 0.2607, + "step": 19500 + }, + { + "epoch": 0.8825978728219054, + "grad_norm": 0.6861618750869042, + "learning_rate": 3.573930604672904e-07, + "loss": 0.2651, + "step": 19501 + }, + { + "epoch": 0.882643131930301, + "grad_norm": 0.2943924522520371, + "learning_rate": 3.571209922259439e-07, + "loss": 0.4661, + "step": 19502 + }, + { + "epoch": 0.8826883910386966, + "grad_norm": 0.6177842227396511, + "learning_rate": 3.568490237458083e-07, + "loss": 0.303, + "step": 19503 + }, + { + "epoch": 0.8827336501470922, + "grad_norm": 0.2745503493714577, + "learning_rate": 3.5657715503272574e-07, + "loss": 0.4871, + "step": 19504 + }, + { + "epoch": 0.8827789092554876, + "grad_norm": 0.5571673918655595, + "learning_rate": 3.563053860925392e-07, + "loss": 0.302, + "step": 19505 + }, + { + "epoch": 0.8828241683638832, + "grad_norm": 0.6663631237912184, + "learning_rate": 3.5603371693108845e-07, + "loss": 0.3064, + "step": 19506 + }, + { + "epoch": 0.8828694274722788, + "grad_norm": 0.6260927130262418, + "learning_rate": 3.5576214755421e-07, + "loss": 0.2845, + "step": 19507 + }, + { + "epoch": 0.8829146865806744, + "grad_norm": 0.5917560520583667, + "learning_rate": 3.5549067796773915e-07, + "loss": 0.31, + "step": 19508 + }, + { + "epoch": 0.8829599456890699, + "grad_norm": 1.1379628888985436, + "learning_rate": 3.5521930817750963e-07, + "loss": 0.2842, + "step": 19509 + }, + { + "epoch": 0.8830052047974655, + "grad_norm": 0.6216868931141476, + "learning_rate": 3.549480381893505e-07, + "loss": 0.2603, + "step": 19510 + }, + { + "epoch": 0.8830504639058611, + "grad_norm": 0.6292864149594591, + "learning_rate": 3.546768680090934e-07, + "loss": 0.2915, + "step": 19511 + }, + { + "epoch": 0.8830957230142567, + "grad_norm": 0.5450364036985776, + "learning_rate": 3.544057976425619e-07, + "loss": 0.2967, + "step": 19512 + }, + { + "epoch": 0.8831409821226521, + "grad_norm": 0.6244991880782165, + "learning_rate": 3.5413482709558353e-07, + "loss": 0.297, + "step": 19513 + }, + { + "epoch": 0.8831862412310477, + "grad_norm": 0.6562732652908535, + "learning_rate": 3.538639563739776e-07, + "loss": 0.3106, + "step": 19514 + }, + { + "epoch": 0.8832315003394433, + "grad_norm": 0.24905978799813444, + "learning_rate": 3.535931854835667e-07, + "loss": 0.4413, + "step": 19515 + }, + { + "epoch": 0.8832767594478389, + "grad_norm": 0.6667836600959485, + "learning_rate": 3.533225144301683e-07, + "loss": 0.314, + "step": 19516 + }, + { + "epoch": 0.8833220185562345, + "grad_norm": 0.6324897324523692, + "learning_rate": 3.530519432195967e-07, + "loss": 0.269, + "step": 19517 + }, + { + "epoch": 0.88336727766463, + "grad_norm": 0.6004718094460242, + "learning_rate": 3.5278147185766665e-07, + "loss": 0.315, + "step": 19518 + }, + { + "epoch": 0.8834125367730256, + "grad_norm": 0.580260577387419, + "learning_rate": 3.525111003501908e-07, + "loss": 0.3061, + "step": 19519 + }, + { + "epoch": 0.8834577958814211, + "grad_norm": 0.5955680232408859, + "learning_rate": 3.522408287029783e-07, + "loss": 0.3236, + "step": 19520 + }, + { + "epoch": 0.8835030549898167, + "grad_norm": 0.5709331388756373, + "learning_rate": 3.519706569218345e-07, + "loss": 0.2763, + "step": 19521 + }, + { + "epoch": 0.8835483140982122, + "grad_norm": 0.7962422901003292, + "learning_rate": 3.517005850125671e-07, + "loss": 0.2955, + "step": 19522 + }, + { + "epoch": 0.8835935732066078, + "grad_norm": 0.5829977543303796, + "learning_rate": 3.5143061298097693e-07, + "loss": 0.2651, + "step": 19523 + }, + { + "epoch": 0.8836388323150034, + "grad_norm": 0.6197722715291717, + "learning_rate": 3.5116074083286655e-07, + "loss": 0.2744, + "step": 19524 + }, + { + "epoch": 0.883684091423399, + "grad_norm": 0.7073843878252019, + "learning_rate": 3.508909685740336e-07, + "loss": 0.2971, + "step": 19525 + }, + { + "epoch": 0.8837293505317946, + "grad_norm": 0.6411539261445458, + "learning_rate": 3.5062129621027565e-07, + "loss": 0.3349, + "step": 19526 + }, + { + "epoch": 0.8837746096401901, + "grad_norm": 0.5867776122798319, + "learning_rate": 3.5035172374738636e-07, + "loss": 0.3039, + "step": 19527 + }, + { + "epoch": 0.8838198687485856, + "grad_norm": 0.5909421682088367, + "learning_rate": 3.500822511911578e-07, + "loss": 0.3061, + "step": 19528 + }, + { + "epoch": 0.8838651278569812, + "grad_norm": 1.0457195445167602, + "learning_rate": 3.4981287854738143e-07, + "loss": 0.3391, + "step": 19529 + }, + { + "epoch": 0.8839103869653768, + "grad_norm": 0.6062906376849054, + "learning_rate": 3.495436058218432e-07, + "loss": 0.2978, + "step": 19530 + }, + { + "epoch": 0.8839556460737723, + "grad_norm": 0.6345821755484723, + "learning_rate": 3.4927443302033127e-07, + "loss": 0.298, + "step": 19531 + }, + { + "epoch": 0.8840009051821679, + "grad_norm": 0.5693391318164701, + "learning_rate": 3.4900536014862763e-07, + "loss": 0.2746, + "step": 19532 + }, + { + "epoch": 0.8840461642905635, + "grad_norm": 0.607301733592083, + "learning_rate": 3.487363872125138e-07, + "loss": 0.3167, + "step": 19533 + }, + { + "epoch": 0.8840914233989591, + "grad_norm": 0.6503070900326725, + "learning_rate": 3.4846751421777014e-07, + "loss": 0.2913, + "step": 19534 + }, + { + "epoch": 0.8841366825073546, + "grad_norm": 0.6221687774308418, + "learning_rate": 3.4819874117017373e-07, + "loss": 0.2766, + "step": 19535 + }, + { + "epoch": 0.8841819416157501, + "grad_norm": 0.5918727382827849, + "learning_rate": 3.479300680754999e-07, + "loss": 0.3141, + "step": 19536 + }, + { + "epoch": 0.8842272007241457, + "grad_norm": 0.7346385733269784, + "learning_rate": 3.4766149493952015e-07, + "loss": 0.3138, + "step": 19537 + }, + { + "epoch": 0.8842724598325413, + "grad_norm": 0.9691102569844686, + "learning_rate": 3.4739302176800603e-07, + "loss": 0.2779, + "step": 19538 + }, + { + "epoch": 0.8843177189409369, + "grad_norm": 0.5998689432100688, + "learning_rate": 3.471246485667279e-07, + "loss": 0.2885, + "step": 19539 + }, + { + "epoch": 0.8843629780493324, + "grad_norm": 0.5951426041184009, + "learning_rate": 3.468563753414506e-07, + "loss": 0.2637, + "step": 19540 + }, + { + "epoch": 0.884408237157728, + "grad_norm": 0.6478330409952806, + "learning_rate": 3.4658820209793773e-07, + "loss": 0.2924, + "step": 19541 + }, + { + "epoch": 0.8844534962661236, + "grad_norm": 0.6745078177873124, + "learning_rate": 3.463201288419532e-07, + "loss": 0.2855, + "step": 19542 + }, + { + "epoch": 0.8844987553745192, + "grad_norm": 0.2845659706695868, + "learning_rate": 3.460521555792562e-07, + "loss": 0.4745, + "step": 19543 + }, + { + "epoch": 0.8845440144829146, + "grad_norm": 0.588620732096941, + "learning_rate": 3.4578428231560547e-07, + "loss": 0.3066, + "step": 19544 + }, + { + "epoch": 0.8845892735913102, + "grad_norm": 0.6574074748283296, + "learning_rate": 3.4551650905675584e-07, + "loss": 0.3262, + "step": 19545 + }, + { + "epoch": 0.8846345326997058, + "grad_norm": 0.6045371469529954, + "learning_rate": 3.4524883580846045e-07, + "loss": 0.3199, + "step": 19546 + }, + { + "epoch": 0.8846797918081014, + "grad_norm": 0.5269759620850714, + "learning_rate": 3.44981262576472e-07, + "loss": 0.2819, + "step": 19547 + }, + { + "epoch": 0.8847250509164969, + "grad_norm": 0.5953117426683678, + "learning_rate": 3.4471378936654033e-07, + "loss": 0.2828, + "step": 19548 + }, + { + "epoch": 0.8847703100248925, + "grad_norm": 0.5621445856567112, + "learning_rate": 3.444464161844113e-07, + "loss": 0.2823, + "step": 19549 + }, + { + "epoch": 0.8848155691332881, + "grad_norm": 0.5682398961006359, + "learning_rate": 3.441791430358299e-07, + "loss": 0.2874, + "step": 19550 + }, + { + "epoch": 0.8848608282416837, + "grad_norm": 0.5835295496557882, + "learning_rate": 3.4391196992653976e-07, + "loss": 0.253, + "step": 19551 + }, + { + "epoch": 0.8849060873500793, + "grad_norm": 0.6077999682841784, + "learning_rate": 3.4364489686228076e-07, + "loss": 0.2733, + "step": 19552 + }, + { + "epoch": 0.8849513464584747, + "grad_norm": 0.6696188768672875, + "learning_rate": 3.4337792384879274e-07, + "loss": 0.2468, + "step": 19553 + }, + { + "epoch": 0.8849966055668703, + "grad_norm": 0.2749716275869113, + "learning_rate": 3.431110508918112e-07, + "loss": 0.4613, + "step": 19554 + }, + { + "epoch": 0.8850418646752659, + "grad_norm": 0.312283411686006, + "learning_rate": 3.428442779970709e-07, + "loss": 0.4337, + "step": 19555 + }, + { + "epoch": 0.8850871237836615, + "grad_norm": 0.2585317322224034, + "learning_rate": 3.425776051703028e-07, + "loss": 0.4855, + "step": 19556 + }, + { + "epoch": 0.885132382892057, + "grad_norm": 0.5817616751386044, + "learning_rate": 3.4231103241723904e-07, + "loss": 0.2766, + "step": 19557 + }, + { + "epoch": 0.8851776420004526, + "grad_norm": 0.657507339106589, + "learning_rate": 3.420445597436056e-07, + "loss": 0.3129, + "step": 19558 + }, + { + "epoch": 0.8852229011088482, + "grad_norm": 0.25678973673554567, + "learning_rate": 3.4177818715512844e-07, + "loss": 0.4524, + "step": 19559 + }, + { + "epoch": 0.8852681602172437, + "grad_norm": 0.6084533850542106, + "learning_rate": 3.415119146575313e-07, + "loss": 0.2883, + "step": 19560 + }, + { + "epoch": 0.8853134193256393, + "grad_norm": 0.6689930661361905, + "learning_rate": 3.412457422565368e-07, + "loss": 0.3022, + "step": 19561 + }, + { + "epoch": 0.8853586784340348, + "grad_norm": 0.6389051792047346, + "learning_rate": 3.409796699578621e-07, + "loss": 0.3097, + "step": 19562 + }, + { + "epoch": 0.8854039375424304, + "grad_norm": 0.6029784116061799, + "learning_rate": 3.4071369776722487e-07, + "loss": 0.2956, + "step": 19563 + }, + { + "epoch": 0.885449196650826, + "grad_norm": 0.2873501741847482, + "learning_rate": 3.4044782569034096e-07, + "loss": 0.4892, + "step": 19564 + }, + { + "epoch": 0.8854944557592216, + "grad_norm": 0.6136217708277859, + "learning_rate": 3.401820537329231e-07, + "loss": 0.2888, + "step": 19565 + }, + { + "epoch": 0.8855397148676171, + "grad_norm": 0.6195423898571132, + "learning_rate": 3.399163819006801e-07, + "loss": 0.2727, + "step": 19566 + }, + { + "epoch": 0.8855849739760127, + "grad_norm": 0.5806307328938312, + "learning_rate": 3.3965081019932176e-07, + "loss": 0.2648, + "step": 19567 + }, + { + "epoch": 0.8856302330844082, + "grad_norm": 1.002216804194398, + "learning_rate": 3.3938533863455526e-07, + "loss": 0.321, + "step": 19568 + }, + { + "epoch": 0.8856754921928038, + "grad_norm": 0.2583117712856083, + "learning_rate": 3.3911996721208373e-07, + "loss": 0.4512, + "step": 19569 + }, + { + "epoch": 0.8857207513011993, + "grad_norm": 0.6515063155660002, + "learning_rate": 3.388546959376088e-07, + "loss": 0.2698, + "step": 19570 + }, + { + "epoch": 0.8857660104095949, + "grad_norm": 0.6489297620437484, + "learning_rate": 3.385895248168314e-07, + "loss": 0.2663, + "step": 19571 + }, + { + "epoch": 0.8858112695179905, + "grad_norm": 0.6414007301380998, + "learning_rate": 3.383244538554481e-07, + "loss": 0.312, + "step": 19572 + }, + { + "epoch": 0.8858565286263861, + "grad_norm": 0.6456000031136664, + "learning_rate": 3.380594830591555e-07, + "loss": 0.311, + "step": 19573 + }, + { + "epoch": 0.8859017877347817, + "grad_norm": 0.6274904157405076, + "learning_rate": 3.3779461243364673e-07, + "loss": 0.3125, + "step": 19574 + }, + { + "epoch": 0.8859470468431772, + "grad_norm": 0.6555214158150692, + "learning_rate": 3.3752984198461236e-07, + "loss": 0.3102, + "step": 19575 + }, + { + "epoch": 0.8859923059515727, + "grad_norm": 0.6002976364382615, + "learning_rate": 3.3726517171774163e-07, + "loss": 0.251, + "step": 19576 + }, + { + "epoch": 0.8860375650599683, + "grad_norm": 0.6060495541921025, + "learning_rate": 3.3700060163872285e-07, + "loss": 0.2729, + "step": 19577 + }, + { + "epoch": 0.8860828241683639, + "grad_norm": 0.5862835298549485, + "learning_rate": 3.367361317532397e-07, + "loss": 0.2833, + "step": 19578 + }, + { + "epoch": 0.8861280832767594, + "grad_norm": 0.26710656491030127, + "learning_rate": 3.3647176206697387e-07, + "loss": 0.4968, + "step": 19579 + }, + { + "epoch": 0.886173342385155, + "grad_norm": 0.2666211536533626, + "learning_rate": 3.362074925856079e-07, + "loss": 0.431, + "step": 19580 + }, + { + "epoch": 0.8862186014935506, + "grad_norm": 0.5695538360488616, + "learning_rate": 3.359433233148185e-07, + "loss": 0.2709, + "step": 19581 + }, + { + "epoch": 0.8862638606019462, + "grad_norm": 0.59719229785415, + "learning_rate": 3.356792542602838e-07, + "loss": 0.2706, + "step": 19582 + }, + { + "epoch": 0.8863091197103417, + "grad_norm": 0.5836204564546559, + "learning_rate": 3.354152854276749e-07, + "loss": 0.2906, + "step": 19583 + }, + { + "epoch": 0.8863543788187372, + "grad_norm": 0.26799320129721166, + "learning_rate": 3.351514168226666e-07, + "loss": 0.4762, + "step": 19584 + }, + { + "epoch": 0.8863996379271328, + "grad_norm": 0.26377778286885706, + "learning_rate": 3.348876484509267e-07, + "loss": 0.4441, + "step": 19585 + }, + { + "epoch": 0.8864448970355284, + "grad_norm": 0.2715906803668781, + "learning_rate": 3.346239803181239e-07, + "loss": 0.4714, + "step": 19586 + }, + { + "epoch": 0.886490156143924, + "grad_norm": 0.6451659642294735, + "learning_rate": 3.343604124299232e-07, + "loss": 0.309, + "step": 19587 + }, + { + "epoch": 0.8865354152523195, + "grad_norm": 0.6017547316225268, + "learning_rate": 3.340969447919873e-07, + "loss": 0.2856, + "step": 19588 + }, + { + "epoch": 0.8865806743607151, + "grad_norm": 0.6555996985935236, + "learning_rate": 3.338335774099777e-07, + "loss": 0.3516, + "step": 19589 + }, + { + "epoch": 0.8866259334691107, + "grad_norm": 0.6593669661485506, + "learning_rate": 3.335703102895549e-07, + "loss": 0.3465, + "step": 19590 + }, + { + "epoch": 0.8866711925775063, + "grad_norm": 0.26153963630435606, + "learning_rate": 3.333071434363727e-07, + "loss": 0.4901, + "step": 19591 + }, + { + "epoch": 0.8867164516859017, + "grad_norm": 0.5857658939032128, + "learning_rate": 3.3304407685608777e-07, + "loss": 0.2507, + "step": 19592 + }, + { + "epoch": 0.8867617107942973, + "grad_norm": 0.26285765382768767, + "learning_rate": 3.3278111055435214e-07, + "loss": 0.4731, + "step": 19593 + }, + { + "epoch": 0.8868069699026929, + "grad_norm": 0.590814928105251, + "learning_rate": 3.325182445368169e-07, + "loss": 0.3197, + "step": 19594 + }, + { + "epoch": 0.8868522290110885, + "grad_norm": 0.6646910180079847, + "learning_rate": 3.322554788091287e-07, + "loss": 0.3039, + "step": 19595 + }, + { + "epoch": 0.8868974881194841, + "grad_norm": 0.6051269524511917, + "learning_rate": 3.31992813376934e-07, + "loss": 0.2911, + "step": 19596 + }, + { + "epoch": 0.8869427472278796, + "grad_norm": 0.6154458023837881, + "learning_rate": 3.3173024824587786e-07, + "loss": 0.2668, + "step": 19597 + }, + { + "epoch": 0.8869880063362752, + "grad_norm": 0.7649855997860965, + "learning_rate": 3.314677834216012e-07, + "loss": 0.2939, + "step": 19598 + }, + { + "epoch": 0.8870332654446708, + "grad_norm": 0.5910296190533375, + "learning_rate": 3.31205418909743e-07, + "loss": 0.3067, + "step": 19599 + }, + { + "epoch": 0.8870785245530663, + "grad_norm": 0.6687276660419679, + "learning_rate": 3.30943154715942e-07, + "loss": 0.384, + "step": 19600 + }, + { + "epoch": 0.8871237836614618, + "grad_norm": 0.26577943660214226, + "learning_rate": 3.3068099084583195e-07, + "loss": 0.4374, + "step": 19601 + }, + { + "epoch": 0.8871690427698574, + "grad_norm": 0.5708204411176053, + "learning_rate": 3.304189273050473e-07, + "loss": 0.2687, + "step": 19602 + }, + { + "epoch": 0.887214301878253, + "grad_norm": 0.28507712724926143, + "learning_rate": 3.301569640992186e-07, + "loss": 0.4675, + "step": 19603 + }, + { + "epoch": 0.8872595609866486, + "grad_norm": 0.6315800803320455, + "learning_rate": 3.298951012339735e-07, + "loss": 0.2979, + "step": 19604 + }, + { + "epoch": 0.8873048200950441, + "grad_norm": 0.6476269168568197, + "learning_rate": 3.2963333871493917e-07, + "loss": 0.2999, + "step": 19605 + }, + { + "epoch": 0.8873500792034397, + "grad_norm": 0.5974437769779533, + "learning_rate": 3.293716765477417e-07, + "loss": 0.2903, + "step": 19606 + }, + { + "epoch": 0.8873953383118353, + "grad_norm": 0.6080218454670456, + "learning_rate": 3.2911011473800213e-07, + "loss": 0.2874, + "step": 19607 + }, + { + "epoch": 0.8874405974202308, + "grad_norm": 0.24843599926496016, + "learning_rate": 3.2884865329133986e-07, + "loss": 0.4416, + "step": 19608 + }, + { + "epoch": 0.8874858565286264, + "grad_norm": 0.6070739732249638, + "learning_rate": 3.285872922133737e-07, + "loss": 0.264, + "step": 19609 + }, + { + "epoch": 0.8875311156370219, + "grad_norm": 0.61134458557448, + "learning_rate": 3.2832603150971974e-07, + "loss": 0.2928, + "step": 19610 + }, + { + "epoch": 0.8875763747454175, + "grad_norm": 0.590230363263004, + "learning_rate": 3.2806487118599237e-07, + "loss": 0.2799, + "step": 19611 + }, + { + "epoch": 0.8876216338538131, + "grad_norm": 0.5998546487827437, + "learning_rate": 3.2780381124780046e-07, + "loss": 0.306, + "step": 19612 + }, + { + "epoch": 0.8876668929622087, + "grad_norm": 0.6350566283688125, + "learning_rate": 3.275428517007562e-07, + "loss": 0.2695, + "step": 19613 + }, + { + "epoch": 0.8877121520706042, + "grad_norm": 0.607949377490081, + "learning_rate": 3.27281992550465e-07, + "loss": 0.3069, + "step": 19614 + }, + { + "epoch": 0.8877574111789998, + "grad_norm": 0.6484075872349462, + "learning_rate": 3.270212338025336e-07, + "loss": 0.2813, + "step": 19615 + }, + { + "epoch": 0.8878026702873953, + "grad_norm": 0.6570954002846704, + "learning_rate": 3.2676057546256354e-07, + "loss": 0.2596, + "step": 19616 + }, + { + "epoch": 0.8878479293957909, + "grad_norm": 0.6249630982237082, + "learning_rate": 3.2650001753615547e-07, + "loss": 0.2986, + "step": 19617 + }, + { + "epoch": 0.8878931885041864, + "grad_norm": 0.6217870094199994, + "learning_rate": 3.262395600289087e-07, + "loss": 0.3208, + "step": 19618 + }, + { + "epoch": 0.887938447612582, + "grad_norm": 0.5814716308866661, + "learning_rate": 3.259792029464204e-07, + "loss": 0.293, + "step": 19619 + }, + { + "epoch": 0.8879837067209776, + "grad_norm": 0.6060563502689635, + "learning_rate": 3.2571894629428224e-07, + "loss": 0.3349, + "step": 19620 + }, + { + "epoch": 0.8880289658293732, + "grad_norm": 0.6316516663442189, + "learning_rate": 3.2545879007808866e-07, + "loss": 0.2974, + "step": 19621 + }, + { + "epoch": 0.8880742249377688, + "grad_norm": 0.5989664177057166, + "learning_rate": 3.2519873430342905e-07, + "loss": 0.2803, + "step": 19622 + }, + { + "epoch": 0.8881194840461643, + "grad_norm": 0.650238534391762, + "learning_rate": 3.2493877897589123e-07, + "loss": 0.2616, + "step": 19623 + }, + { + "epoch": 0.8881647431545598, + "grad_norm": 0.5862701585975826, + "learning_rate": 3.2467892410106006e-07, + "loss": 0.3034, + "step": 19624 + }, + { + "epoch": 0.8882100022629554, + "grad_norm": 0.2878441759964836, + "learning_rate": 3.2441916968452003e-07, + "loss": 0.4626, + "step": 19625 + }, + { + "epoch": 0.888255261371351, + "grad_norm": 0.7429412990238157, + "learning_rate": 3.2415951573185224e-07, + "loss": 0.2857, + "step": 19626 + }, + { + "epoch": 0.8883005204797465, + "grad_norm": 0.5785353982968996, + "learning_rate": 3.2389996224863604e-07, + "loss": 0.271, + "step": 19627 + }, + { + "epoch": 0.8883457795881421, + "grad_norm": 0.282444870309688, + "learning_rate": 3.236405092404471e-07, + "loss": 0.4653, + "step": 19628 + }, + { + "epoch": 0.8883910386965377, + "grad_norm": 0.5859165748543158, + "learning_rate": 3.2338115671286254e-07, + "loss": 0.305, + "step": 19629 + }, + { + "epoch": 0.8884362978049333, + "grad_norm": 0.5722429759030558, + "learning_rate": 3.231219046714523e-07, + "loss": 0.2632, + "step": 19630 + }, + { + "epoch": 0.8884815569133289, + "grad_norm": 0.2766515224836743, + "learning_rate": 3.2286275312178984e-07, + "loss": 0.464, + "step": 19631 + }, + { + "epoch": 0.8885268160217243, + "grad_norm": 0.6600272957732479, + "learning_rate": 3.226037020694417e-07, + "loss": 0.2758, + "step": 19632 + }, + { + "epoch": 0.8885720751301199, + "grad_norm": 0.6126178906891568, + "learning_rate": 3.2234475151997345e-07, + "loss": 0.2981, + "step": 19633 + }, + { + "epoch": 0.8886173342385155, + "grad_norm": 0.4553668902972284, + "learning_rate": 3.220859014789507e-07, + "loss": 0.4847, + "step": 19634 + }, + { + "epoch": 0.8886625933469111, + "grad_norm": 0.6261440561231867, + "learning_rate": 3.21827151951935e-07, + "loss": 0.2758, + "step": 19635 + }, + { + "epoch": 0.8887078524553066, + "grad_norm": 0.6035397288881246, + "learning_rate": 3.215685029444865e-07, + "loss": 0.2576, + "step": 19636 + }, + { + "epoch": 0.8887531115637022, + "grad_norm": 0.6880253117826668, + "learning_rate": 3.213099544621612e-07, + "loss": 0.2752, + "step": 19637 + }, + { + "epoch": 0.8887983706720978, + "grad_norm": 0.5554182104281393, + "learning_rate": 3.210515065105152e-07, + "loss": 0.2761, + "step": 19638 + }, + { + "epoch": 0.8888436297804934, + "grad_norm": 0.7699347195496511, + "learning_rate": 3.20793159095103e-07, + "loss": 0.2992, + "step": 19639 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.674613342892287, + "learning_rate": 3.2053491222147514e-07, + "loss": 0.2525, + "step": 19640 + }, + { + "epoch": 0.8889341479972844, + "grad_norm": 0.6265349024511476, + "learning_rate": 3.2027676589517885e-07, + "loss": 0.3129, + "step": 19641 + }, + { + "epoch": 0.88897940710568, + "grad_norm": 0.6407998943248913, + "learning_rate": 3.2001872012176304e-07, + "loss": 0.3456, + "step": 19642 + }, + { + "epoch": 0.8890246662140756, + "grad_norm": 0.6249769827265742, + "learning_rate": 3.1976077490677106e-07, + "loss": 0.2703, + "step": 19643 + }, + { + "epoch": 0.8890699253224712, + "grad_norm": 0.2689051847218968, + "learning_rate": 3.195029302557462e-07, + "loss": 0.4517, + "step": 19644 + }, + { + "epoch": 0.8891151844308667, + "grad_norm": 0.8638207476845396, + "learning_rate": 3.1924518617422796e-07, + "loss": 0.2631, + "step": 19645 + }, + { + "epoch": 0.8891604435392623, + "grad_norm": 0.5727394461563624, + "learning_rate": 3.1898754266775467e-07, + "loss": 0.2959, + "step": 19646 + }, + { + "epoch": 0.8892057026476579, + "grad_norm": 0.6598354797321724, + "learning_rate": 3.1872999974186194e-07, + "loss": 0.2718, + "step": 19647 + }, + { + "epoch": 0.8892509617560534, + "grad_norm": 0.5766600113148568, + "learning_rate": 3.1847255740208636e-07, + "loss": 0.2871, + "step": 19648 + }, + { + "epoch": 0.8892962208644489, + "grad_norm": 0.2839105455499461, + "learning_rate": 3.182152156539553e-07, + "loss": 0.4891, + "step": 19649 + }, + { + "epoch": 0.8893414799728445, + "grad_norm": 0.6785505816778864, + "learning_rate": 3.179579745029998e-07, + "loss": 0.2825, + "step": 19650 + }, + { + "epoch": 0.8893867390812401, + "grad_norm": 0.5529712585838098, + "learning_rate": 3.1770083395474827e-07, + "loss": 0.2801, + "step": 19651 + }, + { + "epoch": 0.8894319981896357, + "grad_norm": 0.6154279850046951, + "learning_rate": 3.174437940147268e-07, + "loss": 0.3026, + "step": 19652 + }, + { + "epoch": 0.8894772572980312, + "grad_norm": 0.6320580096458656, + "learning_rate": 3.171868546884549e-07, + "loss": 0.2991, + "step": 19653 + }, + { + "epoch": 0.8895225164064268, + "grad_norm": 0.5896944818087667, + "learning_rate": 3.169300159814559e-07, + "loss": 0.2572, + "step": 19654 + }, + { + "epoch": 0.8895677755148224, + "grad_norm": 0.6314943278887438, + "learning_rate": 3.1667327789924815e-07, + "loss": 0.2978, + "step": 19655 + }, + { + "epoch": 0.889613034623218, + "grad_norm": 0.6240822715544811, + "learning_rate": 3.1641664044734786e-07, + "loss": 0.3456, + "step": 19656 + }, + { + "epoch": 0.8896582937316135, + "grad_norm": 0.5924244399609876, + "learning_rate": 3.1616010363126893e-07, + "loss": 0.3238, + "step": 19657 + }, + { + "epoch": 0.889703552840009, + "grad_norm": 0.6401946065654505, + "learning_rate": 3.159036674565247e-07, + "loss": 0.2926, + "step": 19658 + }, + { + "epoch": 0.8897488119484046, + "grad_norm": 0.6277174806465244, + "learning_rate": 3.156473319286241e-07, + "loss": 0.2609, + "step": 19659 + }, + { + "epoch": 0.8897940710568002, + "grad_norm": 0.6024028873920564, + "learning_rate": 3.15391097053076e-07, + "loss": 0.2848, + "step": 19660 + }, + { + "epoch": 0.8898393301651958, + "grad_norm": 0.6713968922655957, + "learning_rate": 3.151349628353856e-07, + "loss": 0.2723, + "step": 19661 + }, + { + "epoch": 0.8898845892735913, + "grad_norm": 0.28489424233366306, + "learning_rate": 3.1487892928105554e-07, + "loss": 0.4566, + "step": 19662 + }, + { + "epoch": 0.8899298483819869, + "grad_norm": 0.2687351928135246, + "learning_rate": 3.146229963955877e-07, + "loss": 0.4745, + "step": 19663 + }, + { + "epoch": 0.8899751074903824, + "grad_norm": 0.2776367386077948, + "learning_rate": 3.143671641844831e-07, + "loss": 0.464, + "step": 19664 + }, + { + "epoch": 0.890020366598778, + "grad_norm": 0.7031881152911872, + "learning_rate": 3.1411143265323684e-07, + "loss": 0.3152, + "step": 19665 + }, + { + "epoch": 0.8900656257071735, + "grad_norm": 0.7058053280755887, + "learning_rate": 3.138558018073434e-07, + "loss": 0.3258, + "step": 19666 + }, + { + "epoch": 0.8901108848155691, + "grad_norm": 0.2524281733289989, + "learning_rate": 3.1360027165229677e-07, + "loss": 0.4666, + "step": 19667 + }, + { + "epoch": 0.8901561439239647, + "grad_norm": 0.5566851001002566, + "learning_rate": 3.1334484219358754e-07, + "loss": 0.3123, + "step": 19668 + }, + { + "epoch": 0.8902014030323603, + "grad_norm": 0.6187301932164185, + "learning_rate": 3.13089513436704e-07, + "loss": 0.3133, + "step": 19669 + }, + { + "epoch": 0.8902466621407559, + "grad_norm": 0.6546110361034537, + "learning_rate": 3.1283428538713134e-07, + "loss": 0.2975, + "step": 19670 + }, + { + "epoch": 0.8902919212491514, + "grad_norm": 0.5680701004053491, + "learning_rate": 3.125791580503551e-07, + "loss": 0.2893, + "step": 19671 + }, + { + "epoch": 0.8903371803575469, + "grad_norm": 0.6341125142971917, + "learning_rate": 3.1232413143185534e-07, + "loss": 0.285, + "step": 19672 + }, + { + "epoch": 0.8903824394659425, + "grad_norm": 0.6664029205667784, + "learning_rate": 3.1206920553711385e-07, + "loss": 0.2534, + "step": 19673 + }, + { + "epoch": 0.8904276985743381, + "grad_norm": 0.660749332252751, + "learning_rate": 3.1181438037160727e-07, + "loss": 0.3067, + "step": 19674 + }, + { + "epoch": 0.8904729576827336, + "grad_norm": 0.2588901641031759, + "learning_rate": 3.1155965594081017e-07, + "loss": 0.4597, + "step": 19675 + }, + { + "epoch": 0.8905182167911292, + "grad_norm": 0.6116703932544021, + "learning_rate": 3.1130503225019705e-07, + "loss": 0.3262, + "step": 19676 + }, + { + "epoch": 0.8905634758995248, + "grad_norm": 0.28303243772908865, + "learning_rate": 3.110505093052396e-07, + "loss": 0.4598, + "step": 19677 + }, + { + "epoch": 0.8906087350079204, + "grad_norm": 0.624993176785746, + "learning_rate": 3.107960871114041e-07, + "loss": 0.2759, + "step": 19678 + }, + { + "epoch": 0.890653994116316, + "grad_norm": 0.2662360133368587, + "learning_rate": 3.1054176567415937e-07, + "loss": 0.4466, + "step": 19679 + }, + { + "epoch": 0.8906992532247114, + "grad_norm": 0.6189525480592604, + "learning_rate": 3.1028754499896895e-07, + "loss": 0.2865, + "step": 19680 + }, + { + "epoch": 0.890744512333107, + "grad_norm": 0.6925785423664801, + "learning_rate": 3.1003342509129783e-07, + "loss": 0.287, + "step": 19681 + }, + { + "epoch": 0.8907897714415026, + "grad_norm": 0.5897566594709222, + "learning_rate": 3.097794059566023e-07, + "loss": 0.2926, + "step": 19682 + }, + { + "epoch": 0.8908350305498982, + "grad_norm": 0.5965249195848944, + "learning_rate": 3.0952548760034284e-07, + "loss": 0.2999, + "step": 19683 + }, + { + "epoch": 0.8908802896582937, + "grad_norm": 0.2501494233144302, + "learning_rate": 3.0927167002797574e-07, + "loss": 0.443, + "step": 19684 + }, + { + "epoch": 0.8909255487666893, + "grad_norm": 0.6550215832039792, + "learning_rate": 3.0901795324495334e-07, + "loss": 0.2894, + "step": 19685 + }, + { + "epoch": 0.8909708078750849, + "grad_norm": 0.6083383919033705, + "learning_rate": 3.0876433725672783e-07, + "loss": 0.2451, + "step": 19686 + }, + { + "epoch": 0.8910160669834805, + "grad_norm": 0.5901909902580682, + "learning_rate": 3.085108220687494e-07, + "loss": 0.2653, + "step": 19687 + }, + { + "epoch": 0.8910613260918759, + "grad_norm": 0.6244542350942888, + "learning_rate": 3.082574076864636e-07, + "loss": 0.3328, + "step": 19688 + }, + { + "epoch": 0.8911065852002715, + "grad_norm": 0.6182059717072552, + "learning_rate": 3.0800409411531727e-07, + "loss": 0.2903, + "step": 19689 + }, + { + "epoch": 0.8911518443086671, + "grad_norm": 0.27308560925210523, + "learning_rate": 3.077508813607527e-07, + "loss": 0.4782, + "step": 19690 + }, + { + "epoch": 0.8911971034170627, + "grad_norm": 0.6191578915878152, + "learning_rate": 3.0749776942820943e-07, + "loss": 0.3073, + "step": 19691 + }, + { + "epoch": 0.8912423625254583, + "grad_norm": 0.6610736152984831, + "learning_rate": 3.072447583231275e-07, + "loss": 0.224, + "step": 19692 + }, + { + "epoch": 0.8912876216338538, + "grad_norm": 0.2789734855593772, + "learning_rate": 3.0699184805094374e-07, + "loss": 0.4748, + "step": 19693 + }, + { + "epoch": 0.8913328807422494, + "grad_norm": 0.7874295132580852, + "learning_rate": 3.067390386170915e-07, + "loss": 0.2646, + "step": 19694 + }, + { + "epoch": 0.891378139850645, + "grad_norm": 0.2728170143891171, + "learning_rate": 3.064863300270027e-07, + "loss": 0.4445, + "step": 19695 + }, + { + "epoch": 0.8914233989590405, + "grad_norm": 0.7961230497887644, + "learning_rate": 3.0623372228610725e-07, + "loss": 0.3046, + "step": 19696 + }, + { + "epoch": 0.891468658067436, + "grad_norm": 1.4269209581457398, + "learning_rate": 3.059812153998343e-07, + "loss": 0.3039, + "step": 19697 + }, + { + "epoch": 0.8915139171758316, + "grad_norm": 0.7972617116547703, + "learning_rate": 3.057288093736083e-07, + "loss": 0.2733, + "step": 19698 + }, + { + "epoch": 0.8915591762842272, + "grad_norm": 0.650142097827148, + "learning_rate": 3.0547650421285216e-07, + "loss": 0.2966, + "step": 19699 + }, + { + "epoch": 0.8916044353926228, + "grad_norm": 1.0215184862868254, + "learning_rate": 3.0522429992298873e-07, + "loss": 0.3322, + "step": 19700 + }, + { + "epoch": 0.8916496945010183, + "grad_norm": 0.5915881881019676, + "learning_rate": 3.0497219650943545e-07, + "loss": 0.2878, + "step": 19701 + }, + { + "epoch": 0.8916949536094139, + "grad_norm": 0.6089222195563149, + "learning_rate": 3.0472019397761065e-07, + "loss": 0.2904, + "step": 19702 + }, + { + "epoch": 0.8917402127178095, + "grad_norm": 0.6306950949640981, + "learning_rate": 3.044682923329284e-07, + "loss": 0.2931, + "step": 19703 + }, + { + "epoch": 0.891785471826205, + "grad_norm": 0.6377067107024558, + "learning_rate": 3.0421649158080047e-07, + "loss": 0.29, + "step": 19704 + }, + { + "epoch": 0.8918307309346006, + "grad_norm": 0.6472275255096006, + "learning_rate": 3.0396479172663806e-07, + "loss": 0.2944, + "step": 19705 + }, + { + "epoch": 0.8918759900429961, + "grad_norm": 0.6015998556020318, + "learning_rate": 3.037131927758502e-07, + "loss": 0.2888, + "step": 19706 + }, + { + "epoch": 0.8919212491513917, + "grad_norm": 0.6050816306761447, + "learning_rate": 3.0346169473384255e-07, + "loss": 0.2742, + "step": 19707 + }, + { + "epoch": 0.8919665082597873, + "grad_norm": 0.5798568141658198, + "learning_rate": 3.032102976060181e-07, + "loss": 0.2751, + "step": 19708 + }, + { + "epoch": 0.8920117673681829, + "grad_norm": 0.7827332971119896, + "learning_rate": 3.02959001397779e-07, + "loss": 0.2635, + "step": 19709 + }, + { + "epoch": 0.8920570264765784, + "grad_norm": 0.6218102387465159, + "learning_rate": 3.027078061145261e-07, + "loss": 0.3007, + "step": 19710 + }, + { + "epoch": 0.892102285584974, + "grad_norm": 0.5946378433026083, + "learning_rate": 3.024567117616556e-07, + "loss": 0.2722, + "step": 19711 + }, + { + "epoch": 0.8921475446933695, + "grad_norm": 0.6229968934552937, + "learning_rate": 3.0220571834456256e-07, + "loss": 0.2859, + "step": 19712 + }, + { + "epoch": 0.8921928038017651, + "grad_norm": 0.551512068307753, + "learning_rate": 3.0195482586864055e-07, + "loss": 0.2961, + "step": 19713 + }, + { + "epoch": 0.8922380629101607, + "grad_norm": 0.6761291147721131, + "learning_rate": 3.0170403433928077e-07, + "loss": 0.2716, + "step": 19714 + }, + { + "epoch": 0.8922833220185562, + "grad_norm": 0.5916261620907665, + "learning_rate": 3.014533437618711e-07, + "loss": 0.2877, + "step": 19715 + }, + { + "epoch": 0.8923285811269518, + "grad_norm": 0.6567528853482709, + "learning_rate": 3.012027541417989e-07, + "loss": 0.3192, + "step": 19716 + }, + { + "epoch": 0.8923738402353474, + "grad_norm": 0.6371454442751823, + "learning_rate": 3.0095226548444765e-07, + "loss": 0.3236, + "step": 19717 + }, + { + "epoch": 0.892419099343743, + "grad_norm": 0.5819460949351073, + "learning_rate": 3.007018777952009e-07, + "loss": 0.3087, + "step": 19718 + }, + { + "epoch": 0.8924643584521385, + "grad_norm": 0.6690205421170103, + "learning_rate": 3.004515910794381e-07, + "loss": 0.2836, + "step": 19719 + }, + { + "epoch": 0.892509617560534, + "grad_norm": 0.24512552320011244, + "learning_rate": 3.0020140534253617e-07, + "loss": 0.4502, + "step": 19720 + }, + { + "epoch": 0.8925548766689296, + "grad_norm": 0.2524877763429363, + "learning_rate": 2.9995132058987185e-07, + "loss": 0.4571, + "step": 19721 + }, + { + "epoch": 0.8926001357773252, + "grad_norm": 0.6603633468410276, + "learning_rate": 2.9970133682681924e-07, + "loss": 0.2898, + "step": 19722 + }, + { + "epoch": 0.8926453948857207, + "grad_norm": 0.623220132327247, + "learning_rate": 2.9945145405874955e-07, + "loss": 0.2916, + "step": 19723 + }, + { + "epoch": 0.8926906539941163, + "grad_norm": 0.694842873664057, + "learning_rate": 2.9920167229103015e-07, + "loss": 0.284, + "step": 19724 + }, + { + "epoch": 0.8927359131025119, + "grad_norm": 0.6110616350131243, + "learning_rate": 2.9895199152902955e-07, + "loss": 0.2988, + "step": 19725 + }, + { + "epoch": 0.8927811722109075, + "grad_norm": 0.6380149501154079, + "learning_rate": 2.987024117781129e-07, + "loss": 0.2796, + "step": 19726 + }, + { + "epoch": 0.8928264313193031, + "grad_norm": 0.5801201468884075, + "learning_rate": 2.984529330436431e-07, + "loss": 0.2867, + "step": 19727 + }, + { + "epoch": 0.8928716904276985, + "grad_norm": 0.5946765214793907, + "learning_rate": 2.9820355533097864e-07, + "loss": 0.2699, + "step": 19728 + }, + { + "epoch": 0.8929169495360941, + "grad_norm": 0.2925541355352873, + "learning_rate": 2.9795427864548034e-07, + "loss": 0.4857, + "step": 19729 + }, + { + "epoch": 0.8929622086444897, + "grad_norm": 0.6101996258652933, + "learning_rate": 2.9770510299250265e-07, + "loss": 0.2928, + "step": 19730 + }, + { + "epoch": 0.8930074677528853, + "grad_norm": 0.9588491138498303, + "learning_rate": 2.974560283774014e-07, + "loss": 0.3178, + "step": 19731 + }, + { + "epoch": 0.8930527268612808, + "grad_norm": 0.650951067812214, + "learning_rate": 2.972070548055267e-07, + "loss": 0.3092, + "step": 19732 + }, + { + "epoch": 0.8930979859696764, + "grad_norm": 0.6326366483138217, + "learning_rate": 2.9695818228222873e-07, + "loss": 0.2679, + "step": 19733 + }, + { + "epoch": 0.893143245078072, + "grad_norm": 0.6164318764744039, + "learning_rate": 2.967094108128549e-07, + "loss": 0.3045, + "step": 19734 + }, + { + "epoch": 0.8931885041864676, + "grad_norm": 0.563153323637875, + "learning_rate": 2.964607404027514e-07, + "loss": 0.2565, + "step": 19735 + }, + { + "epoch": 0.893233763294863, + "grad_norm": 0.607980885936067, + "learning_rate": 2.9621217105726077e-07, + "loss": 0.3056, + "step": 19736 + }, + { + "epoch": 0.8932790224032586, + "grad_norm": 0.708213998184299, + "learning_rate": 2.9596370278172305e-07, + "loss": 0.2901, + "step": 19737 + }, + { + "epoch": 0.8933242815116542, + "grad_norm": 0.6246550411649562, + "learning_rate": 2.9571533558147845e-07, + "loss": 0.2731, + "step": 19738 + }, + { + "epoch": 0.8933695406200498, + "grad_norm": 0.6935539968631732, + "learning_rate": 2.9546706946186387e-07, + "loss": 0.3315, + "step": 19739 + }, + { + "epoch": 0.8934147997284454, + "grad_norm": 0.61341847188213, + "learning_rate": 2.9521890442821276e-07, + "loss": 0.2844, + "step": 19740 + }, + { + "epoch": 0.8934600588368409, + "grad_norm": 0.6046356361903988, + "learning_rate": 2.9497084048585755e-07, + "loss": 0.2901, + "step": 19741 + }, + { + "epoch": 0.8935053179452365, + "grad_norm": 0.6001415015783245, + "learning_rate": 2.94722877640129e-07, + "loss": 0.2884, + "step": 19742 + }, + { + "epoch": 0.8935505770536321, + "grad_norm": 0.6380842124636362, + "learning_rate": 2.9447501589635387e-07, + "loss": 0.247, + "step": 19743 + }, + { + "epoch": 0.8935958361620276, + "grad_norm": 0.5885874973556446, + "learning_rate": 2.942272552598596e-07, + "loss": 0.3277, + "step": 19744 + }, + { + "epoch": 0.8936410952704231, + "grad_norm": 0.6059575773494633, + "learning_rate": 2.9397959573596867e-07, + "loss": 0.2867, + "step": 19745 + }, + { + "epoch": 0.8936863543788187, + "grad_norm": 0.25104711480411934, + "learning_rate": 2.9373203733000234e-07, + "loss": 0.4673, + "step": 19746 + }, + { + "epoch": 0.8937316134872143, + "grad_norm": 0.2764519128206775, + "learning_rate": 2.9348458004728074e-07, + "loss": 0.4565, + "step": 19747 + }, + { + "epoch": 0.8937768725956099, + "grad_norm": 0.25004411755184186, + "learning_rate": 2.9323722389312084e-07, + "loss": 0.448, + "step": 19748 + }, + { + "epoch": 0.8938221317040055, + "grad_norm": 0.2816876893548513, + "learning_rate": 2.929899688728366e-07, + "loss": 0.4502, + "step": 19749 + }, + { + "epoch": 0.893867390812401, + "grad_norm": 0.6316241558565415, + "learning_rate": 2.927428149917416e-07, + "loss": 0.2989, + "step": 19750 + }, + { + "epoch": 0.8939126499207966, + "grad_norm": 0.2710113008541533, + "learning_rate": 2.9249576225514664e-07, + "loss": 0.4817, + "step": 19751 + }, + { + "epoch": 0.8939579090291921, + "grad_norm": 0.633843670051217, + "learning_rate": 2.922488106683596e-07, + "loss": 0.3181, + "step": 19752 + }, + { + "epoch": 0.8940031681375877, + "grad_norm": 0.6530386741855132, + "learning_rate": 2.9200196023668693e-07, + "loss": 0.3278, + "step": 19753 + }, + { + "epoch": 0.8940484272459832, + "grad_norm": 0.6829899675953176, + "learning_rate": 2.91755210965432e-07, + "loss": 0.3048, + "step": 19754 + }, + { + "epoch": 0.8940936863543788, + "grad_norm": 0.5950240474932199, + "learning_rate": 2.915085628598979e-07, + "loss": 0.291, + "step": 19755 + }, + { + "epoch": 0.8941389454627744, + "grad_norm": 0.27733112695232565, + "learning_rate": 2.9126201592538427e-07, + "loss": 0.4681, + "step": 19756 + }, + { + "epoch": 0.89418420457117, + "grad_norm": 0.6544517655613107, + "learning_rate": 2.910155701671868e-07, + "loss": 0.2653, + "step": 19757 + }, + { + "epoch": 0.8942294636795655, + "grad_norm": 0.2657406569618254, + "learning_rate": 2.907692255906036e-07, + "loss": 0.4757, + "step": 19758 + }, + { + "epoch": 0.894274722787961, + "grad_norm": 0.6613698668778631, + "learning_rate": 2.905229822009253e-07, + "loss": 0.3149, + "step": 19759 + }, + { + "epoch": 0.8943199818963566, + "grad_norm": 0.5564095968570708, + "learning_rate": 2.9027684000344446e-07, + "loss": 0.2823, + "step": 19760 + }, + { + "epoch": 0.8943652410047522, + "grad_norm": 0.2668504868753882, + "learning_rate": 2.900307990034501e-07, + "loss": 0.4609, + "step": 19761 + }, + { + "epoch": 0.8944105001131478, + "grad_norm": 0.6364155599319349, + "learning_rate": 2.8978485920622747e-07, + "loss": 0.3106, + "step": 19762 + }, + { + "epoch": 0.8944557592215433, + "grad_norm": 0.550987796887358, + "learning_rate": 2.8953902061706173e-07, + "loss": 0.2661, + "step": 19763 + }, + { + "epoch": 0.8945010183299389, + "grad_norm": 0.25702933360286156, + "learning_rate": 2.8929328324123595e-07, + "loss": 0.4433, + "step": 19764 + }, + { + "epoch": 0.8945462774383345, + "grad_norm": 0.5719382824193083, + "learning_rate": 2.890476470840303e-07, + "loss": 0.3219, + "step": 19765 + }, + { + "epoch": 0.8945915365467301, + "grad_norm": 0.6305795164274872, + "learning_rate": 2.8880211215072065e-07, + "loss": 0.279, + "step": 19766 + }, + { + "epoch": 0.8946367956551255, + "grad_norm": 0.6123742014140814, + "learning_rate": 2.8855667844658484e-07, + "loss": 0.2827, + "step": 19767 + }, + { + "epoch": 0.8946820547635211, + "grad_norm": 0.6371501831330458, + "learning_rate": 2.8831134597689604e-07, + "loss": 0.2471, + "step": 19768 + }, + { + "epoch": 0.8947273138719167, + "grad_norm": 0.6011369890792166, + "learning_rate": 2.8806611474692604e-07, + "loss": 0.3209, + "step": 19769 + }, + { + "epoch": 0.8947725729803123, + "grad_norm": 0.6019283022002772, + "learning_rate": 2.878209847619429e-07, + "loss": 0.2731, + "step": 19770 + }, + { + "epoch": 0.8948178320887078, + "grad_norm": 0.6117442681689266, + "learning_rate": 2.875759560272151e-07, + "loss": 0.2717, + "step": 19771 + }, + { + "epoch": 0.8948630911971034, + "grad_norm": 0.2607889799884667, + "learning_rate": 2.873310285480063e-07, + "loss": 0.4742, + "step": 19772 + }, + { + "epoch": 0.894908350305499, + "grad_norm": 0.6182961608864804, + "learning_rate": 2.8708620232958004e-07, + "loss": 0.3086, + "step": 19773 + }, + { + "epoch": 0.8949536094138946, + "grad_norm": 0.579715758551968, + "learning_rate": 2.868414773771971e-07, + "loss": 0.294, + "step": 19774 + }, + { + "epoch": 0.8949988685222902, + "grad_norm": 0.5744817660343502, + "learning_rate": 2.8659685369611503e-07, + "loss": 0.275, + "step": 19775 + }, + { + "epoch": 0.8950441276306856, + "grad_norm": 0.5618204289540498, + "learning_rate": 2.8635233129159004e-07, + "loss": 0.2363, + "step": 19776 + }, + { + "epoch": 0.8950893867390812, + "grad_norm": 0.650461405504146, + "learning_rate": 2.8610791016887794e-07, + "loss": 0.2863, + "step": 19777 + }, + { + "epoch": 0.8951346458474768, + "grad_norm": 0.5736113775880847, + "learning_rate": 2.85863590333228e-07, + "loss": 0.2734, + "step": 19778 + }, + { + "epoch": 0.8951799049558724, + "grad_norm": 0.5879128588576477, + "learning_rate": 2.8561937178989087e-07, + "loss": 0.2767, + "step": 19779 + }, + { + "epoch": 0.8952251640642679, + "grad_norm": 0.6277117466751468, + "learning_rate": 2.853752545441146e-07, + "loss": 0.2962, + "step": 19780 + }, + { + "epoch": 0.8952704231726635, + "grad_norm": 0.666912011889595, + "learning_rate": 2.851312386011457e-07, + "loss": 0.3418, + "step": 19781 + }, + { + "epoch": 0.8953156822810591, + "grad_norm": 0.724911195777025, + "learning_rate": 2.8488732396622476e-07, + "loss": 0.2705, + "step": 19782 + }, + { + "epoch": 0.8953609413894547, + "grad_norm": 0.28456627284890906, + "learning_rate": 2.846435106445933e-07, + "loss": 0.459, + "step": 19783 + }, + { + "epoch": 0.8954062004978502, + "grad_norm": 0.6560552485337452, + "learning_rate": 2.843997986414915e-07, + "loss": 0.254, + "step": 19784 + }, + { + "epoch": 0.8954514596062457, + "grad_norm": 0.6501966318352483, + "learning_rate": 2.8415618796215516e-07, + "loss": 0.3142, + "step": 19785 + }, + { + "epoch": 0.8954967187146413, + "grad_norm": 0.25189795807932797, + "learning_rate": 2.839126786118179e-07, + "loss": 0.4688, + "step": 19786 + }, + { + "epoch": 0.8955419778230369, + "grad_norm": 0.5445596252129087, + "learning_rate": 2.8366927059571393e-07, + "loss": 0.2673, + "step": 19787 + }, + { + "epoch": 0.8955872369314325, + "grad_norm": 0.6155411118466514, + "learning_rate": 2.834259639190712e-07, + "loss": 0.2709, + "step": 19788 + }, + { + "epoch": 0.895632496039828, + "grad_norm": 0.5917130188296267, + "learning_rate": 2.8318275858711943e-07, + "loss": 0.3112, + "step": 19789 + }, + { + "epoch": 0.8956777551482236, + "grad_norm": 0.2553760666330001, + "learning_rate": 2.829396546050839e-07, + "loss": 0.4581, + "step": 19790 + }, + { + "epoch": 0.8957230142566192, + "grad_norm": 0.26227643370388387, + "learning_rate": 2.826966519781871e-07, + "loss": 0.4565, + "step": 19791 + }, + { + "epoch": 0.8957682733650147, + "grad_norm": 0.6770457020151776, + "learning_rate": 2.824537507116504e-07, + "loss": 0.2896, + "step": 19792 + }, + { + "epoch": 0.8958135324734102, + "grad_norm": 0.6022884364992735, + "learning_rate": 2.8221095081069517e-07, + "loss": 0.2608, + "step": 19793 + }, + { + "epoch": 0.8958587915818058, + "grad_norm": 0.5787444811677719, + "learning_rate": 2.819682522805367e-07, + "loss": 0.2736, + "step": 19794 + }, + { + "epoch": 0.8959040506902014, + "grad_norm": 0.599606507374959, + "learning_rate": 2.8172565512638974e-07, + "loss": 0.2927, + "step": 19795 + }, + { + "epoch": 0.895949309798597, + "grad_norm": 0.625534914275994, + "learning_rate": 2.8148315935346725e-07, + "loss": 0.2343, + "step": 19796 + }, + { + "epoch": 0.8959945689069926, + "grad_norm": 0.5826505623078216, + "learning_rate": 2.812407649669807e-07, + "loss": 0.3011, + "step": 19797 + }, + { + "epoch": 0.8960398280153881, + "grad_norm": 0.6161056662098575, + "learning_rate": 2.809984719721376e-07, + "loss": 0.2726, + "step": 19798 + }, + { + "epoch": 0.8960850871237837, + "grad_norm": 0.5970542362819024, + "learning_rate": 2.807562803741426e-07, + "loss": 0.2774, + "step": 19799 + }, + { + "epoch": 0.8961303462321792, + "grad_norm": 0.6152274129951627, + "learning_rate": 2.805141901782027e-07, + "loss": 0.2983, + "step": 19800 + }, + { + "epoch": 0.8961756053405748, + "grad_norm": 0.28077492126298775, + "learning_rate": 2.8027220138951705e-07, + "loss": 0.4626, + "step": 19801 + }, + { + "epoch": 0.8962208644489703, + "grad_norm": 0.6393216136802916, + "learning_rate": 2.8003031401328653e-07, + "loss": 0.2933, + "step": 19802 + }, + { + "epoch": 0.8962661235573659, + "grad_norm": 0.5985841723566697, + "learning_rate": 2.797885280547086e-07, + "loss": 0.2614, + "step": 19803 + }, + { + "epoch": 0.8963113826657615, + "grad_norm": 0.5298212310543782, + "learning_rate": 2.795468435189774e-07, + "loss": 0.2568, + "step": 19804 + }, + { + "epoch": 0.8963566417741571, + "grad_norm": 0.5987061216283815, + "learning_rate": 2.7930526041128727e-07, + "loss": 0.2981, + "step": 19805 + }, + { + "epoch": 0.8964019008825526, + "grad_norm": 0.6934345951631308, + "learning_rate": 2.790637787368294e-07, + "loss": 0.3146, + "step": 19806 + }, + { + "epoch": 0.8964471599909482, + "grad_norm": 0.6084117691504284, + "learning_rate": 2.788223985007904e-07, + "loss": 0.2934, + "step": 19807 + }, + { + "epoch": 0.8964924190993437, + "grad_norm": 0.5735161793850959, + "learning_rate": 2.7858111970835823e-07, + "loss": 0.3225, + "step": 19808 + }, + { + "epoch": 0.8965376782077393, + "grad_norm": 0.6054105921436966, + "learning_rate": 2.783399423647171e-07, + "loss": 0.3502, + "step": 19809 + }, + { + "epoch": 0.8965829373161349, + "grad_norm": 0.7195183412606594, + "learning_rate": 2.7809886647505e-07, + "loss": 0.3202, + "step": 19810 + }, + { + "epoch": 0.8966281964245304, + "grad_norm": 0.5922610762383163, + "learning_rate": 2.778578920445352e-07, + "loss": 0.2581, + "step": 19811 + }, + { + "epoch": 0.896673455532926, + "grad_norm": 0.8144053010320692, + "learning_rate": 2.7761701907835114e-07, + "loss": 0.3336, + "step": 19812 + }, + { + "epoch": 0.8967187146413216, + "grad_norm": 0.5985999716901659, + "learning_rate": 2.7737624758167436e-07, + "loss": 0.3069, + "step": 19813 + }, + { + "epoch": 0.8967639737497172, + "grad_norm": 0.6518395917982522, + "learning_rate": 2.771355775596779e-07, + "loss": 0.2978, + "step": 19814 + }, + { + "epoch": 0.8968092328581126, + "grad_norm": 0.6309417840869341, + "learning_rate": 2.768950090175315e-07, + "loss": 0.3159, + "step": 19815 + }, + { + "epoch": 0.8968544919665082, + "grad_norm": 0.28977489773166926, + "learning_rate": 2.7665454196040665e-07, + "loss": 0.4729, + "step": 19816 + }, + { + "epoch": 0.8968997510749038, + "grad_norm": 0.2650338371075625, + "learning_rate": 2.76414176393468e-07, + "loss": 0.4647, + "step": 19817 + }, + { + "epoch": 0.8969450101832994, + "grad_norm": 0.5966383668175729, + "learning_rate": 2.7617391232188207e-07, + "loss": 0.2825, + "step": 19818 + }, + { + "epoch": 0.896990269291695, + "grad_norm": 0.7291846411758335, + "learning_rate": 2.7593374975081075e-07, + "loss": 0.2948, + "step": 19819 + }, + { + "epoch": 0.8970355284000905, + "grad_norm": 0.29386027248409563, + "learning_rate": 2.7569368868541333e-07, + "loss": 0.4718, + "step": 19820 + }, + { + "epoch": 0.8970807875084861, + "grad_norm": 0.5366175112637515, + "learning_rate": 2.75453729130849e-07, + "loss": 0.2841, + "step": 19821 + }, + { + "epoch": 0.8971260466168817, + "grad_norm": 0.5773925409926636, + "learning_rate": 2.752138710922747e-07, + "loss": 0.2648, + "step": 19822 + }, + { + "epoch": 0.8971713057252773, + "grad_norm": 0.6528258350735204, + "learning_rate": 2.74974114574843e-07, + "loss": 0.2812, + "step": 19823 + }, + { + "epoch": 0.8972165648336727, + "grad_norm": 0.5804721946337439, + "learning_rate": 2.747344595837048e-07, + "loss": 0.2388, + "step": 19824 + }, + { + "epoch": 0.8972618239420683, + "grad_norm": 0.5968044745916934, + "learning_rate": 2.74494906124011e-07, + "loss": 0.3212, + "step": 19825 + }, + { + "epoch": 0.8973070830504639, + "grad_norm": 0.6206777396994065, + "learning_rate": 2.7425545420090906e-07, + "loss": 0.2932, + "step": 19826 + }, + { + "epoch": 0.8973523421588595, + "grad_norm": 0.5662554206299493, + "learning_rate": 2.7401610381954325e-07, + "loss": 0.2769, + "step": 19827 + }, + { + "epoch": 0.897397601267255, + "grad_norm": 0.6144031913927165, + "learning_rate": 2.7377685498505557e-07, + "loss": 0.3565, + "step": 19828 + }, + { + "epoch": 0.8974428603756506, + "grad_norm": 0.2641253441420351, + "learning_rate": 2.7353770770258915e-07, + "loss": 0.4778, + "step": 19829 + }, + { + "epoch": 0.8974881194840462, + "grad_norm": 0.5820930843681503, + "learning_rate": 2.7329866197727983e-07, + "loss": 0.3096, + "step": 19830 + }, + { + "epoch": 0.8975333785924418, + "grad_norm": 0.274671774086108, + "learning_rate": 2.7305971781426634e-07, + "loss": 0.4733, + "step": 19831 + }, + { + "epoch": 0.8975786377008373, + "grad_norm": 0.586907469338291, + "learning_rate": 2.728208752186817e-07, + "loss": 0.2492, + "step": 19832 + }, + { + "epoch": 0.8976238968092328, + "grad_norm": 0.2651039284392432, + "learning_rate": 2.725821341956575e-07, + "loss": 0.4904, + "step": 19833 + }, + { + "epoch": 0.8976691559176284, + "grad_norm": 0.558027137087256, + "learning_rate": 2.7234349475032395e-07, + "loss": 0.2695, + "step": 19834 + }, + { + "epoch": 0.897714415026024, + "grad_norm": 0.2854868461334528, + "learning_rate": 2.7210495688781037e-07, + "loss": 0.4558, + "step": 19835 + }, + { + "epoch": 0.8977596741344196, + "grad_norm": 0.2792178849027721, + "learning_rate": 2.7186652061323924e-07, + "loss": 0.4828, + "step": 19836 + }, + { + "epoch": 0.8978049332428151, + "grad_norm": 0.6265739449917149, + "learning_rate": 2.716281859317349e-07, + "loss": 0.2715, + "step": 19837 + }, + { + "epoch": 0.8978501923512107, + "grad_norm": 0.6166940853718327, + "learning_rate": 2.713899528484193e-07, + "loss": 0.3151, + "step": 19838 + }, + { + "epoch": 0.8978954514596063, + "grad_norm": 0.6109203277092121, + "learning_rate": 2.7115182136841166e-07, + "loss": 0.3109, + "step": 19839 + }, + { + "epoch": 0.8979407105680018, + "grad_norm": 0.5714322175597286, + "learning_rate": 2.7091379149682683e-07, + "loss": 0.3114, + "step": 19840 + }, + { + "epoch": 0.8979859696763973, + "grad_norm": 0.2799105941790582, + "learning_rate": 2.7067586323878014e-07, + "loss": 0.4597, + "step": 19841 + }, + { + "epoch": 0.8980312287847929, + "grad_norm": 0.6264111829057746, + "learning_rate": 2.704380365993847e-07, + "loss": 0.2858, + "step": 19842 + }, + { + "epoch": 0.8980764878931885, + "grad_norm": 0.6384133066448844, + "learning_rate": 2.7020031158375037e-07, + "loss": 0.3181, + "step": 19843 + }, + { + "epoch": 0.8981217470015841, + "grad_norm": 0.6823010909322235, + "learning_rate": 2.699626881969841e-07, + "loss": 0.3096, + "step": 19844 + }, + { + "epoch": 0.8981670061099797, + "grad_norm": 0.6476981453613727, + "learning_rate": 2.6972516644419356e-07, + "loss": 0.2549, + "step": 19845 + }, + { + "epoch": 0.8982122652183752, + "grad_norm": 0.6620153827854617, + "learning_rate": 2.6948774633048016e-07, + "loss": 0.3313, + "step": 19846 + }, + { + "epoch": 0.8982575243267708, + "grad_norm": 0.748192870105131, + "learning_rate": 2.69250427860947e-07, + "loss": 0.295, + "step": 19847 + }, + { + "epoch": 0.8983027834351663, + "grad_norm": 0.5661829688022068, + "learning_rate": 2.690132110406929e-07, + "loss": 0.2879, + "step": 19848 + }, + { + "epoch": 0.8983480425435619, + "grad_norm": 0.602374880920724, + "learning_rate": 2.687760958748137e-07, + "loss": 0.3005, + "step": 19849 + }, + { + "epoch": 0.8983933016519574, + "grad_norm": 0.7271757380328313, + "learning_rate": 2.6853908236840586e-07, + "loss": 0.3028, + "step": 19850 + }, + { + "epoch": 0.898438560760353, + "grad_norm": 0.6395083076273245, + "learning_rate": 2.68302170526562e-07, + "loss": 0.2946, + "step": 19851 + }, + { + "epoch": 0.8984838198687486, + "grad_norm": 0.5677435398394265, + "learning_rate": 2.680653603543726e-07, + "loss": 0.268, + "step": 19852 + }, + { + "epoch": 0.8985290789771442, + "grad_norm": 0.6325083354864175, + "learning_rate": 2.678286518569245e-07, + "loss": 0.3044, + "step": 19853 + }, + { + "epoch": 0.8985743380855398, + "grad_norm": 0.5950952403576475, + "learning_rate": 2.675920450393049e-07, + "loss": 0.3017, + "step": 19854 + }, + { + "epoch": 0.8986195971939352, + "grad_norm": 0.6290601694265202, + "learning_rate": 2.673555399065986e-07, + "loss": 0.2637, + "step": 19855 + }, + { + "epoch": 0.8986648563023308, + "grad_norm": 0.6594481525414645, + "learning_rate": 2.6711913646388645e-07, + "loss": 0.2924, + "step": 19856 + }, + { + "epoch": 0.8987101154107264, + "grad_norm": 0.6759843523758717, + "learning_rate": 2.6688283471624775e-07, + "loss": 0.3086, + "step": 19857 + }, + { + "epoch": 0.898755374519122, + "grad_norm": 0.6150657269648987, + "learning_rate": 2.666466346687607e-07, + "loss": 0.2711, + "step": 19858 + }, + { + "epoch": 0.8988006336275175, + "grad_norm": 0.683435761376238, + "learning_rate": 2.6641053632649907e-07, + "loss": 0.2714, + "step": 19859 + }, + { + "epoch": 0.8988458927359131, + "grad_norm": 0.6200624707524008, + "learning_rate": 2.661745396945381e-07, + "loss": 0.3114, + "step": 19860 + }, + { + "epoch": 0.8988911518443087, + "grad_norm": 0.5928210284723426, + "learning_rate": 2.6593864477794716e-07, + "loss": 0.2759, + "step": 19861 + }, + { + "epoch": 0.8989364109527043, + "grad_norm": 0.6245841851464607, + "learning_rate": 2.65702851581795e-07, + "loss": 0.2771, + "step": 19862 + }, + { + "epoch": 0.8989816700610997, + "grad_norm": 0.6491764300200128, + "learning_rate": 2.654671601111475e-07, + "loss": 0.3323, + "step": 19863 + }, + { + "epoch": 0.8990269291694953, + "grad_norm": 0.5722118069854693, + "learning_rate": 2.652315703710712e-07, + "loss": 0.3018, + "step": 19864 + }, + { + "epoch": 0.8990721882778909, + "grad_norm": 0.5964576792561528, + "learning_rate": 2.649960823666259e-07, + "loss": 0.2881, + "step": 19865 + }, + { + "epoch": 0.8991174473862865, + "grad_norm": 0.5918655001352278, + "learning_rate": 2.64760696102872e-07, + "loss": 0.316, + "step": 19866 + }, + { + "epoch": 0.8991627064946821, + "grad_norm": 0.5898085749442864, + "learning_rate": 2.6452541158486776e-07, + "loss": 0.3112, + "step": 19867 + }, + { + "epoch": 0.8992079656030776, + "grad_norm": 0.6609907398862443, + "learning_rate": 2.642902288176696e-07, + "loss": 0.254, + "step": 19868 + }, + { + "epoch": 0.8992532247114732, + "grad_norm": 0.6117096439894077, + "learning_rate": 2.640551478063286e-07, + "loss": 0.2878, + "step": 19869 + }, + { + "epoch": 0.8992984838198688, + "grad_norm": 0.6931583049749798, + "learning_rate": 2.638201685558972e-07, + "loss": 0.2783, + "step": 19870 + }, + { + "epoch": 0.8993437429282644, + "grad_norm": 0.2860786343684956, + "learning_rate": 2.6358529107142485e-07, + "loss": 0.4694, + "step": 19871 + }, + { + "epoch": 0.8993890020366598, + "grad_norm": 0.2578641755720357, + "learning_rate": 2.63350515357958e-07, + "loss": 0.4533, + "step": 19872 + }, + { + "epoch": 0.8994342611450554, + "grad_norm": 0.5993616311568444, + "learning_rate": 2.6311584142054036e-07, + "loss": 0.3173, + "step": 19873 + }, + { + "epoch": 0.899479520253451, + "grad_norm": 0.6154401783980245, + "learning_rate": 2.6288126926421576e-07, + "loss": 0.2741, + "step": 19874 + }, + { + "epoch": 0.8995247793618466, + "grad_norm": 0.6898472911804348, + "learning_rate": 2.626467988940229e-07, + "loss": 0.2823, + "step": 19875 + }, + { + "epoch": 0.8995700384702421, + "grad_norm": 0.6204420429013519, + "learning_rate": 2.624124303150011e-07, + "loss": 0.2818, + "step": 19876 + }, + { + "epoch": 0.8996152975786377, + "grad_norm": 0.6136446340161927, + "learning_rate": 2.621781635321863e-07, + "loss": 0.2975, + "step": 19877 + }, + { + "epoch": 0.8996605566870333, + "grad_norm": 0.6612494059694828, + "learning_rate": 2.6194399855061056e-07, + "loss": 0.2961, + "step": 19878 + }, + { + "epoch": 0.8997058157954289, + "grad_norm": 0.6256958878919391, + "learning_rate": 2.6170993537530665e-07, + "loss": 0.2769, + "step": 19879 + }, + { + "epoch": 0.8997510749038244, + "grad_norm": 0.6156127701428498, + "learning_rate": 2.6147597401130433e-07, + "loss": 0.2967, + "step": 19880 + }, + { + "epoch": 0.8997963340122199, + "grad_norm": 0.5999686058867921, + "learning_rate": 2.612421144636301e-07, + "loss": 0.2683, + "step": 19881 + }, + { + "epoch": 0.8998415931206155, + "grad_norm": 0.6345276232974958, + "learning_rate": 2.610083567373078e-07, + "loss": 0.2729, + "step": 19882 + }, + { + "epoch": 0.8998868522290111, + "grad_norm": 0.6370424196514467, + "learning_rate": 2.6077470083736176e-07, + "loss": 0.2669, + "step": 19883 + }, + { + "epoch": 0.8999321113374067, + "grad_norm": 0.25297348773573924, + "learning_rate": 2.6054114676881237e-07, + "loss": 0.4509, + "step": 19884 + }, + { + "epoch": 0.8999773704458022, + "grad_norm": 0.6323420343265559, + "learning_rate": 2.6030769453667783e-07, + "loss": 0.2861, + "step": 19885 + }, + { + "epoch": 0.9000226295541978, + "grad_norm": 0.6696787676826822, + "learning_rate": 2.60074344145973e-07, + "loss": 0.3002, + "step": 19886 + }, + { + "epoch": 0.9000678886625934, + "grad_norm": 0.6649350361638969, + "learning_rate": 2.5984109560171387e-07, + "loss": 0.3064, + "step": 19887 + }, + { + "epoch": 0.9001131477709889, + "grad_norm": 0.2588500924076885, + "learning_rate": 2.5960794890891093e-07, + "loss": 0.4569, + "step": 19888 + }, + { + "epoch": 0.9001584068793844, + "grad_norm": 0.6195830757026074, + "learning_rate": 2.593749040725746e-07, + "loss": 0.3292, + "step": 19889 + }, + { + "epoch": 0.90020366598778, + "grad_norm": 0.5795950758623272, + "learning_rate": 2.5914196109771197e-07, + "loss": 0.2796, + "step": 19890 + }, + { + "epoch": 0.9002489250961756, + "grad_norm": 0.2654230841344855, + "learning_rate": 2.5890911998932735e-07, + "loss": 0.4723, + "step": 19891 + }, + { + "epoch": 0.9002941842045712, + "grad_norm": 0.5663503785166497, + "learning_rate": 2.5867638075242454e-07, + "loss": 0.2968, + "step": 19892 + }, + { + "epoch": 0.9003394433129668, + "grad_norm": 0.6279957619169866, + "learning_rate": 2.5844374339200505e-07, + "loss": 0.3256, + "step": 19893 + }, + { + "epoch": 0.9003847024213623, + "grad_norm": 0.5559620151418101, + "learning_rate": 2.5821120791306665e-07, + "loss": 0.2999, + "step": 19894 + }, + { + "epoch": 0.9004299615297578, + "grad_norm": 0.7242133638005693, + "learning_rate": 2.579787743206058e-07, + "loss": 0.2995, + "step": 19895 + }, + { + "epoch": 0.9004752206381534, + "grad_norm": 0.6079929095062804, + "learning_rate": 2.5774644261961746e-07, + "loss": 0.2885, + "step": 19896 + }, + { + "epoch": 0.900520479746549, + "grad_norm": 0.25827572484656686, + "learning_rate": 2.5751421281509426e-07, + "loss": 0.4523, + "step": 19897 + }, + { + "epoch": 0.9005657388549445, + "grad_norm": 0.5761463549202924, + "learning_rate": 2.572820849120239e-07, + "loss": 0.2691, + "step": 19898 + }, + { + "epoch": 0.9006109979633401, + "grad_norm": 0.6240479186911713, + "learning_rate": 2.5705005891539516e-07, + "loss": 0.2806, + "step": 19899 + }, + { + "epoch": 0.9006562570717357, + "grad_norm": 0.6456703003520794, + "learning_rate": 2.5681813483019515e-07, + "loss": 0.2929, + "step": 19900 + }, + { + "epoch": 0.9007015161801313, + "grad_norm": 0.624521209277269, + "learning_rate": 2.565863126614049e-07, + "loss": 0.2685, + "step": 19901 + }, + { + "epoch": 0.9007467752885269, + "grad_norm": 0.6036744838044934, + "learning_rate": 2.563545924140065e-07, + "loss": 0.3145, + "step": 19902 + }, + { + "epoch": 0.9007920343969223, + "grad_norm": 0.6375461561357829, + "learning_rate": 2.5612297409297937e-07, + "loss": 0.2643, + "step": 19903 + }, + { + "epoch": 0.9008372935053179, + "grad_norm": 0.6038009033673213, + "learning_rate": 2.558914577032995e-07, + "loss": 0.3051, + "step": 19904 + }, + { + "epoch": 0.9008825526137135, + "grad_norm": 0.5593653532095186, + "learning_rate": 2.5566004324994174e-07, + "loss": 0.2905, + "step": 19905 + }, + { + "epoch": 0.9009278117221091, + "grad_norm": 0.5949014043160534, + "learning_rate": 2.554287307378794e-07, + "loss": 0.2783, + "step": 19906 + }, + { + "epoch": 0.9009730708305046, + "grad_norm": 0.5745613856583127, + "learning_rate": 2.551975201720802e-07, + "loss": 0.2942, + "step": 19907 + }, + { + "epoch": 0.9010183299389002, + "grad_norm": 0.6167366159363535, + "learning_rate": 2.5496641155751456e-07, + "loss": 0.2976, + "step": 19908 + }, + { + "epoch": 0.9010635890472958, + "grad_norm": 0.6439202150247513, + "learning_rate": 2.5473540489914794e-07, + "loss": 0.295, + "step": 19909 + }, + { + "epoch": 0.9011088481556914, + "grad_norm": 0.6657246782177099, + "learning_rate": 2.5450450020194306e-07, + "loss": 0.3307, + "step": 19910 + }, + { + "epoch": 0.9011541072640868, + "grad_norm": 0.6133423808228226, + "learning_rate": 2.542736974708615e-07, + "loss": 0.2642, + "step": 19911 + }, + { + "epoch": 0.9011993663724824, + "grad_norm": 0.686243999667497, + "learning_rate": 2.5404299671086264e-07, + "loss": 0.3237, + "step": 19912 + }, + { + "epoch": 0.901244625480878, + "grad_norm": 0.6013825159558385, + "learning_rate": 2.538123979269047e-07, + "loss": 0.2688, + "step": 19913 + }, + { + "epoch": 0.9012898845892736, + "grad_norm": 0.5847798766604101, + "learning_rate": 2.5358190112394097e-07, + "loss": 0.2772, + "step": 19914 + }, + { + "epoch": 0.9013351436976692, + "grad_norm": 0.6089930596789058, + "learning_rate": 2.5335150630692476e-07, + "loss": 0.2988, + "step": 19915 + }, + { + "epoch": 0.9013804028060647, + "grad_norm": 0.5788620985116087, + "learning_rate": 2.5312121348080643e-07, + "loss": 0.2939, + "step": 19916 + }, + { + "epoch": 0.9014256619144603, + "grad_norm": 0.6364074845395432, + "learning_rate": 2.528910226505338e-07, + "loss": 0.2769, + "step": 19917 + }, + { + "epoch": 0.9014709210228559, + "grad_norm": 0.2567038459793554, + "learning_rate": 2.5266093382105395e-07, + "loss": 0.4693, + "step": 19918 + }, + { + "epoch": 0.9015161801312515, + "grad_norm": 0.7494711890048615, + "learning_rate": 2.5243094699731076e-07, + "loss": 0.2732, + "step": 19919 + }, + { + "epoch": 0.9015614392396469, + "grad_norm": 0.5794564708348203, + "learning_rate": 2.522010621842447e-07, + "loss": 0.2735, + "step": 19920 + }, + { + "epoch": 0.9016066983480425, + "grad_norm": 0.6298020059586386, + "learning_rate": 2.5197127938679567e-07, + "loss": 0.3306, + "step": 19921 + }, + { + "epoch": 0.9016519574564381, + "grad_norm": 0.6395789983688206, + "learning_rate": 2.5174159860990256e-07, + "loss": 0.3239, + "step": 19922 + }, + { + "epoch": 0.9016972165648337, + "grad_norm": 0.6277059924919379, + "learning_rate": 2.5151201985849915e-07, + "loss": 0.2741, + "step": 19923 + }, + { + "epoch": 0.9017424756732292, + "grad_norm": 0.7003457691753658, + "learning_rate": 2.512825431375177e-07, + "loss": 0.2628, + "step": 19924 + }, + { + "epoch": 0.9017877347816248, + "grad_norm": 0.5869953938455421, + "learning_rate": 2.510531684518902e-07, + "loss": 0.2927, + "step": 19925 + }, + { + "epoch": 0.9018329938900204, + "grad_norm": 0.6669116927869857, + "learning_rate": 2.508238958065451e-07, + "loss": 0.2906, + "step": 19926 + }, + { + "epoch": 0.901878252998416, + "grad_norm": 0.6093736144281799, + "learning_rate": 2.505947252064089e-07, + "loss": 0.3189, + "step": 19927 + }, + { + "epoch": 0.9019235121068115, + "grad_norm": 0.25538566156125225, + "learning_rate": 2.5036565665640443e-07, + "loss": 0.465, + "step": 19928 + }, + { + "epoch": 0.901968771215207, + "grad_norm": 0.2582399895913366, + "learning_rate": 2.501366901614555e-07, + "loss": 0.4727, + "step": 19929 + }, + { + "epoch": 0.9020140303236026, + "grad_norm": 0.28190071012238044, + "learning_rate": 2.4990782572647977e-07, + "loss": 0.4667, + "step": 19930 + }, + { + "epoch": 0.9020592894319982, + "grad_norm": 1.11822190822124, + "learning_rate": 2.4967906335639725e-07, + "loss": 0.3162, + "step": 19931 + }, + { + "epoch": 0.9021045485403938, + "grad_norm": 0.6665944056354663, + "learning_rate": 2.494504030561223e-07, + "loss": 0.3137, + "step": 19932 + }, + { + "epoch": 0.9021498076487893, + "grad_norm": 0.5415064243105101, + "learning_rate": 2.4922184483056665e-07, + "loss": 0.2803, + "step": 19933 + }, + { + "epoch": 0.9021950667571849, + "grad_norm": 0.7574661488081733, + "learning_rate": 2.4899338868464404e-07, + "loss": 0.3501, + "step": 19934 + }, + { + "epoch": 0.9022403258655805, + "grad_norm": 0.6385815253014705, + "learning_rate": 2.487650346232606e-07, + "loss": 0.3302, + "step": 19935 + }, + { + "epoch": 0.902285584973976, + "grad_norm": 0.5893896562648631, + "learning_rate": 2.485367826513258e-07, + "loss": 0.2581, + "step": 19936 + }, + { + "epoch": 0.9023308440823716, + "grad_norm": 0.5638720757545203, + "learning_rate": 2.483086327737411e-07, + "loss": 0.3226, + "step": 19937 + }, + { + "epoch": 0.9023761031907671, + "grad_norm": 0.5858279262262214, + "learning_rate": 2.48080584995411e-07, + "loss": 0.2622, + "step": 19938 + }, + { + "epoch": 0.9024213622991627, + "grad_norm": 0.6459975315468293, + "learning_rate": 2.4785263932123495e-07, + "loss": 0.3356, + "step": 19939 + }, + { + "epoch": 0.9024666214075583, + "grad_norm": 0.5968005891204486, + "learning_rate": 2.4762479575610954e-07, + "loss": 0.288, + "step": 19940 + }, + { + "epoch": 0.9025118805159539, + "grad_norm": 0.6842359527286419, + "learning_rate": 2.47397054304932e-07, + "loss": 0.2635, + "step": 19941 + }, + { + "epoch": 0.9025571396243494, + "grad_norm": 0.6616650535720541, + "learning_rate": 2.4716941497259563e-07, + "loss": 0.293, + "step": 19942 + }, + { + "epoch": 0.902602398732745, + "grad_norm": 0.6865500924952155, + "learning_rate": 2.4694187776399094e-07, + "loss": 0.2953, + "step": 19943 + }, + { + "epoch": 0.9026476578411405, + "grad_norm": 0.24231874254102226, + "learning_rate": 2.4671444268400736e-07, + "loss": 0.465, + "step": 19944 + }, + { + "epoch": 0.9026929169495361, + "grad_norm": 0.5940515750046157, + "learning_rate": 2.464871097375321e-07, + "loss": 0.2587, + "step": 19945 + }, + { + "epoch": 0.9027381760579316, + "grad_norm": 0.6427760010776417, + "learning_rate": 2.46259878929449e-07, + "loss": 0.3054, + "step": 19946 + }, + { + "epoch": 0.9027834351663272, + "grad_norm": 0.6293624346771375, + "learning_rate": 2.460327502646415e-07, + "loss": 0.2891, + "step": 19947 + }, + { + "epoch": 0.9028286942747228, + "grad_norm": 0.6467156463352277, + "learning_rate": 2.4580572374798997e-07, + "loss": 0.2955, + "step": 19948 + }, + { + "epoch": 0.9028739533831184, + "grad_norm": 0.2585270588356642, + "learning_rate": 2.455787993843711e-07, + "loss": 0.4673, + "step": 19949 + }, + { + "epoch": 0.902919212491514, + "grad_norm": 0.5902196062976237, + "learning_rate": 2.453519771786617e-07, + "loss": 0.2961, + "step": 19950 + }, + { + "epoch": 0.9029644715999094, + "grad_norm": 0.2511985102721687, + "learning_rate": 2.451252571357365e-07, + "loss": 0.4569, + "step": 19951 + }, + { + "epoch": 0.903009730708305, + "grad_norm": 0.6232119279137442, + "learning_rate": 2.4489863926046577e-07, + "loss": 0.2901, + "step": 19952 + }, + { + "epoch": 0.9030549898167006, + "grad_norm": 1.440194667225179, + "learning_rate": 2.446721235577182e-07, + "loss": 0.2961, + "step": 19953 + }, + { + "epoch": 0.9031002489250962, + "grad_norm": 0.615639529056928, + "learning_rate": 2.4444571003236216e-07, + "loss": 0.3075, + "step": 19954 + }, + { + "epoch": 0.9031455080334917, + "grad_norm": 0.593651072497511, + "learning_rate": 2.4421939868926325e-07, + "loss": 0.2727, + "step": 19955 + }, + { + "epoch": 0.9031907671418873, + "grad_norm": 0.6528840635575128, + "learning_rate": 2.4399318953328255e-07, + "loss": 0.2899, + "step": 19956 + }, + { + "epoch": 0.9032360262502829, + "grad_norm": 0.6198831311701227, + "learning_rate": 2.437670825692812e-07, + "loss": 0.2697, + "step": 19957 + }, + { + "epoch": 0.9032812853586785, + "grad_norm": 0.6317778826705256, + "learning_rate": 2.4354107780211745e-07, + "loss": 0.3065, + "step": 19958 + }, + { + "epoch": 0.9033265444670739, + "grad_norm": 0.2854533424264698, + "learning_rate": 2.433151752366475e-07, + "loss": 0.4926, + "step": 19959 + }, + { + "epoch": 0.9033718035754695, + "grad_norm": 0.735995959989909, + "learning_rate": 2.4308937487772576e-07, + "loss": 0.2814, + "step": 19960 + }, + { + "epoch": 0.9034170626838651, + "grad_norm": 0.6361012382328806, + "learning_rate": 2.4286367673020396e-07, + "loss": 0.2507, + "step": 19961 + }, + { + "epoch": 0.9034623217922607, + "grad_norm": 0.6104055856919773, + "learning_rate": 2.4263808079893035e-07, + "loss": 0.2508, + "step": 19962 + }, + { + "epoch": 0.9035075809006563, + "grad_norm": 0.6575551325731162, + "learning_rate": 2.4241258708875336e-07, + "loss": 0.2905, + "step": 19963 + }, + { + "epoch": 0.9035528400090518, + "grad_norm": 0.5908448190267949, + "learning_rate": 2.4218719560451907e-07, + "loss": 0.3214, + "step": 19964 + }, + { + "epoch": 0.9035980991174474, + "grad_norm": 0.5794135894633232, + "learning_rate": 2.4196190635106917e-07, + "loss": 0.2715, + "step": 19965 + }, + { + "epoch": 0.903643358225843, + "grad_norm": 0.703211043777139, + "learning_rate": 2.4173671933324373e-07, + "loss": 0.2657, + "step": 19966 + }, + { + "epoch": 0.9036886173342386, + "grad_norm": 0.6009922340041405, + "learning_rate": 2.415116345558832e-07, + "loss": 0.2983, + "step": 19967 + }, + { + "epoch": 0.903733876442634, + "grad_norm": 0.2530597694122791, + "learning_rate": 2.4128665202382327e-07, + "loss": 0.464, + "step": 19968 + }, + { + "epoch": 0.9037791355510296, + "grad_norm": 0.6404494730165863, + "learning_rate": 2.4106177174189724e-07, + "loss": 0.3327, + "step": 19969 + }, + { + "epoch": 0.9038243946594252, + "grad_norm": 0.6221944918677672, + "learning_rate": 2.408369937149374e-07, + "loss": 0.2858, + "step": 19970 + }, + { + "epoch": 0.9038696537678208, + "grad_norm": 0.7356818975973848, + "learning_rate": 2.4061231794777483e-07, + "loss": 0.3332, + "step": 19971 + }, + { + "epoch": 0.9039149128762164, + "grad_norm": 0.2557326175187924, + "learning_rate": 2.4038774444523627e-07, + "loss": 0.4571, + "step": 19972 + }, + { + "epoch": 0.9039601719846119, + "grad_norm": 0.6719787234019912, + "learning_rate": 2.4016327321214614e-07, + "loss": 0.3138, + "step": 19973 + }, + { + "epoch": 0.9040054310930075, + "grad_norm": 0.5657826799708439, + "learning_rate": 2.3993890425332957e-07, + "loss": 0.3172, + "step": 19974 + }, + { + "epoch": 0.904050690201403, + "grad_norm": 0.6515336390605506, + "learning_rate": 2.3971463757360537e-07, + "loss": 0.2936, + "step": 19975 + }, + { + "epoch": 0.9040959493097986, + "grad_norm": 0.630534874722075, + "learning_rate": 2.394904731777947e-07, + "loss": 0.3024, + "step": 19976 + }, + { + "epoch": 0.9041412084181941, + "grad_norm": 0.5926030588348037, + "learning_rate": 2.392664110707116e-07, + "loss": 0.2706, + "step": 19977 + }, + { + "epoch": 0.9041864675265897, + "grad_norm": 0.6027188615891163, + "learning_rate": 2.390424512571732e-07, + "loss": 0.3356, + "step": 19978 + }, + { + "epoch": 0.9042317266349853, + "grad_norm": 0.8008295264317392, + "learning_rate": 2.388185937419896e-07, + "loss": 0.2613, + "step": 19979 + }, + { + "epoch": 0.9042769857433809, + "grad_norm": 0.5926005579561792, + "learning_rate": 2.385948385299719e-07, + "loss": 0.2557, + "step": 19980 + }, + { + "epoch": 0.9043222448517764, + "grad_norm": 0.6086515646248625, + "learning_rate": 2.3837118562592799e-07, + "loss": 0.2976, + "step": 19981 + }, + { + "epoch": 0.904367503960172, + "grad_norm": 0.7498121647059611, + "learning_rate": 2.3814763503466175e-07, + "loss": 0.2795, + "step": 19982 + }, + { + "epoch": 0.9044127630685675, + "grad_norm": 0.6778933569637745, + "learning_rate": 2.3792418676097884e-07, + "loss": 0.3199, + "step": 19983 + }, + { + "epoch": 0.9044580221769631, + "grad_norm": 0.6100026715479245, + "learning_rate": 2.3770084080967926e-07, + "loss": 0.274, + "step": 19984 + }, + { + "epoch": 0.9045032812853587, + "grad_norm": 0.26096261402513665, + "learning_rate": 2.3747759718556308e-07, + "loss": 0.4535, + "step": 19985 + }, + { + "epoch": 0.9045485403937542, + "grad_norm": 0.6154908976706324, + "learning_rate": 2.3725445589342534e-07, + "loss": 0.3258, + "step": 19986 + }, + { + "epoch": 0.9045937995021498, + "grad_norm": 0.6117443541239407, + "learning_rate": 2.3703141693806276e-07, + "loss": 0.2933, + "step": 19987 + }, + { + "epoch": 0.9046390586105454, + "grad_norm": 0.7156865784325133, + "learning_rate": 2.368084803242654e-07, + "loss": 0.3113, + "step": 19988 + }, + { + "epoch": 0.904684317718941, + "grad_norm": 0.4855626199280217, + "learning_rate": 2.3658564605682555e-07, + "loss": 0.4333, + "step": 19989 + }, + { + "epoch": 0.9047295768273365, + "grad_norm": 0.6961645975967724, + "learning_rate": 2.3636291414053104e-07, + "loss": 0.2703, + "step": 19990 + }, + { + "epoch": 0.904774835935732, + "grad_norm": 0.2763988693404931, + "learning_rate": 2.3614028458016581e-07, + "loss": 0.4774, + "step": 19991 + }, + { + "epoch": 0.9048200950441276, + "grad_norm": 0.6112826169360427, + "learning_rate": 2.3591775738051491e-07, + "loss": 0.2983, + "step": 19992 + }, + { + "epoch": 0.9048653541525232, + "grad_norm": 0.6104745671130538, + "learning_rate": 2.356953325463607e-07, + "loss": 0.2626, + "step": 19993 + }, + { + "epoch": 0.9049106132609187, + "grad_norm": 0.7505273977344898, + "learning_rate": 2.354730100824809e-07, + "loss": 0.2903, + "step": 19994 + }, + { + "epoch": 0.9049558723693143, + "grad_norm": 0.26741496817940696, + "learning_rate": 2.3525078999365236e-07, + "loss": 0.4528, + "step": 19995 + }, + { + "epoch": 0.9050011314777099, + "grad_norm": 0.28491452323675803, + "learning_rate": 2.3502867228465064e-07, + "loss": 0.4586, + "step": 19996 + }, + { + "epoch": 0.9050463905861055, + "grad_norm": 0.6851341238362276, + "learning_rate": 2.3480665696024974e-07, + "loss": 0.3186, + "step": 19997 + }, + { + "epoch": 0.9050916496945011, + "grad_norm": 0.5780368717859541, + "learning_rate": 2.3458474402521747e-07, + "loss": 0.3063, + "step": 19998 + }, + { + "epoch": 0.9051369088028965, + "grad_norm": 0.5757468936890212, + "learning_rate": 2.343629334843228e-07, + "loss": 0.2774, + "step": 19999 + }, + { + "epoch": 0.9051821679112921, + "grad_norm": 0.6206281078686119, + "learning_rate": 2.3414122534233252e-07, + "loss": 0.3095, + "step": 20000 + }, + { + "epoch": 0.9052274270196877, + "grad_norm": 0.6001697472110555, + "learning_rate": 2.3391961960401055e-07, + "loss": 0.2821, + "step": 20001 + }, + { + "epoch": 0.9052726861280833, + "grad_norm": 0.6455405388295047, + "learning_rate": 2.3369811627411755e-07, + "loss": 0.2946, + "step": 20002 + }, + { + "epoch": 0.9053179452364788, + "grad_norm": 0.5857060542849744, + "learning_rate": 2.3347671535741356e-07, + "loss": 0.2875, + "step": 20003 + }, + { + "epoch": 0.9053632043448744, + "grad_norm": 0.2701905458880112, + "learning_rate": 2.3325541685865538e-07, + "loss": 0.4448, + "step": 20004 + }, + { + "epoch": 0.90540846345327, + "grad_norm": 0.7333708834362386, + "learning_rate": 2.3303422078259918e-07, + "loss": 0.2884, + "step": 20005 + }, + { + "epoch": 0.9054537225616656, + "grad_norm": 0.6560985783085491, + "learning_rate": 2.3281312713399618e-07, + "loss": 0.2768, + "step": 20006 + }, + { + "epoch": 0.9054989816700612, + "grad_norm": 0.24549467484021845, + "learning_rate": 2.325921359175981e-07, + "loss": 0.468, + "step": 20007 + }, + { + "epoch": 0.9055442407784566, + "grad_norm": 0.35618191466750615, + "learning_rate": 2.3237124713815285e-07, + "loss": 0.4485, + "step": 20008 + }, + { + "epoch": 0.9055894998868522, + "grad_norm": 1.165867138539832, + "learning_rate": 2.3215046080040714e-07, + "loss": 0.28, + "step": 20009 + }, + { + "epoch": 0.9056347589952478, + "grad_norm": 0.5878889928573706, + "learning_rate": 2.31929776909105e-07, + "loss": 0.265, + "step": 20010 + }, + { + "epoch": 0.9056800181036434, + "grad_norm": 0.6493016711033609, + "learning_rate": 2.3170919546898707e-07, + "loss": 0.2676, + "step": 20011 + }, + { + "epoch": 0.9057252772120389, + "grad_norm": 0.6119097365861756, + "learning_rate": 2.3148871648479398e-07, + "loss": 0.3042, + "step": 20012 + }, + { + "epoch": 0.9057705363204345, + "grad_norm": 0.25708074637369255, + "learning_rate": 2.3126833996126364e-07, + "loss": 0.4837, + "step": 20013 + }, + { + "epoch": 0.9058157954288301, + "grad_norm": 0.5724588237659539, + "learning_rate": 2.3104806590313055e-07, + "loss": 0.2769, + "step": 20014 + }, + { + "epoch": 0.9058610545372257, + "grad_norm": 0.2631363793982775, + "learning_rate": 2.308278943151271e-07, + "loss": 0.4353, + "step": 20015 + }, + { + "epoch": 0.9059063136456211, + "grad_norm": 0.6307812688200601, + "learning_rate": 2.3060782520198554e-07, + "loss": 0.2949, + "step": 20016 + }, + { + "epoch": 0.9059515727540167, + "grad_norm": 0.8016890777574586, + "learning_rate": 2.3038785856843328e-07, + "loss": 0.3419, + "step": 20017 + }, + { + "epoch": 0.9059968318624123, + "grad_norm": 0.2989237934568109, + "learning_rate": 2.3016799441919756e-07, + "loss": 0.4753, + "step": 20018 + }, + { + "epoch": 0.9060420909708079, + "grad_norm": 0.6004554050861189, + "learning_rate": 2.2994823275900246e-07, + "loss": 0.2807, + "step": 20019 + }, + { + "epoch": 0.9060873500792035, + "grad_norm": 0.6495330424530107, + "learning_rate": 2.2972857359256862e-07, + "loss": 0.3212, + "step": 20020 + }, + { + "epoch": 0.906132609187599, + "grad_norm": 0.5534044314636215, + "learning_rate": 2.2950901692461725e-07, + "loss": 0.2954, + "step": 20021 + }, + { + "epoch": 0.9061778682959946, + "grad_norm": 0.5939677994361185, + "learning_rate": 2.292895627598668e-07, + "loss": 0.3441, + "step": 20022 + }, + { + "epoch": 0.9062231274043901, + "grad_norm": 0.28416916028422884, + "learning_rate": 2.2907021110303073e-07, + "loss": 0.4868, + "step": 20023 + }, + { + "epoch": 0.9062683865127857, + "grad_norm": 0.6777718658467312, + "learning_rate": 2.2885096195882306e-07, + "loss": 0.2729, + "step": 20024 + }, + { + "epoch": 0.9063136456211812, + "grad_norm": 0.6626368509700329, + "learning_rate": 2.2863181533195443e-07, + "loss": 0.3077, + "step": 20025 + }, + { + "epoch": 0.9063589047295768, + "grad_norm": 0.5582505524440544, + "learning_rate": 2.2841277122713502e-07, + "loss": 0.2678, + "step": 20026 + }, + { + "epoch": 0.9064041638379724, + "grad_norm": 0.6291111402466241, + "learning_rate": 2.2819382964906933e-07, + "loss": 0.2972, + "step": 20027 + }, + { + "epoch": 0.906449422946368, + "grad_norm": 0.6332071836580533, + "learning_rate": 2.2797499060246253e-07, + "loss": 0.339, + "step": 20028 + }, + { + "epoch": 0.9064946820547635, + "grad_norm": 0.592577141665457, + "learning_rate": 2.2775625409201807e-07, + "loss": 0.2715, + "step": 20029 + }, + { + "epoch": 0.9065399411631591, + "grad_norm": 0.5395007641614055, + "learning_rate": 2.275376201224344e-07, + "loss": 0.29, + "step": 20030 + }, + { + "epoch": 0.9065852002715546, + "grad_norm": 0.5726874305021403, + "learning_rate": 2.2731908869840945e-07, + "loss": 0.2871, + "step": 20031 + }, + { + "epoch": 0.9066304593799502, + "grad_norm": 0.5915757607695858, + "learning_rate": 2.2710065982464001e-07, + "loss": 0.2853, + "step": 20032 + }, + { + "epoch": 0.9066757184883458, + "grad_norm": 0.6336437929439116, + "learning_rate": 2.2688233350581734e-07, + "loss": 0.3084, + "step": 20033 + }, + { + "epoch": 0.9067209775967413, + "grad_norm": 0.5800979237410989, + "learning_rate": 2.266641097466349e-07, + "loss": 0.2711, + "step": 20034 + }, + { + "epoch": 0.9067662367051369, + "grad_norm": 0.6896619283347677, + "learning_rate": 2.2644598855177947e-07, + "loss": 0.2564, + "step": 20035 + }, + { + "epoch": 0.9068114958135325, + "grad_norm": 0.6594548147719869, + "learning_rate": 2.262279699259401e-07, + "loss": 0.3011, + "step": 20036 + }, + { + "epoch": 0.9068567549219281, + "grad_norm": 0.6633480941917634, + "learning_rate": 2.2601005387379914e-07, + "loss": 0.3024, + "step": 20037 + }, + { + "epoch": 0.9069020140303236, + "grad_norm": 0.6002884781872193, + "learning_rate": 2.2579224040004068e-07, + "loss": 0.2946, + "step": 20038 + }, + { + "epoch": 0.9069472731387191, + "grad_norm": 0.26351204782415494, + "learning_rate": 2.2557452950934367e-07, + "loss": 0.4717, + "step": 20039 + }, + { + "epoch": 0.9069925322471147, + "grad_norm": 0.6369228382695861, + "learning_rate": 2.2535692120638665e-07, + "loss": 0.3007, + "step": 20040 + }, + { + "epoch": 0.9070377913555103, + "grad_norm": 0.27484098339073637, + "learning_rate": 2.2513941549584473e-07, + "loss": 0.4742, + "step": 20041 + }, + { + "epoch": 0.9070830504639059, + "grad_norm": 0.5711325651584115, + "learning_rate": 2.2492201238239252e-07, + "loss": 0.2869, + "step": 20042 + }, + { + "epoch": 0.9071283095723014, + "grad_norm": 0.5805424742255618, + "learning_rate": 2.2470471187070075e-07, + "loss": 0.2799, + "step": 20043 + }, + { + "epoch": 0.907173568680697, + "grad_norm": 0.5694199887498899, + "learning_rate": 2.2448751396543788e-07, + "loss": 0.2586, + "step": 20044 + }, + { + "epoch": 0.9072188277890926, + "grad_norm": 0.5960203023496685, + "learning_rate": 2.242704186712724e-07, + "loss": 0.267, + "step": 20045 + }, + { + "epoch": 0.9072640868974882, + "grad_norm": 0.6596114041630285, + "learning_rate": 2.2405342599286672e-07, + "loss": 0.2806, + "step": 20046 + }, + { + "epoch": 0.9073093460058836, + "grad_norm": 0.27839170887263026, + "learning_rate": 2.2383653593488596e-07, + "loss": 0.479, + "step": 20047 + }, + { + "epoch": 0.9073546051142792, + "grad_norm": 0.6263478186913513, + "learning_rate": 2.2361974850198865e-07, + "loss": 0.2961, + "step": 20048 + }, + { + "epoch": 0.9073998642226748, + "grad_norm": 0.6984960638546034, + "learning_rate": 2.234030636988338e-07, + "loss": 0.2843, + "step": 20049 + }, + { + "epoch": 0.9074451233310704, + "grad_norm": 0.6520341024668019, + "learning_rate": 2.2318648153007605e-07, + "loss": 0.3258, + "step": 20050 + }, + { + "epoch": 0.9074903824394659, + "grad_norm": 0.6284751430203892, + "learning_rate": 2.229700020003711e-07, + "loss": 0.2776, + "step": 20051 + }, + { + "epoch": 0.9075356415478615, + "grad_norm": 0.5311253177293819, + "learning_rate": 2.2275362511436914e-07, + "loss": 0.2703, + "step": 20052 + }, + { + "epoch": 0.9075809006562571, + "grad_norm": 0.616319374180847, + "learning_rate": 2.2253735087671867e-07, + "loss": 0.2971, + "step": 20053 + }, + { + "epoch": 0.9076261597646527, + "grad_norm": 0.26938372789301107, + "learning_rate": 2.2232117929206764e-07, + "loss": 0.4823, + "step": 20054 + }, + { + "epoch": 0.9076714188730483, + "grad_norm": 0.6187431043930877, + "learning_rate": 2.2210511036506232e-07, + "loss": 0.3024, + "step": 20055 + }, + { + "epoch": 0.9077166779814437, + "grad_norm": 0.6003295596575504, + "learning_rate": 2.218891441003429e-07, + "loss": 0.3081, + "step": 20056 + }, + { + "epoch": 0.9077619370898393, + "grad_norm": 0.5985733209787534, + "learning_rate": 2.2167328050255122e-07, + "loss": 0.3059, + "step": 20057 + }, + { + "epoch": 0.9078071961982349, + "grad_norm": 0.6669948310068601, + "learning_rate": 2.2145751957632521e-07, + "loss": 0.3032, + "step": 20058 + }, + { + "epoch": 0.9078524553066305, + "grad_norm": 0.25838696674097744, + "learning_rate": 2.2124186132630122e-07, + "loss": 0.4851, + "step": 20059 + }, + { + "epoch": 0.907897714415026, + "grad_norm": 0.5950360146604151, + "learning_rate": 2.2102630575711215e-07, + "loss": 0.2995, + "step": 20060 + }, + { + "epoch": 0.9079429735234216, + "grad_norm": 0.6025156236670351, + "learning_rate": 2.20810852873391e-07, + "loss": 0.2496, + "step": 20061 + }, + { + "epoch": 0.9079882326318172, + "grad_norm": 0.26372452664482027, + "learning_rate": 2.2059550267976572e-07, + "loss": 0.4513, + "step": 20062 + }, + { + "epoch": 0.9080334917402128, + "grad_norm": 0.5920258230976766, + "learning_rate": 2.2038025518086482e-07, + "loss": 0.2796, + "step": 20063 + }, + { + "epoch": 0.9080787508486082, + "grad_norm": 0.615303934129582, + "learning_rate": 2.2016511038131238e-07, + "loss": 0.3333, + "step": 20064 + }, + { + "epoch": 0.9081240099570038, + "grad_norm": 0.6655320396579752, + "learning_rate": 2.1995006828573194e-07, + "loss": 0.2623, + "step": 20065 + }, + { + "epoch": 0.9081692690653994, + "grad_norm": 0.2551258815085798, + "learning_rate": 2.1973512889874316e-07, + "loss": 0.4526, + "step": 20066 + }, + { + "epoch": 0.908214528173795, + "grad_norm": 0.786400107775112, + "learning_rate": 2.1952029222496562e-07, + "loss": 0.3186, + "step": 20067 + }, + { + "epoch": 0.9082597872821906, + "grad_norm": 0.5946120226496916, + "learning_rate": 2.1930555826901513e-07, + "loss": 0.2552, + "step": 20068 + }, + { + "epoch": 0.9083050463905861, + "grad_norm": 0.6447590483502432, + "learning_rate": 2.1909092703550406e-07, + "loss": 0.3191, + "step": 20069 + }, + { + "epoch": 0.9083503054989817, + "grad_norm": 0.25579389228282384, + "learning_rate": 2.1887639852904653e-07, + "loss": 0.4662, + "step": 20070 + }, + { + "epoch": 0.9083955646073772, + "grad_norm": 0.6112753434468116, + "learning_rate": 2.1866197275425106e-07, + "loss": 0.2557, + "step": 20071 + }, + { + "epoch": 0.9084408237157728, + "grad_norm": 0.674747862300124, + "learning_rate": 2.1844764971572507e-07, + "loss": 0.3018, + "step": 20072 + }, + { + "epoch": 0.9084860828241683, + "grad_norm": 0.6150259270468362, + "learning_rate": 2.1823342941807324e-07, + "loss": 0.2986, + "step": 20073 + }, + { + "epoch": 0.9085313419325639, + "grad_norm": 0.6258412983607646, + "learning_rate": 2.1801931186589963e-07, + "loss": 0.3338, + "step": 20074 + }, + { + "epoch": 0.9085766010409595, + "grad_norm": 0.6052915432140621, + "learning_rate": 2.1780529706380337e-07, + "loss": 0.2614, + "step": 20075 + }, + { + "epoch": 0.9086218601493551, + "grad_norm": 0.6119499527385366, + "learning_rate": 2.1759138501638466e-07, + "loss": 0.2688, + "step": 20076 + }, + { + "epoch": 0.9086671192577507, + "grad_norm": 0.5978537982068777, + "learning_rate": 2.1737757572823813e-07, + "loss": 0.3054, + "step": 20077 + }, + { + "epoch": 0.9087123783661462, + "grad_norm": 0.5522834789899027, + "learning_rate": 2.1716386920396016e-07, + "loss": 0.283, + "step": 20078 + }, + { + "epoch": 0.9087576374745417, + "grad_norm": 0.5917621379636202, + "learning_rate": 2.169502654481398e-07, + "loss": 0.302, + "step": 20079 + }, + { + "epoch": 0.9088028965829373, + "grad_norm": 0.27429627501030185, + "learning_rate": 2.1673676446536952e-07, + "loss": 0.4688, + "step": 20080 + }, + { + "epoch": 0.9088481556913329, + "grad_norm": 0.6334485282257166, + "learning_rate": 2.1652336626023506e-07, + "loss": 0.3555, + "step": 20081 + }, + { + "epoch": 0.9088934147997284, + "grad_norm": 0.6271218443000107, + "learning_rate": 2.1631007083732169e-07, + "loss": 0.2904, + "step": 20082 + }, + { + "epoch": 0.908938673908124, + "grad_norm": 0.5528300230753579, + "learning_rate": 2.1609687820121295e-07, + "loss": 0.2757, + "step": 20083 + }, + { + "epoch": 0.9089839330165196, + "grad_norm": 0.5809626317071711, + "learning_rate": 2.158837883564907e-07, + "loss": 0.2674, + "step": 20084 + }, + { + "epoch": 0.9090291921249152, + "grad_norm": 0.5976098597063977, + "learning_rate": 2.1567080130773188e-07, + "loss": 0.2712, + "step": 20085 + }, + { + "epoch": 0.9090744512333107, + "grad_norm": 0.5778406755545751, + "learning_rate": 2.154579170595128e-07, + "loss": 0.2683, + "step": 20086 + }, + { + "epoch": 0.9091197103417062, + "grad_norm": 0.6191740308187985, + "learning_rate": 2.152451356164098e-07, + "loss": 0.2899, + "step": 20087 + }, + { + "epoch": 0.9091649694501018, + "grad_norm": 0.6120327299909363, + "learning_rate": 2.1503245698299312e-07, + "loss": 0.266, + "step": 20088 + }, + { + "epoch": 0.9092102285584974, + "grad_norm": 0.688995530064606, + "learning_rate": 2.1481988116383246e-07, + "loss": 0.2843, + "step": 20089 + }, + { + "epoch": 0.909255487666893, + "grad_norm": 0.5403359844763675, + "learning_rate": 2.146074081634969e-07, + "loss": 0.282, + "step": 20090 + }, + { + "epoch": 0.9093007467752885, + "grad_norm": 1.0998611164448522, + "learning_rate": 2.1439503798655003e-07, + "loss": 0.2768, + "step": 20091 + }, + { + "epoch": 0.9093460058836841, + "grad_norm": 0.5736670905305177, + "learning_rate": 2.1418277063755656e-07, + "loss": 0.2578, + "step": 20092 + }, + { + "epoch": 0.9093912649920797, + "grad_norm": 0.6303521103735157, + "learning_rate": 2.139706061210761e-07, + "loss": 0.272, + "step": 20093 + }, + { + "epoch": 0.9094365241004753, + "grad_norm": 0.5708800033364091, + "learning_rate": 2.13758544441669e-07, + "loss": 0.2708, + "step": 20094 + }, + { + "epoch": 0.9094817832088707, + "grad_norm": 0.6194234916597959, + "learning_rate": 2.1354658560389042e-07, + "loss": 0.2416, + "step": 20095 + }, + { + "epoch": 0.9095270423172663, + "grad_norm": 0.5330122281441105, + "learning_rate": 2.1333472961229563e-07, + "loss": 0.2535, + "step": 20096 + }, + { + "epoch": 0.9095723014256619, + "grad_norm": 0.2637332376720348, + "learning_rate": 2.1312297647143653e-07, + "loss": 0.4591, + "step": 20097 + }, + { + "epoch": 0.9096175605340575, + "grad_norm": 0.625594032869571, + "learning_rate": 2.129113261858623e-07, + "loss": 0.2574, + "step": 20098 + }, + { + "epoch": 0.909662819642453, + "grad_norm": 0.5724869592135517, + "learning_rate": 2.1269977876012094e-07, + "loss": 0.2765, + "step": 20099 + }, + { + "epoch": 0.9097080787508486, + "grad_norm": 0.5638714706740632, + "learning_rate": 2.1248833419875936e-07, + "loss": 0.2833, + "step": 20100 + }, + { + "epoch": 0.9097533378592442, + "grad_norm": 0.6085258497235556, + "learning_rate": 2.122769925063195e-07, + "loss": 0.3008, + "step": 20101 + }, + { + "epoch": 0.9097985969676398, + "grad_norm": 0.5601390088119588, + "learning_rate": 2.1206575368734216e-07, + "loss": 0.2708, + "step": 20102 + }, + { + "epoch": 0.9098438560760354, + "grad_norm": 0.6428953467497713, + "learning_rate": 2.1185461774636705e-07, + "loss": 0.3048, + "step": 20103 + }, + { + "epoch": 0.9098891151844308, + "grad_norm": 0.2727656342620612, + "learning_rate": 2.1164358468793055e-07, + "loss": 0.4753, + "step": 20104 + }, + { + "epoch": 0.9099343742928264, + "grad_norm": 0.6124735588652476, + "learning_rate": 2.1143265451656736e-07, + "loss": 0.3, + "step": 20105 + }, + { + "epoch": 0.909979633401222, + "grad_norm": 0.647641665379744, + "learning_rate": 2.1122182723680883e-07, + "loss": 0.2805, + "step": 20106 + }, + { + "epoch": 0.9100248925096176, + "grad_norm": 0.5980609552698322, + "learning_rate": 2.1101110285318639e-07, + "loss": 0.3268, + "step": 20107 + }, + { + "epoch": 0.9100701516180131, + "grad_norm": 0.6609734334611863, + "learning_rate": 2.108004813702258e-07, + "loss": 0.2965, + "step": 20108 + }, + { + "epoch": 0.9101154107264087, + "grad_norm": 0.5644962278927006, + "learning_rate": 2.1058996279245515e-07, + "loss": 0.2486, + "step": 20109 + }, + { + "epoch": 0.9101606698348043, + "grad_norm": 0.2563080729027022, + "learning_rate": 2.103795471243969e-07, + "loss": 0.4734, + "step": 20110 + }, + { + "epoch": 0.9102059289431998, + "grad_norm": 0.6255772447099949, + "learning_rate": 2.101692343705708e-07, + "loss": 0.2781, + "step": 20111 + }, + { + "epoch": 0.9102511880515954, + "grad_norm": 0.6850588419635695, + "learning_rate": 2.0995902453549766e-07, + "loss": 0.2852, + "step": 20112 + }, + { + "epoch": 0.9102964471599909, + "grad_norm": 0.9849127423646337, + "learning_rate": 2.0974891762369386e-07, + "loss": 0.3114, + "step": 20113 + }, + { + "epoch": 0.9103417062683865, + "grad_norm": 0.6059976563244579, + "learning_rate": 2.095389136396736e-07, + "loss": 0.3264, + "step": 20114 + }, + { + "epoch": 0.9103869653767821, + "grad_norm": 0.5625520832592269, + "learning_rate": 2.093290125879488e-07, + "loss": 0.2636, + "step": 20115 + }, + { + "epoch": 0.9104322244851777, + "grad_norm": 0.6029349016535929, + "learning_rate": 2.0911921447303086e-07, + "loss": 0.3275, + "step": 20116 + }, + { + "epoch": 0.9104774835935732, + "grad_norm": 0.6418283491471828, + "learning_rate": 2.0890951929942671e-07, + "loss": 0.2979, + "step": 20117 + }, + { + "epoch": 0.9105227427019688, + "grad_norm": 0.5858464661181148, + "learning_rate": 2.0869992707164166e-07, + "loss": 0.3058, + "step": 20118 + }, + { + "epoch": 0.9105680018103643, + "grad_norm": 0.5446063183284051, + "learning_rate": 2.0849043779417987e-07, + "loss": 0.253, + "step": 20119 + }, + { + "epoch": 0.9106132609187599, + "grad_norm": 0.6664356187095676, + "learning_rate": 2.0828105147154275e-07, + "loss": 0.2791, + "step": 20120 + }, + { + "epoch": 0.9106585200271554, + "grad_norm": 0.6085205672276123, + "learning_rate": 2.0807176810823005e-07, + "loss": 0.2497, + "step": 20121 + }, + { + "epoch": 0.910703779135551, + "grad_norm": 0.2654758316672969, + "learning_rate": 2.0786258770873647e-07, + "loss": 0.4548, + "step": 20122 + }, + { + "epoch": 0.9107490382439466, + "grad_norm": 0.6455911301462177, + "learning_rate": 2.0765351027755897e-07, + "loss": 0.2668, + "step": 20123 + }, + { + "epoch": 0.9107942973523422, + "grad_norm": 0.6219158113543389, + "learning_rate": 2.0744453581918843e-07, + "loss": 0.3166, + "step": 20124 + }, + { + "epoch": 0.9108395564607378, + "grad_norm": 0.7528225257884705, + "learning_rate": 2.0723566433811572e-07, + "loss": 0.2693, + "step": 20125 + }, + { + "epoch": 0.9108848155691333, + "grad_norm": 0.6306450779415568, + "learning_rate": 2.0702689583882883e-07, + "loss": 0.3284, + "step": 20126 + }, + { + "epoch": 0.9109300746775288, + "grad_norm": 0.5932455349455487, + "learning_rate": 2.0681823032581316e-07, + "loss": 0.3162, + "step": 20127 + }, + { + "epoch": 0.9109753337859244, + "grad_norm": 0.6433329829199189, + "learning_rate": 2.066096678035523e-07, + "loss": 0.3029, + "step": 20128 + }, + { + "epoch": 0.91102059289432, + "grad_norm": 0.6278373181227935, + "learning_rate": 2.0640120827652876e-07, + "loss": 0.3142, + "step": 20129 + }, + { + "epoch": 0.9110658520027155, + "grad_norm": 0.5950706534219158, + "learning_rate": 2.0619285174922067e-07, + "loss": 0.2493, + "step": 20130 + }, + { + "epoch": 0.9111111111111111, + "grad_norm": 0.6003694710464432, + "learning_rate": 2.0598459822610494e-07, + "loss": 0.2582, + "step": 20131 + }, + { + "epoch": 0.9111563702195067, + "grad_norm": 0.6110120573969848, + "learning_rate": 2.057764477116564e-07, + "loss": 0.3004, + "step": 20132 + }, + { + "epoch": 0.9112016293279023, + "grad_norm": 0.6446796955432598, + "learning_rate": 2.0556840021034753e-07, + "loss": 0.2936, + "step": 20133 + }, + { + "epoch": 0.9112468884362978, + "grad_norm": 0.6100270371936517, + "learning_rate": 2.053604557266492e-07, + "loss": 0.3027, + "step": 20134 + }, + { + "epoch": 0.9112921475446933, + "grad_norm": 0.6406473157774112, + "learning_rate": 2.0515261426502897e-07, + "loss": 0.2601, + "step": 20135 + }, + { + "epoch": 0.9113374066530889, + "grad_norm": 0.4227689291899131, + "learning_rate": 2.049448758299527e-07, + "loss": 0.4509, + "step": 20136 + }, + { + "epoch": 0.9113826657614845, + "grad_norm": 0.581610239211927, + "learning_rate": 2.0473724042588405e-07, + "loss": 0.254, + "step": 20137 + }, + { + "epoch": 0.9114279248698801, + "grad_norm": 0.6115934720483611, + "learning_rate": 2.0452970805728502e-07, + "loss": 0.2672, + "step": 20138 + }, + { + "epoch": 0.9114731839782756, + "grad_norm": 0.6571594489557938, + "learning_rate": 2.0432227872861422e-07, + "loss": 0.3153, + "step": 20139 + }, + { + "epoch": 0.9115184430866712, + "grad_norm": 0.5950439047019607, + "learning_rate": 2.041149524443281e-07, + "loss": 0.2624, + "step": 20140 + }, + { + "epoch": 0.9115637021950668, + "grad_norm": 1.4537733982204537, + "learning_rate": 2.0390772920888258e-07, + "loss": 0.3267, + "step": 20141 + }, + { + "epoch": 0.9116089613034624, + "grad_norm": 0.7336297650765733, + "learning_rate": 2.0370060902673074e-07, + "loss": 0.2958, + "step": 20142 + }, + { + "epoch": 0.9116542204118578, + "grad_norm": 0.6293224370546653, + "learning_rate": 2.0349359190232176e-07, + "loss": 0.3183, + "step": 20143 + }, + { + "epoch": 0.9116994795202534, + "grad_norm": 0.6208929072969963, + "learning_rate": 2.0328667784010324e-07, + "loss": 0.2704, + "step": 20144 + }, + { + "epoch": 0.911744738628649, + "grad_norm": 0.6356988807823192, + "learning_rate": 2.030798668445233e-07, + "loss": 0.2778, + "step": 20145 + }, + { + "epoch": 0.9117899977370446, + "grad_norm": 0.632110705142832, + "learning_rate": 2.0287315892002335e-07, + "loss": 0.2838, + "step": 20146 + }, + { + "epoch": 0.9118352568454401, + "grad_norm": 0.6456962160612171, + "learning_rate": 2.0266655407104652e-07, + "loss": 0.3172, + "step": 20147 + }, + { + "epoch": 0.9118805159538357, + "grad_norm": 0.6471312561425153, + "learning_rate": 2.024600523020309e-07, + "loss": 0.2721, + "step": 20148 + }, + { + "epoch": 0.9119257750622313, + "grad_norm": 0.6297922945477495, + "learning_rate": 2.0225365361741522e-07, + "loss": 0.3046, + "step": 20149 + }, + { + "epoch": 0.9119710341706269, + "grad_norm": 0.6130515227525498, + "learning_rate": 2.0204735802163254e-07, + "loss": 0.2654, + "step": 20150 + }, + { + "epoch": 0.9120162932790224, + "grad_norm": 0.5487820024424245, + "learning_rate": 2.0184116551911714e-07, + "loss": 0.282, + "step": 20151 + }, + { + "epoch": 0.9120615523874179, + "grad_norm": 0.599754947660118, + "learning_rate": 2.0163507611429823e-07, + "loss": 0.3083, + "step": 20152 + }, + { + "epoch": 0.9121068114958135, + "grad_norm": 0.6336611363559453, + "learning_rate": 2.0142908981160447e-07, + "loss": 0.3301, + "step": 20153 + }, + { + "epoch": 0.9121520706042091, + "grad_norm": 0.6186858420131777, + "learning_rate": 2.012232066154618e-07, + "loss": 0.3038, + "step": 20154 + }, + { + "epoch": 0.9121973297126047, + "grad_norm": 0.6368384635497683, + "learning_rate": 2.01017426530295e-07, + "loss": 0.2921, + "step": 20155 + }, + { + "epoch": 0.9122425888210002, + "grad_norm": 0.7317290019886159, + "learning_rate": 2.0081174956052329e-07, + "loss": 0.3185, + "step": 20156 + }, + { + "epoch": 0.9122878479293958, + "grad_norm": 0.26234659961321066, + "learning_rate": 2.0060617571056817e-07, + "loss": 0.4373, + "step": 20157 + }, + { + "epoch": 0.9123331070377914, + "grad_norm": 0.7022199549928246, + "learning_rate": 2.004007049848461e-07, + "loss": 0.3736, + "step": 20158 + }, + { + "epoch": 0.912378366146187, + "grad_norm": 0.24999613707843227, + "learning_rate": 2.001953373877724e-07, + "loss": 0.4316, + "step": 20159 + }, + { + "epoch": 0.9124236252545825, + "grad_norm": 1.258637722295874, + "learning_rate": 1.999900729237586e-07, + "loss": 0.2693, + "step": 20160 + }, + { + "epoch": 0.912468884362978, + "grad_norm": 0.5582673961022042, + "learning_rate": 1.9978491159721724e-07, + "loss": 0.287, + "step": 20161 + }, + { + "epoch": 0.9125141434713736, + "grad_norm": 0.5574906899089822, + "learning_rate": 1.9957985341255427e-07, + "loss": 0.2933, + "step": 20162 + }, + { + "epoch": 0.9125594025797692, + "grad_norm": 0.6276203254950994, + "learning_rate": 1.9937489837417723e-07, + "loss": 0.2954, + "step": 20163 + }, + { + "epoch": 0.9126046616881648, + "grad_norm": 0.25034283765087206, + "learning_rate": 1.991700464864893e-07, + "loss": 0.4516, + "step": 20164 + }, + { + "epoch": 0.9126499207965603, + "grad_norm": 0.6101342253794745, + "learning_rate": 1.9896529775389363e-07, + "loss": 0.3309, + "step": 20165 + }, + { + "epoch": 0.9126951799049559, + "grad_norm": 0.5735698779122026, + "learning_rate": 1.9876065218078722e-07, + "loss": 0.3228, + "step": 20166 + }, + { + "epoch": 0.9127404390133514, + "grad_norm": 0.9948355697347983, + "learning_rate": 1.9855610977156882e-07, + "loss": 0.298, + "step": 20167 + }, + { + "epoch": 0.912785698121747, + "grad_norm": 0.6163256323172275, + "learning_rate": 1.9835167053063376e-07, + "loss": 0.2965, + "step": 20168 + }, + { + "epoch": 0.9128309572301425, + "grad_norm": 0.29371825179107114, + "learning_rate": 1.9814733446237356e-07, + "loss": 0.4638, + "step": 20169 + }, + { + "epoch": 0.9128762163385381, + "grad_norm": 0.70807913278701, + "learning_rate": 1.9794310157117913e-07, + "loss": 0.3371, + "step": 20170 + }, + { + "epoch": 0.9129214754469337, + "grad_norm": 0.6712528365040975, + "learning_rate": 1.977389718614392e-07, + "loss": 0.3038, + "step": 20171 + }, + { + "epoch": 0.9129667345553293, + "grad_norm": 0.6521966163230466, + "learning_rate": 1.9753494533754026e-07, + "loss": 0.3169, + "step": 20172 + }, + { + "epoch": 0.9130119936637249, + "grad_norm": 0.260499083334948, + "learning_rate": 1.9733102200386544e-07, + "loss": 0.4618, + "step": 20173 + }, + { + "epoch": 0.9130572527721204, + "grad_norm": 0.2448547025157443, + "learning_rate": 1.9712720186479685e-07, + "loss": 0.4476, + "step": 20174 + }, + { + "epoch": 0.9131025118805159, + "grad_norm": 0.6350681819680561, + "learning_rate": 1.9692348492471313e-07, + "loss": 0.2788, + "step": 20175 + }, + { + "epoch": 0.9131477709889115, + "grad_norm": 0.6428461694612434, + "learning_rate": 1.9671987118799307e-07, + "loss": 0.3113, + "step": 20176 + }, + { + "epoch": 0.9131930300973071, + "grad_norm": 0.2592207557142955, + "learning_rate": 1.965163606590098e-07, + "loss": 0.4812, + "step": 20177 + }, + { + "epoch": 0.9132382892057026, + "grad_norm": 0.6141639243091234, + "learning_rate": 1.963129533421382e-07, + "loss": 0.3166, + "step": 20178 + }, + { + "epoch": 0.9132835483140982, + "grad_norm": 0.2552167500263459, + "learning_rate": 1.961096492417469e-07, + "loss": 0.4558, + "step": 20179 + }, + { + "epoch": 0.9133288074224938, + "grad_norm": 0.24585128292836528, + "learning_rate": 1.9590644836220584e-07, + "loss": 0.4447, + "step": 20180 + }, + { + "epoch": 0.9133740665308894, + "grad_norm": 0.6693985643334417, + "learning_rate": 1.9570335070788093e-07, + "loss": 0.337, + "step": 20181 + }, + { + "epoch": 0.9134193256392849, + "grad_norm": 0.566802657436706, + "learning_rate": 1.9550035628313478e-07, + "loss": 0.3002, + "step": 20182 + }, + { + "epoch": 0.9134645847476804, + "grad_norm": 0.24719745682119912, + "learning_rate": 1.9529746509233006e-07, + "loss": 0.4712, + "step": 20183 + }, + { + "epoch": 0.913509843856076, + "grad_norm": 0.5804125523677844, + "learning_rate": 1.950946771398282e-07, + "loss": 0.2691, + "step": 20184 + }, + { + "epoch": 0.9135551029644716, + "grad_norm": 0.5569565564874374, + "learning_rate": 1.9489199242998248e-07, + "loss": 0.2865, + "step": 20185 + }, + { + "epoch": 0.9136003620728672, + "grad_norm": 0.5907424873541962, + "learning_rate": 1.9468941096715043e-07, + "loss": 0.3374, + "step": 20186 + }, + { + "epoch": 0.9136456211812627, + "grad_norm": 1.7032824969349205, + "learning_rate": 1.9448693275568532e-07, + "loss": 0.2975, + "step": 20187 + }, + { + "epoch": 0.9136908802896583, + "grad_norm": 0.8489322917680525, + "learning_rate": 1.9428455779993694e-07, + "loss": 0.3074, + "step": 20188 + }, + { + "epoch": 0.9137361393980539, + "grad_norm": 0.6903877179464709, + "learning_rate": 1.9408228610425296e-07, + "loss": 0.2862, + "step": 20189 + }, + { + "epoch": 0.9137813985064495, + "grad_norm": 0.652244246341138, + "learning_rate": 1.9388011767298042e-07, + "loss": 0.292, + "step": 20190 + }, + { + "epoch": 0.9138266576148449, + "grad_norm": 0.597657405511287, + "learning_rate": 1.9367805251046422e-07, + "loss": 0.2418, + "step": 20191 + }, + { + "epoch": 0.9138719167232405, + "grad_norm": 0.6294900696669814, + "learning_rate": 1.9347609062104478e-07, + "loss": 0.3036, + "step": 20192 + }, + { + "epoch": 0.9139171758316361, + "grad_norm": 0.7343966627058698, + "learning_rate": 1.932742320090619e-07, + "loss": 0.2796, + "step": 20193 + }, + { + "epoch": 0.9139624349400317, + "grad_norm": 0.2675377517537364, + "learning_rate": 1.9307247667885331e-07, + "loss": 0.4451, + "step": 20194 + }, + { + "epoch": 0.9140076940484273, + "grad_norm": 0.6294374440983784, + "learning_rate": 1.9287082463475326e-07, + "loss": 0.2973, + "step": 20195 + }, + { + "epoch": 0.9140529531568228, + "grad_norm": 0.2735152794246382, + "learning_rate": 1.926692758810955e-07, + "loss": 0.4829, + "step": 20196 + }, + { + "epoch": 0.9140982122652184, + "grad_norm": 1.029793456721243, + "learning_rate": 1.9246783042221106e-07, + "loss": 0.2955, + "step": 20197 + }, + { + "epoch": 0.914143471373614, + "grad_norm": 0.5930683611980938, + "learning_rate": 1.9226648826242699e-07, + "loss": 0.2777, + "step": 20198 + }, + { + "epoch": 0.9141887304820095, + "grad_norm": 0.2621875181458503, + "learning_rate": 1.9206524940606984e-07, + "loss": 0.4717, + "step": 20199 + }, + { + "epoch": 0.914233989590405, + "grad_norm": 0.27766580160689125, + "learning_rate": 1.9186411385746507e-07, + "loss": 0.469, + "step": 20200 + }, + { + "epoch": 0.9142792486988006, + "grad_norm": 0.6663637678544063, + "learning_rate": 1.9166308162093306e-07, + "loss": 0.3206, + "step": 20201 + }, + { + "epoch": 0.9143245078071962, + "grad_norm": 0.5736598664573532, + "learning_rate": 1.914621527007937e-07, + "loss": 0.2368, + "step": 20202 + }, + { + "epoch": 0.9143697669155918, + "grad_norm": 0.5907927822480857, + "learning_rate": 1.912613271013647e-07, + "loss": 0.3147, + "step": 20203 + }, + { + "epoch": 0.9144150260239873, + "grad_norm": 0.6438013056377528, + "learning_rate": 1.9106060482695976e-07, + "loss": 0.296, + "step": 20204 + }, + { + "epoch": 0.9144602851323829, + "grad_norm": 0.6053099453746283, + "learning_rate": 1.9085998588189436e-07, + "loss": 0.2871, + "step": 20205 + }, + { + "epoch": 0.9145055442407785, + "grad_norm": 0.622434748725618, + "learning_rate": 1.906594702704767e-07, + "loss": 0.3364, + "step": 20206 + }, + { + "epoch": 0.914550803349174, + "grad_norm": 0.6125138418175076, + "learning_rate": 1.904590579970167e-07, + "loss": 0.2924, + "step": 20207 + }, + { + "epoch": 0.9145960624575696, + "grad_norm": 0.6837576033495794, + "learning_rate": 1.9025874906581975e-07, + "loss": 0.3192, + "step": 20208 + }, + { + "epoch": 0.9146413215659651, + "grad_norm": 0.5913973359919333, + "learning_rate": 1.900585434811908e-07, + "loss": 0.287, + "step": 20209 + }, + { + "epoch": 0.9146865806743607, + "grad_norm": 0.7310883652397158, + "learning_rate": 1.8985844124743136e-07, + "loss": 0.2943, + "step": 20210 + }, + { + "epoch": 0.9147318397827563, + "grad_norm": 0.620939923788956, + "learning_rate": 1.8965844236883968e-07, + "loss": 0.3022, + "step": 20211 + }, + { + "epoch": 0.9147770988911519, + "grad_norm": 0.5870684472252491, + "learning_rate": 1.894585468497151e-07, + "loss": 0.267, + "step": 20212 + }, + { + "epoch": 0.9148223579995474, + "grad_norm": 0.579162736305499, + "learning_rate": 1.892587546943525e-07, + "loss": 0.289, + "step": 20213 + }, + { + "epoch": 0.914867617107943, + "grad_norm": 0.61310203729002, + "learning_rate": 1.8905906590704293e-07, + "loss": 0.3629, + "step": 20214 + }, + { + "epoch": 0.9149128762163385, + "grad_norm": 0.6277058044001794, + "learning_rate": 1.8885948049207847e-07, + "loss": 0.2849, + "step": 20215 + }, + { + "epoch": 0.9149581353247341, + "grad_norm": 0.8194378360950997, + "learning_rate": 1.8865999845374794e-07, + "loss": 0.323, + "step": 20216 + }, + { + "epoch": 0.9150033944331296, + "grad_norm": 0.2803414338616721, + "learning_rate": 1.8846061979633734e-07, + "loss": 0.4627, + "step": 20217 + }, + { + "epoch": 0.9150486535415252, + "grad_norm": 0.6307224297322953, + "learning_rate": 1.8826134452412993e-07, + "loss": 0.3527, + "step": 20218 + }, + { + "epoch": 0.9150939126499208, + "grad_norm": 0.5779802801482953, + "learning_rate": 1.8806217264140836e-07, + "loss": 0.3024, + "step": 20219 + }, + { + "epoch": 0.9151391717583164, + "grad_norm": 0.5418445470374095, + "learning_rate": 1.87863104152452e-07, + "loss": 0.2627, + "step": 20220 + }, + { + "epoch": 0.915184430866712, + "grad_norm": 0.5964870628673384, + "learning_rate": 1.8766413906153856e-07, + "loss": 0.3207, + "step": 20221 + }, + { + "epoch": 0.9152296899751075, + "grad_norm": 0.6014968870783488, + "learning_rate": 1.874652773729424e-07, + "loss": 0.2746, + "step": 20222 + }, + { + "epoch": 0.915274949083503, + "grad_norm": 0.2805350431099635, + "learning_rate": 1.8726651909093675e-07, + "loss": 0.4705, + "step": 20223 + }, + { + "epoch": 0.9153202081918986, + "grad_norm": 0.6307605153708684, + "learning_rate": 1.870678642197926e-07, + "loss": 0.3217, + "step": 20224 + }, + { + "epoch": 0.9153654673002942, + "grad_norm": 0.5446748202273499, + "learning_rate": 1.868693127637783e-07, + "loss": 0.2803, + "step": 20225 + }, + { + "epoch": 0.9154107264086897, + "grad_norm": 0.5966799858674203, + "learning_rate": 1.8667086472716034e-07, + "loss": 0.3142, + "step": 20226 + }, + { + "epoch": 0.9154559855170853, + "grad_norm": 0.5956319525704, + "learning_rate": 1.8647252011420202e-07, + "loss": 0.2786, + "step": 20227 + }, + { + "epoch": 0.9155012446254809, + "grad_norm": 0.5717016703576566, + "learning_rate": 1.8627427892916493e-07, + "loss": 0.2844, + "step": 20228 + }, + { + "epoch": 0.9155465037338765, + "grad_norm": 0.670300461314522, + "learning_rate": 1.860761411763107e-07, + "loss": 0.2898, + "step": 20229 + }, + { + "epoch": 0.9155917628422721, + "grad_norm": 0.6314533304522649, + "learning_rate": 1.8587810685989528e-07, + "loss": 0.2834, + "step": 20230 + }, + { + "epoch": 0.9156370219506675, + "grad_norm": 0.6125017548125974, + "learning_rate": 1.856801759841731e-07, + "loss": 0.2744, + "step": 20231 + }, + { + "epoch": 0.9156822810590631, + "grad_norm": 0.5840046732133334, + "learning_rate": 1.8548234855339798e-07, + "loss": 0.2708, + "step": 20232 + }, + { + "epoch": 0.9157275401674587, + "grad_norm": 0.25643681040427324, + "learning_rate": 1.8528462457182095e-07, + "loss": 0.4598, + "step": 20233 + }, + { + "epoch": 0.9157727992758543, + "grad_norm": 0.6206408904388228, + "learning_rate": 1.8508700404368973e-07, + "loss": 0.2768, + "step": 20234 + }, + { + "epoch": 0.9158180583842498, + "grad_norm": 0.6378085195757364, + "learning_rate": 1.8488948697325094e-07, + "loss": 0.3203, + "step": 20235 + }, + { + "epoch": 0.9158633174926454, + "grad_norm": 0.5901667715023341, + "learning_rate": 1.8469207336474893e-07, + "loss": 0.2854, + "step": 20236 + }, + { + "epoch": 0.915908576601041, + "grad_norm": 0.6633365915851603, + "learning_rate": 1.8449476322242476e-07, + "loss": 0.3057, + "step": 20237 + }, + { + "epoch": 0.9159538357094366, + "grad_norm": 0.2721618197466199, + "learning_rate": 1.8429755655051896e-07, + "loss": 0.4787, + "step": 20238 + }, + { + "epoch": 0.915999094817832, + "grad_norm": 0.5846259701182656, + "learning_rate": 1.841004533532681e-07, + "loss": 0.2677, + "step": 20239 + }, + { + "epoch": 0.9160443539262276, + "grad_norm": 0.586431225367831, + "learning_rate": 1.8390345363490713e-07, + "loss": 0.2987, + "step": 20240 + }, + { + "epoch": 0.9160896130346232, + "grad_norm": 0.5920702643710308, + "learning_rate": 1.8370655739966937e-07, + "loss": 0.3144, + "step": 20241 + }, + { + "epoch": 0.9161348721430188, + "grad_norm": 0.5928483811429326, + "learning_rate": 1.8350976465178693e-07, + "loss": 0.2833, + "step": 20242 + }, + { + "epoch": 0.9161801312514144, + "grad_norm": 0.6568539880090193, + "learning_rate": 1.8331307539548593e-07, + "loss": 0.2931, + "step": 20243 + }, + { + "epoch": 0.9162253903598099, + "grad_norm": 0.6174884944900461, + "learning_rate": 1.831164896349935e-07, + "loss": 0.2672, + "step": 20244 + }, + { + "epoch": 0.9162706494682055, + "grad_norm": 0.6719921439413501, + "learning_rate": 1.829200073745341e-07, + "loss": 0.2718, + "step": 20245 + }, + { + "epoch": 0.9163159085766011, + "grad_norm": 0.2804039360211197, + "learning_rate": 1.8272362861832925e-07, + "loss": 0.4778, + "step": 20246 + }, + { + "epoch": 0.9163611676849966, + "grad_norm": 0.24948144674247946, + "learning_rate": 1.825273533705979e-07, + "loss": 0.4582, + "step": 20247 + }, + { + "epoch": 0.9164064267933921, + "grad_norm": 0.7611009625456361, + "learning_rate": 1.823311816355583e-07, + "loss": 0.2726, + "step": 20248 + }, + { + "epoch": 0.9164516859017877, + "grad_norm": 0.5962263712466588, + "learning_rate": 1.8213511341742596e-07, + "loss": 0.2935, + "step": 20249 + }, + { + "epoch": 0.9164969450101833, + "grad_norm": 0.2622130822164122, + "learning_rate": 1.819391487204125e-07, + "loss": 0.479, + "step": 20250 + }, + { + "epoch": 0.9165422041185789, + "grad_norm": 0.5557890604995867, + "learning_rate": 1.8174328754872906e-07, + "loss": 0.2505, + "step": 20251 + }, + { + "epoch": 0.9165874632269744, + "grad_norm": 0.5915074391315309, + "learning_rate": 1.815475299065844e-07, + "loss": 0.254, + "step": 20252 + }, + { + "epoch": 0.91663272233537, + "grad_norm": 0.2590629272394503, + "learning_rate": 1.8135187579818415e-07, + "loss": 0.4755, + "step": 20253 + }, + { + "epoch": 0.9166779814437656, + "grad_norm": 0.554163045102261, + "learning_rate": 1.8115632522773375e-07, + "loss": 0.2421, + "step": 20254 + }, + { + "epoch": 0.9167232405521611, + "grad_norm": 0.5629796866398182, + "learning_rate": 1.8096087819943376e-07, + "loss": 0.2769, + "step": 20255 + }, + { + "epoch": 0.9167684996605567, + "grad_norm": 0.6647392026898864, + "learning_rate": 1.8076553471748304e-07, + "loss": 0.2869, + "step": 20256 + }, + { + "epoch": 0.9168137587689522, + "grad_norm": 0.5923359751380025, + "learning_rate": 1.805702947860799e-07, + "loss": 0.2561, + "step": 20257 + }, + { + "epoch": 0.9168590178773478, + "grad_norm": 0.2732673759257525, + "learning_rate": 1.8037515840942043e-07, + "loss": 0.4584, + "step": 20258 + }, + { + "epoch": 0.9169042769857434, + "grad_norm": 0.5901968281956451, + "learning_rate": 1.8018012559169573e-07, + "loss": 0.2926, + "step": 20259 + }, + { + "epoch": 0.916949536094139, + "grad_norm": 0.694288842386728, + "learning_rate": 1.7998519633709688e-07, + "loss": 0.3034, + "step": 20260 + }, + { + "epoch": 0.9169947952025345, + "grad_norm": 0.581170916707714, + "learning_rate": 1.7979037064981275e-07, + "loss": 0.3262, + "step": 20261 + }, + { + "epoch": 0.91704005431093, + "grad_norm": 0.24995335589526108, + "learning_rate": 1.7959564853403e-07, + "loss": 0.4322, + "step": 20262 + }, + { + "epoch": 0.9170853134193256, + "grad_norm": 0.6144727338350052, + "learning_rate": 1.7940102999393194e-07, + "loss": 0.2856, + "step": 20263 + }, + { + "epoch": 0.9171305725277212, + "grad_norm": 0.25951800013520543, + "learning_rate": 1.7920651503370022e-07, + "loss": 0.4764, + "step": 20264 + }, + { + "epoch": 0.9171758316361168, + "grad_norm": 0.2825775327652799, + "learning_rate": 1.7901210365751488e-07, + "loss": 0.4756, + "step": 20265 + }, + { + "epoch": 0.9172210907445123, + "grad_norm": 0.5810418071924581, + "learning_rate": 1.7881779586955196e-07, + "loss": 0.2479, + "step": 20266 + }, + { + "epoch": 0.9172663498529079, + "grad_norm": 0.6689716280098308, + "learning_rate": 1.7862359167398814e-07, + "loss": 0.2806, + "step": 20267 + }, + { + "epoch": 0.9173116089613035, + "grad_norm": 0.6429654330586431, + "learning_rate": 1.784294910749962e-07, + "loss": 0.2845, + "step": 20268 + }, + { + "epoch": 0.9173568680696991, + "grad_norm": 0.6171635547788606, + "learning_rate": 1.78235494076745e-07, + "loss": 0.2493, + "step": 20269 + }, + { + "epoch": 0.9174021271780946, + "grad_norm": 0.6655164567554036, + "learning_rate": 1.7804160068340403e-07, + "loss": 0.3252, + "step": 20270 + }, + { + "epoch": 0.9174473862864901, + "grad_norm": 0.5822090955235841, + "learning_rate": 1.7784781089914106e-07, + "loss": 0.2945, + "step": 20271 + }, + { + "epoch": 0.9174926453948857, + "grad_norm": 0.24552934203250296, + "learning_rate": 1.776541247281177e-07, + "loss": 0.456, + "step": 20272 + }, + { + "epoch": 0.9175379045032813, + "grad_norm": 0.2575428016919689, + "learning_rate": 1.774605421744957e-07, + "loss": 0.4819, + "step": 20273 + }, + { + "epoch": 0.9175831636116768, + "grad_norm": 0.5968669926945249, + "learning_rate": 1.7726706324243614e-07, + "loss": 0.3044, + "step": 20274 + }, + { + "epoch": 0.9176284227200724, + "grad_norm": 0.679367312200526, + "learning_rate": 1.770736879360957e-07, + "loss": 0.3052, + "step": 20275 + }, + { + "epoch": 0.917673681828468, + "grad_norm": 0.2811066629242291, + "learning_rate": 1.7688041625962881e-07, + "loss": 0.4838, + "step": 20276 + }, + { + "epoch": 0.9177189409368636, + "grad_norm": 0.5880107542504787, + "learning_rate": 1.766872482171883e-07, + "loss": 0.2946, + "step": 20277 + }, + { + "epoch": 0.9177642000452592, + "grad_norm": 0.6804630857887852, + "learning_rate": 1.7649418381292584e-07, + "loss": 0.2766, + "step": 20278 + }, + { + "epoch": 0.9178094591536546, + "grad_norm": 0.7164473666448703, + "learning_rate": 1.7630122305098919e-07, + "loss": 0.2822, + "step": 20279 + }, + { + "epoch": 0.9178547182620502, + "grad_norm": 0.5634355600166236, + "learning_rate": 1.7610836593552394e-07, + "loss": 0.2706, + "step": 20280 + }, + { + "epoch": 0.9178999773704458, + "grad_norm": 0.6060948444999675, + "learning_rate": 1.7591561247067513e-07, + "loss": 0.2619, + "step": 20281 + }, + { + "epoch": 0.9179452364788414, + "grad_norm": 0.6906994401131011, + "learning_rate": 1.7572296266058274e-07, + "loss": 0.2884, + "step": 20282 + }, + { + "epoch": 0.9179904955872369, + "grad_norm": 0.3472640325425808, + "learning_rate": 1.7553041650938797e-07, + "loss": 0.4929, + "step": 20283 + }, + { + "epoch": 0.9180357546956325, + "grad_norm": 0.2600903816186952, + "learning_rate": 1.7533797402122743e-07, + "loss": 0.4658, + "step": 20284 + }, + { + "epoch": 0.9180810138040281, + "grad_norm": 0.28327782277311986, + "learning_rate": 1.7514563520023565e-07, + "loss": 0.4415, + "step": 20285 + }, + { + "epoch": 0.9181262729124237, + "grad_norm": 0.26357230226310807, + "learning_rate": 1.749534000505454e-07, + "loss": 0.4762, + "step": 20286 + }, + { + "epoch": 0.9181715320208191, + "grad_norm": 0.6251890113242208, + "learning_rate": 1.747612685762884e-07, + "loss": 0.2731, + "step": 20287 + }, + { + "epoch": 0.9182167911292147, + "grad_norm": 0.5775415680510588, + "learning_rate": 1.7456924078159187e-07, + "loss": 0.2898, + "step": 20288 + }, + { + "epoch": 0.9182620502376103, + "grad_norm": 0.5923941246023781, + "learning_rate": 1.7437731667058143e-07, + "loss": 0.2839, + "step": 20289 + }, + { + "epoch": 0.9183073093460059, + "grad_norm": 0.8476013350525512, + "learning_rate": 1.7418549624738213e-07, + "loss": 0.3324, + "step": 20290 + }, + { + "epoch": 0.9183525684544015, + "grad_norm": 0.6336776573796626, + "learning_rate": 1.7399377951611563e-07, + "loss": 0.2754, + "step": 20291 + }, + { + "epoch": 0.918397827562797, + "grad_norm": 0.6547617577821889, + "learning_rate": 1.7380216648090087e-07, + "loss": 0.3239, + "step": 20292 + }, + { + "epoch": 0.9184430866711926, + "grad_norm": 0.5762644864125279, + "learning_rate": 1.7361065714585458e-07, + "loss": 0.2698, + "step": 20293 + }, + { + "epoch": 0.9184883457795882, + "grad_norm": 0.26382957927102185, + "learning_rate": 1.734192515150923e-07, + "loss": 0.4401, + "step": 20294 + }, + { + "epoch": 0.9185336048879837, + "grad_norm": 0.6306528339851688, + "learning_rate": 1.732279495927264e-07, + "loss": 0.3416, + "step": 20295 + }, + { + "epoch": 0.9185788639963792, + "grad_norm": 0.587214543222049, + "learning_rate": 1.730367513828679e-07, + "loss": 0.2814, + "step": 20296 + }, + { + "epoch": 0.9186241231047748, + "grad_norm": 0.5364668170202281, + "learning_rate": 1.7284565688962474e-07, + "loss": 0.2797, + "step": 20297 + }, + { + "epoch": 0.9186693822131704, + "grad_norm": 0.5879666024093826, + "learning_rate": 1.7265466611710248e-07, + "loss": 0.2945, + "step": 20298 + }, + { + "epoch": 0.918714641321566, + "grad_norm": 0.7149839138267361, + "learning_rate": 1.7246377906940503e-07, + "loss": 0.3117, + "step": 20299 + }, + { + "epoch": 0.9187599004299616, + "grad_norm": 0.27485696126433967, + "learning_rate": 1.7227299575063528e-07, + "loss": 0.474, + "step": 20300 + }, + { + "epoch": 0.9188051595383571, + "grad_norm": 0.6842081565564186, + "learning_rate": 1.7208231616489156e-07, + "loss": 0.2535, + "step": 20301 + }, + { + "epoch": 0.9188504186467527, + "grad_norm": 0.9067952840010864, + "learning_rate": 1.7189174031627064e-07, + "loss": 0.3039, + "step": 20302 + }, + { + "epoch": 0.9188956777551482, + "grad_norm": 0.6247776678453516, + "learning_rate": 1.7170126820886755e-07, + "loss": 0.2834, + "step": 20303 + }, + { + "epoch": 0.9189409368635438, + "grad_norm": 0.6025353886341012, + "learning_rate": 1.7151089984677684e-07, + "loss": 0.265, + "step": 20304 + }, + { + "epoch": 0.9189861959719393, + "grad_norm": 0.2846579159301575, + "learning_rate": 1.713206352340857e-07, + "loss": 0.4568, + "step": 20305 + }, + { + "epoch": 0.9190314550803349, + "grad_norm": 0.6058765647167526, + "learning_rate": 1.7113047437488373e-07, + "loss": 0.267, + "step": 20306 + }, + { + "epoch": 0.9190767141887305, + "grad_norm": 0.6440807707619911, + "learning_rate": 1.7094041727325817e-07, + "loss": 0.2987, + "step": 20307 + }, + { + "epoch": 0.9191219732971261, + "grad_norm": 0.6087728665863835, + "learning_rate": 1.7075046393329132e-07, + "loss": 0.2649, + "step": 20308 + }, + { + "epoch": 0.9191672324055216, + "grad_norm": 0.6629216048938273, + "learning_rate": 1.705606143590649e-07, + "loss": 0.3285, + "step": 20309 + }, + { + "epoch": 0.9192124915139172, + "grad_norm": 0.6432855564190427, + "learning_rate": 1.7037086855465902e-07, + "loss": 0.2972, + "step": 20310 + }, + { + "epoch": 0.9192577506223127, + "grad_norm": 0.5504870130107095, + "learning_rate": 1.7018122652414926e-07, + "loss": 0.3088, + "step": 20311 + }, + { + "epoch": 0.9193030097307083, + "grad_norm": 0.6063378414760505, + "learning_rate": 1.6999168827161182e-07, + "loss": 0.2958, + "step": 20312 + }, + { + "epoch": 0.9193482688391039, + "grad_norm": 0.5759916244656855, + "learning_rate": 1.6980225380111904e-07, + "loss": 0.2869, + "step": 20313 + }, + { + "epoch": 0.9193935279474994, + "grad_norm": 0.591904167150174, + "learning_rate": 1.6961292311674037e-07, + "loss": 0.2959, + "step": 20314 + }, + { + "epoch": 0.919438787055895, + "grad_norm": 0.6214481185267032, + "learning_rate": 1.6942369622254428e-07, + "loss": 0.2788, + "step": 20315 + }, + { + "epoch": 0.9194840461642906, + "grad_norm": 0.2848607056517523, + "learning_rate": 1.692345731225975e-07, + "loss": 0.4749, + "step": 20316 + }, + { + "epoch": 0.9195293052726862, + "grad_norm": 0.25736908411097426, + "learning_rate": 1.6904555382096343e-07, + "loss": 0.4568, + "step": 20317 + }, + { + "epoch": 0.9195745643810816, + "grad_norm": 0.6279904182505865, + "learning_rate": 1.6885663832170274e-07, + "loss": 0.2423, + "step": 20318 + }, + { + "epoch": 0.9196198234894772, + "grad_norm": 0.5400609614569958, + "learning_rate": 1.686678266288755e-07, + "loss": 0.2607, + "step": 20319 + }, + { + "epoch": 0.9196650825978728, + "grad_norm": 0.5738164996555193, + "learning_rate": 1.6847911874653843e-07, + "loss": 0.2651, + "step": 20320 + }, + { + "epoch": 0.9197103417062684, + "grad_norm": 0.6456818098102596, + "learning_rate": 1.6829051467874613e-07, + "loss": 0.2998, + "step": 20321 + }, + { + "epoch": 0.9197556008146639, + "grad_norm": 0.6348852708401035, + "learning_rate": 1.6810201442955087e-07, + "loss": 0.3245, + "step": 20322 + }, + { + "epoch": 0.9198008599230595, + "grad_norm": 0.6234987326189086, + "learning_rate": 1.6791361800300386e-07, + "loss": 0.2987, + "step": 20323 + }, + { + "epoch": 0.9198461190314551, + "grad_norm": 0.46159750163370644, + "learning_rate": 1.6772532540315188e-07, + "loss": 0.4803, + "step": 20324 + }, + { + "epoch": 0.9198913781398507, + "grad_norm": 0.27836394932852804, + "learning_rate": 1.6753713663404224e-07, + "loss": 0.472, + "step": 20325 + }, + { + "epoch": 0.9199366372482463, + "grad_norm": 0.6227338739189778, + "learning_rate": 1.6734905169971782e-07, + "loss": 0.2471, + "step": 20326 + }, + { + "epoch": 0.9199818963566417, + "grad_norm": 0.6299923273196348, + "learning_rate": 1.671610706042187e-07, + "loss": 0.2589, + "step": 20327 + }, + { + "epoch": 0.9200271554650373, + "grad_norm": 0.5676574324783089, + "learning_rate": 1.6697319335158613e-07, + "loss": 0.2706, + "step": 20328 + }, + { + "epoch": 0.9200724145734329, + "grad_norm": 0.6999647644794571, + "learning_rate": 1.6678541994585629e-07, + "loss": 0.2926, + "step": 20329 + }, + { + "epoch": 0.9201176736818285, + "grad_norm": 0.643394225929882, + "learning_rate": 1.665977503910632e-07, + "loss": 0.3487, + "step": 20330 + }, + { + "epoch": 0.920162932790224, + "grad_norm": 0.5346259476521655, + "learning_rate": 1.664101846912397e-07, + "loss": 0.2489, + "step": 20331 + }, + { + "epoch": 0.9202081918986196, + "grad_norm": 0.625222026956306, + "learning_rate": 1.6622272285041652e-07, + "loss": 0.2456, + "step": 20332 + }, + { + "epoch": 0.9202534510070152, + "grad_norm": 0.7322590324640349, + "learning_rate": 1.6603536487262095e-07, + "loss": 0.2566, + "step": 20333 + }, + { + "epoch": 0.9202987101154108, + "grad_norm": 0.5569926068928689, + "learning_rate": 1.658481107618798e-07, + "loss": 0.2397, + "step": 20334 + }, + { + "epoch": 0.9203439692238063, + "grad_norm": 0.6147399641282735, + "learning_rate": 1.6566096052221482e-07, + "loss": 0.2932, + "step": 20335 + }, + { + "epoch": 0.9203892283322018, + "grad_norm": 0.639177648247498, + "learning_rate": 1.6547391415764836e-07, + "loss": 0.3123, + "step": 20336 + }, + { + "epoch": 0.9204344874405974, + "grad_norm": 0.6068624796114247, + "learning_rate": 1.652869716722e-07, + "loss": 0.3208, + "step": 20337 + }, + { + "epoch": 0.920479746548993, + "grad_norm": 0.762520313233785, + "learning_rate": 1.6510013306988538e-07, + "loss": 0.3149, + "step": 20338 + }, + { + "epoch": 0.9205250056573886, + "grad_norm": 0.29952288591201587, + "learning_rate": 1.6491339835471964e-07, + "loss": 0.474, + "step": 20339 + }, + { + "epoch": 0.9205702647657841, + "grad_norm": 0.6158717020716946, + "learning_rate": 1.6472676753071516e-07, + "loss": 0.2757, + "step": 20340 + }, + { + "epoch": 0.9206155238741797, + "grad_norm": 0.610806311783428, + "learning_rate": 1.6454024060188257e-07, + "loss": 0.3228, + "step": 20341 + }, + { + "epoch": 0.9206607829825753, + "grad_norm": 0.6269652583233704, + "learning_rate": 1.6435381757222869e-07, + "loss": 0.3003, + "step": 20342 + }, + { + "epoch": 0.9207060420909708, + "grad_norm": 0.6759177811930258, + "learning_rate": 1.6416749844575974e-07, + "loss": 0.2882, + "step": 20343 + }, + { + "epoch": 0.9207513011993663, + "grad_norm": 0.631666234340163, + "learning_rate": 1.6398128322647865e-07, + "loss": 0.2816, + "step": 20344 + }, + { + "epoch": 0.9207965603077619, + "grad_norm": 0.6173364819027063, + "learning_rate": 1.6379517191838777e-07, + "loss": 0.2593, + "step": 20345 + }, + { + "epoch": 0.9208418194161575, + "grad_norm": 0.5951260182263146, + "learning_rate": 1.636091645254856e-07, + "loss": 0.2865, + "step": 20346 + }, + { + "epoch": 0.9208870785245531, + "grad_norm": 0.5968563815932153, + "learning_rate": 1.634232610517683e-07, + "loss": 0.2656, + "step": 20347 + }, + { + "epoch": 0.9209323376329487, + "grad_norm": 0.5688768727910141, + "learning_rate": 1.6323746150123e-07, + "loss": 0.276, + "step": 20348 + }, + { + "epoch": 0.9209775967413442, + "grad_norm": 0.602614942186902, + "learning_rate": 1.6305176587786465e-07, + "loss": 0.2923, + "step": 20349 + }, + { + "epoch": 0.9210228558497398, + "grad_norm": 0.8071633035076183, + "learning_rate": 1.628661741856613e-07, + "loss": 0.2828, + "step": 20350 + }, + { + "epoch": 0.9210681149581353, + "grad_norm": 0.5866499802529012, + "learning_rate": 1.6268068642860735e-07, + "loss": 0.2556, + "step": 20351 + }, + { + "epoch": 0.9211133740665309, + "grad_norm": 0.25558126830066286, + "learning_rate": 1.6249530261068903e-07, + "loss": 0.466, + "step": 20352 + }, + { + "epoch": 0.9211586331749264, + "grad_norm": 0.25617421405222546, + "learning_rate": 1.623100227358887e-07, + "loss": 0.4592, + "step": 20353 + }, + { + "epoch": 0.921203892283322, + "grad_norm": 0.6722170018272257, + "learning_rate": 1.621248468081893e-07, + "loss": 0.2959, + "step": 20354 + }, + { + "epoch": 0.9212491513917176, + "grad_norm": 0.2676472468700252, + "learning_rate": 1.619397748315682e-07, + "loss": 0.4913, + "step": 20355 + }, + { + "epoch": 0.9212944105001132, + "grad_norm": 0.269768813951422, + "learning_rate": 1.6175480681000167e-07, + "loss": 0.4517, + "step": 20356 + }, + { + "epoch": 0.9213396696085087, + "grad_norm": 0.6520827900369393, + "learning_rate": 1.6156994274746484e-07, + "loss": 0.3, + "step": 20357 + }, + { + "epoch": 0.9213849287169043, + "grad_norm": 0.6370859639103148, + "learning_rate": 1.613851826479307e-07, + "loss": 0.3292, + "step": 20358 + }, + { + "epoch": 0.9214301878252998, + "grad_norm": 0.2639862715381424, + "learning_rate": 1.6120052651536766e-07, + "loss": 0.4813, + "step": 20359 + }, + { + "epoch": 0.9214754469336954, + "grad_norm": 0.2595331991104463, + "learning_rate": 1.6101597435374428e-07, + "loss": 0.4575, + "step": 20360 + }, + { + "epoch": 0.921520706042091, + "grad_norm": 0.6429967201358898, + "learning_rate": 1.6083152616702512e-07, + "loss": 0.3012, + "step": 20361 + }, + { + "epoch": 0.9215659651504865, + "grad_norm": 0.5585878017776091, + "learning_rate": 1.606471819591754e-07, + "loss": 0.3135, + "step": 20362 + }, + { + "epoch": 0.9216112242588821, + "grad_norm": 0.5677551745022221, + "learning_rate": 1.604629417341541e-07, + "loss": 0.2622, + "step": 20363 + }, + { + "epoch": 0.9216564833672777, + "grad_norm": 0.6363566209381748, + "learning_rate": 1.6027880549592033e-07, + "loss": 0.2885, + "step": 20364 + }, + { + "epoch": 0.9217017424756733, + "grad_norm": 0.7943373394647701, + "learning_rate": 1.6009477324843204e-07, + "loss": 0.3059, + "step": 20365 + }, + { + "epoch": 0.9217470015840687, + "grad_norm": 0.5412161990938706, + "learning_rate": 1.59910844995641e-07, + "loss": 0.2455, + "step": 20366 + }, + { + "epoch": 0.9217922606924643, + "grad_norm": 0.628707186222274, + "learning_rate": 1.5972702074150194e-07, + "loss": 0.2965, + "step": 20367 + }, + { + "epoch": 0.9218375198008599, + "grad_norm": 0.671824730233963, + "learning_rate": 1.5954330048996326e-07, + "loss": 0.2751, + "step": 20368 + }, + { + "epoch": 0.9218827789092555, + "grad_norm": 0.5892389520276957, + "learning_rate": 1.5935968424497184e-07, + "loss": 0.2822, + "step": 20369 + }, + { + "epoch": 0.921928038017651, + "grad_norm": 0.5617899868690214, + "learning_rate": 1.5917617201047508e-07, + "loss": 0.2617, + "step": 20370 + }, + { + "epoch": 0.9219732971260466, + "grad_norm": 0.27106261944272375, + "learning_rate": 1.589927637904143e-07, + "loss": 0.4458, + "step": 20371 + }, + { + "epoch": 0.9220185562344422, + "grad_norm": 0.6156014604944067, + "learning_rate": 1.5880945958873073e-07, + "loss": 0.2717, + "step": 20372 + }, + { + "epoch": 0.9220638153428378, + "grad_norm": 0.6014911690767994, + "learning_rate": 1.586262594093635e-07, + "loss": 0.322, + "step": 20373 + }, + { + "epoch": 0.9221090744512334, + "grad_norm": 0.6133283076051357, + "learning_rate": 1.5844316325624887e-07, + "loss": 0.2976, + "step": 20374 + }, + { + "epoch": 0.9221543335596288, + "grad_norm": 0.6033457098179053, + "learning_rate": 1.5826017113332148e-07, + "loss": 0.2794, + "step": 20375 + }, + { + "epoch": 0.9221995926680244, + "grad_norm": 0.5862417085140412, + "learning_rate": 1.580772830445121e-07, + "loss": 0.2638, + "step": 20376 + }, + { + "epoch": 0.92224485177642, + "grad_norm": 0.5840447890147605, + "learning_rate": 1.5789449899375086e-07, + "loss": 0.2529, + "step": 20377 + }, + { + "epoch": 0.9222901108848156, + "grad_norm": 0.5688909679197075, + "learning_rate": 1.5771181898496578e-07, + "loss": 0.2991, + "step": 20378 + }, + { + "epoch": 0.9223353699932111, + "grad_norm": 0.724734229839457, + "learning_rate": 1.5752924302208206e-07, + "loss": 0.2936, + "step": 20379 + }, + { + "epoch": 0.9223806291016067, + "grad_norm": 0.5286664324455521, + "learning_rate": 1.573467711090221e-07, + "loss": 0.3022, + "step": 20380 + }, + { + "epoch": 0.9224258882100023, + "grad_norm": 0.6311872016999195, + "learning_rate": 1.5716440324970716e-07, + "loss": 0.2745, + "step": 20381 + }, + { + "epoch": 0.9224711473183979, + "grad_norm": 0.2896336364166879, + "learning_rate": 1.5698213944805528e-07, + "loss": 0.4545, + "step": 20382 + }, + { + "epoch": 0.9225164064267934, + "grad_norm": 0.26109570845484165, + "learning_rate": 1.5679997970798333e-07, + "loss": 0.4907, + "step": 20383 + }, + { + "epoch": 0.9225616655351889, + "grad_norm": 0.5823890638539553, + "learning_rate": 1.566179240334048e-07, + "loss": 0.2804, + "step": 20384 + }, + { + "epoch": 0.9226069246435845, + "grad_norm": 0.24110901552768604, + "learning_rate": 1.564359724282316e-07, + "loss": 0.4458, + "step": 20385 + }, + { + "epoch": 0.9226521837519801, + "grad_norm": 0.6395430045112075, + "learning_rate": 1.5625412489637337e-07, + "loss": 0.268, + "step": 20386 + }, + { + "epoch": 0.9226974428603757, + "grad_norm": 0.5978845010368887, + "learning_rate": 1.5607238144173864e-07, + "loss": 0.2735, + "step": 20387 + }, + { + "epoch": 0.9227427019687712, + "grad_norm": 0.6037538312066337, + "learning_rate": 1.5589074206823096e-07, + "loss": 0.2703, + "step": 20388 + }, + { + "epoch": 0.9227879610771668, + "grad_norm": 0.27980886714460196, + "learning_rate": 1.5570920677975276e-07, + "loss": 0.4569, + "step": 20389 + }, + { + "epoch": 0.9228332201855624, + "grad_norm": 0.6337457442557619, + "learning_rate": 1.5552777558020594e-07, + "loss": 0.2486, + "step": 20390 + }, + { + "epoch": 0.9228784792939579, + "grad_norm": 0.6288532464939638, + "learning_rate": 1.5534644847348957e-07, + "loss": 0.2934, + "step": 20391 + }, + { + "epoch": 0.9229237384023534, + "grad_norm": 0.6810993647037666, + "learning_rate": 1.5516522546349833e-07, + "loss": 0.2449, + "step": 20392 + }, + { + "epoch": 0.922968997510749, + "grad_norm": 0.6479504141767006, + "learning_rate": 1.5498410655412577e-07, + "loss": 0.283, + "step": 20393 + }, + { + "epoch": 0.9230142566191446, + "grad_norm": 0.6164855382342345, + "learning_rate": 1.5480309174926544e-07, + "loss": 0.3113, + "step": 20394 + }, + { + "epoch": 0.9230595157275402, + "grad_norm": 0.8744004839033451, + "learning_rate": 1.5462218105280535e-07, + "loss": 0.2923, + "step": 20395 + }, + { + "epoch": 0.9231047748359358, + "grad_norm": 0.6521839231619401, + "learning_rate": 1.544413744686335e-07, + "loss": 0.3171, + "step": 20396 + }, + { + "epoch": 0.9231500339443313, + "grad_norm": 0.628945928780969, + "learning_rate": 1.5426067200063454e-07, + "loss": 0.3141, + "step": 20397 + }, + { + "epoch": 0.9231952930527269, + "grad_norm": 0.6002713949526557, + "learning_rate": 1.540800736526904e-07, + "loss": 0.3094, + "step": 20398 + }, + { + "epoch": 0.9232405521611224, + "grad_norm": 0.6054775864880105, + "learning_rate": 1.5389957942868295e-07, + "loss": 0.325, + "step": 20399 + }, + { + "epoch": 0.923285811269518, + "grad_norm": 0.5423819600754345, + "learning_rate": 1.5371918933249018e-07, + "loss": 0.2665, + "step": 20400 + }, + { + "epoch": 0.9233310703779135, + "grad_norm": 0.6114592856681977, + "learning_rate": 1.5353890336798738e-07, + "loss": 0.2896, + "step": 20401 + }, + { + "epoch": 0.9233763294863091, + "grad_norm": 0.6591721781153652, + "learning_rate": 1.5335872153904863e-07, + "loss": 0.3219, + "step": 20402 + }, + { + "epoch": 0.9234215885947047, + "grad_norm": 0.5609042868126081, + "learning_rate": 1.5317864384954527e-07, + "loss": 0.2773, + "step": 20403 + }, + { + "epoch": 0.9234668477031003, + "grad_norm": 0.6318151743882193, + "learning_rate": 1.5299867030334815e-07, + "loss": 0.273, + "step": 20404 + }, + { + "epoch": 0.9235121068114958, + "grad_norm": 0.6692743045696299, + "learning_rate": 1.5281880090432245e-07, + "loss": 0.3142, + "step": 20405 + }, + { + "epoch": 0.9235573659198913, + "grad_norm": 0.623888346712142, + "learning_rate": 1.5263903565633342e-07, + "loss": 0.2948, + "step": 20406 + }, + { + "epoch": 0.9236026250282869, + "grad_norm": 0.2757973992857609, + "learning_rate": 1.5245937456324468e-07, + "loss": 0.4559, + "step": 20407 + }, + { + "epoch": 0.9236478841366825, + "grad_norm": 0.6188098397848397, + "learning_rate": 1.5227981762891586e-07, + "loss": 0.2783, + "step": 20408 + }, + { + "epoch": 0.9236931432450781, + "grad_norm": 0.8300233252171786, + "learning_rate": 1.5210036485720503e-07, + "loss": 0.302, + "step": 20409 + }, + { + "epoch": 0.9237384023534736, + "grad_norm": 0.37985033345341146, + "learning_rate": 1.5192101625196798e-07, + "loss": 0.4886, + "step": 20410 + }, + { + "epoch": 0.9237836614618692, + "grad_norm": 0.5821665503233471, + "learning_rate": 1.517417718170583e-07, + "loss": 0.2816, + "step": 20411 + }, + { + "epoch": 0.9238289205702648, + "grad_norm": 0.24585264221427977, + "learning_rate": 1.5156263155632844e-07, + "loss": 0.4586, + "step": 20412 + }, + { + "epoch": 0.9238741796786604, + "grad_norm": 0.6593814272013578, + "learning_rate": 1.5138359547362645e-07, + "loss": 0.2861, + "step": 20413 + }, + { + "epoch": 0.9239194387870558, + "grad_norm": 0.2898750406110483, + "learning_rate": 1.5120466357279929e-07, + "loss": 0.4899, + "step": 20414 + }, + { + "epoch": 0.9239646978954514, + "grad_norm": 0.5845322241114378, + "learning_rate": 1.510258358576916e-07, + "loss": 0.2763, + "step": 20415 + }, + { + "epoch": 0.924009957003847, + "grad_norm": 0.6005937355250502, + "learning_rate": 1.5084711233214699e-07, + "loss": 0.2856, + "step": 20416 + }, + { + "epoch": 0.9240552161122426, + "grad_norm": 0.6675591618962443, + "learning_rate": 1.5066849300000519e-07, + "loss": 0.2622, + "step": 20417 + }, + { + "epoch": 0.9241004752206382, + "grad_norm": 0.6381729858413194, + "learning_rate": 1.5048997786510311e-07, + "loss": 0.3195, + "step": 20418 + }, + { + "epoch": 0.9241457343290337, + "grad_norm": 0.6069740562512995, + "learning_rate": 1.5031156693127714e-07, + "loss": 0.2958, + "step": 20419 + }, + { + "epoch": 0.9241909934374293, + "grad_norm": 0.6311512886772741, + "learning_rate": 1.5013326020236141e-07, + "loss": 0.3274, + "step": 20420 + }, + { + "epoch": 0.9242362525458249, + "grad_norm": 0.5988129173441522, + "learning_rate": 1.4995505768218677e-07, + "loss": 0.2664, + "step": 20421 + }, + { + "epoch": 0.9242815116542205, + "grad_norm": 0.6004426149550289, + "learning_rate": 1.497769593745818e-07, + "loss": 0.2802, + "step": 20422 + }, + { + "epoch": 0.9243267707626159, + "grad_norm": 0.6129639964883378, + "learning_rate": 1.4959896528337402e-07, + "loss": 0.3328, + "step": 20423 + }, + { + "epoch": 0.9243720298710115, + "grad_norm": 0.5832031731496322, + "learning_rate": 1.4942107541238705e-07, + "loss": 0.3161, + "step": 20424 + }, + { + "epoch": 0.9244172889794071, + "grad_norm": 0.2922199415284455, + "learning_rate": 1.4924328976544446e-07, + "loss": 0.4766, + "step": 20425 + }, + { + "epoch": 0.9244625480878027, + "grad_norm": 0.6427471456037717, + "learning_rate": 1.490656083463654e-07, + "loss": 0.3016, + "step": 20426 + }, + { + "epoch": 0.9245078071961982, + "grad_norm": 0.6452756589772367, + "learning_rate": 1.4888803115896745e-07, + "loss": 0.2993, + "step": 20427 + }, + { + "epoch": 0.9245530663045938, + "grad_norm": 0.7535574291069664, + "learning_rate": 1.4871055820706692e-07, + "loss": 0.297, + "step": 20428 + }, + { + "epoch": 0.9245983254129894, + "grad_norm": 0.619299210313287, + "learning_rate": 1.4853318949447747e-07, + "loss": 0.3203, + "step": 20429 + }, + { + "epoch": 0.924643584521385, + "grad_norm": 0.7475947457906347, + "learning_rate": 1.4835592502500883e-07, + "loss": 0.2791, + "step": 20430 + }, + { + "epoch": 0.9246888436297805, + "grad_norm": 0.6180770913988448, + "learning_rate": 1.4817876480247074e-07, + "loss": 0.2761, + "step": 20431 + }, + { + "epoch": 0.924734102738176, + "grad_norm": 0.7096132177751993, + "learning_rate": 1.4800170883066954e-07, + "loss": 0.2817, + "step": 20432 + }, + { + "epoch": 0.9247793618465716, + "grad_norm": 0.27086503197020917, + "learning_rate": 1.4782475711341115e-07, + "loss": 0.4727, + "step": 20433 + }, + { + "epoch": 0.9248246209549672, + "grad_norm": 0.5942397803198581, + "learning_rate": 1.4764790965449528e-07, + "loss": 0.2906, + "step": 20434 + }, + { + "epoch": 0.9248698800633628, + "grad_norm": 0.6921617036728822, + "learning_rate": 1.474711664577233e-07, + "loss": 0.2978, + "step": 20435 + }, + { + "epoch": 0.9249151391717583, + "grad_norm": 0.5857174790255871, + "learning_rate": 1.4729452752689277e-07, + "loss": 0.3047, + "step": 20436 + }, + { + "epoch": 0.9249603982801539, + "grad_norm": 0.7227079550701959, + "learning_rate": 1.471179928657984e-07, + "loss": 0.2796, + "step": 20437 + }, + { + "epoch": 0.9250056573885495, + "grad_norm": 0.6329769635085243, + "learning_rate": 1.4694156247823387e-07, + "loss": 0.3013, + "step": 20438 + }, + { + "epoch": 0.925050916496945, + "grad_norm": 0.301921818944306, + "learning_rate": 1.4676523636799057e-07, + "loss": 0.4498, + "step": 20439 + }, + { + "epoch": 0.9250961756053405, + "grad_norm": 0.5936333054729149, + "learning_rate": 1.4658901453885654e-07, + "loss": 0.2787, + "step": 20440 + }, + { + "epoch": 0.9251414347137361, + "grad_norm": 0.6066804280429381, + "learning_rate": 1.464128969946188e-07, + "loss": 0.3102, + "step": 20441 + }, + { + "epoch": 0.9251866938221317, + "grad_norm": 0.5606739862421105, + "learning_rate": 1.4623688373906098e-07, + "loss": 0.2788, + "step": 20442 + }, + { + "epoch": 0.9252319529305273, + "grad_norm": 0.6343599251414981, + "learning_rate": 1.4606097477596504e-07, + "loss": 0.3192, + "step": 20443 + }, + { + "epoch": 0.9252772120389229, + "grad_norm": 0.5491031501485523, + "learning_rate": 1.4588517010911073e-07, + "loss": 0.3119, + "step": 20444 + }, + { + "epoch": 0.9253224711473184, + "grad_norm": 0.6006706830936251, + "learning_rate": 1.4570946974227674e-07, + "loss": 0.2711, + "step": 20445 + }, + { + "epoch": 0.925367730255714, + "grad_norm": 0.6806183941372534, + "learning_rate": 1.455338736792372e-07, + "loss": 0.3267, + "step": 20446 + }, + { + "epoch": 0.9254129893641095, + "grad_norm": 0.6494127773785351, + "learning_rate": 1.4535838192376527e-07, + "loss": 0.3118, + "step": 20447 + }, + { + "epoch": 0.9254582484725051, + "grad_norm": 0.6462344113018397, + "learning_rate": 1.4518299447963126e-07, + "loss": 0.3411, + "step": 20448 + }, + { + "epoch": 0.9255035075809006, + "grad_norm": 0.5892924906517587, + "learning_rate": 1.4500771135060486e-07, + "loss": 0.3017, + "step": 20449 + }, + { + "epoch": 0.9255487666892962, + "grad_norm": 0.6098691150241621, + "learning_rate": 1.4483253254045205e-07, + "loss": 0.2883, + "step": 20450 + }, + { + "epoch": 0.9255940257976918, + "grad_norm": 0.6008950850538864, + "learning_rate": 1.4465745805293584e-07, + "loss": 0.3161, + "step": 20451 + }, + { + "epoch": 0.9256392849060874, + "grad_norm": 0.6600949221221687, + "learning_rate": 1.444824878918194e-07, + "loss": 0.2913, + "step": 20452 + }, + { + "epoch": 0.925684544014483, + "grad_norm": 0.5904981565608284, + "learning_rate": 1.4430762206086136e-07, + "loss": 0.2743, + "step": 20453 + }, + { + "epoch": 0.9257298031228784, + "grad_norm": 0.604631254211844, + "learning_rate": 1.441328605638198e-07, + "loss": 0.3187, + "step": 20454 + }, + { + "epoch": 0.925775062231274, + "grad_norm": 0.6057068609858999, + "learning_rate": 1.4395820340444954e-07, + "loss": 0.2989, + "step": 20455 + }, + { + "epoch": 0.9258203213396696, + "grad_norm": 0.6196797755533714, + "learning_rate": 1.4378365058650257e-07, + "loss": 0.3047, + "step": 20456 + }, + { + "epoch": 0.9258655804480652, + "grad_norm": 0.5852014846220924, + "learning_rate": 1.436092021137303e-07, + "loss": 0.2727, + "step": 20457 + }, + { + "epoch": 0.9259108395564607, + "grad_norm": 0.6442860368973576, + "learning_rate": 1.4343485798988198e-07, + "loss": 0.3025, + "step": 20458 + }, + { + "epoch": 0.9259560986648563, + "grad_norm": 0.6644307473314526, + "learning_rate": 1.4326061821870186e-07, + "loss": 0.2908, + "step": 20459 + }, + { + "epoch": 0.9260013577732519, + "grad_norm": 0.6125065176034205, + "learning_rate": 1.4308648280393466e-07, + "loss": 0.2927, + "step": 20460 + }, + { + "epoch": 0.9260466168816475, + "grad_norm": 0.7227698857705623, + "learning_rate": 1.429124517493219e-07, + "loss": 0.2954, + "step": 20461 + }, + { + "epoch": 0.926091875990043, + "grad_norm": 0.6853652240967014, + "learning_rate": 1.4273852505860443e-07, + "loss": 0.2987, + "step": 20462 + }, + { + "epoch": 0.9261371350984385, + "grad_norm": 0.2613456844771442, + "learning_rate": 1.4256470273551705e-07, + "loss": 0.474, + "step": 20463 + }, + { + "epoch": 0.9261823942068341, + "grad_norm": 1.0479860117384798, + "learning_rate": 1.4239098478379565e-07, + "loss": 0.2837, + "step": 20464 + }, + { + "epoch": 0.9262276533152297, + "grad_norm": 0.5307899678938974, + "learning_rate": 1.4221737120717338e-07, + "loss": 0.2954, + "step": 20465 + }, + { + "epoch": 0.9262729124236253, + "grad_norm": 0.6494081792983811, + "learning_rate": 1.4204386200938057e-07, + "loss": 0.3101, + "step": 20466 + }, + { + "epoch": 0.9263181715320208, + "grad_norm": 0.26612494706636547, + "learning_rate": 1.4187045719414427e-07, + "loss": 0.4481, + "step": 20467 + }, + { + "epoch": 0.9263634306404164, + "grad_norm": 0.5947286912424484, + "learning_rate": 1.4169715676519203e-07, + "loss": 0.2688, + "step": 20468 + }, + { + "epoch": 0.926408689748812, + "grad_norm": 0.5686910911062496, + "learning_rate": 1.4152396072624587e-07, + "loss": 0.2903, + "step": 20469 + }, + { + "epoch": 0.9264539488572076, + "grad_norm": 0.5883994277724498, + "learning_rate": 1.413508690810289e-07, + "loss": 0.3428, + "step": 20470 + }, + { + "epoch": 0.926499207965603, + "grad_norm": 0.6112876770382101, + "learning_rate": 1.4117788183325986e-07, + "loss": 0.2879, + "step": 20471 + }, + { + "epoch": 0.9265444670739986, + "grad_norm": 0.6009679063039658, + "learning_rate": 1.410049989866541e-07, + "loss": 0.2958, + "step": 20472 + }, + { + "epoch": 0.9265897261823942, + "grad_norm": 0.7066580354453394, + "learning_rate": 1.4083222054492862e-07, + "loss": 0.2611, + "step": 20473 + }, + { + "epoch": 0.9266349852907898, + "grad_norm": 0.6038274186738244, + "learning_rate": 1.4065954651179492e-07, + "loss": 0.3046, + "step": 20474 + }, + { + "epoch": 0.9266802443991853, + "grad_norm": 0.26245478634417746, + "learning_rate": 1.404869768909628e-07, + "loss": 0.461, + "step": 20475 + }, + { + "epoch": 0.9267255035075809, + "grad_norm": 0.5982783265922673, + "learning_rate": 1.4031451168614097e-07, + "loss": 0.2844, + "step": 20476 + }, + { + "epoch": 0.9267707626159765, + "grad_norm": 0.5835922919662517, + "learning_rate": 1.4014215090103424e-07, + "loss": 0.3082, + "step": 20477 + }, + { + "epoch": 0.926816021724372, + "grad_norm": 0.5992267862265387, + "learning_rate": 1.3996989453934795e-07, + "loss": 0.2762, + "step": 20478 + }, + { + "epoch": 0.9268612808327676, + "grad_norm": 0.2659060116019865, + "learning_rate": 1.397977426047814e-07, + "loss": 0.4519, + "step": 20479 + }, + { + "epoch": 0.9269065399411631, + "grad_norm": 0.2554853549334449, + "learning_rate": 1.396256951010344e-07, + "loss": 0.4599, + "step": 20480 + }, + { + "epoch": 0.9269517990495587, + "grad_norm": 0.6112969280647528, + "learning_rate": 1.39453752031804e-07, + "loss": 0.3366, + "step": 20481 + }, + { + "epoch": 0.9269970581579543, + "grad_norm": 0.6211876213503223, + "learning_rate": 1.3928191340078446e-07, + "loss": 0.3113, + "step": 20482 + }, + { + "epoch": 0.9270423172663499, + "grad_norm": 0.6725018512657114, + "learning_rate": 1.391101792116678e-07, + "loss": 0.3026, + "step": 20483 + }, + { + "epoch": 0.9270875763747454, + "grad_norm": 0.6567315780209119, + "learning_rate": 1.38938549468145e-07, + "loss": 0.2821, + "step": 20484 + }, + { + "epoch": 0.927132835483141, + "grad_norm": 0.6525421756495428, + "learning_rate": 1.3876702417390197e-07, + "loss": 0.3291, + "step": 20485 + }, + { + "epoch": 0.9271780945915366, + "grad_norm": 0.5911414570134386, + "learning_rate": 1.3859560333262578e-07, + "loss": 0.3101, + "step": 20486 + }, + { + "epoch": 0.9272233536999321, + "grad_norm": 0.5579965197267512, + "learning_rate": 1.384242869480007e-07, + "loss": 0.2805, + "step": 20487 + }, + { + "epoch": 0.9272686128083277, + "grad_norm": 0.665727549513722, + "learning_rate": 1.3825307502370487e-07, + "loss": 0.2891, + "step": 20488 + }, + { + "epoch": 0.9273138719167232, + "grad_norm": 0.5640027269792147, + "learning_rate": 1.3808196756341928e-07, + "loss": 0.2708, + "step": 20489 + }, + { + "epoch": 0.9273591310251188, + "grad_norm": 0.6294402244906123, + "learning_rate": 1.3791096457081987e-07, + "loss": 0.2878, + "step": 20490 + }, + { + "epoch": 0.9274043901335144, + "grad_norm": 0.8039347348919981, + "learning_rate": 1.3774006604958202e-07, + "loss": 0.3326, + "step": 20491 + }, + { + "epoch": 0.92744964924191, + "grad_norm": 0.2858853839858013, + "learning_rate": 1.3756927200337555e-07, + "loss": 0.4793, + "step": 20492 + }, + { + "epoch": 0.9274949083503055, + "grad_norm": 0.6492039128977286, + "learning_rate": 1.37398582435872e-07, + "loss": 0.2837, + "step": 20493 + }, + { + "epoch": 0.927540167458701, + "grad_norm": 0.6545488674984096, + "learning_rate": 1.3722799735073898e-07, + "loss": 0.3027, + "step": 20494 + }, + { + "epoch": 0.9275854265670966, + "grad_norm": 0.6080759408041968, + "learning_rate": 1.3705751675164137e-07, + "loss": 0.2678, + "step": 20495 + }, + { + "epoch": 0.9276306856754922, + "grad_norm": 0.6326392617289096, + "learning_rate": 1.3688714064224175e-07, + "loss": 0.2975, + "step": 20496 + }, + { + "epoch": 0.9276759447838877, + "grad_norm": 0.6756598727801661, + "learning_rate": 1.367168690262022e-07, + "loss": 0.3192, + "step": 20497 + }, + { + "epoch": 0.9277212038922833, + "grad_norm": 0.2631476713639336, + "learning_rate": 1.3654670190718035e-07, + "loss": 0.447, + "step": 20498 + }, + { + "epoch": 0.9277664630006789, + "grad_norm": 0.5927515198474593, + "learning_rate": 1.3637663928883328e-07, + "loss": 0.2915, + "step": 20499 + }, + { + "epoch": 0.9278117221090745, + "grad_norm": 0.6149204123557976, + "learning_rate": 1.3620668117481471e-07, + "loss": 0.2322, + "step": 20500 + }, + { + "epoch": 0.9278569812174701, + "grad_norm": 0.6259834762829701, + "learning_rate": 1.3603682756877624e-07, + "loss": 0.29, + "step": 20501 + }, + { + "epoch": 0.9279022403258655, + "grad_norm": 0.592115814894418, + "learning_rate": 1.3586707847436765e-07, + "loss": 0.2761, + "step": 20502 + }, + { + "epoch": 0.9279474994342611, + "grad_norm": 0.6097902401023106, + "learning_rate": 1.356974338952366e-07, + "loss": 0.3121, + "step": 20503 + }, + { + "epoch": 0.9279927585426567, + "grad_norm": 0.6350445882270588, + "learning_rate": 1.3552789383502906e-07, + "loss": 0.278, + "step": 20504 + }, + { + "epoch": 0.9280380176510523, + "grad_norm": 0.617967458484192, + "learning_rate": 1.3535845829738547e-07, + "loss": 0.3052, + "step": 20505 + }, + { + "epoch": 0.9280832767594478, + "grad_norm": 0.5859739429927824, + "learning_rate": 1.3518912728594902e-07, + "loss": 0.2497, + "step": 20506 + }, + { + "epoch": 0.9281285358678434, + "grad_norm": 0.5816997049619769, + "learning_rate": 1.350199008043568e-07, + "loss": 0.3071, + "step": 20507 + }, + { + "epoch": 0.928173794976239, + "grad_norm": 0.6387081232887973, + "learning_rate": 1.3485077885624587e-07, + "loss": 0.295, + "step": 20508 + }, + { + "epoch": 0.9282190540846346, + "grad_norm": 0.6058050849986356, + "learning_rate": 1.3468176144524837e-07, + "loss": 0.2468, + "step": 20509 + }, + { + "epoch": 0.92826431319303, + "grad_norm": 0.6973876198738655, + "learning_rate": 1.3451284857499803e-07, + "loss": 0.3326, + "step": 20510 + }, + { + "epoch": 0.9283095723014256, + "grad_norm": 0.5773441879086344, + "learning_rate": 1.3434404024912307e-07, + "loss": 0.2416, + "step": 20511 + }, + { + "epoch": 0.9283548314098212, + "grad_norm": 0.26222600729671075, + "learning_rate": 1.3417533647125114e-07, + "loss": 0.4776, + "step": 20512 + }, + { + "epoch": 0.9284000905182168, + "grad_norm": 0.5985889900254078, + "learning_rate": 1.3400673724500713e-07, + "loss": 0.313, + "step": 20513 + }, + { + "epoch": 0.9284453496266124, + "grad_norm": 1.1236766909494023, + "learning_rate": 1.3383824257401256e-07, + "loss": 0.2749, + "step": 20514 + }, + { + "epoch": 0.9284906087350079, + "grad_norm": 0.3598829426277801, + "learning_rate": 1.3366985246188958e-07, + "loss": 0.4529, + "step": 20515 + }, + { + "epoch": 0.9285358678434035, + "grad_norm": 0.6381871757977565, + "learning_rate": 1.335015669122558e-07, + "loss": 0.3305, + "step": 20516 + }, + { + "epoch": 0.9285811269517991, + "grad_norm": 0.27547840975691507, + "learning_rate": 1.3333338592872725e-07, + "loss": 0.476, + "step": 20517 + }, + { + "epoch": 0.9286263860601947, + "grad_norm": 0.654558987708579, + "learning_rate": 1.3316530951491712e-07, + "loss": 0.3096, + "step": 20518 + }, + { + "epoch": 0.9286716451685901, + "grad_norm": 0.6093967244105901, + "learning_rate": 1.3299733767443645e-07, + "loss": 0.2882, + "step": 20519 + }, + { + "epoch": 0.9287169042769857, + "grad_norm": 0.6398248693621602, + "learning_rate": 1.3282947041089678e-07, + "loss": 0.3011, + "step": 20520 + }, + { + "epoch": 0.9287621633853813, + "grad_norm": 0.6069225158778648, + "learning_rate": 1.3266170772790244e-07, + "loss": 0.2636, + "step": 20521 + }, + { + "epoch": 0.9288074224937769, + "grad_norm": 0.26498649232465793, + "learning_rate": 1.3249404962905832e-07, + "loss": 0.4825, + "step": 20522 + }, + { + "epoch": 0.9288526816021725, + "grad_norm": 0.4456432719083173, + "learning_rate": 1.3232649611796878e-07, + "loss": 0.4737, + "step": 20523 + }, + { + "epoch": 0.928897940710568, + "grad_norm": 0.6128672166428326, + "learning_rate": 1.3215904719823313e-07, + "loss": 0.2678, + "step": 20524 + }, + { + "epoch": 0.9289431998189636, + "grad_norm": 0.629973709969791, + "learning_rate": 1.3199170287344797e-07, + "loss": 0.3146, + "step": 20525 + }, + { + "epoch": 0.9289884589273592, + "grad_norm": 0.26166497076034695, + "learning_rate": 1.3182446314721154e-07, + "loss": 0.4286, + "step": 20526 + }, + { + "epoch": 0.9290337180357547, + "grad_norm": 0.7510941062492887, + "learning_rate": 1.316573280231148e-07, + "loss": 0.2927, + "step": 20527 + }, + { + "epoch": 0.9290789771441502, + "grad_norm": 0.28980127141565226, + "learning_rate": 1.3149029750475052e-07, + "loss": 0.4667, + "step": 20528 + }, + { + "epoch": 0.9291242362525458, + "grad_norm": 0.5980028104852588, + "learning_rate": 1.313233715957074e-07, + "loss": 0.3383, + "step": 20529 + }, + { + "epoch": 0.9291694953609414, + "grad_norm": 0.700142480044533, + "learning_rate": 1.3115655029957207e-07, + "loss": 0.3128, + "step": 20530 + }, + { + "epoch": 0.929214754469337, + "grad_norm": 0.6492349316199251, + "learning_rate": 1.3098983361992834e-07, + "loss": 0.31, + "step": 20531 + }, + { + "epoch": 0.9292600135777325, + "grad_norm": 0.6112670944838561, + "learning_rate": 1.3082322156035942e-07, + "loss": 0.3311, + "step": 20532 + }, + { + "epoch": 0.9293052726861281, + "grad_norm": 0.2691885698944432, + "learning_rate": 1.3065671412444526e-07, + "loss": 0.4467, + "step": 20533 + }, + { + "epoch": 0.9293505317945236, + "grad_norm": 0.640175163072835, + "learning_rate": 1.3049031131576294e-07, + "loss": 0.3091, + "step": 20534 + }, + { + "epoch": 0.9293957909029192, + "grad_norm": 0.5699163789000503, + "learning_rate": 1.30324013137888e-07, + "loss": 0.2457, + "step": 20535 + }, + { + "epoch": 0.9294410500113148, + "grad_norm": 0.6347799630181783, + "learning_rate": 1.3015781959439478e-07, + "loss": 0.2705, + "step": 20536 + }, + { + "epoch": 0.9294863091197103, + "grad_norm": 0.6121332607475962, + "learning_rate": 1.299917306888532e-07, + "loss": 0.2613, + "step": 20537 + }, + { + "epoch": 0.9295315682281059, + "grad_norm": 0.579396510362602, + "learning_rate": 1.2982574642483148e-07, + "loss": 0.302, + "step": 20538 + }, + { + "epoch": 0.9295768273365015, + "grad_norm": 0.6109700917502632, + "learning_rate": 1.2965986680589793e-07, + "loss": 0.2864, + "step": 20539 + }, + { + "epoch": 0.9296220864448971, + "grad_norm": 0.5384522792034464, + "learning_rate": 1.2949409183561467e-07, + "loss": 0.2482, + "step": 20540 + }, + { + "epoch": 0.9296673455532926, + "grad_norm": 0.27588932629748913, + "learning_rate": 1.2932842151754555e-07, + "loss": 0.4769, + "step": 20541 + }, + { + "epoch": 0.9297126046616881, + "grad_norm": 0.5868988348140859, + "learning_rate": 1.2916285585524936e-07, + "loss": 0.3171, + "step": 20542 + }, + { + "epoch": 0.9297578637700837, + "grad_norm": 0.5927639601052319, + "learning_rate": 1.2899739485228325e-07, + "loss": 0.2801, + "step": 20543 + }, + { + "epoch": 0.9298031228784793, + "grad_norm": 0.5875612986420774, + "learning_rate": 1.2883203851220326e-07, + "loss": 0.3065, + "step": 20544 + }, + { + "epoch": 0.9298483819868748, + "grad_norm": 0.5992372891431096, + "learning_rate": 1.286667868385627e-07, + "loss": 0.2955, + "step": 20545 + }, + { + "epoch": 0.9298936410952704, + "grad_norm": 0.5718712978749788, + "learning_rate": 1.285016398349115e-07, + "loss": 0.2895, + "step": 20546 + }, + { + "epoch": 0.929938900203666, + "grad_norm": 0.30508341455682847, + "learning_rate": 1.2833659750479787e-07, + "loss": 0.472, + "step": 20547 + }, + { + "epoch": 0.9299841593120616, + "grad_norm": 0.6402722698777407, + "learning_rate": 1.281716598517685e-07, + "loss": 0.2778, + "step": 20548 + }, + { + "epoch": 0.9300294184204572, + "grad_norm": 0.6912387838467804, + "learning_rate": 1.2800682687936826e-07, + "loss": 0.2441, + "step": 20549 + }, + { + "epoch": 0.9300746775288526, + "grad_norm": 0.5550573796308262, + "learning_rate": 1.2784209859113773e-07, + "loss": 0.2777, + "step": 20550 + }, + { + "epoch": 0.9301199366372482, + "grad_norm": 0.7114796618199457, + "learning_rate": 1.2767747499061677e-07, + "loss": 0.316, + "step": 20551 + }, + { + "epoch": 0.9301651957456438, + "grad_norm": 0.6176338168213179, + "learning_rate": 1.2751295608134262e-07, + "loss": 0.2525, + "step": 20552 + }, + { + "epoch": 0.9302104548540394, + "grad_norm": 0.257163146418204, + "learning_rate": 1.273485418668502e-07, + "loss": 0.4685, + "step": 20553 + }, + { + "epoch": 0.9302557139624349, + "grad_norm": 0.5724525940518886, + "learning_rate": 1.2718423235067278e-07, + "loss": 0.3013, + "step": 20554 + }, + { + "epoch": 0.9303009730708305, + "grad_norm": 0.6024815855082362, + "learning_rate": 1.2702002753634092e-07, + "loss": 0.3173, + "step": 20555 + }, + { + "epoch": 0.9303462321792261, + "grad_norm": 0.6725907048687568, + "learning_rate": 1.2685592742738173e-07, + "loss": 0.3283, + "step": 20556 + }, + { + "epoch": 0.9303914912876217, + "grad_norm": 0.2593537537342258, + "learning_rate": 1.266919320273219e-07, + "loss": 0.4496, + "step": 20557 + }, + { + "epoch": 0.9304367503960173, + "grad_norm": 0.6055413336996001, + "learning_rate": 1.2652804133968578e-07, + "loss": 0.3077, + "step": 20558 + }, + { + "epoch": 0.9304820095044127, + "grad_norm": 0.27130371236025036, + "learning_rate": 1.263642553679939e-07, + "loss": 0.4526, + "step": 20559 + }, + { + "epoch": 0.9305272686128083, + "grad_norm": 0.2867604133985603, + "learning_rate": 1.2620057411576568e-07, + "loss": 0.4429, + "step": 20560 + }, + { + "epoch": 0.9305725277212039, + "grad_norm": 0.7368223557648287, + "learning_rate": 1.2603699758651888e-07, + "loss": 0.3223, + "step": 20561 + }, + { + "epoch": 0.9306177868295995, + "grad_norm": 0.5805378186525145, + "learning_rate": 1.2587352578376787e-07, + "loss": 0.3017, + "step": 20562 + }, + { + "epoch": 0.930663045937995, + "grad_norm": 0.28028151826188114, + "learning_rate": 1.2571015871102433e-07, + "loss": 0.4467, + "step": 20563 + }, + { + "epoch": 0.9307083050463906, + "grad_norm": 0.28380587605428187, + "learning_rate": 1.2554689637179984e-07, + "loss": 0.4622, + "step": 20564 + }, + { + "epoch": 0.9307535641547862, + "grad_norm": 0.6322006217264248, + "learning_rate": 1.2538373876960162e-07, + "loss": 0.2572, + "step": 20565 + }, + { + "epoch": 0.9307988232631818, + "grad_norm": 0.28134304880944516, + "learning_rate": 1.2522068590793578e-07, + "loss": 0.4756, + "step": 20566 + }, + { + "epoch": 0.9308440823715772, + "grad_norm": 0.5847147524196179, + "learning_rate": 1.2505773779030562e-07, + "loss": 0.342, + "step": 20567 + }, + { + "epoch": 0.9308893414799728, + "grad_norm": 0.6099517197345848, + "learning_rate": 1.2489489442021275e-07, + "loss": 0.28, + "step": 20568 + }, + { + "epoch": 0.9309346005883684, + "grad_norm": 0.3126201811819335, + "learning_rate": 1.2473215580115493e-07, + "loss": 0.4718, + "step": 20569 + }, + { + "epoch": 0.930979859696764, + "grad_norm": 0.5750391423215298, + "learning_rate": 1.2456952193663052e-07, + "loss": 0.3037, + "step": 20570 + }, + { + "epoch": 0.9310251188051596, + "grad_norm": 0.613391861418245, + "learning_rate": 1.2440699283013335e-07, + "loss": 0.3163, + "step": 20571 + }, + { + "epoch": 0.9310703779135551, + "grad_norm": 0.5927581691696879, + "learning_rate": 1.2424456848515565e-07, + "loss": 0.27, + "step": 20572 + }, + { + "epoch": 0.9311156370219507, + "grad_norm": 0.6240474780698677, + "learning_rate": 1.2408224890518683e-07, + "loss": 0.2794, + "step": 20573 + }, + { + "epoch": 0.9311608961303462, + "grad_norm": 0.6195892179866072, + "learning_rate": 1.2392003409371578e-07, + "loss": 0.3358, + "step": 20574 + }, + { + "epoch": 0.9312061552387418, + "grad_norm": 0.6295648159672592, + "learning_rate": 1.2375792405422748e-07, + "loss": 0.2981, + "step": 20575 + }, + { + "epoch": 0.9312514143471373, + "grad_norm": 0.6303571979522588, + "learning_rate": 1.2359591879020528e-07, + "loss": 0.3139, + "step": 20576 + }, + { + "epoch": 0.9312966734555329, + "grad_norm": 0.2804170390755626, + "learning_rate": 1.2343401830512914e-07, + "loss": 0.4946, + "step": 20577 + }, + { + "epoch": 0.9313419325639285, + "grad_norm": 0.2491879000750708, + "learning_rate": 1.232722226024796e-07, + "loss": 0.4687, + "step": 20578 + }, + { + "epoch": 0.9313871916723241, + "grad_norm": 0.5779750601252036, + "learning_rate": 1.231105316857323e-07, + "loss": 0.2897, + "step": 20579 + }, + { + "epoch": 0.9314324507807196, + "grad_norm": 0.6781888687374773, + "learning_rate": 1.22948945558361e-07, + "loss": 0.3061, + "step": 20580 + }, + { + "epoch": 0.9314777098891152, + "grad_norm": 0.5907780247980898, + "learning_rate": 1.2278746422383858e-07, + "loss": 0.2441, + "step": 20581 + }, + { + "epoch": 0.9315229689975107, + "grad_norm": 0.5702829526712206, + "learning_rate": 1.226260876856339e-07, + "loss": 0.2912, + "step": 20582 + }, + { + "epoch": 0.9315682281059063, + "grad_norm": 0.23769228889810345, + "learning_rate": 1.2246481594721582e-07, + "loss": 0.4518, + "step": 20583 + }, + { + "epoch": 0.9316134872143019, + "grad_norm": 0.6490998239245681, + "learning_rate": 1.2230364901204773e-07, + "loss": 0.2644, + "step": 20584 + }, + { + "epoch": 0.9316587463226974, + "grad_norm": 0.5921873669323882, + "learning_rate": 1.2214258688359347e-07, + "loss": 0.2807, + "step": 20585 + }, + { + "epoch": 0.931704005431093, + "grad_norm": 0.6513329181124101, + "learning_rate": 1.2198162956531423e-07, + "loss": 0.2608, + "step": 20586 + }, + { + "epoch": 0.9317492645394886, + "grad_norm": 0.5945147095377089, + "learning_rate": 1.2182077706066776e-07, + "loss": 0.2831, + "step": 20587 + }, + { + "epoch": 0.9317945236478842, + "grad_norm": 0.27614195415263953, + "learning_rate": 1.2166002937311128e-07, + "loss": 0.4728, + "step": 20588 + }, + { + "epoch": 0.9318397827562797, + "grad_norm": 0.5984964831266933, + "learning_rate": 1.2149938650609704e-07, + "loss": 0.2974, + "step": 20589 + }, + { + "epoch": 0.9318850418646752, + "grad_norm": 0.5825886799572433, + "learning_rate": 1.2133884846307898e-07, + "loss": 0.2943, + "step": 20590 + }, + { + "epoch": 0.9319303009730708, + "grad_norm": 0.6139118481888114, + "learning_rate": 1.2117841524750485e-07, + "loss": 0.3212, + "step": 20591 + }, + { + "epoch": 0.9319755600814664, + "grad_norm": 0.6098701406625552, + "learning_rate": 1.210180868628219e-07, + "loss": 0.2863, + "step": 20592 + }, + { + "epoch": 0.9320208191898619, + "grad_norm": 0.6222401621125467, + "learning_rate": 1.2085786331247574e-07, + "loss": 0.2707, + "step": 20593 + }, + { + "epoch": 0.9320660782982575, + "grad_norm": 0.6321271350748866, + "learning_rate": 1.206977445999097e-07, + "loss": 0.2575, + "step": 20594 + }, + { + "epoch": 0.9321113374066531, + "grad_norm": 0.65663036194109, + "learning_rate": 1.2053773072856323e-07, + "loss": 0.3058, + "step": 20595 + }, + { + "epoch": 0.9321565965150487, + "grad_norm": 0.5785509596715999, + "learning_rate": 1.2037782170187472e-07, + "loss": 0.2685, + "step": 20596 + }, + { + "epoch": 0.9322018556234443, + "grad_norm": 0.6303907355875169, + "learning_rate": 1.2021801752328034e-07, + "loss": 0.289, + "step": 20597 + }, + { + "epoch": 0.9322471147318397, + "grad_norm": 0.5863599732194953, + "learning_rate": 1.2005831819621284e-07, + "loss": 0.2338, + "step": 20598 + }, + { + "epoch": 0.9322923738402353, + "grad_norm": 0.610079565117258, + "learning_rate": 1.198987237241056e-07, + "loss": 0.2871, + "step": 20599 + }, + { + "epoch": 0.9323376329486309, + "grad_norm": 0.6265622624776339, + "learning_rate": 1.1973923411038646e-07, + "loss": 0.3204, + "step": 20600 + }, + { + "epoch": 0.9323828920570265, + "grad_norm": 0.6561079202368796, + "learning_rate": 1.195798493584821e-07, + "loss": 0.2588, + "step": 20601 + }, + { + "epoch": 0.932428151165422, + "grad_norm": 0.25492769257299186, + "learning_rate": 1.1942056947181757e-07, + "loss": 0.4727, + "step": 20602 + }, + { + "epoch": 0.9324734102738176, + "grad_norm": 0.6279894540127755, + "learning_rate": 1.1926139445381624e-07, + "loss": 0.283, + "step": 20603 + }, + { + "epoch": 0.9325186693822132, + "grad_norm": 0.2566352653015581, + "learning_rate": 1.1910232430789703e-07, + "loss": 0.458, + "step": 20604 + }, + { + "epoch": 0.9325639284906088, + "grad_norm": 0.6515167565739174, + "learning_rate": 1.1894335903747834e-07, + "loss": 0.2847, + "step": 20605 + }, + { + "epoch": 0.9326091875990044, + "grad_norm": 0.6284090350465346, + "learning_rate": 1.1878449864597575e-07, + "loss": 0.3173, + "step": 20606 + }, + { + "epoch": 0.9326544467073998, + "grad_norm": 0.26375783626835564, + "learning_rate": 1.1862574313680264e-07, + "loss": 0.4519, + "step": 20607 + }, + { + "epoch": 0.9326997058157954, + "grad_norm": 0.6398174159409153, + "learning_rate": 1.1846709251337129e-07, + "loss": 0.2929, + "step": 20608 + }, + { + "epoch": 0.932744964924191, + "grad_norm": 0.2867187620695821, + "learning_rate": 1.1830854677908842e-07, + "loss": 0.4455, + "step": 20609 + }, + { + "epoch": 0.9327902240325866, + "grad_norm": 0.5787649635408306, + "learning_rate": 1.1815010593736298e-07, + "loss": 0.2664, + "step": 20610 + }, + { + "epoch": 0.9328354831409821, + "grad_norm": 0.585519096160613, + "learning_rate": 1.1799176999159722e-07, + "loss": 0.2548, + "step": 20611 + }, + { + "epoch": 0.9328807422493777, + "grad_norm": 0.26683750157452135, + "learning_rate": 1.1783353894519512e-07, + "loss": 0.4511, + "step": 20612 + }, + { + "epoch": 0.9329260013577733, + "grad_norm": 0.5801571515630817, + "learning_rate": 1.1767541280155614e-07, + "loss": 0.3128, + "step": 20613 + }, + { + "epoch": 0.9329712604661689, + "grad_norm": 0.3611265207641925, + "learning_rate": 1.1751739156407649e-07, + "loss": 0.4847, + "step": 20614 + }, + { + "epoch": 0.9330165195745643, + "grad_norm": 0.6068286561560011, + "learning_rate": 1.1735947523615344e-07, + "loss": 0.2688, + "step": 20615 + }, + { + "epoch": 0.9330617786829599, + "grad_norm": 0.5836598884521632, + "learning_rate": 1.1720166382117925e-07, + "loss": 0.2683, + "step": 20616 + }, + { + "epoch": 0.9331070377913555, + "grad_norm": 0.5905638232357641, + "learning_rate": 1.1704395732254515e-07, + "loss": 0.281, + "step": 20617 + }, + { + "epoch": 0.9331522968997511, + "grad_norm": 0.5952007858494884, + "learning_rate": 1.1688635574363894e-07, + "loss": 0.2656, + "step": 20618 + }, + { + "epoch": 0.9331975560081467, + "grad_norm": 0.6158201663415633, + "learning_rate": 1.1672885908784792e-07, + "loss": 0.286, + "step": 20619 + }, + { + "epoch": 0.9332428151165422, + "grad_norm": 0.561801502229038, + "learning_rate": 1.1657146735855662e-07, + "loss": 0.2793, + "step": 20620 + }, + { + "epoch": 0.9332880742249378, + "grad_norm": 0.6148801897922445, + "learning_rate": 1.1641418055914566e-07, + "loss": 0.3061, + "step": 20621 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 0.5864090389864688, + "learning_rate": 1.1625699869299457e-07, + "loss": 0.2931, + "step": 20622 + }, + { + "epoch": 0.9333785924417289, + "grad_norm": 0.6281392670998087, + "learning_rate": 1.1609992176348228e-07, + "loss": 0.2819, + "step": 20623 + }, + { + "epoch": 0.9334238515501244, + "grad_norm": 0.5604583332916402, + "learning_rate": 1.1594294977398224e-07, + "loss": 0.3034, + "step": 20624 + }, + { + "epoch": 0.93346911065852, + "grad_norm": 0.6333945542597627, + "learning_rate": 1.1578608272786785e-07, + "loss": 0.2666, + "step": 20625 + }, + { + "epoch": 0.9335143697669156, + "grad_norm": 0.6070115405261841, + "learning_rate": 1.1562932062851084e-07, + "loss": 0.3127, + "step": 20626 + }, + { + "epoch": 0.9335596288753112, + "grad_norm": 0.6164082922394221, + "learning_rate": 1.1547266347927743e-07, + "loss": 0.3199, + "step": 20627 + }, + { + "epoch": 0.9336048879837067, + "grad_norm": 0.5987945549595481, + "learning_rate": 1.1531611128353548e-07, + "loss": 0.3147, + "step": 20628 + }, + { + "epoch": 0.9336501470921023, + "grad_norm": 0.6618821649290977, + "learning_rate": 1.1515966404464728e-07, + "loss": 0.2713, + "step": 20629 + }, + { + "epoch": 0.9336954062004978, + "grad_norm": 0.5678115155151787, + "learning_rate": 1.1500332176597629e-07, + "loss": 0.2605, + "step": 20630 + }, + { + "epoch": 0.9337406653088934, + "grad_norm": 0.6471740693252899, + "learning_rate": 1.1484708445087978e-07, + "loss": 0.3178, + "step": 20631 + }, + { + "epoch": 0.933785924417289, + "grad_norm": 0.5827959737037237, + "learning_rate": 1.1469095210271675e-07, + "loss": 0.3047, + "step": 20632 + }, + { + "epoch": 0.9338311835256845, + "grad_norm": 0.6409107104206825, + "learning_rate": 1.1453492472484118e-07, + "loss": 0.2448, + "step": 20633 + }, + { + "epoch": 0.9338764426340801, + "grad_norm": 0.5763797274333525, + "learning_rate": 1.1437900232060483e-07, + "loss": 0.2844, + "step": 20634 + }, + { + "epoch": 0.9339217017424757, + "grad_norm": 0.6591185442707223, + "learning_rate": 1.1422318489335838e-07, + "loss": 0.3084, + "step": 20635 + }, + { + "epoch": 0.9339669608508713, + "grad_norm": 0.6101336407060521, + "learning_rate": 1.1406747244645078e-07, + "loss": 0.2699, + "step": 20636 + }, + { + "epoch": 0.9340122199592668, + "grad_norm": 0.6088189525568914, + "learning_rate": 1.1391186498322771e-07, + "loss": 0.2701, + "step": 20637 + }, + { + "epoch": 0.9340574790676623, + "grad_norm": 0.647299207439683, + "learning_rate": 1.1375636250703092e-07, + "loss": 0.2833, + "step": 20638 + }, + { + "epoch": 0.9341027381760579, + "grad_norm": 0.6330465226385166, + "learning_rate": 1.1360096502120387e-07, + "loss": 0.2807, + "step": 20639 + }, + { + "epoch": 0.9341479972844535, + "grad_norm": 0.7381215076472419, + "learning_rate": 1.1344567252908445e-07, + "loss": 0.3136, + "step": 20640 + }, + { + "epoch": 0.9341932563928491, + "grad_norm": 0.5984678510006978, + "learning_rate": 1.1329048503400996e-07, + "loss": 0.2919, + "step": 20641 + }, + { + "epoch": 0.9342385155012446, + "grad_norm": 0.2648725606387613, + "learning_rate": 1.1313540253931387e-07, + "loss": 0.452, + "step": 20642 + }, + { + "epoch": 0.9342837746096402, + "grad_norm": 0.6362303922975918, + "learning_rate": 1.1298042504832963e-07, + "loss": 0.3172, + "step": 20643 + }, + { + "epoch": 0.9343290337180358, + "grad_norm": 0.5489044672031816, + "learning_rate": 1.1282555256438622e-07, + "loss": 0.2738, + "step": 20644 + }, + { + "epoch": 0.9343742928264314, + "grad_norm": 0.6001819554223173, + "learning_rate": 1.1267078509081209e-07, + "loss": 0.2534, + "step": 20645 + }, + { + "epoch": 0.9344195519348268, + "grad_norm": 0.5818315990671367, + "learning_rate": 1.1251612263093292e-07, + "loss": 0.2834, + "step": 20646 + }, + { + "epoch": 0.9344648110432224, + "grad_norm": 0.6244052785314712, + "learning_rate": 1.1236156518807106e-07, + "loss": 0.3121, + "step": 20647 + }, + { + "epoch": 0.934510070151618, + "grad_norm": 0.2778742047993764, + "learning_rate": 1.1220711276554775e-07, + "loss": 0.4486, + "step": 20648 + }, + { + "epoch": 0.9345553292600136, + "grad_norm": 0.6124342133392783, + "learning_rate": 1.1205276536668252e-07, + "loss": 0.2993, + "step": 20649 + }, + { + "epoch": 0.9346005883684091, + "grad_norm": 0.5638354052595087, + "learning_rate": 1.118985229947911e-07, + "loss": 0.2454, + "step": 20650 + }, + { + "epoch": 0.9346458474768047, + "grad_norm": 0.6008229714251444, + "learning_rate": 1.1174438565318691e-07, + "loss": 0.2681, + "step": 20651 + }, + { + "epoch": 0.9346911065852003, + "grad_norm": 0.6160137116835261, + "learning_rate": 1.1159035334518343e-07, + "loss": 0.2764, + "step": 20652 + }, + { + "epoch": 0.9347363656935959, + "grad_norm": 0.6336472871929654, + "learning_rate": 1.1143642607409023e-07, + "loss": 0.2865, + "step": 20653 + }, + { + "epoch": 0.9347816248019915, + "grad_norm": 0.6108194372529918, + "learning_rate": 1.11282603843213e-07, + "loss": 0.2658, + "step": 20654 + }, + { + "epoch": 0.9348268839103869, + "grad_norm": 0.2761153769668537, + "learning_rate": 1.1112888665585852e-07, + "loss": 0.4549, + "step": 20655 + }, + { + "epoch": 0.9348721430187825, + "grad_norm": 0.6343527944142431, + "learning_rate": 1.109752745153292e-07, + "loss": 0.3186, + "step": 20656 + }, + { + "epoch": 0.9349174021271781, + "grad_norm": 0.5965075668169835, + "learning_rate": 1.1082176742492623e-07, + "loss": 0.2781, + "step": 20657 + }, + { + "epoch": 0.9349626612355737, + "grad_norm": 0.6259405292833713, + "learning_rate": 1.1066836538794645e-07, + "loss": 0.2521, + "step": 20658 + }, + { + "epoch": 0.9350079203439692, + "grad_norm": 0.6056507338546241, + "learning_rate": 1.1051506840768833e-07, + "loss": 0.3014, + "step": 20659 + }, + { + "epoch": 0.9350531794523648, + "grad_norm": 0.5962640258792247, + "learning_rate": 1.1036187648744311e-07, + "loss": 0.3197, + "step": 20660 + }, + { + "epoch": 0.9350984385607604, + "grad_norm": 0.28611351228558085, + "learning_rate": 1.1020878963050485e-07, + "loss": 0.4817, + "step": 20661 + }, + { + "epoch": 0.935143697669156, + "grad_norm": 0.8179938486173777, + "learning_rate": 1.10055807840162e-07, + "loss": 0.3086, + "step": 20662 + }, + { + "epoch": 0.9351889567775514, + "grad_norm": 0.6348754369470706, + "learning_rate": 1.0990293111970085e-07, + "loss": 0.2467, + "step": 20663 + }, + { + "epoch": 0.935234215885947, + "grad_norm": 0.6353518229408754, + "learning_rate": 1.0975015947240652e-07, + "loss": 0.2806, + "step": 20664 + }, + { + "epoch": 0.9352794749943426, + "grad_norm": 0.5646218500174285, + "learning_rate": 1.0959749290156307e-07, + "loss": 0.2838, + "step": 20665 + }, + { + "epoch": 0.9353247341027382, + "grad_norm": 0.6067841157078296, + "learning_rate": 1.0944493141044953e-07, + "loss": 0.3028, + "step": 20666 + }, + { + "epoch": 0.9353699932111338, + "grad_norm": 0.6727425150929137, + "learning_rate": 1.0929247500234386e-07, + "loss": 0.3187, + "step": 20667 + }, + { + "epoch": 0.9354152523195293, + "grad_norm": 0.5660911183506132, + "learning_rate": 1.0914012368052229e-07, + "loss": 0.2772, + "step": 20668 + }, + { + "epoch": 0.9354605114279249, + "grad_norm": 0.26176590366847236, + "learning_rate": 1.0898787744825833e-07, + "loss": 0.4611, + "step": 20669 + }, + { + "epoch": 0.9355057705363204, + "grad_norm": 0.6051240123558446, + "learning_rate": 1.0883573630882327e-07, + "loss": 0.3043, + "step": 20670 + }, + { + "epoch": 0.935551029644716, + "grad_norm": 0.28517697737707726, + "learning_rate": 1.086837002654867e-07, + "loss": 0.4457, + "step": 20671 + }, + { + "epoch": 0.9355962887531115, + "grad_norm": 0.6380053084611067, + "learning_rate": 1.0853176932151432e-07, + "loss": 0.283, + "step": 20672 + }, + { + "epoch": 0.9356415478615071, + "grad_norm": 0.6196496503170629, + "learning_rate": 1.0837994348017133e-07, + "loss": 0.2406, + "step": 20673 + }, + { + "epoch": 0.9356868069699027, + "grad_norm": 0.5589782837257363, + "learning_rate": 1.0822822274472011e-07, + "loss": 0.32, + "step": 20674 + }, + { + "epoch": 0.9357320660782983, + "grad_norm": 0.6107204778054238, + "learning_rate": 1.0807660711842027e-07, + "loss": 0.326, + "step": 20675 + }, + { + "epoch": 0.9357773251866939, + "grad_norm": 0.5624460182654537, + "learning_rate": 1.0792509660452921e-07, + "loss": 0.2679, + "step": 20676 + }, + { + "epoch": 0.9358225842950894, + "grad_norm": 0.6935165199403789, + "learning_rate": 1.0777369120630377e-07, + "loss": 0.2483, + "step": 20677 + }, + { + "epoch": 0.935867843403485, + "grad_norm": 0.6135992877109803, + "learning_rate": 1.0762239092699633e-07, + "loss": 0.2961, + "step": 20678 + }, + { + "epoch": 0.9359131025118805, + "grad_norm": 0.5876182716494162, + "learning_rate": 1.0747119576985765e-07, + "loss": 0.3076, + "step": 20679 + }, + { + "epoch": 0.9359583616202761, + "grad_norm": 0.6489565993731783, + "learning_rate": 1.0732010573813623e-07, + "loss": 0.2848, + "step": 20680 + }, + { + "epoch": 0.9360036207286716, + "grad_norm": 0.6246061102068522, + "learning_rate": 1.0716912083508003e-07, + "loss": 0.3443, + "step": 20681 + }, + { + "epoch": 0.9360488798370672, + "grad_norm": 0.2884324669723961, + "learning_rate": 1.07018241063932e-07, + "loss": 0.4986, + "step": 20682 + }, + { + "epoch": 0.9360941389454628, + "grad_norm": 0.29179013837324413, + "learning_rate": 1.06867466427934e-07, + "loss": 0.4963, + "step": 20683 + }, + { + "epoch": 0.9361393980538584, + "grad_norm": 0.6290104692273423, + "learning_rate": 1.0671679693032621e-07, + "loss": 0.3042, + "step": 20684 + }, + { + "epoch": 0.9361846571622539, + "grad_norm": 0.5833963754232364, + "learning_rate": 1.0656623257434551e-07, + "loss": 0.3001, + "step": 20685 + }, + { + "epoch": 0.9362299162706494, + "grad_norm": 0.2653414681907125, + "learning_rate": 1.0641577336322761e-07, + "loss": 0.4641, + "step": 20686 + }, + { + "epoch": 0.936275175379045, + "grad_norm": 0.271146481973984, + "learning_rate": 1.0626541930020551e-07, + "loss": 0.4616, + "step": 20687 + }, + { + "epoch": 0.9363204344874406, + "grad_norm": 0.6061218869882479, + "learning_rate": 1.0611517038850938e-07, + "loss": 0.2942, + "step": 20688 + }, + { + "epoch": 0.9363656935958362, + "grad_norm": 0.6484075906920178, + "learning_rate": 1.0596502663136776e-07, + "loss": 0.305, + "step": 20689 + }, + { + "epoch": 0.9364109527042317, + "grad_norm": 0.6201439884226438, + "learning_rate": 1.0581498803200696e-07, + "loss": 0.2868, + "step": 20690 + }, + { + "epoch": 0.9364562118126273, + "grad_norm": 0.26360757034570326, + "learning_rate": 1.0566505459365106e-07, + "loss": 0.4628, + "step": 20691 + }, + { + "epoch": 0.9365014709210229, + "grad_norm": 0.2568003036889309, + "learning_rate": 1.0551522631952083e-07, + "loss": 0.4424, + "step": 20692 + }, + { + "epoch": 0.9365467300294185, + "grad_norm": 0.670806761610968, + "learning_rate": 1.0536550321283589e-07, + "loss": 0.2877, + "step": 20693 + }, + { + "epoch": 0.9365919891378139, + "grad_norm": 0.26251251174654255, + "learning_rate": 1.0521588527681426e-07, + "loss": 0.461, + "step": 20694 + }, + { + "epoch": 0.9366372482462095, + "grad_norm": 0.62146968516843, + "learning_rate": 1.0506637251467e-07, + "loss": 0.3156, + "step": 20695 + }, + { + "epoch": 0.9366825073546051, + "grad_norm": 0.6042091551748556, + "learning_rate": 1.0491696492961501e-07, + "loss": 0.2895, + "step": 20696 + }, + { + "epoch": 0.9367277664630007, + "grad_norm": 0.6251271004901205, + "learning_rate": 1.0476766252486114e-07, + "loss": 0.3159, + "step": 20697 + }, + { + "epoch": 0.9367730255713962, + "grad_norm": 0.7420180169744252, + "learning_rate": 1.046184653036153e-07, + "loss": 0.2795, + "step": 20698 + }, + { + "epoch": 0.9368182846797918, + "grad_norm": 0.5907238212284458, + "learning_rate": 1.044693732690838e-07, + "loss": 0.2615, + "step": 20699 + }, + { + "epoch": 0.9368635437881874, + "grad_norm": 0.5952101244183566, + "learning_rate": 1.0432038642446962e-07, + "loss": 0.3017, + "step": 20700 + }, + { + "epoch": 0.936908802896583, + "grad_norm": 0.6378778314486471, + "learning_rate": 1.0417150477297466e-07, + "loss": 0.2927, + "step": 20701 + }, + { + "epoch": 0.9369540620049785, + "grad_norm": 0.6421025040459815, + "learning_rate": 1.0402272831779747e-07, + "loss": 0.2539, + "step": 20702 + }, + { + "epoch": 0.936999321113374, + "grad_norm": 0.6106293661526145, + "learning_rate": 1.038740570621355e-07, + "loss": 0.2801, + "step": 20703 + }, + { + "epoch": 0.9370445802217696, + "grad_norm": 0.5933251946238154, + "learning_rate": 1.0372549100918283e-07, + "loss": 0.2888, + "step": 20704 + }, + { + "epoch": 0.9370898393301652, + "grad_norm": 0.6368085204757286, + "learning_rate": 1.0357703016213083e-07, + "loss": 0.3189, + "step": 20705 + }, + { + "epoch": 0.9371350984385608, + "grad_norm": 0.8117745628825518, + "learning_rate": 1.0342867452417027e-07, + "loss": 0.2458, + "step": 20706 + }, + { + "epoch": 0.9371803575469563, + "grad_norm": 0.593644986221223, + "learning_rate": 1.0328042409849026e-07, + "loss": 0.3155, + "step": 20707 + }, + { + "epoch": 0.9372256166553519, + "grad_norm": 0.6291442440715559, + "learning_rate": 1.0313227888827326e-07, + "loss": 0.314, + "step": 20708 + }, + { + "epoch": 0.9372708757637475, + "grad_norm": 0.5833471343156812, + "learning_rate": 1.0298423889670395e-07, + "loss": 0.3022, + "step": 20709 + }, + { + "epoch": 0.937316134872143, + "grad_norm": 0.6802737846945168, + "learning_rate": 1.0283630412696422e-07, + "loss": 0.3423, + "step": 20710 + }, + { + "epoch": 0.9373613939805386, + "grad_norm": 0.6108634600364936, + "learning_rate": 1.0268847458223152e-07, + "loss": 0.2871, + "step": 20711 + }, + { + "epoch": 0.9374066530889341, + "grad_norm": 0.6103729859109673, + "learning_rate": 1.0254075026568222e-07, + "loss": 0.2992, + "step": 20712 + }, + { + "epoch": 0.9374519121973297, + "grad_norm": 0.68933196891107, + "learning_rate": 1.0239313118049155e-07, + "loss": 0.3295, + "step": 20713 + }, + { + "epoch": 0.9374971713057253, + "grad_norm": 0.6171769175551642, + "learning_rate": 1.0224561732982973e-07, + "loss": 0.3062, + "step": 20714 + }, + { + "epoch": 0.9375424304141209, + "grad_norm": 0.6477669112440486, + "learning_rate": 1.0209820871686816e-07, + "loss": 0.2867, + "step": 20715 + }, + { + "epoch": 0.9375876895225164, + "grad_norm": 0.5893855899352083, + "learning_rate": 1.0195090534477258e-07, + "loss": 0.2634, + "step": 20716 + }, + { + "epoch": 0.937632948630912, + "grad_norm": 0.5858031388266357, + "learning_rate": 1.0180370721670941e-07, + "loss": 0.29, + "step": 20717 + }, + { + "epoch": 0.9376782077393075, + "grad_norm": 0.6179295544108785, + "learning_rate": 1.0165661433583996e-07, + "loss": 0.2734, + "step": 20718 + }, + { + "epoch": 0.9377234668477031, + "grad_norm": 0.588071945852166, + "learning_rate": 1.0150962670532671e-07, + "loss": 0.2639, + "step": 20719 + }, + { + "epoch": 0.9377687259560986, + "grad_norm": 0.654440332520569, + "learning_rate": 1.0136274432832715e-07, + "loss": 0.3177, + "step": 20720 + }, + { + "epoch": 0.9378139850644942, + "grad_norm": 0.6446000731328926, + "learning_rate": 1.0121596720799653e-07, + "loss": 0.3317, + "step": 20721 + }, + { + "epoch": 0.9378592441728898, + "grad_norm": 0.6274830605070559, + "learning_rate": 1.01069295347489e-07, + "loss": 0.2716, + "step": 20722 + }, + { + "epoch": 0.9379045032812854, + "grad_norm": 0.571998916266495, + "learning_rate": 1.00922728749957e-07, + "loss": 0.2628, + "step": 20723 + }, + { + "epoch": 0.937949762389681, + "grad_norm": 0.2695101803328294, + "learning_rate": 1.0077626741854973e-07, + "loss": 0.4834, + "step": 20724 + }, + { + "epoch": 0.9379950214980765, + "grad_norm": 0.605191147919929, + "learning_rate": 1.0062991135641242e-07, + "loss": 0.2976, + "step": 20725 + }, + { + "epoch": 0.938040280606472, + "grad_norm": 0.6077927199877912, + "learning_rate": 1.0048366056669201e-07, + "loss": 0.253, + "step": 20726 + }, + { + "epoch": 0.9380855397148676, + "grad_norm": 0.2903158097699309, + "learning_rate": 1.0033751505252987e-07, + "loss": 0.4636, + "step": 20727 + }, + { + "epoch": 0.9381307988232632, + "grad_norm": 0.5420678267629183, + "learning_rate": 1.0019147481706626e-07, + "loss": 0.2904, + "step": 20728 + }, + { + "epoch": 0.9381760579316587, + "grad_norm": 0.566633524220132, + "learning_rate": 1.0004553986343868e-07, + "loss": 0.3297, + "step": 20729 + }, + { + "epoch": 0.9382213170400543, + "grad_norm": 0.6372132574695685, + "learning_rate": 9.989971019478406e-08, + "loss": 0.2887, + "step": 20730 + }, + { + "epoch": 0.9382665761484499, + "grad_norm": 0.5991609239206822, + "learning_rate": 9.97539858142349e-08, + "loss": 0.2905, + "step": 20731 + }, + { + "epoch": 0.9383118352568455, + "grad_norm": 0.6303969117656907, + "learning_rate": 9.960836672492313e-08, + "loss": 0.294, + "step": 20732 + }, + { + "epoch": 0.938357094365241, + "grad_norm": 0.6249848427120213, + "learning_rate": 9.946285292997681e-08, + "loss": 0.2889, + "step": 20733 + }, + { + "epoch": 0.9384023534736365, + "grad_norm": 0.2906943402790951, + "learning_rate": 9.931744443252234e-08, + "loss": 0.4659, + "step": 20734 + }, + { + "epoch": 0.9384476125820321, + "grad_norm": 0.601588500629499, + "learning_rate": 9.917214123568498e-08, + "loss": 0.2762, + "step": 20735 + }, + { + "epoch": 0.9384928716904277, + "grad_norm": 0.5907351514132653, + "learning_rate": 9.902694334258722e-08, + "loss": 0.2967, + "step": 20736 + }, + { + "epoch": 0.9385381307988233, + "grad_norm": 0.6572000147313092, + "learning_rate": 9.88818507563477e-08, + "loss": 0.2877, + "step": 20737 + }, + { + "epoch": 0.9385833899072188, + "grad_norm": 0.5992227687883745, + "learning_rate": 9.873686348008448e-08, + "loss": 0.29, + "step": 20738 + }, + { + "epoch": 0.9386286490156144, + "grad_norm": 0.578189315166507, + "learning_rate": 9.859198151691341e-08, + "loss": 0.2909, + "step": 20739 + }, + { + "epoch": 0.93867390812401, + "grad_norm": 0.6107110327904092, + "learning_rate": 9.844720486994752e-08, + "loss": 0.2614, + "step": 20740 + }, + { + "epoch": 0.9387191672324056, + "grad_norm": 0.6555056399941228, + "learning_rate": 9.830253354229601e-08, + "loss": 0.2667, + "step": 20741 + }, + { + "epoch": 0.938764426340801, + "grad_norm": 0.6031419210418268, + "learning_rate": 9.815796753706975e-08, + "loss": 0.2976, + "step": 20742 + }, + { + "epoch": 0.9388096854491966, + "grad_norm": 0.6368408296601574, + "learning_rate": 9.801350685737288e-08, + "loss": 0.2762, + "step": 20743 + }, + { + "epoch": 0.9388549445575922, + "grad_norm": 0.2690041210892171, + "learning_rate": 9.786915150631126e-08, + "loss": 0.4666, + "step": 20744 + }, + { + "epoch": 0.9389002036659878, + "grad_norm": 0.2698433370702812, + "learning_rate": 9.772490148698522e-08, + "loss": 0.4607, + "step": 20745 + }, + { + "epoch": 0.9389454627743834, + "grad_norm": 0.690187986117633, + "learning_rate": 9.758075680249556e-08, + "loss": 0.3069, + "step": 20746 + }, + { + "epoch": 0.9389907218827789, + "grad_norm": 0.5945569657567784, + "learning_rate": 9.743671745593819e-08, + "loss": 0.2957, + "step": 20747 + }, + { + "epoch": 0.9390359809911745, + "grad_norm": 0.25335591426758786, + "learning_rate": 9.729278345040894e-08, + "loss": 0.4828, + "step": 20748 + }, + { + "epoch": 0.9390812400995701, + "grad_norm": 0.27672363140824363, + "learning_rate": 9.714895478900088e-08, + "loss": 0.4656, + "step": 20749 + }, + { + "epoch": 0.9391264992079656, + "grad_norm": 0.6777892512541681, + "learning_rate": 9.700523147480267e-08, + "loss": 0.3492, + "step": 20750 + }, + { + "epoch": 0.9391717583163611, + "grad_norm": 0.6734370034072383, + "learning_rate": 9.686161351090407e-08, + "loss": 0.2834, + "step": 20751 + }, + { + "epoch": 0.9392170174247567, + "grad_norm": 0.6072316263959454, + "learning_rate": 9.671810090039091e-08, + "loss": 0.2997, + "step": 20752 + }, + { + "epoch": 0.9392622765331523, + "grad_norm": 0.7106171343448984, + "learning_rate": 9.65746936463463e-08, + "loss": 0.2921, + "step": 20753 + }, + { + "epoch": 0.9393075356415479, + "grad_norm": 0.6342138174196997, + "learning_rate": 9.643139175185168e-08, + "loss": 0.3073, + "step": 20754 + }, + { + "epoch": 0.9393527947499434, + "grad_norm": 0.6157424495113403, + "learning_rate": 9.628819521998622e-08, + "loss": 0.3091, + "step": 20755 + }, + { + "epoch": 0.939398053858339, + "grad_norm": 0.6602136680439613, + "learning_rate": 9.614510405382693e-08, + "loss": 0.2859, + "step": 20756 + }, + { + "epoch": 0.9394433129667346, + "grad_norm": 0.5572487286447867, + "learning_rate": 9.600211825644856e-08, + "loss": 0.2603, + "step": 20757 + }, + { + "epoch": 0.9394885720751301, + "grad_norm": 0.261804490907496, + "learning_rate": 9.585923783092255e-08, + "loss": 0.4781, + "step": 20758 + }, + { + "epoch": 0.9395338311835257, + "grad_norm": 0.2599046113400412, + "learning_rate": 9.571646278032032e-08, + "loss": 0.4741, + "step": 20759 + }, + { + "epoch": 0.9395790902919212, + "grad_norm": 0.6160630272152418, + "learning_rate": 9.557379310770831e-08, + "loss": 0.3133, + "step": 20760 + }, + { + "epoch": 0.9396243494003168, + "grad_norm": 0.26545070340134724, + "learning_rate": 9.543122881615297e-08, + "loss": 0.4767, + "step": 20761 + }, + { + "epoch": 0.9396696085087124, + "grad_norm": 0.27299182989107457, + "learning_rate": 9.528876990871793e-08, + "loss": 0.4777, + "step": 20762 + }, + { + "epoch": 0.939714867617108, + "grad_norm": 0.6274370211652937, + "learning_rate": 9.514641638846245e-08, + "loss": 0.2666, + "step": 20763 + }, + { + "epoch": 0.9397601267255035, + "grad_norm": 0.26111906070225804, + "learning_rate": 9.500416825844682e-08, + "loss": 0.4652, + "step": 20764 + }, + { + "epoch": 0.939805385833899, + "grad_norm": 0.6329097033253354, + "learning_rate": 9.486202552172697e-08, + "loss": 0.288, + "step": 20765 + }, + { + "epoch": 0.9398506449422946, + "grad_norm": 0.25605798885576536, + "learning_rate": 9.471998818135764e-08, + "loss": 0.451, + "step": 20766 + }, + { + "epoch": 0.9398959040506902, + "grad_norm": 0.5778371870857809, + "learning_rate": 9.457805624038974e-08, + "loss": 0.2861, + "step": 20767 + }, + { + "epoch": 0.9399411631590857, + "grad_norm": 0.6041845447583947, + "learning_rate": 9.443622970187415e-08, + "loss": 0.2864, + "step": 20768 + }, + { + "epoch": 0.9399864222674813, + "grad_norm": 0.6005423832736728, + "learning_rate": 9.429450856885736e-08, + "loss": 0.2925, + "step": 20769 + }, + { + "epoch": 0.9400316813758769, + "grad_norm": 0.6072185585444965, + "learning_rate": 9.415289284438523e-08, + "loss": 0.2868, + "step": 20770 + }, + { + "epoch": 0.9400769404842725, + "grad_norm": 0.5844322503894608, + "learning_rate": 9.401138253149977e-08, + "loss": 0.2903, + "step": 20771 + }, + { + "epoch": 0.9401221995926681, + "grad_norm": 0.5930622865313223, + "learning_rate": 9.386997763324246e-08, + "loss": 0.3265, + "step": 20772 + }, + { + "epoch": 0.9401674587010636, + "grad_norm": 0.5847618141175167, + "learning_rate": 9.372867815265085e-08, + "loss": 0.2775, + "step": 20773 + }, + { + "epoch": 0.9402127178094591, + "grad_norm": 0.2686276723017246, + "learning_rate": 9.358748409276196e-08, + "loss": 0.4778, + "step": 20774 + }, + { + "epoch": 0.9402579769178547, + "grad_norm": 0.2631650236530227, + "learning_rate": 9.34463954566095e-08, + "loss": 0.4637, + "step": 20775 + }, + { + "epoch": 0.9403032360262503, + "grad_norm": 0.2683016572122838, + "learning_rate": 9.330541224722378e-08, + "loss": 0.4509, + "step": 20776 + }, + { + "epoch": 0.9403484951346458, + "grad_norm": 0.7865247979900946, + "learning_rate": 9.316453446763518e-08, + "loss": 0.2846, + "step": 20777 + }, + { + "epoch": 0.9403937542430414, + "grad_norm": 0.7200645826034856, + "learning_rate": 9.302376212087128e-08, + "loss": 0.2904, + "step": 20778 + }, + { + "epoch": 0.940439013351437, + "grad_norm": 0.24627763004895964, + "learning_rate": 9.28830952099552e-08, + "loss": 0.4622, + "step": 20779 + }, + { + "epoch": 0.9404842724598326, + "grad_norm": 0.8043361758815954, + "learning_rate": 9.274253373791064e-08, + "loss": 0.289, + "step": 20780 + }, + { + "epoch": 0.9405295315682282, + "grad_norm": 0.6579035360584158, + "learning_rate": 9.260207770775742e-08, + "loss": 0.2704, + "step": 20781 + }, + { + "epoch": 0.9405747906766236, + "grad_norm": 0.6009245769666856, + "learning_rate": 9.246172712251422e-08, + "loss": 0.2917, + "step": 20782 + }, + { + "epoch": 0.9406200497850192, + "grad_norm": 0.6449447415026867, + "learning_rate": 9.23214819851953e-08, + "loss": 0.2859, + "step": 20783 + }, + { + "epoch": 0.9406653088934148, + "grad_norm": 0.7220785813494247, + "learning_rate": 9.218134229881548e-08, + "loss": 0.2819, + "step": 20784 + }, + { + "epoch": 0.9407105680018104, + "grad_norm": 0.6103242402975791, + "learning_rate": 9.204130806638511e-08, + "loss": 0.2776, + "step": 20785 + }, + { + "epoch": 0.9407558271102059, + "grad_norm": 0.6693346951778233, + "learning_rate": 9.190137929091403e-08, + "loss": 0.3184, + "step": 20786 + }, + { + "epoch": 0.9408010862186015, + "grad_norm": 0.2612038012801092, + "learning_rate": 9.176155597540759e-08, + "loss": 0.4541, + "step": 20787 + }, + { + "epoch": 0.9408463453269971, + "grad_norm": 0.5896828998572881, + "learning_rate": 9.162183812287117e-08, + "loss": 0.2876, + "step": 20788 + }, + { + "epoch": 0.9408916044353927, + "grad_norm": 0.6221679556915928, + "learning_rate": 9.148222573630572e-08, + "loss": 0.2627, + "step": 20789 + }, + { + "epoch": 0.9409368635437881, + "grad_norm": 0.6494938453249083, + "learning_rate": 9.13427188187127e-08, + "loss": 0.3288, + "step": 20790 + }, + { + "epoch": 0.9409821226521837, + "grad_norm": 0.2865355009159929, + "learning_rate": 9.120331737308919e-08, + "loss": 0.453, + "step": 20791 + }, + { + "epoch": 0.9410273817605793, + "grad_norm": 0.6116692277411361, + "learning_rate": 9.106402140242943e-08, + "loss": 0.3156, + "step": 20792 + }, + { + "epoch": 0.9410726408689749, + "grad_norm": 0.5744247951556596, + "learning_rate": 9.092483090972714e-08, + "loss": 0.2678, + "step": 20793 + }, + { + "epoch": 0.9411178999773705, + "grad_norm": 0.5521819457940438, + "learning_rate": 9.078574589797329e-08, + "loss": 0.2615, + "step": 20794 + }, + { + "epoch": 0.941163159085766, + "grad_norm": 0.9947077639779605, + "learning_rate": 9.064676637015656e-08, + "loss": 0.2676, + "step": 20795 + }, + { + "epoch": 0.9412084181941616, + "grad_norm": 0.28656657329167173, + "learning_rate": 9.050789232926293e-08, + "loss": 0.4613, + "step": 20796 + }, + { + "epoch": 0.9412536773025572, + "grad_norm": 0.6196670981555568, + "learning_rate": 9.036912377827611e-08, + "loss": 0.2707, + "step": 20797 + }, + { + "epoch": 0.9412989364109527, + "grad_norm": 0.6390512291005944, + "learning_rate": 9.023046072017761e-08, + "loss": 0.2722, + "step": 20798 + }, + { + "epoch": 0.9413441955193482, + "grad_norm": 0.6302381460021429, + "learning_rate": 9.009190315794835e-08, + "loss": 0.308, + "step": 20799 + }, + { + "epoch": 0.9413894546277438, + "grad_norm": 0.6638656744628239, + "learning_rate": 8.995345109456377e-08, + "loss": 0.2708, + "step": 20800 + }, + { + "epoch": 0.9414347137361394, + "grad_norm": 0.6004928538328558, + "learning_rate": 8.981510453299925e-08, + "loss": 0.3135, + "step": 20801 + }, + { + "epoch": 0.941479972844535, + "grad_norm": 0.6447108416431894, + "learning_rate": 8.967686347622795e-08, + "loss": 0.3093, + "step": 20802 + }, + { + "epoch": 0.9415252319529305, + "grad_norm": 0.5841072682650577, + "learning_rate": 8.953872792722029e-08, + "loss": 0.2828, + "step": 20803 + }, + { + "epoch": 0.9415704910613261, + "grad_norm": 0.26468154786651854, + "learning_rate": 8.940069788894389e-08, + "loss": 0.4916, + "step": 20804 + }, + { + "epoch": 0.9416157501697217, + "grad_norm": 0.6191241165888246, + "learning_rate": 8.926277336436417e-08, + "loss": 0.2887, + "step": 20805 + }, + { + "epoch": 0.9416610092781172, + "grad_norm": 0.5628097473321694, + "learning_rate": 8.912495435644542e-08, + "loss": 0.2721, + "step": 20806 + }, + { + "epoch": 0.9417062683865128, + "grad_norm": 0.6086306229708609, + "learning_rate": 8.898724086814969e-08, + "loss": 0.2938, + "step": 20807 + }, + { + "epoch": 0.9417515274949083, + "grad_norm": 0.6401280558876904, + "learning_rate": 8.88496329024341e-08, + "loss": 0.3095, + "step": 20808 + }, + { + "epoch": 0.9417967866033039, + "grad_norm": 0.616183908277212, + "learning_rate": 8.87121304622568e-08, + "loss": 0.3041, + "step": 20809 + }, + { + "epoch": 0.9418420457116995, + "grad_norm": 0.5798350520600436, + "learning_rate": 8.857473355057211e-08, + "loss": 0.3106, + "step": 20810 + }, + { + "epoch": 0.9418873048200951, + "grad_norm": 0.2668041636397088, + "learning_rate": 8.843744217033212e-08, + "loss": 0.4618, + "step": 20811 + }, + { + "epoch": 0.9419325639284906, + "grad_norm": 0.5873337668301859, + "learning_rate": 8.83002563244867e-08, + "loss": 0.3185, + "step": 20812 + }, + { + "epoch": 0.9419778230368862, + "grad_norm": 0.5872641247341023, + "learning_rate": 8.816317601598346e-08, + "loss": 0.2823, + "step": 20813 + }, + { + "epoch": 0.9420230821452817, + "grad_norm": 0.6006640705752363, + "learning_rate": 8.802620124776784e-08, + "loss": 0.3097, + "step": 20814 + }, + { + "epoch": 0.9420683412536773, + "grad_norm": 0.5830047965432855, + "learning_rate": 8.78893320227836e-08, + "loss": 0.2669, + "step": 20815 + }, + { + "epoch": 0.9421136003620728, + "grad_norm": 0.6608002252496344, + "learning_rate": 8.775256834397117e-08, + "loss": 0.2908, + "step": 20816 + }, + { + "epoch": 0.9421588594704684, + "grad_norm": 0.6022366659295527, + "learning_rate": 8.761591021426929e-08, + "loss": 0.2896, + "step": 20817 + }, + { + "epoch": 0.942204118578864, + "grad_norm": 0.5838450301006088, + "learning_rate": 8.747935763661397e-08, + "loss": 0.2679, + "step": 20818 + }, + { + "epoch": 0.9422493776872596, + "grad_norm": 0.5875361382398744, + "learning_rate": 8.734291061394006e-08, + "loss": 0.279, + "step": 20819 + }, + { + "epoch": 0.9422946367956552, + "grad_norm": 0.5922955937040799, + "learning_rate": 8.720656914917858e-08, + "loss": 0.2504, + "step": 20820 + }, + { + "epoch": 0.9423398959040507, + "grad_norm": 0.5978814440316839, + "learning_rate": 8.707033324525937e-08, + "loss": 0.2884, + "step": 20821 + }, + { + "epoch": 0.9423851550124462, + "grad_norm": 0.5789905148749843, + "learning_rate": 8.693420290510957e-08, + "loss": 0.2639, + "step": 20822 + }, + { + "epoch": 0.9424304141208418, + "grad_norm": 0.6163399307257849, + "learning_rate": 8.679817813165514e-08, + "loss": 0.2994, + "step": 20823 + }, + { + "epoch": 0.9424756732292374, + "grad_norm": 0.8125102862013436, + "learning_rate": 8.666225892781765e-08, + "loss": 0.3021, + "step": 20824 + }, + { + "epoch": 0.9425209323376329, + "grad_norm": 0.5710581587433058, + "learning_rate": 8.65264452965181e-08, + "loss": 0.2927, + "step": 20825 + }, + { + "epoch": 0.9425661914460285, + "grad_norm": 0.6127986569841006, + "learning_rate": 8.63907372406747e-08, + "loss": 0.2423, + "step": 20826 + }, + { + "epoch": 0.9426114505544241, + "grad_norm": 1.494464415261979, + "learning_rate": 8.625513476320291e-08, + "loss": 0.2586, + "step": 20827 + }, + { + "epoch": 0.9426567096628197, + "grad_norm": 0.6249746398755778, + "learning_rate": 8.61196378670176e-08, + "loss": 0.2899, + "step": 20828 + }, + { + "epoch": 0.9427019687712153, + "grad_norm": 0.6167952244077558, + "learning_rate": 8.598424655502868e-08, + "loss": 0.2926, + "step": 20829 + }, + { + "epoch": 0.9427472278796107, + "grad_norm": 0.2539420689861876, + "learning_rate": 8.584896083014715e-08, + "loss": 0.4542, + "step": 20830 + }, + { + "epoch": 0.9427924869880063, + "grad_norm": 0.6555364937207958, + "learning_rate": 8.571378069527792e-08, + "loss": 0.2907, + "step": 20831 + }, + { + "epoch": 0.9428377460964019, + "grad_norm": 0.6597898779039076, + "learning_rate": 8.557870615332642e-08, + "loss": 0.3222, + "step": 20832 + }, + { + "epoch": 0.9428830052047975, + "grad_norm": 0.5962301735572672, + "learning_rate": 8.54437372071959e-08, + "loss": 0.2518, + "step": 20833 + }, + { + "epoch": 0.942928264313193, + "grad_norm": 0.6429082872294095, + "learning_rate": 8.53088738597846e-08, + "loss": 0.3197, + "step": 20834 + }, + { + "epoch": 0.9429735234215886, + "grad_norm": 0.5912302009849205, + "learning_rate": 8.517411611399129e-08, + "loss": 0.3089, + "step": 20835 + }, + { + "epoch": 0.9430187825299842, + "grad_norm": 0.25971197451149486, + "learning_rate": 8.503946397271257e-08, + "loss": 0.4697, + "step": 20836 + }, + { + "epoch": 0.9430640416383798, + "grad_norm": 0.6035761663858425, + "learning_rate": 8.490491743883944e-08, + "loss": 0.2927, + "step": 20837 + }, + { + "epoch": 0.9431093007467752, + "grad_norm": 0.6500550482103429, + "learning_rate": 8.47704765152646e-08, + "loss": 0.3457, + "step": 20838 + }, + { + "epoch": 0.9431545598551708, + "grad_norm": 0.6167534463037921, + "learning_rate": 8.463614120487629e-08, + "loss": 0.3086, + "step": 20839 + }, + { + "epoch": 0.9431998189635664, + "grad_norm": 0.6201984186786818, + "learning_rate": 8.450191151056054e-08, + "loss": 0.324, + "step": 20840 + }, + { + "epoch": 0.943245078071962, + "grad_norm": 0.6488985759841122, + "learning_rate": 8.436778743520225e-08, + "loss": 0.2899, + "step": 20841 + }, + { + "epoch": 0.9432903371803576, + "grad_norm": 0.6156545222452543, + "learning_rate": 8.423376898168246e-08, + "loss": 0.2632, + "step": 20842 + }, + { + "epoch": 0.9433355962887531, + "grad_norm": 0.6573285195409124, + "learning_rate": 8.409985615288218e-08, + "loss": 0.3041, + "step": 20843 + }, + { + "epoch": 0.9433808553971487, + "grad_norm": 0.6037230252645684, + "learning_rate": 8.396604895167748e-08, + "loss": 0.2964, + "step": 20844 + }, + { + "epoch": 0.9434261145055443, + "grad_norm": 0.5870445554468589, + "learning_rate": 8.383234738094381e-08, + "loss": 0.3154, + "step": 20845 + }, + { + "epoch": 0.9434713736139398, + "grad_norm": 0.5362564404723827, + "learning_rate": 8.3698751443555e-08, + "loss": 0.2493, + "step": 20846 + }, + { + "epoch": 0.9435166327223353, + "grad_norm": 0.6489866126328017, + "learning_rate": 8.356526114237983e-08, + "loss": 0.3024, + "step": 20847 + }, + { + "epoch": 0.9435618918307309, + "grad_norm": 0.2631653817827076, + "learning_rate": 8.343187648028772e-08, + "loss": 0.4851, + "step": 20848 + }, + { + "epoch": 0.9436071509391265, + "grad_norm": 0.6463116506629104, + "learning_rate": 8.329859746014468e-08, + "loss": 0.2714, + "step": 20849 + }, + { + "epoch": 0.9436524100475221, + "grad_norm": 0.5690519206908605, + "learning_rate": 8.316542408481398e-08, + "loss": 0.2543, + "step": 20850 + }, + { + "epoch": 0.9436976691559176, + "grad_norm": 0.5890163713323255, + "learning_rate": 8.303235635715723e-08, + "loss": 0.2665, + "step": 20851 + }, + { + "epoch": 0.9437429282643132, + "grad_norm": 0.6416623026573095, + "learning_rate": 8.289939428003491e-08, + "loss": 0.3173, + "step": 20852 + }, + { + "epoch": 0.9437881873727088, + "grad_norm": 0.5765374229975274, + "learning_rate": 8.276653785630195e-08, + "loss": 0.2747, + "step": 20853 + }, + { + "epoch": 0.9438334464811043, + "grad_norm": 0.25657991387142726, + "learning_rate": 8.263378708881443e-08, + "loss": 0.4645, + "step": 20854 + }, + { + "epoch": 0.9438787055894999, + "grad_norm": 0.5873869400501082, + "learning_rate": 8.250114198042392e-08, + "loss": 0.2769, + "step": 20855 + }, + { + "epoch": 0.9439239646978954, + "grad_norm": 0.5907343998929974, + "learning_rate": 8.236860253398094e-08, + "loss": 0.3104, + "step": 20856 + }, + { + "epoch": 0.943969223806291, + "grad_norm": 0.6388619579014826, + "learning_rate": 8.223616875233376e-08, + "loss": 0.3117, + "step": 20857 + }, + { + "epoch": 0.9440144829146866, + "grad_norm": 0.6244062370435659, + "learning_rate": 8.210384063832678e-08, + "loss": 0.2738, + "step": 20858 + }, + { + "epoch": 0.9440597420230822, + "grad_norm": 0.6299226406317449, + "learning_rate": 8.197161819480493e-08, + "loss": 0.2964, + "step": 20859 + }, + { + "epoch": 0.9441050011314777, + "grad_norm": 0.7781736528219134, + "learning_rate": 8.183950142460761e-08, + "loss": 0.2986, + "step": 20860 + }, + { + "epoch": 0.9441502602398733, + "grad_norm": 0.5846529738886026, + "learning_rate": 8.170749033057534e-08, + "loss": 0.2877, + "step": 20861 + }, + { + "epoch": 0.9441955193482688, + "grad_norm": 0.5571171188715623, + "learning_rate": 8.157558491554306e-08, + "loss": 0.2746, + "step": 20862 + }, + { + "epoch": 0.9442407784566644, + "grad_norm": 0.6518076238522412, + "learning_rate": 8.144378518234574e-08, + "loss": 0.2819, + "step": 20863 + }, + { + "epoch": 0.94428603756506, + "grad_norm": 0.6662587604637679, + "learning_rate": 8.131209113381556e-08, + "loss": 0.2865, + "step": 20864 + }, + { + "epoch": 0.9443312966734555, + "grad_norm": 0.28615234380710475, + "learning_rate": 8.118050277278245e-08, + "loss": 0.4741, + "step": 20865 + }, + { + "epoch": 0.9443765557818511, + "grad_norm": 0.6165350758593174, + "learning_rate": 8.104902010207249e-08, + "loss": 0.2808, + "step": 20866 + }, + { + "epoch": 0.9444218148902467, + "grad_norm": 0.945412991945462, + "learning_rate": 8.091764312451122e-08, + "loss": 0.2474, + "step": 20867 + }, + { + "epoch": 0.9444670739986423, + "grad_norm": 0.6484266098431832, + "learning_rate": 8.078637184292304e-08, + "loss": 0.3092, + "step": 20868 + }, + { + "epoch": 0.9445123331070377, + "grad_norm": 0.5862338401789485, + "learning_rate": 8.065520626012735e-08, + "loss": 0.3084, + "step": 20869 + }, + { + "epoch": 0.9445575922154333, + "grad_norm": 0.6182423665804847, + "learning_rate": 8.052414637894246e-08, + "loss": 0.2862, + "step": 20870 + }, + { + "epoch": 0.9446028513238289, + "grad_norm": 0.6010714458374049, + "learning_rate": 8.039319220218444e-08, + "loss": 0.3038, + "step": 20871 + }, + { + "epoch": 0.9446481104322245, + "grad_norm": 0.6317935772444221, + "learning_rate": 8.026234373266773e-08, + "loss": 0.2724, + "step": 20872 + }, + { + "epoch": 0.94469336954062, + "grad_norm": 0.5788671859175848, + "learning_rate": 8.013160097320339e-08, + "loss": 0.2702, + "step": 20873 + }, + { + "epoch": 0.9447386286490156, + "grad_norm": 0.24808045688663874, + "learning_rate": 8.000096392660029e-08, + "loss": 0.4617, + "step": 20874 + }, + { + "epoch": 0.9447838877574112, + "grad_norm": 0.5853744341393615, + "learning_rate": 7.987043259566618e-08, + "loss": 0.3128, + "step": 20875 + }, + { + "epoch": 0.9448291468658068, + "grad_norm": 0.5850339197503641, + "learning_rate": 7.974000698320495e-08, + "loss": 0.2834, + "step": 20876 + }, + { + "epoch": 0.9448744059742024, + "grad_norm": 0.23458010581167918, + "learning_rate": 7.960968709202044e-08, + "loss": 0.4502, + "step": 20877 + }, + { + "epoch": 0.9449196650825978, + "grad_norm": 0.6946033096154024, + "learning_rate": 7.947947292491154e-08, + "loss": 0.3207, + "step": 20878 + }, + { + "epoch": 0.9449649241909934, + "grad_norm": 0.5948525066627732, + "learning_rate": 7.9349364484676e-08, + "loss": 0.2688, + "step": 20879 + }, + { + "epoch": 0.945010183299389, + "grad_norm": 0.585106418093436, + "learning_rate": 7.921936177411049e-08, + "loss": 0.3158, + "step": 20880 + }, + { + "epoch": 0.9450554424077846, + "grad_norm": 0.7232573710512356, + "learning_rate": 7.908946479600777e-08, + "loss": 0.2977, + "step": 20881 + }, + { + "epoch": 0.9451007015161801, + "grad_norm": 0.6729210253022881, + "learning_rate": 7.895967355315948e-08, + "loss": 0.3173, + "step": 20882 + }, + { + "epoch": 0.9451459606245757, + "grad_norm": 0.5913024246309978, + "learning_rate": 7.88299880483534e-08, + "loss": 0.2871, + "step": 20883 + }, + { + "epoch": 0.9451912197329713, + "grad_norm": 0.6504311777673383, + "learning_rate": 7.870040828437675e-08, + "loss": 0.2656, + "step": 20884 + }, + { + "epoch": 0.9452364788413669, + "grad_norm": 0.6857428537796664, + "learning_rate": 7.857093426401397e-08, + "loss": 0.2983, + "step": 20885 + }, + { + "epoch": 0.9452817379497623, + "grad_norm": 0.6297503614601261, + "learning_rate": 7.844156599004671e-08, + "loss": 0.3062, + "step": 20886 + }, + { + "epoch": 0.9453269970581579, + "grad_norm": 0.5954023304747791, + "learning_rate": 7.831230346525443e-08, + "loss": 0.2848, + "step": 20887 + }, + { + "epoch": 0.9453722561665535, + "grad_norm": 0.26616954929128794, + "learning_rate": 7.818314669241544e-08, + "loss": 0.4977, + "step": 20888 + }, + { + "epoch": 0.9454175152749491, + "grad_norm": 0.6184736597521032, + "learning_rate": 7.805409567430367e-08, + "loss": 0.2996, + "step": 20889 + }, + { + "epoch": 0.9454627743833447, + "grad_norm": 0.25006736595217566, + "learning_rate": 7.792515041369353e-08, + "loss": 0.4563, + "step": 20890 + }, + { + "epoch": 0.9455080334917402, + "grad_norm": 0.5676641219555973, + "learning_rate": 7.779631091335505e-08, + "loss": 0.3086, + "step": 20891 + }, + { + "epoch": 0.9455532926001358, + "grad_norm": 0.2454819629724571, + "learning_rate": 7.7667577176056e-08, + "loss": 0.4475, + "step": 20892 + }, + { + "epoch": 0.9455985517085314, + "grad_norm": 0.7133383522561514, + "learning_rate": 7.753894920456251e-08, + "loss": 0.265, + "step": 20893 + }, + { + "epoch": 0.9456438108169269, + "grad_norm": 0.7529118268346086, + "learning_rate": 7.741042700164014e-08, + "loss": 0.3141, + "step": 20894 + }, + { + "epoch": 0.9456890699253224, + "grad_norm": 0.6052921223708356, + "learning_rate": 7.72820105700478e-08, + "loss": 0.2876, + "step": 20895 + }, + { + "epoch": 0.945734329033718, + "grad_norm": 0.7209021765111919, + "learning_rate": 7.715369991254662e-08, + "loss": 0.2644, + "step": 20896 + }, + { + "epoch": 0.9457795881421136, + "grad_norm": 0.5757050535425359, + "learning_rate": 7.702549503189272e-08, + "loss": 0.3039, + "step": 20897 + }, + { + "epoch": 0.9458248472505092, + "grad_norm": 0.2770258305772582, + "learning_rate": 7.689739593084166e-08, + "loss": 0.4918, + "step": 20898 + }, + { + "epoch": 0.9458701063589048, + "grad_norm": 0.22921896598491626, + "learning_rate": 7.676940261214516e-08, + "loss": 0.4559, + "step": 20899 + }, + { + "epoch": 0.9459153654673003, + "grad_norm": 0.276942567838856, + "learning_rate": 7.664151507855378e-08, + "loss": 0.4843, + "step": 20900 + }, + { + "epoch": 0.9459606245756959, + "grad_norm": 0.605310768362177, + "learning_rate": 7.651373333281532e-08, + "loss": 0.314, + "step": 20901 + }, + { + "epoch": 0.9460058836840914, + "grad_norm": 0.6351944506005454, + "learning_rate": 7.638605737767534e-08, + "loss": 0.2826, + "step": 20902 + }, + { + "epoch": 0.946051142792487, + "grad_norm": 0.6064184675760638, + "learning_rate": 7.625848721587725e-08, + "loss": 0.2872, + "step": 20903 + }, + { + "epoch": 0.9460964019008825, + "grad_norm": 0.6023337318092542, + "learning_rate": 7.613102285016216e-08, + "loss": 0.2878, + "step": 20904 + }, + { + "epoch": 0.9461416610092781, + "grad_norm": 0.5897002049598271, + "learning_rate": 7.600366428326845e-08, + "loss": 0.3028, + "step": 20905 + }, + { + "epoch": 0.9461869201176737, + "grad_norm": 0.5920218219632091, + "learning_rate": 7.58764115179339e-08, + "loss": 0.2779, + "step": 20906 + }, + { + "epoch": 0.9462321792260693, + "grad_norm": 0.5523652834187022, + "learning_rate": 7.574926455689136e-08, + "loss": 0.2515, + "step": 20907 + }, + { + "epoch": 0.9462774383344648, + "grad_norm": 0.5483838828103694, + "learning_rate": 7.562222340287362e-08, + "loss": 0.2723, + "step": 20908 + }, + { + "epoch": 0.9463226974428603, + "grad_norm": 0.6156417525577635, + "learning_rate": 7.549528805861017e-08, + "loss": 0.2727, + "step": 20909 + }, + { + "epoch": 0.9463679565512559, + "grad_norm": 0.6154673128491037, + "learning_rate": 7.536845852682884e-08, + "loss": 0.3334, + "step": 20910 + }, + { + "epoch": 0.9464132156596515, + "grad_norm": 0.5543715790612631, + "learning_rate": 7.52417348102541e-08, + "loss": 0.2951, + "step": 20911 + }, + { + "epoch": 0.9464584747680471, + "grad_norm": 0.5990124247903211, + "learning_rate": 7.511511691160933e-08, + "loss": 0.2926, + "step": 20912 + }, + { + "epoch": 0.9465037338764426, + "grad_norm": 0.6182521099000854, + "learning_rate": 7.498860483361459e-08, + "loss": 0.2876, + "step": 20913 + }, + { + "epoch": 0.9465489929848382, + "grad_norm": 0.6492851657534132, + "learning_rate": 7.486219857898935e-08, + "loss": 0.307, + "step": 20914 + }, + { + "epoch": 0.9465942520932338, + "grad_norm": 1.1963892970319248, + "learning_rate": 7.473589815044924e-08, + "loss": 0.2528, + "step": 20915 + }, + { + "epoch": 0.9466395112016294, + "grad_norm": 0.6083346740598554, + "learning_rate": 7.460970355070763e-08, + "loss": 0.2931, + "step": 20916 + }, + { + "epoch": 0.9466847703100248, + "grad_norm": 0.7154186931695922, + "learning_rate": 7.448361478247624e-08, + "loss": 0.2864, + "step": 20917 + }, + { + "epoch": 0.9467300294184204, + "grad_norm": 0.28426325171555816, + "learning_rate": 7.4357631848464e-08, + "loss": 0.4749, + "step": 20918 + }, + { + "epoch": 0.946775288526816, + "grad_norm": 0.6014997826042021, + "learning_rate": 7.423175475137934e-08, + "loss": 0.3012, + "step": 20919 + }, + { + "epoch": 0.9468205476352116, + "grad_norm": 0.28871653746278625, + "learning_rate": 7.410598349392506e-08, + "loss": 0.4796, + "step": 20920 + }, + { + "epoch": 0.9468658067436071, + "grad_norm": 0.2493791500104858, + "learning_rate": 7.398031807880456e-08, + "loss": 0.4565, + "step": 20921 + }, + { + "epoch": 0.9469110658520027, + "grad_norm": 0.6420186837671348, + "learning_rate": 7.385475850871793e-08, + "loss": 0.2726, + "step": 20922 + }, + { + "epoch": 0.9469563249603983, + "grad_norm": 0.7493605256465465, + "learning_rate": 7.372930478636353e-08, + "loss": 0.3089, + "step": 20923 + }, + { + "epoch": 0.9470015840687939, + "grad_norm": 0.6441946120542986, + "learning_rate": 7.360395691443644e-08, + "loss": 0.2867, + "step": 20924 + }, + { + "epoch": 0.9470468431771895, + "grad_norm": 0.6416847443775258, + "learning_rate": 7.347871489562952e-08, + "loss": 0.2897, + "step": 20925 + }, + { + "epoch": 0.9470921022855849, + "grad_norm": 0.6957522478918147, + "learning_rate": 7.335357873263449e-08, + "loss": 0.3384, + "step": 20926 + }, + { + "epoch": 0.9471373613939805, + "grad_norm": 0.6612940508036357, + "learning_rate": 7.322854842814031e-08, + "loss": 0.3174, + "step": 20927 + }, + { + "epoch": 0.9471826205023761, + "grad_norm": 0.2789703973879626, + "learning_rate": 7.310362398483262e-08, + "loss": 0.4583, + "step": 20928 + }, + { + "epoch": 0.9472278796107717, + "grad_norm": 0.6310776014752988, + "learning_rate": 7.297880540539648e-08, + "loss": 0.31, + "step": 20929 + }, + { + "epoch": 0.9472731387191672, + "grad_norm": 0.6304965415205801, + "learning_rate": 7.28540926925142e-08, + "loss": 0.2681, + "step": 20930 + }, + { + "epoch": 0.9473183978275628, + "grad_norm": 0.2765236675299349, + "learning_rate": 7.27294858488642e-08, + "loss": 0.4656, + "step": 20931 + }, + { + "epoch": 0.9473636569359584, + "grad_norm": 0.6720546270612172, + "learning_rate": 7.260498487712487e-08, + "loss": 0.3207, + "step": 20932 + }, + { + "epoch": 0.947408916044354, + "grad_norm": 0.627060354808679, + "learning_rate": 7.24805897799713e-08, + "loss": 0.2713, + "step": 20933 + }, + { + "epoch": 0.9474541751527495, + "grad_norm": 0.5966061175146216, + "learning_rate": 7.23563005600758e-08, + "loss": 0.2702, + "step": 20934 + }, + { + "epoch": 0.947499434261145, + "grad_norm": 0.6318013396150959, + "learning_rate": 7.223211722010959e-08, + "loss": 0.311, + "step": 20935 + }, + { + "epoch": 0.9475446933695406, + "grad_norm": 0.5954183406323375, + "learning_rate": 7.21080397627405e-08, + "loss": 0.2959, + "step": 20936 + }, + { + "epoch": 0.9475899524779362, + "grad_norm": 0.6338239230437382, + "learning_rate": 7.198406819063419e-08, + "loss": 0.3436, + "step": 20937 + }, + { + "epoch": 0.9476352115863318, + "grad_norm": 0.2799655198032767, + "learning_rate": 7.186020250645576e-08, + "loss": 0.4425, + "step": 20938 + }, + { + "epoch": 0.9476804706947273, + "grad_norm": 0.5993272085024067, + "learning_rate": 7.173644271286584e-08, + "loss": 0.2909, + "step": 20939 + }, + { + "epoch": 0.9477257298031229, + "grad_norm": 0.6036991084042239, + "learning_rate": 7.161278881252398e-08, + "loss": 0.2581, + "step": 20940 + }, + { + "epoch": 0.9477709889115185, + "grad_norm": 0.5657049971902813, + "learning_rate": 7.14892408080864e-08, + "loss": 0.292, + "step": 20941 + }, + { + "epoch": 0.947816248019914, + "grad_norm": 0.619094047430233, + "learning_rate": 7.136579870220817e-08, + "loss": 0.2978, + "step": 20942 + }, + { + "epoch": 0.9478615071283095, + "grad_norm": 0.6030486256087684, + "learning_rate": 7.124246249754218e-08, + "loss": 0.2904, + "step": 20943 + }, + { + "epoch": 0.9479067662367051, + "grad_norm": 0.275658799458521, + "learning_rate": 7.1119232196738e-08, + "loss": 0.4689, + "step": 20944 + }, + { + "epoch": 0.9479520253451007, + "grad_norm": 0.3242225239131589, + "learning_rate": 7.099610780244348e-08, + "loss": 0.4701, + "step": 20945 + }, + { + "epoch": 0.9479972844534963, + "grad_norm": 0.6053260765775155, + "learning_rate": 7.087308931730485e-08, + "loss": 0.2765, + "step": 20946 + }, + { + "epoch": 0.9480425435618919, + "grad_norm": 0.5822627920801777, + "learning_rate": 7.075017674396445e-08, + "loss": 0.283, + "step": 20947 + }, + { + "epoch": 0.9480878026702874, + "grad_norm": 0.6450773095173824, + "learning_rate": 7.062737008506404e-08, + "loss": 0.283, + "step": 20948 + }, + { + "epoch": 0.948133061778683, + "grad_norm": 0.6037121474357465, + "learning_rate": 7.050466934324207e-08, + "loss": 0.3242, + "step": 20949 + }, + { + "epoch": 0.9481783208870785, + "grad_norm": 0.5702545627996378, + "learning_rate": 7.038207452113422e-08, + "loss": 0.2438, + "step": 20950 + }, + { + "epoch": 0.9482235799954741, + "grad_norm": 0.5819989483938457, + "learning_rate": 7.025958562137559e-08, + "loss": 0.291, + "step": 20951 + }, + { + "epoch": 0.9482688391038696, + "grad_norm": 0.6063900047506062, + "learning_rate": 7.013720264659851e-08, + "loss": 0.2897, + "step": 20952 + }, + { + "epoch": 0.9483140982122652, + "grad_norm": 0.25345238884624277, + "learning_rate": 7.001492559943201e-08, + "loss": 0.4894, + "step": 20953 + }, + { + "epoch": 0.9483593573206608, + "grad_norm": 0.607414809250055, + "learning_rate": 6.989275448250288e-08, + "loss": 0.3302, + "step": 20954 + }, + { + "epoch": 0.9484046164290564, + "grad_norm": 0.6563703110364023, + "learning_rate": 6.977068929843678e-08, + "loss": 0.2831, + "step": 20955 + }, + { + "epoch": 0.9484498755374519, + "grad_norm": 0.6503675742457773, + "learning_rate": 6.964873004985717e-08, + "loss": 0.2811, + "step": 20956 + }, + { + "epoch": 0.9484951346458474, + "grad_norm": 0.6344037115955137, + "learning_rate": 6.952687673938363e-08, + "loss": 0.3489, + "step": 20957 + }, + { + "epoch": 0.948540393754243, + "grad_norm": 0.5878882777015434, + "learning_rate": 6.940512936963461e-08, + "loss": 0.2568, + "step": 20958 + }, + { + "epoch": 0.9485856528626386, + "grad_norm": 0.5402159930154613, + "learning_rate": 6.928348794322637e-08, + "loss": 0.2659, + "step": 20959 + }, + { + "epoch": 0.9486309119710342, + "grad_norm": 0.5676910831484602, + "learning_rate": 6.916195246277291e-08, + "loss": 0.2994, + "step": 20960 + }, + { + "epoch": 0.9486761710794297, + "grad_norm": 0.6005688409644764, + "learning_rate": 6.904052293088437e-08, + "loss": 0.2892, + "step": 20961 + }, + { + "epoch": 0.9487214301878253, + "grad_norm": 0.5844076062105606, + "learning_rate": 6.891919935017089e-08, + "loss": 0.2766, + "step": 20962 + }, + { + "epoch": 0.9487666892962209, + "grad_norm": 0.2680542891543253, + "learning_rate": 6.879798172323926e-08, + "loss": 0.4712, + "step": 20963 + }, + { + "epoch": 0.9488119484046165, + "grad_norm": 1.233302978572291, + "learning_rate": 6.867687005269408e-08, + "loss": 0.274, + "step": 20964 + }, + { + "epoch": 0.948857207513012, + "grad_norm": 0.5678739452683957, + "learning_rate": 6.855586434113771e-08, + "loss": 0.2768, + "step": 20965 + }, + { + "epoch": 0.9489024666214075, + "grad_norm": 0.6447409783343343, + "learning_rate": 6.843496459116917e-08, + "loss": 0.2816, + "step": 20966 + }, + { + "epoch": 0.9489477257298031, + "grad_norm": 0.6330282871837848, + "learning_rate": 6.83141708053875e-08, + "loss": 0.2733, + "step": 20967 + }, + { + "epoch": 0.9489929848381987, + "grad_norm": 0.2887291296955998, + "learning_rate": 6.819348298638839e-08, + "loss": 0.481, + "step": 20968 + }, + { + "epoch": 0.9490382439465943, + "grad_norm": 0.6455807936183818, + "learning_rate": 6.807290113676423e-08, + "loss": 0.3026, + "step": 20969 + }, + { + "epoch": 0.9490835030549898, + "grad_norm": 0.5974785045800158, + "learning_rate": 6.795242525910573e-08, + "loss": 0.2907, + "step": 20970 + }, + { + "epoch": 0.9491287621633854, + "grad_norm": 0.5544173397992318, + "learning_rate": 6.783205535600191e-08, + "loss": 0.3221, + "step": 20971 + }, + { + "epoch": 0.949174021271781, + "grad_norm": 0.6038550671982219, + "learning_rate": 6.771179143003958e-08, + "loss": 0.3031, + "step": 20972 + }, + { + "epoch": 0.9492192803801766, + "grad_norm": 0.6218445340632932, + "learning_rate": 6.759163348380282e-08, + "loss": 0.2946, + "step": 20973 + }, + { + "epoch": 0.949264539488572, + "grad_norm": 0.2522104366288927, + "learning_rate": 6.747158151987232e-08, + "loss": 0.4569, + "step": 20974 + }, + { + "epoch": 0.9493097985969676, + "grad_norm": 0.2725387958153841, + "learning_rate": 6.73516355408288e-08, + "loss": 0.4977, + "step": 20975 + }, + { + "epoch": 0.9493550577053632, + "grad_norm": 0.5699970378852044, + "learning_rate": 6.723179554924908e-08, + "loss": 0.2763, + "step": 20976 + }, + { + "epoch": 0.9494003168137588, + "grad_norm": 0.5946194671942663, + "learning_rate": 6.711206154770833e-08, + "loss": 0.2901, + "step": 20977 + }, + { + "epoch": 0.9494455759221543, + "grad_norm": 0.5964805198685067, + "learning_rate": 6.699243353877949e-08, + "loss": 0.2875, + "step": 20978 + }, + { + "epoch": 0.9494908350305499, + "grad_norm": 1.2702132941453168, + "learning_rate": 6.687291152503217e-08, + "loss": 0.3049, + "step": 20979 + }, + { + "epoch": 0.9495360941389455, + "grad_norm": 0.35882497371212974, + "learning_rate": 6.675349550903488e-08, + "loss": 0.4688, + "step": 20980 + }, + { + "epoch": 0.949581353247341, + "grad_norm": 0.7600225740849603, + "learning_rate": 6.663418549335443e-08, + "loss": 0.2948, + "step": 20981 + }, + { + "epoch": 0.9496266123557366, + "grad_norm": 0.6181063297081756, + "learning_rate": 6.651498148055324e-08, + "loss": 0.3708, + "step": 20982 + }, + { + "epoch": 0.9496718714641321, + "grad_norm": 1.4409835173105592, + "learning_rate": 6.639588347319315e-08, + "loss": 0.2737, + "step": 20983 + }, + { + "epoch": 0.9497171305725277, + "grad_norm": 0.5926133598300354, + "learning_rate": 6.627689147383265e-08, + "loss": 0.275, + "step": 20984 + }, + { + "epoch": 0.9497623896809233, + "grad_norm": 0.621328656285908, + "learning_rate": 6.615800548502971e-08, + "loss": 0.2557, + "step": 20985 + }, + { + "epoch": 0.9498076487893189, + "grad_norm": 0.6553306928571592, + "learning_rate": 6.603922550933783e-08, + "loss": 0.2822, + "step": 20986 + }, + { + "epoch": 0.9498529078977144, + "grad_norm": 0.5898052926929013, + "learning_rate": 6.592055154930887e-08, + "loss": 0.29, + "step": 20987 + }, + { + "epoch": 0.94989816700611, + "grad_norm": 0.60423853194223, + "learning_rate": 6.580198360749412e-08, + "loss": 0.2807, + "step": 20988 + }, + { + "epoch": 0.9499434261145056, + "grad_norm": 0.6083911386276145, + "learning_rate": 6.568352168644043e-08, + "loss": 0.2714, + "step": 20989 + }, + { + "epoch": 0.9499886852229011, + "grad_norm": 0.8479645873483991, + "learning_rate": 6.556516578869299e-08, + "loss": 0.3046, + "step": 20990 + }, + { + "epoch": 0.9500339443312966, + "grad_norm": 0.5760836337681511, + "learning_rate": 6.544691591679531e-08, + "loss": 0.274, + "step": 20991 + }, + { + "epoch": 0.9500792034396922, + "grad_norm": 0.6151864967390867, + "learning_rate": 6.532877207328813e-08, + "loss": 0.271, + "step": 20992 + }, + { + "epoch": 0.9501244625480878, + "grad_norm": 0.6503544528267623, + "learning_rate": 6.521073426070945e-08, + "loss": 0.2904, + "step": 20993 + }, + { + "epoch": 0.9501697216564834, + "grad_norm": 0.26711729261399775, + "learning_rate": 6.509280248159721e-08, + "loss": 0.4739, + "step": 20994 + }, + { + "epoch": 0.950214980764879, + "grad_norm": 0.571338219335804, + "learning_rate": 6.49749767384833e-08, + "loss": 0.2758, + "step": 20995 + }, + { + "epoch": 0.9502602398732745, + "grad_norm": 0.6613673798392251, + "learning_rate": 6.485725703390067e-08, + "loss": 0.3139, + "step": 20996 + }, + { + "epoch": 0.95030549898167, + "grad_norm": 0.6584892264971919, + "learning_rate": 6.473964337037842e-08, + "loss": 0.2549, + "step": 20997 + }, + { + "epoch": 0.9503507580900656, + "grad_norm": 0.5943344110402399, + "learning_rate": 6.462213575044396e-08, + "loss": 0.2781, + "step": 20998 + }, + { + "epoch": 0.9503960171984612, + "grad_norm": 0.6127678986376591, + "learning_rate": 6.45047341766214e-08, + "loss": 0.2842, + "step": 20999 + }, + { + "epoch": 0.9504412763068567, + "grad_norm": 0.6000744206177664, + "learning_rate": 6.438743865143371e-08, + "loss": 0.3379, + "step": 21000 + }, + { + "epoch": 0.9504865354152523, + "grad_norm": 0.5657602497024272, + "learning_rate": 6.42702491774022e-08, + "loss": 0.2908, + "step": 21001 + }, + { + "epoch": 0.9505317945236479, + "grad_norm": 0.2609860411322821, + "learning_rate": 6.415316575704378e-08, + "loss": 0.4503, + "step": 21002 + }, + { + "epoch": 0.9505770536320435, + "grad_norm": 0.604504207006534, + "learning_rate": 6.403618839287418e-08, + "loss": 0.2892, + "step": 21003 + }, + { + "epoch": 0.9506223127404391, + "grad_norm": 0.6030571400391765, + "learning_rate": 6.391931708740806e-08, + "loss": 0.2736, + "step": 21004 + }, + { + "epoch": 0.9506675718488345, + "grad_norm": 0.2807062113118566, + "learning_rate": 6.380255184315509e-08, + "loss": 0.4747, + "step": 21005 + }, + { + "epoch": 0.9507128309572301, + "grad_norm": 0.5946543907442272, + "learning_rate": 6.368589266262493e-08, + "loss": 0.2965, + "step": 21006 + }, + { + "epoch": 0.9507580900656257, + "grad_norm": 0.590482601249089, + "learning_rate": 6.356933954832501e-08, + "loss": 0.3242, + "step": 21007 + }, + { + "epoch": 0.9508033491740213, + "grad_norm": 0.5794334337238063, + "learning_rate": 6.345289250275777e-08, + "loss": 0.2708, + "step": 21008 + }, + { + "epoch": 0.9508486082824168, + "grad_norm": 0.5793012980790783, + "learning_rate": 6.333655152842676e-08, + "loss": 0.3122, + "step": 21009 + }, + { + "epoch": 0.9508938673908124, + "grad_norm": 0.5797144339972501, + "learning_rate": 6.322031662783167e-08, + "loss": 0.303, + "step": 21010 + }, + { + "epoch": 0.950939126499208, + "grad_norm": 0.5331724832690391, + "learning_rate": 6.310418780346993e-08, + "loss": 0.283, + "step": 21011 + }, + { + "epoch": 0.9509843856076036, + "grad_norm": 0.6539748251622324, + "learning_rate": 6.298816505783623e-08, + "loss": 0.3098, + "step": 21012 + }, + { + "epoch": 0.951029644715999, + "grad_norm": 0.5968445176581573, + "learning_rate": 6.28722483934241e-08, + "loss": 0.2896, + "step": 21013 + }, + { + "epoch": 0.9510749038243946, + "grad_norm": 0.7165491692322432, + "learning_rate": 6.275643781272489e-08, + "loss": 0.3001, + "step": 21014 + }, + { + "epoch": 0.9511201629327902, + "grad_norm": 0.5754934026250936, + "learning_rate": 6.264073331822551e-08, + "loss": 0.3247, + "step": 21015 + }, + { + "epoch": 0.9511654220411858, + "grad_norm": 0.6371439596459173, + "learning_rate": 6.252513491241285e-08, + "loss": 0.3054, + "step": 21016 + }, + { + "epoch": 0.9512106811495814, + "grad_norm": 0.6393908936424378, + "learning_rate": 6.240964259777104e-08, + "loss": 0.304, + "step": 21017 + }, + { + "epoch": 0.9512559402579769, + "grad_norm": 0.6371497263366444, + "learning_rate": 6.229425637678088e-08, + "loss": 0.2724, + "step": 21018 + }, + { + "epoch": 0.9513011993663725, + "grad_norm": 0.26785550578434497, + "learning_rate": 6.217897625192259e-08, + "loss": 0.4779, + "step": 21019 + }, + { + "epoch": 0.9513464584747681, + "grad_norm": 0.5961553134529505, + "learning_rate": 6.206380222567254e-08, + "loss": 0.301, + "step": 21020 + }, + { + "epoch": 0.9513917175831637, + "grad_norm": 0.5852208105910898, + "learning_rate": 6.194873430050596e-08, + "loss": 0.2966, + "step": 21021 + }, + { + "epoch": 0.9514369766915591, + "grad_norm": 0.6041103603260393, + "learning_rate": 6.183377247889422e-08, + "loss": 0.2742, + "step": 21022 + }, + { + "epoch": 0.9514822357999547, + "grad_norm": 0.6030801014720969, + "learning_rate": 6.171891676330922e-08, + "loss": 0.2631, + "step": 21023 + }, + { + "epoch": 0.9515274949083503, + "grad_norm": 0.6407847667172143, + "learning_rate": 6.160416715621786e-08, + "loss": 0.268, + "step": 21024 + }, + { + "epoch": 0.9515727540167459, + "grad_norm": 0.2788988931928052, + "learning_rate": 6.148952366008487e-08, + "loss": 0.4682, + "step": 21025 + }, + { + "epoch": 0.9516180131251414, + "grad_norm": 0.5766844844108057, + "learning_rate": 6.137498627737492e-08, + "loss": 0.2491, + "step": 21026 + }, + { + "epoch": 0.951663272233537, + "grad_norm": 0.6204674046222174, + "learning_rate": 6.126055501054995e-08, + "loss": 0.2971, + "step": 21027 + }, + { + "epoch": 0.9517085313419326, + "grad_norm": 0.566262868538345, + "learning_rate": 6.114622986206575e-08, + "loss": 0.2732, + "step": 21028 + }, + { + "epoch": 0.9517537904503282, + "grad_norm": 0.6266490854478113, + "learning_rate": 6.103201083438149e-08, + "loss": 0.3184, + "step": 21029 + }, + { + "epoch": 0.9517990495587237, + "grad_norm": 0.6233291681881801, + "learning_rate": 6.091789792995018e-08, + "loss": 0.3184, + "step": 21030 + }, + { + "epoch": 0.9518443086671192, + "grad_norm": 0.6900403839984441, + "learning_rate": 6.080389115122432e-08, + "loss": 0.3531, + "step": 21031 + }, + { + "epoch": 0.9518895677755148, + "grad_norm": 0.6418354601151535, + "learning_rate": 6.06899905006525e-08, + "loss": 0.3536, + "step": 21032 + }, + { + "epoch": 0.9519348268839104, + "grad_norm": 0.7346705264008148, + "learning_rate": 6.057619598068332e-08, + "loss": 0.3051, + "step": 21033 + }, + { + "epoch": 0.951980085992306, + "grad_norm": 0.6679759636947534, + "learning_rate": 6.046250759376148e-08, + "loss": 0.2993, + "step": 21034 + }, + { + "epoch": 0.9520253451007015, + "grad_norm": 0.27520480360869226, + "learning_rate": 6.034892534233006e-08, + "loss": 0.4927, + "step": 21035 + }, + { + "epoch": 0.9520706042090971, + "grad_norm": 0.6746084316484376, + "learning_rate": 6.023544922882874e-08, + "loss": 0.3048, + "step": 21036 + }, + { + "epoch": 0.9521158633174926, + "grad_norm": 0.6528323441208711, + "learning_rate": 6.012207925569613e-08, + "loss": 0.3036, + "step": 21037 + }, + { + "epoch": 0.9521611224258882, + "grad_norm": 0.6145097092357207, + "learning_rate": 6.000881542536863e-08, + "loss": 0.2905, + "step": 21038 + }, + { + "epoch": 0.9522063815342838, + "grad_norm": 0.24463072379857484, + "learning_rate": 5.989565774027983e-08, + "loss": 0.4491, + "step": 21039 + }, + { + "epoch": 0.9522516406426793, + "grad_norm": 0.2627344849839754, + "learning_rate": 5.978260620286058e-08, + "loss": 0.4778, + "step": 21040 + }, + { + "epoch": 0.9522968997510749, + "grad_norm": 0.596168684386448, + "learning_rate": 5.96696608155406e-08, + "loss": 0.242, + "step": 21041 + }, + { + "epoch": 0.9523421588594705, + "grad_norm": 0.2646225649141157, + "learning_rate": 5.955682158074627e-08, + "loss": 0.4841, + "step": 21042 + }, + { + "epoch": 0.9523874179678661, + "grad_norm": 0.7122779503387644, + "learning_rate": 5.944408850090289e-08, + "loss": 0.3003, + "step": 21043 + }, + { + "epoch": 0.9524326770762616, + "grad_norm": 0.6594758890068033, + "learning_rate": 5.933146157843239e-08, + "loss": 0.2806, + "step": 21044 + }, + { + "epoch": 0.9524779361846571, + "grad_norm": 0.5710099357492796, + "learning_rate": 5.921894081575397e-08, + "loss": 0.2961, + "step": 21045 + }, + { + "epoch": 0.9525231952930527, + "grad_norm": 0.26264952959296717, + "learning_rate": 5.9106526215286786e-08, + "loss": 0.477, + "step": 21046 + }, + { + "epoch": 0.9525684544014483, + "grad_norm": 0.667966157173857, + "learning_rate": 5.899421777944503e-08, + "loss": 0.25, + "step": 21047 + }, + { + "epoch": 0.9526137135098438, + "grad_norm": 0.6473696352491063, + "learning_rate": 5.888201551064288e-08, + "loss": 0.2955, + "step": 21048 + }, + { + "epoch": 0.9526589726182394, + "grad_norm": 0.6820985118600682, + "learning_rate": 5.876991941129062e-08, + "loss": 0.2818, + "step": 21049 + }, + { + "epoch": 0.952704231726635, + "grad_norm": 0.6109836936427275, + "learning_rate": 5.8657929483796336e-08, + "loss": 0.2987, + "step": 21050 + }, + { + "epoch": 0.9527494908350306, + "grad_norm": 0.651082778888153, + "learning_rate": 5.854604573056755e-08, + "loss": 0.287, + "step": 21051 + }, + { + "epoch": 0.9527947499434262, + "grad_norm": 0.30875576273308114, + "learning_rate": 5.843426815400788e-08, + "loss": 0.4622, + "step": 21052 + }, + { + "epoch": 0.9528400090518216, + "grad_norm": 0.655355098365421, + "learning_rate": 5.8322596756518744e-08, + "loss": 0.2646, + "step": 21053 + }, + { + "epoch": 0.9528852681602172, + "grad_norm": 0.6104545493531314, + "learning_rate": 5.821103154049934e-08, + "loss": 0.306, + "step": 21054 + }, + { + "epoch": 0.9529305272686128, + "grad_norm": 0.5416426331637725, + "learning_rate": 5.809957250834774e-08, + "loss": 0.2829, + "step": 21055 + }, + { + "epoch": 0.9529757863770084, + "grad_norm": 0.6268521076725558, + "learning_rate": 5.7988219662458714e-08, + "loss": 0.2843, + "step": 21056 + }, + { + "epoch": 0.9530210454854039, + "grad_norm": 0.2669820642962163, + "learning_rate": 5.787697300522421e-08, + "loss": 0.4533, + "step": 21057 + }, + { + "epoch": 0.9530663045937995, + "grad_norm": 0.2477758080254133, + "learning_rate": 5.7765832539035113e-08, + "loss": 0.4546, + "step": 21058 + }, + { + "epoch": 0.9531115637021951, + "grad_norm": 0.648880992568745, + "learning_rate": 5.765479826627951e-08, + "loss": 0.2887, + "step": 21059 + }, + { + "epoch": 0.9531568228105907, + "grad_norm": 0.6113640942856314, + "learning_rate": 5.754387018934271e-08, + "loss": 0.3575, + "step": 21060 + }, + { + "epoch": 0.9532020819189861, + "grad_norm": 0.2506811649761924, + "learning_rate": 5.743304831060836e-08, + "loss": 0.4683, + "step": 21061 + }, + { + "epoch": 0.9532473410273817, + "grad_norm": 0.6305241344926028, + "learning_rate": 5.7322332632458454e-08, + "loss": 0.3598, + "step": 21062 + }, + { + "epoch": 0.9532926001357773, + "grad_norm": 0.9082959487210251, + "learning_rate": 5.721172315727108e-08, + "loss": 0.3155, + "step": 21063 + }, + { + "epoch": 0.9533378592441729, + "grad_norm": 0.30722117733165927, + "learning_rate": 5.7101219887423233e-08, + "loss": 0.4894, + "step": 21064 + }, + { + "epoch": 0.9533831183525685, + "grad_norm": 0.30885965328323056, + "learning_rate": 5.6990822825289115e-08, + "loss": 0.4934, + "step": 21065 + }, + { + "epoch": 0.953428377460964, + "grad_norm": 0.6392135432575204, + "learning_rate": 5.688053197324073e-08, + "loss": 0.2562, + "step": 21066 + }, + { + "epoch": 0.9534736365693596, + "grad_norm": 0.5728801787248158, + "learning_rate": 5.677034733364839e-08, + "loss": 0.288, + "step": 21067 + }, + { + "epoch": 0.9535188956777552, + "grad_norm": 0.2522632360951553, + "learning_rate": 5.66602689088791e-08, + "loss": 0.4584, + "step": 21068 + }, + { + "epoch": 0.9535641547861508, + "grad_norm": 0.7143571198515337, + "learning_rate": 5.655029670129875e-08, + "loss": 0.293, + "step": 21069 + }, + { + "epoch": 0.9536094138945462, + "grad_norm": 0.5981811006835039, + "learning_rate": 5.6440430713269325e-08, + "loss": 0.2969, + "step": 21070 + }, + { + "epoch": 0.9536546730029418, + "grad_norm": 0.2668000224402627, + "learning_rate": 5.633067094715228e-08, + "loss": 0.4681, + "step": 21071 + }, + { + "epoch": 0.9536999321113374, + "grad_norm": 0.6051485800178262, + "learning_rate": 5.622101740530572e-08, + "loss": 0.2771, + "step": 21072 + }, + { + "epoch": 0.953745191219733, + "grad_norm": 0.6327385317687241, + "learning_rate": 5.6111470090086106e-08, + "loss": 0.303, + "step": 21073 + }, + { + "epoch": 0.9537904503281285, + "grad_norm": 0.7189651235790038, + "learning_rate": 5.6002029003847105e-08, + "loss": 0.3054, + "step": 21074 + }, + { + "epoch": 0.9538357094365241, + "grad_norm": 0.6291207355588576, + "learning_rate": 5.589269414893961e-08, + "loss": 0.3036, + "step": 21075 + }, + { + "epoch": 0.9538809685449197, + "grad_norm": 0.6193220306777247, + "learning_rate": 5.5783465527713964e-08, + "loss": 0.3429, + "step": 21076 + }, + { + "epoch": 0.9539262276533153, + "grad_norm": 0.5910164438973713, + "learning_rate": 5.567434314251663e-08, + "loss": 0.2816, + "step": 21077 + }, + { + "epoch": 0.9539714867617108, + "grad_norm": 0.2552213221032225, + "learning_rate": 5.5565326995691835e-08, + "loss": 0.4609, + "step": 21078 + }, + { + "epoch": 0.9540167458701063, + "grad_norm": 0.5937592714720844, + "learning_rate": 5.5456417089582715e-08, + "loss": 0.2692, + "step": 21079 + }, + { + "epoch": 0.9540620049785019, + "grad_norm": 0.5623907371733852, + "learning_rate": 5.534761342652906e-08, + "loss": 0.2704, + "step": 21080 + }, + { + "epoch": 0.9541072640868975, + "grad_norm": 0.6982056335613128, + "learning_rate": 5.523891600886955e-08, + "loss": 0.2716, + "step": 21081 + }, + { + "epoch": 0.9541525231952931, + "grad_norm": 0.5974350115498644, + "learning_rate": 5.513032483893843e-08, + "loss": 0.2886, + "step": 21082 + }, + { + "epoch": 0.9541977823036886, + "grad_norm": 0.5876815070067601, + "learning_rate": 5.50218399190694e-08, + "loss": 0.2659, + "step": 21083 + }, + { + "epoch": 0.9542430414120842, + "grad_norm": 0.5721221155130534, + "learning_rate": 5.491346125159391e-08, + "loss": 0.2632, + "step": 21084 + }, + { + "epoch": 0.9542883005204797, + "grad_norm": 0.5523906313642887, + "learning_rate": 5.4805188838841226e-08, + "loss": 0.2783, + "step": 21085 + }, + { + "epoch": 0.9543335596288753, + "grad_norm": 0.6276116118037559, + "learning_rate": 5.4697022683136145e-08, + "loss": 0.2775, + "step": 21086 + }, + { + "epoch": 0.9543788187372709, + "grad_norm": 0.5721432827948391, + "learning_rate": 5.4588962786804035e-08, + "loss": 0.2863, + "step": 21087 + }, + { + "epoch": 0.9544240778456664, + "grad_norm": 0.5807363563910248, + "learning_rate": 5.448100915216636e-08, + "loss": 0.2458, + "step": 21088 + }, + { + "epoch": 0.954469336954062, + "grad_norm": 0.238644150188603, + "learning_rate": 5.437316178154295e-08, + "loss": 0.4643, + "step": 21089 + }, + { + "epoch": 0.9545145960624576, + "grad_norm": 0.7048671117012055, + "learning_rate": 5.4265420677250267e-08, + "loss": 0.3157, + "step": 21090 + }, + { + "epoch": 0.9545598551708532, + "grad_norm": 0.7289959440252665, + "learning_rate": 5.4157785841604805e-08, + "loss": 0.3047, + "step": 21091 + }, + { + "epoch": 0.9546051142792487, + "grad_norm": 0.28604388476548864, + "learning_rate": 5.4050257276918036e-08, + "loss": 0.4595, + "step": 21092 + }, + { + "epoch": 0.9546503733876442, + "grad_norm": 0.6084096838677485, + "learning_rate": 5.3942834985501455e-08, + "loss": 0.271, + "step": 21093 + }, + { + "epoch": 0.9546956324960398, + "grad_norm": 0.5654045262842151, + "learning_rate": 5.383551896966266e-08, + "loss": 0.2735, + "step": 21094 + }, + { + "epoch": 0.9547408916044354, + "grad_norm": 0.6056723134167218, + "learning_rate": 5.372830923170702e-08, + "loss": 0.3032, + "step": 21095 + }, + { + "epoch": 0.9547861507128309, + "grad_norm": 0.6271978895462672, + "learning_rate": 5.362120577393881e-08, + "loss": 0.2952, + "step": 21096 + }, + { + "epoch": 0.9548314098212265, + "grad_norm": 0.578230439300833, + "learning_rate": 5.351420859865952e-08, + "loss": 0.2753, + "step": 21097 + }, + { + "epoch": 0.9548766689296221, + "grad_norm": 0.6369091384458982, + "learning_rate": 5.340731770816843e-08, + "loss": 0.2957, + "step": 21098 + }, + { + "epoch": 0.9549219280380177, + "grad_norm": 0.2938941194995703, + "learning_rate": 5.330053310476091e-08, + "loss": 0.4708, + "step": 21099 + }, + { + "epoch": 0.9549671871464133, + "grad_norm": 0.2623577246616529, + "learning_rate": 5.319385479073236e-08, + "loss": 0.4724, + "step": 21100 + }, + { + "epoch": 0.9550124462548087, + "grad_norm": 1.2555377118351114, + "learning_rate": 5.308728276837538e-08, + "loss": 0.2841, + "step": 21101 + }, + { + "epoch": 0.9550577053632043, + "grad_norm": 0.6404644636725458, + "learning_rate": 5.298081703997926e-08, + "loss": 0.2868, + "step": 21102 + }, + { + "epoch": 0.9551029644715999, + "grad_norm": 0.5611261911554767, + "learning_rate": 5.287445760783161e-08, + "loss": 0.3046, + "step": 21103 + }, + { + "epoch": 0.9551482235799955, + "grad_norm": 0.26375488095717653, + "learning_rate": 5.276820447421782e-08, + "loss": 0.4611, + "step": 21104 + }, + { + "epoch": 0.955193482688391, + "grad_norm": 0.7815139458922309, + "learning_rate": 5.266205764142107e-08, + "loss": 0.2924, + "step": 21105 + }, + { + "epoch": 0.9552387417967866, + "grad_norm": 0.6860109324350689, + "learning_rate": 5.2556017111722315e-08, + "loss": 0.3019, + "step": 21106 + }, + { + "epoch": 0.9552840009051822, + "grad_norm": 0.5940709035253605, + "learning_rate": 5.245008288740028e-08, + "loss": 0.2745, + "step": 21107 + }, + { + "epoch": 0.9553292600135778, + "grad_norm": 0.6224783076128134, + "learning_rate": 5.234425497072981e-08, + "loss": 0.2929, + "step": 21108 + }, + { + "epoch": 0.9553745191219732, + "grad_norm": 0.25188962605758014, + "learning_rate": 5.223853336398632e-08, + "loss": 0.4686, + "step": 21109 + }, + { + "epoch": 0.9554197782303688, + "grad_norm": 0.5638666587277356, + "learning_rate": 5.213291806944076e-08, + "loss": 0.2819, + "step": 21110 + }, + { + "epoch": 0.9554650373387644, + "grad_norm": 0.6578418934123136, + "learning_rate": 5.2027409089362434e-08, + "loss": 0.2873, + "step": 21111 + }, + { + "epoch": 0.95551029644716, + "grad_norm": 0.6370928784488743, + "learning_rate": 5.192200642601841e-08, + "loss": 0.2969, + "step": 21112 + }, + { + "epoch": 0.9555555555555556, + "grad_norm": 0.25498088535690616, + "learning_rate": 5.181671008167355e-08, + "loss": 0.4389, + "step": 21113 + }, + { + "epoch": 0.9556008146639511, + "grad_norm": 0.2758628803552411, + "learning_rate": 5.171152005859159e-08, + "loss": 0.473, + "step": 21114 + }, + { + "epoch": 0.9556460737723467, + "grad_norm": 0.5835708463549837, + "learning_rate": 5.1606436359030174e-08, + "loss": 0.3202, + "step": 21115 + }, + { + "epoch": 0.9556913328807423, + "grad_norm": 0.616351688744552, + "learning_rate": 5.150145898524916e-08, + "loss": 0.3031, + "step": 21116 + }, + { + "epoch": 0.9557365919891379, + "grad_norm": 0.25333214668607923, + "learning_rate": 5.139658793950342e-08, + "loss": 0.454, + "step": 21117 + }, + { + "epoch": 0.9557818510975333, + "grad_norm": 0.2571882919873599, + "learning_rate": 5.1291823224046687e-08, + "loss": 0.4685, + "step": 21118 + }, + { + "epoch": 0.9558271102059289, + "grad_norm": 0.7099556179776506, + "learning_rate": 5.1187164841129954e-08, + "loss": 0.3117, + "step": 21119 + }, + { + "epoch": 0.9558723693143245, + "grad_norm": 0.5463801828022469, + "learning_rate": 5.1082612793001976e-08, + "loss": 0.2631, + "step": 21120 + }, + { + "epoch": 0.9559176284227201, + "grad_norm": 0.5868453745955741, + "learning_rate": 5.0978167081908726e-08, + "loss": 0.2613, + "step": 21121 + }, + { + "epoch": 0.9559628875311157, + "grad_norm": 0.2854284040132491, + "learning_rate": 5.0873827710095636e-08, + "loss": 0.468, + "step": 21122 + }, + { + "epoch": 0.9560081466395112, + "grad_norm": 1.618474871427813, + "learning_rate": 5.076959467980369e-08, + "loss": 0.2847, + "step": 21123 + }, + { + "epoch": 0.9560534057479068, + "grad_norm": 0.5758606216633175, + "learning_rate": 5.066546799327221e-08, + "loss": 0.2828, + "step": 21124 + }, + { + "epoch": 0.9560986648563023, + "grad_norm": 0.5602948965142438, + "learning_rate": 5.0561447652739404e-08, + "loss": 0.2697, + "step": 21125 + }, + { + "epoch": 0.9561439239646979, + "grad_norm": 0.6075309864313594, + "learning_rate": 5.045753366044015e-08, + "loss": 0.236, + "step": 21126 + }, + { + "epoch": 0.9561891830730934, + "grad_norm": 0.6679248281529808, + "learning_rate": 5.035372601860766e-08, + "loss": 0.2855, + "step": 21127 + }, + { + "epoch": 0.956234442181489, + "grad_norm": 0.6291431479098318, + "learning_rate": 5.0250024729470714e-08, + "loss": 0.2807, + "step": 21128 + }, + { + "epoch": 0.9562797012898846, + "grad_norm": 0.564615306221422, + "learning_rate": 5.0146429795259745e-08, + "loss": 0.3114, + "step": 21129 + }, + { + "epoch": 0.9563249603982802, + "grad_norm": 0.26899426000202586, + "learning_rate": 5.004294121819908e-08, + "loss": 0.4783, + "step": 21130 + }, + { + "epoch": 0.9563702195066757, + "grad_norm": 0.29653148382525596, + "learning_rate": 4.993955900051362e-08, + "loss": 0.4898, + "step": 21131 + }, + { + "epoch": 0.9564154786150713, + "grad_norm": 0.6173135466935873, + "learning_rate": 4.983628314442324e-08, + "loss": 0.293, + "step": 21132 + }, + { + "epoch": 0.9564607377234668, + "grad_norm": 0.5586290414287138, + "learning_rate": 4.973311365214894e-08, + "loss": 0.318, + "step": 21133 + }, + { + "epoch": 0.9565059968318624, + "grad_norm": 0.5546851153757404, + "learning_rate": 4.9630050525905635e-08, + "loss": 0.3139, + "step": 21134 + }, + { + "epoch": 0.956551255940258, + "grad_norm": 0.5424416970865831, + "learning_rate": 4.9527093767908765e-08, + "loss": 0.2762, + "step": 21135 + }, + { + "epoch": 0.9565965150486535, + "grad_norm": 0.6311911428226128, + "learning_rate": 4.942424338037044e-08, + "loss": 0.2852, + "step": 21136 + }, + { + "epoch": 0.9566417741570491, + "grad_norm": 0.5741090634293206, + "learning_rate": 4.932149936550057e-08, + "loss": 0.2514, + "step": 21137 + }, + { + "epoch": 0.9566870332654447, + "grad_norm": 0.6936989040590055, + "learning_rate": 4.9218861725506825e-08, + "loss": 0.2994, + "step": 21138 + }, + { + "epoch": 0.9567322923738403, + "grad_norm": 0.6044609637642747, + "learning_rate": 4.9116330462594677e-08, + "loss": 0.2769, + "step": 21139 + }, + { + "epoch": 0.9567775514822358, + "grad_norm": 0.25341767036578816, + "learning_rate": 4.9013905578967346e-08, + "loss": 0.446, + "step": 21140 + }, + { + "epoch": 0.9568228105906313, + "grad_norm": 0.2809826063683718, + "learning_rate": 4.8911587076825305e-08, + "loss": 0.4946, + "step": 21141 + }, + { + "epoch": 0.9568680696990269, + "grad_norm": 0.663001023558929, + "learning_rate": 4.8809374958366796e-08, + "loss": 0.3133, + "step": 21142 + }, + { + "epoch": 0.9569133288074225, + "grad_norm": 0.61431465932955, + "learning_rate": 4.870726922578839e-08, + "loss": 0.2811, + "step": 21143 + }, + { + "epoch": 0.956958587915818, + "grad_norm": 0.5896603758861003, + "learning_rate": 4.8605269881284446e-08, + "loss": 0.298, + "step": 21144 + }, + { + "epoch": 0.9570038470242136, + "grad_norm": 0.6509342502713384, + "learning_rate": 4.8503376927045984e-08, + "loss": 0.2603, + "step": 21145 + }, + { + "epoch": 0.9570491061326092, + "grad_norm": 0.5676415541024242, + "learning_rate": 4.840159036526237e-08, + "loss": 0.2688, + "step": 21146 + }, + { + "epoch": 0.9570943652410048, + "grad_norm": 0.2541454717914221, + "learning_rate": 4.8299910198121304e-08, + "loss": 0.4955, + "step": 21147 + }, + { + "epoch": 0.9571396243494004, + "grad_norm": 0.6064039473492522, + "learning_rate": 4.819833642780713e-08, + "loss": 0.2653, + "step": 21148 + }, + { + "epoch": 0.9571848834577958, + "grad_norm": 0.6175290510783422, + "learning_rate": 4.809686905650257e-08, + "loss": 0.265, + "step": 21149 + }, + { + "epoch": 0.9572301425661914, + "grad_norm": 0.6565275491143019, + "learning_rate": 4.7995508086386975e-08, + "loss": 0.288, + "step": 21150 + }, + { + "epoch": 0.957275401674587, + "grad_norm": 0.762414490219462, + "learning_rate": 4.789425351963972e-08, + "loss": 0.2669, + "step": 21151 + }, + { + "epoch": 0.9573206607829826, + "grad_norm": 0.6262336664562198, + "learning_rate": 4.779310535843573e-08, + "loss": 0.26, + "step": 21152 + }, + { + "epoch": 0.9573659198913781, + "grad_norm": 0.31724872112389707, + "learning_rate": 4.769206360494771e-08, + "loss": 0.4672, + "step": 21153 + }, + { + "epoch": 0.9574111789997737, + "grad_norm": 0.5928255327048539, + "learning_rate": 4.759112826134782e-08, + "loss": 0.2793, + "step": 21154 + }, + { + "epoch": 0.9574564381081693, + "grad_norm": 0.6557021358975665, + "learning_rate": 4.749029932980431e-08, + "loss": 0.2363, + "step": 21155 + }, + { + "epoch": 0.9575016972165649, + "grad_norm": 0.6553815062030756, + "learning_rate": 4.73895768124838e-08, + "loss": 0.2506, + "step": 21156 + }, + { + "epoch": 0.9575469563249605, + "grad_norm": 2.1409793774596806, + "learning_rate": 4.7288960711550644e-08, + "loss": 0.3, + "step": 21157 + }, + { + "epoch": 0.9575922154333559, + "grad_norm": 0.5651822107390357, + "learning_rate": 4.718845102916592e-08, + "loss": 0.2757, + "step": 21158 + }, + { + "epoch": 0.9576374745417515, + "grad_norm": 0.625507174834113, + "learning_rate": 4.708804776749121e-08, + "loss": 0.2822, + "step": 21159 + }, + { + "epoch": 0.9576827336501471, + "grad_norm": 0.6486296978176692, + "learning_rate": 4.6987750928682017e-08, + "loss": 0.2983, + "step": 21160 + }, + { + "epoch": 0.9577279927585427, + "grad_norm": 0.5840867435621814, + "learning_rate": 4.688756051489385e-08, + "loss": 0.297, + "step": 21161 + }, + { + "epoch": 0.9577732518669382, + "grad_norm": 0.6474200844795487, + "learning_rate": 4.678747652827997e-08, + "loss": 0.2637, + "step": 21162 + }, + { + "epoch": 0.9578185109753338, + "grad_norm": 0.2523292538945871, + "learning_rate": 4.668749897099034e-08, + "loss": 0.4583, + "step": 21163 + }, + { + "epoch": 0.9578637700837294, + "grad_norm": 0.26201240745249477, + "learning_rate": 4.6587627845173786e-08, + "loss": 0.4774, + "step": 21164 + }, + { + "epoch": 0.957909029192125, + "grad_norm": 0.2650016651303161, + "learning_rate": 4.648786315297582e-08, + "loss": 0.474, + "step": 21165 + }, + { + "epoch": 0.9579542883005204, + "grad_norm": 0.6206100835637757, + "learning_rate": 4.6388204896539724e-08, + "loss": 0.3042, + "step": 21166 + }, + { + "epoch": 0.957999547408916, + "grad_norm": 0.61239911731407, + "learning_rate": 4.628865307800712e-08, + "loss": 0.2747, + "step": 21167 + }, + { + "epoch": 0.9580448065173116, + "grad_norm": 0.6238800569552224, + "learning_rate": 4.618920769951796e-08, + "loss": 0.3023, + "step": 21168 + }, + { + "epoch": 0.9580900656257072, + "grad_norm": 0.24533446552736418, + "learning_rate": 4.6089868763207756e-08, + "loss": 0.4675, + "step": 21169 + }, + { + "epoch": 0.9581353247341028, + "grad_norm": 0.927201016908863, + "learning_rate": 4.5990636271211474e-08, + "loss": 0.3292, + "step": 21170 + }, + { + "epoch": 0.9581805838424983, + "grad_norm": 0.5815065294128933, + "learning_rate": 4.58915102256613e-08, + "loss": 0.2917, + "step": 21171 + }, + { + "epoch": 0.9582258429508939, + "grad_norm": 0.28944106861197766, + "learning_rate": 4.5792490628687734e-08, + "loss": 0.4839, + "step": 21172 + }, + { + "epoch": 0.9582711020592894, + "grad_norm": 0.5599205054206638, + "learning_rate": 4.569357748241743e-08, + "loss": 0.2879, + "step": 21173 + }, + { + "epoch": 0.958316361167685, + "grad_norm": 0.7083504678979596, + "learning_rate": 4.55947707889759e-08, + "loss": 0.2428, + "step": 21174 + }, + { + "epoch": 0.9583616202760805, + "grad_norm": 0.5732163183389878, + "learning_rate": 4.549607055048699e-08, + "loss": 0.3069, + "step": 21175 + }, + { + "epoch": 0.9584068793844761, + "grad_norm": 0.5684435449877221, + "learning_rate": 4.539747676907069e-08, + "loss": 0.2736, + "step": 21176 + }, + { + "epoch": 0.9584521384928717, + "grad_norm": 0.7016450015645664, + "learning_rate": 4.529898944684585e-08, + "loss": 0.2662, + "step": 21177 + }, + { + "epoch": 0.9584973976012673, + "grad_norm": 0.6133835173395702, + "learning_rate": 4.5200608585928566e-08, + "loss": 0.2779, + "step": 21178 + }, + { + "epoch": 0.9585426567096628, + "grad_norm": 0.8378032151828722, + "learning_rate": 4.510233418843213e-08, + "loss": 0.2647, + "step": 21179 + }, + { + "epoch": 0.9585879158180584, + "grad_norm": 0.3407268560380774, + "learning_rate": 4.5004166256469305e-08, + "loss": 0.4614, + "step": 21180 + }, + { + "epoch": 0.958633174926454, + "grad_norm": 0.6134658495980012, + "learning_rate": 4.490610479214841e-08, + "loss": 0.2638, + "step": 21181 + }, + { + "epoch": 0.9586784340348495, + "grad_norm": 0.6981064429014657, + "learning_rate": 4.480814979757719e-08, + "loss": 0.3505, + "step": 21182 + }, + { + "epoch": 0.9587236931432451, + "grad_norm": 0.5817051446699949, + "learning_rate": 4.471030127486009e-08, + "loss": 0.2621, + "step": 21183 + }, + { + "epoch": 0.9587689522516406, + "grad_norm": 0.6006523021407582, + "learning_rate": 4.461255922609986e-08, + "loss": 0.2656, + "step": 21184 + }, + { + "epoch": 0.9588142113600362, + "grad_norm": 0.6019792523072536, + "learning_rate": 4.451492365339594e-08, + "loss": 0.2884, + "step": 21185 + }, + { + "epoch": 0.9588594704684318, + "grad_norm": 0.6145690423990191, + "learning_rate": 4.4417394558846636e-08, + "loss": 0.3031, + "step": 21186 + }, + { + "epoch": 0.9589047295768274, + "grad_norm": 0.6242685469563993, + "learning_rate": 4.431997194454807e-08, + "loss": 0.355, + "step": 21187 + }, + { + "epoch": 0.9589499886852229, + "grad_norm": 0.6240215002808371, + "learning_rate": 4.4222655812592995e-08, + "loss": 0.2737, + "step": 21188 + }, + { + "epoch": 0.9589952477936184, + "grad_norm": 0.6140724268164072, + "learning_rate": 4.412544616507253e-08, + "loss": 0.3129, + "step": 21189 + }, + { + "epoch": 0.959040506902014, + "grad_norm": 0.8783380764141445, + "learning_rate": 4.402834300407499e-08, + "loss": 0.2695, + "step": 21190 + }, + { + "epoch": 0.9590857660104096, + "grad_norm": 0.5972526903824302, + "learning_rate": 4.3931346331688165e-08, + "loss": 0.3249, + "step": 21191 + }, + { + "epoch": 0.9591310251188052, + "grad_norm": 0.25982751541373955, + "learning_rate": 4.383445614999426e-08, + "loss": 0.4751, + "step": 21192 + }, + { + "epoch": 0.9591762842272007, + "grad_norm": 0.6209074527492965, + "learning_rate": 4.373767246107718e-08, + "loss": 0.2567, + "step": 21193 + }, + { + "epoch": 0.9592215433355963, + "grad_norm": 0.6748164420339022, + "learning_rate": 4.3640995267014704e-08, + "loss": 0.2274, + "step": 21194 + }, + { + "epoch": 0.9592668024439919, + "grad_norm": 0.6207451339187819, + "learning_rate": 4.354442456988517e-08, + "loss": 0.2833, + "step": 21195 + }, + { + "epoch": 0.9593120615523875, + "grad_norm": 0.2598314089164235, + "learning_rate": 4.3447960371763575e-08, + "loss": 0.4636, + "step": 21196 + }, + { + "epoch": 0.9593573206607829, + "grad_norm": 0.6280994414834518, + "learning_rate": 4.335160267472216e-08, + "loss": 0.3045, + "step": 21197 + }, + { + "epoch": 0.9594025797691785, + "grad_norm": 0.6668469020947756, + "learning_rate": 4.325535148083204e-08, + "loss": 0.3126, + "step": 21198 + }, + { + "epoch": 0.9594478388775741, + "grad_norm": 0.7184303504115127, + "learning_rate": 4.3159206792160455e-08, + "loss": 0.2935, + "step": 21199 + }, + { + "epoch": 0.9594930979859697, + "grad_norm": 0.6537703551533632, + "learning_rate": 4.3063168610774084e-08, + "loss": 0.2885, + "step": 21200 + }, + { + "epoch": 0.9595383570943652, + "grad_norm": 0.6323361763200717, + "learning_rate": 4.2967236938735725e-08, + "loss": 0.2797, + "step": 21201 + }, + { + "epoch": 0.9595836162027608, + "grad_norm": 0.2735815182206019, + "learning_rate": 4.287141177810761e-08, + "loss": 0.4609, + "step": 21202 + }, + { + "epoch": 0.9596288753111564, + "grad_norm": 0.6295577981996854, + "learning_rate": 4.2775693130948094e-08, + "loss": 0.3092, + "step": 21203 + }, + { + "epoch": 0.959674134419552, + "grad_norm": 0.5876943016991591, + "learning_rate": 4.268008099931387e-08, + "loss": 0.2894, + "step": 21204 + }, + { + "epoch": 0.9597193935279476, + "grad_norm": 0.5478724164381471, + "learning_rate": 4.25845753852594e-08, + "loss": 0.2857, + "step": 21205 + }, + { + "epoch": 0.959764652636343, + "grad_norm": 0.7570418593038322, + "learning_rate": 4.248917629083693e-08, + "loss": 0.2725, + "step": 21206 + }, + { + "epoch": 0.9598099117447386, + "grad_norm": 0.2699582044210566, + "learning_rate": 4.2393883718096495e-08, + "loss": 0.4692, + "step": 21207 + }, + { + "epoch": 0.9598551708531342, + "grad_norm": 0.6258507229904574, + "learning_rate": 4.2298697669084785e-08, + "loss": 0.2818, + "step": 21208 + }, + { + "epoch": 0.9599004299615298, + "grad_norm": 0.6338652523526765, + "learning_rate": 4.2203618145847946e-08, + "loss": 0.3102, + "step": 21209 + }, + { + "epoch": 0.9599456890699253, + "grad_norm": 0.25974894190440184, + "learning_rate": 4.210864515042878e-08, + "loss": 0.4548, + "step": 21210 + }, + { + "epoch": 0.9599909481783209, + "grad_norm": 0.6166636330929279, + "learning_rate": 4.2013778684867335e-08, + "loss": 0.297, + "step": 21211 + }, + { + "epoch": 0.9600362072867165, + "grad_norm": 0.5978516768881983, + "learning_rate": 4.191901875120308e-08, + "loss": 0.3011, + "step": 21212 + }, + { + "epoch": 0.960081466395112, + "grad_norm": 0.6019325817783381, + "learning_rate": 4.182436535147105e-08, + "loss": 0.3409, + "step": 21213 + }, + { + "epoch": 0.9601267255035075, + "grad_norm": 0.6240142157887236, + "learning_rate": 4.1729818487706297e-08, + "loss": 0.2794, + "step": 21214 + }, + { + "epoch": 0.9601719846119031, + "grad_norm": 0.6209004645487891, + "learning_rate": 4.163537816193885e-08, + "loss": 0.3074, + "step": 21215 + }, + { + "epoch": 0.9602172437202987, + "grad_norm": 0.5959644019366294, + "learning_rate": 4.154104437619877e-08, + "loss": 0.2902, + "step": 21216 + }, + { + "epoch": 0.9602625028286943, + "grad_norm": 0.6120197629494267, + "learning_rate": 4.144681713251275e-08, + "loss": 0.2721, + "step": 21217 + }, + { + "epoch": 0.9603077619370899, + "grad_norm": 0.6320329580407882, + "learning_rate": 4.1352696432906405e-08, + "loss": 0.2833, + "step": 21218 + }, + { + "epoch": 0.9603530210454854, + "grad_norm": 0.5909436023906957, + "learning_rate": 4.125868227940033e-08, + "loss": 0.3063, + "step": 21219 + }, + { + "epoch": 0.960398280153881, + "grad_norm": 0.5730815741653685, + "learning_rate": 4.116477467401625e-08, + "loss": 0.3018, + "step": 21220 + }, + { + "epoch": 0.9604435392622765, + "grad_norm": 0.26835348997717295, + "learning_rate": 4.107097361877088e-08, + "loss": 0.4632, + "step": 21221 + }, + { + "epoch": 0.9604887983706721, + "grad_norm": 0.63553753421761, + "learning_rate": 4.097727911568039e-08, + "loss": 0.3173, + "step": 21222 + }, + { + "epoch": 0.9605340574790676, + "grad_norm": 0.6397865086894752, + "learning_rate": 4.088369116675761e-08, + "loss": 0.2952, + "step": 21223 + }, + { + "epoch": 0.9605793165874632, + "grad_norm": 0.599380152021612, + "learning_rate": 4.0790209774013156e-08, + "loss": 0.2655, + "step": 21224 + }, + { + "epoch": 0.9606245756958588, + "grad_norm": 0.5637309960267389, + "learning_rate": 4.069683493945598e-08, + "loss": 0.2557, + "step": 21225 + }, + { + "epoch": 0.9606698348042544, + "grad_norm": 0.6125425675800832, + "learning_rate": 4.060356666509335e-08, + "loss": 0.298, + "step": 21226 + }, + { + "epoch": 0.96071509391265, + "grad_norm": 0.6476639084705905, + "learning_rate": 4.051040495292757e-08, + "loss": 0.3023, + "step": 21227 + }, + { + "epoch": 0.9607603530210455, + "grad_norm": 0.5677392671931741, + "learning_rate": 4.041734980496148e-08, + "loss": 0.3043, + "step": 21228 + }, + { + "epoch": 0.960805612129441, + "grad_norm": 0.6092973251190281, + "learning_rate": 4.032440122319459e-08, + "loss": 0.2896, + "step": 21229 + }, + { + "epoch": 0.9608508712378366, + "grad_norm": 0.26849380700297054, + "learning_rate": 4.0231559209624185e-08, + "loss": 0.4789, + "step": 21230 + }, + { + "epoch": 0.9608961303462322, + "grad_norm": 1.0232158053660931, + "learning_rate": 4.013882376624423e-08, + "loss": 0.3167, + "step": 21231 + }, + { + "epoch": 0.9609413894546277, + "grad_norm": 0.30389324861319167, + "learning_rate": 4.004619489504813e-08, + "loss": 0.456, + "step": 21232 + }, + { + "epoch": 0.9609866485630233, + "grad_norm": 0.6564086381621276, + "learning_rate": 3.995367259802596e-08, + "loss": 0.3061, + "step": 21233 + }, + { + "epoch": 0.9610319076714189, + "grad_norm": 0.6095148878709185, + "learning_rate": 3.986125687716558e-08, + "loss": 0.2643, + "step": 21234 + }, + { + "epoch": 0.9610771667798145, + "grad_norm": 0.6100252027921181, + "learning_rate": 3.976894773445261e-08, + "loss": 0.3019, + "step": 21235 + }, + { + "epoch": 0.96112242588821, + "grad_norm": 0.5839556175104145, + "learning_rate": 3.967674517187159e-08, + "loss": 0.2719, + "step": 21236 + }, + { + "epoch": 0.9611676849966055, + "grad_norm": 0.5543483765055761, + "learning_rate": 3.9584649191402034e-08, + "loss": 0.3032, + "step": 21237 + }, + { + "epoch": 0.9612129441050011, + "grad_norm": 0.2859477139223187, + "learning_rate": 3.9492659795024035e-08, + "loss": 0.4663, + "step": 21238 + }, + { + "epoch": 0.9612582032133967, + "grad_norm": 0.2531076016653137, + "learning_rate": 3.940077698471378e-08, + "loss": 0.4635, + "step": 21239 + }, + { + "epoch": 0.9613034623217923, + "grad_norm": 0.6157171437043197, + "learning_rate": 3.930900076244526e-08, + "loss": 0.3218, + "step": 21240 + }, + { + "epoch": 0.9613487214301878, + "grad_norm": 0.6477974935783631, + "learning_rate": 3.921733113019077e-08, + "loss": 0.2452, + "step": 21241 + }, + { + "epoch": 0.9613939805385834, + "grad_norm": 0.6188385285914921, + "learning_rate": 3.912576808991986e-08, + "loss": 0.3111, + "step": 21242 + }, + { + "epoch": 0.961439239646979, + "grad_norm": 0.6520923746735432, + "learning_rate": 3.903431164360094e-08, + "loss": 0.3082, + "step": 21243 + }, + { + "epoch": 0.9614844987553746, + "grad_norm": 0.6093413924671904, + "learning_rate": 3.8942961793197456e-08, + "loss": 0.2757, + "step": 21244 + }, + { + "epoch": 0.96152975786377, + "grad_norm": 0.6180322542059126, + "learning_rate": 3.885171854067282e-08, + "loss": 0.2801, + "step": 21245 + }, + { + "epoch": 0.9615750169721656, + "grad_norm": 0.6173825785972774, + "learning_rate": 3.8760581887987706e-08, + "loss": 0.2981, + "step": 21246 + }, + { + "epoch": 0.9616202760805612, + "grad_norm": 0.6535693212898909, + "learning_rate": 3.866955183710108e-08, + "loss": 0.281, + "step": 21247 + }, + { + "epoch": 0.9616655351889568, + "grad_norm": 0.6009252696082468, + "learning_rate": 3.857862838996751e-08, + "loss": 0.2282, + "step": 21248 + }, + { + "epoch": 0.9617107942973523, + "grad_norm": 0.7029568060532387, + "learning_rate": 3.8487811548542086e-08, + "loss": 0.3151, + "step": 21249 + }, + { + "epoch": 0.9617560534057479, + "grad_norm": 0.27285242924989755, + "learning_rate": 3.839710131477492e-08, + "loss": 0.4705, + "step": 21250 + }, + { + "epoch": 0.9618013125141435, + "grad_norm": 0.6684912899665435, + "learning_rate": 3.8306497690615564e-08, + "loss": 0.3379, + "step": 21251 + }, + { + "epoch": 0.9618465716225391, + "grad_norm": 0.5495280270916768, + "learning_rate": 3.8216000678011344e-08, + "loss": 0.2976, + "step": 21252 + }, + { + "epoch": 0.9618918307309346, + "grad_norm": 0.2520299691807092, + "learning_rate": 3.812561027890571e-08, + "loss": 0.4836, + "step": 21253 + }, + { + "epoch": 0.9619370898393301, + "grad_norm": 0.559546923586854, + "learning_rate": 3.8035326495242106e-08, + "loss": 0.2578, + "step": 21254 + }, + { + "epoch": 0.9619823489477257, + "grad_norm": 0.6358503324588595, + "learning_rate": 3.794514932895954e-08, + "loss": 0.3164, + "step": 21255 + }, + { + "epoch": 0.9620276080561213, + "grad_norm": 0.5869712565273543, + "learning_rate": 3.78550787819959e-08, + "loss": 0.2585, + "step": 21256 + }, + { + "epoch": 0.9620728671645169, + "grad_norm": 0.25232073452894843, + "learning_rate": 3.7765114856286866e-08, + "loss": 0.4614, + "step": 21257 + }, + { + "epoch": 0.9621181262729124, + "grad_norm": 0.6596492126163388, + "learning_rate": 3.7675257553764224e-08, + "loss": 0.2838, + "step": 21258 + }, + { + "epoch": 0.962163385381308, + "grad_norm": 0.625749386386828, + "learning_rate": 3.7585506876360865e-08, + "loss": 0.312, + "step": 21259 + }, + { + "epoch": 0.9622086444897036, + "grad_norm": 0.2912275382564058, + "learning_rate": 3.749586282600359e-08, + "loss": 0.4784, + "step": 21260 + }, + { + "epoch": 0.9622539035980991, + "grad_norm": 0.7980256338173004, + "learning_rate": 3.740632540461864e-08, + "loss": 0.2934, + "step": 21261 + }, + { + "epoch": 0.9622991627064947, + "grad_norm": 0.5957946792402637, + "learning_rate": 3.731689461413113e-08, + "loss": 0.2908, + "step": 21262 + }, + { + "epoch": 0.9623444218148902, + "grad_norm": 0.6516521315572986, + "learning_rate": 3.7227570456461194e-08, + "loss": 0.2563, + "step": 21263 + }, + { + "epoch": 0.9623896809232858, + "grad_norm": 0.6167601726247165, + "learning_rate": 3.7138352933528965e-08, + "loss": 0.3169, + "step": 21264 + }, + { + "epoch": 0.9624349400316814, + "grad_norm": 0.6262162319203259, + "learning_rate": 3.70492420472518e-08, + "loss": 0.2906, + "step": 21265 + }, + { + "epoch": 0.962480199140077, + "grad_norm": 0.6135124490731227, + "learning_rate": 3.6960237799543166e-08, + "loss": 0.2909, + "step": 21266 + }, + { + "epoch": 0.9625254582484725, + "grad_norm": 0.7785797765189498, + "learning_rate": 3.6871340192315974e-08, + "loss": 0.2873, + "step": 21267 + }, + { + "epoch": 0.962570717356868, + "grad_norm": 0.6334405969382095, + "learning_rate": 3.6782549227481476e-08, + "loss": 0.2909, + "step": 21268 + }, + { + "epoch": 0.9626159764652636, + "grad_norm": 0.5805036307580624, + "learning_rate": 3.669386490694593e-08, + "loss": 0.268, + "step": 21269 + }, + { + "epoch": 0.9626612355736592, + "grad_norm": 0.2630489032207125, + "learning_rate": 3.6605287232616137e-08, + "loss": 0.4674, + "step": 21270 + }, + { + "epoch": 0.9627064946820547, + "grad_norm": 0.6354005481006761, + "learning_rate": 3.651681620639447e-08, + "loss": 0.2704, + "step": 21271 + }, + { + "epoch": 0.9627517537904503, + "grad_norm": 0.6971951264532625, + "learning_rate": 3.642845183018273e-08, + "loss": 0.2801, + "step": 21272 + }, + { + "epoch": 0.9627970128988459, + "grad_norm": 0.618233409724753, + "learning_rate": 3.63401941058783e-08, + "loss": 0.2253, + "step": 21273 + }, + { + "epoch": 0.9628422720072415, + "grad_norm": 0.2852131682171573, + "learning_rate": 3.625204303537855e-08, + "loss": 0.4863, + "step": 21274 + }, + { + "epoch": 0.9628875311156371, + "grad_norm": 0.6274027089335161, + "learning_rate": 3.6163998620578065e-08, + "loss": 0.3192, + "step": 21275 + }, + { + "epoch": 0.9629327902240326, + "grad_norm": 0.639063695040162, + "learning_rate": 3.6076060863367565e-08, + "loss": 0.2792, + "step": 21276 + }, + { + "epoch": 0.9629780493324281, + "grad_norm": 0.657216145416747, + "learning_rate": 3.598822976563665e-08, + "loss": 0.3416, + "step": 21277 + }, + { + "epoch": 0.9630233084408237, + "grad_norm": 0.314988580497501, + "learning_rate": 3.5900505329273804e-08, + "loss": 0.4811, + "step": 21278 + }, + { + "epoch": 0.9630685675492193, + "grad_norm": 0.6421338261470828, + "learning_rate": 3.581288755616197e-08, + "loss": 0.2978, + "step": 21279 + }, + { + "epoch": 0.9631138266576148, + "grad_norm": 0.6104777277721297, + "learning_rate": 3.5725376448185744e-08, + "loss": 0.3207, + "step": 21280 + }, + { + "epoch": 0.9631590857660104, + "grad_norm": 0.25787119378948375, + "learning_rate": 3.563797200722363e-08, + "loss": 0.4712, + "step": 21281 + }, + { + "epoch": 0.963204344874406, + "grad_norm": 0.6496668162613294, + "learning_rate": 3.555067423515523e-08, + "loss": 0.2714, + "step": 21282 + }, + { + "epoch": 0.9632496039828016, + "grad_norm": 0.5899650319063284, + "learning_rate": 3.5463483133855726e-08, + "loss": 0.3019, + "step": 21283 + }, + { + "epoch": 0.963294863091197, + "grad_norm": 0.6227285001305403, + "learning_rate": 3.5376398705198603e-08, + "loss": 0.2778, + "step": 21284 + }, + { + "epoch": 0.9633401221995926, + "grad_norm": 0.5848115284332623, + "learning_rate": 3.5289420951055145e-08, + "loss": 0.2984, + "step": 21285 + }, + { + "epoch": 0.9633853813079882, + "grad_norm": 0.613686346774837, + "learning_rate": 3.5202549873293304e-08, + "loss": 0.3182, + "step": 21286 + }, + { + "epoch": 0.9634306404163838, + "grad_norm": 0.5834137635129636, + "learning_rate": 3.5115785473781026e-08, + "loss": 0.3004, + "step": 21287 + }, + { + "epoch": 0.9634758995247794, + "grad_norm": 0.6097829559220829, + "learning_rate": 3.502912775438183e-08, + "loss": 0.2664, + "step": 21288 + }, + { + "epoch": 0.9635211586331749, + "grad_norm": 0.25201403422770474, + "learning_rate": 3.494257671695811e-08, + "loss": 0.4597, + "step": 21289 + }, + { + "epoch": 0.9635664177415705, + "grad_norm": 0.6905157574890572, + "learning_rate": 3.4856132363369485e-08, + "loss": 0.3064, + "step": 21290 + }, + { + "epoch": 0.9636116768499661, + "grad_norm": 1.02299639303271, + "learning_rate": 3.476979469547337e-08, + "loss": 0.269, + "step": 21291 + }, + { + "epoch": 0.9636569359583617, + "grad_norm": 0.6560738269000224, + "learning_rate": 3.468356371512438e-08, + "loss": 0.3031, + "step": 21292 + }, + { + "epoch": 0.9637021950667571, + "grad_norm": 0.26279930128979606, + "learning_rate": 3.459743942417604e-08, + "loss": 0.4655, + "step": 21293 + }, + { + "epoch": 0.9637474541751527, + "grad_norm": 0.6623621867401419, + "learning_rate": 3.451142182447908e-08, + "loss": 0.2765, + "step": 21294 + }, + { + "epoch": 0.9637927132835483, + "grad_norm": 0.5857663907388148, + "learning_rate": 3.442551091788038e-08, + "loss": 0.266, + "step": 21295 + }, + { + "epoch": 0.9638379723919439, + "grad_norm": 0.6146602373853328, + "learning_rate": 3.4339706706227326e-08, + "loss": 0.3012, + "step": 21296 + }, + { + "epoch": 0.9638832315003394, + "grad_norm": 0.5996634569620617, + "learning_rate": 3.425400919136346e-08, + "loss": 0.3118, + "step": 21297 + }, + { + "epoch": 0.963928490608735, + "grad_norm": 0.5527183377984016, + "learning_rate": 3.416841837512952e-08, + "loss": 0.2982, + "step": 21298 + }, + { + "epoch": 0.9639737497171306, + "grad_norm": 0.6135812886345209, + "learning_rate": 3.40829342593646e-08, + "loss": 0.2939, + "step": 21299 + }, + { + "epoch": 0.9640190088255262, + "grad_norm": 0.6100060335767012, + "learning_rate": 3.399755684590611e-08, + "loss": 0.2768, + "step": 21300 + }, + { + "epoch": 0.9640642679339217, + "grad_norm": 0.6862427360829833, + "learning_rate": 3.39122861365887e-08, + "loss": 0.3029, + "step": 21301 + }, + { + "epoch": 0.9641095270423172, + "grad_norm": 1.103074870883008, + "learning_rate": 3.382712213324313e-08, + "loss": 0.2559, + "step": 21302 + }, + { + "epoch": 0.9641547861507128, + "grad_norm": 0.27218569974804424, + "learning_rate": 3.374206483770071e-08, + "loss": 0.4603, + "step": 21303 + }, + { + "epoch": 0.9642000452591084, + "grad_norm": 0.2551901000735597, + "learning_rate": 3.365711425178886e-08, + "loss": 0.4548, + "step": 21304 + }, + { + "epoch": 0.964245304367504, + "grad_norm": 0.6019272256332595, + "learning_rate": 3.357227037733224e-08, + "loss": 0.2514, + "step": 21305 + }, + { + "epoch": 0.9642905634758995, + "grad_norm": 0.5836409045668817, + "learning_rate": 3.3487533216154386e-08, + "loss": 0.2902, + "step": 21306 + }, + { + "epoch": 0.9643358225842951, + "grad_norm": 0.3000098505611248, + "learning_rate": 3.340290277007607e-08, + "loss": 0.4798, + "step": 21307 + }, + { + "epoch": 0.9643810816926907, + "grad_norm": 0.6196779072403756, + "learning_rate": 3.3318379040915284e-08, + "loss": 0.3096, + "step": 21308 + }, + { + "epoch": 0.9644263408010862, + "grad_norm": 0.6049054192400112, + "learning_rate": 3.3233962030489453e-08, + "loss": 0.2708, + "step": 21309 + }, + { + "epoch": 0.9644715999094818, + "grad_norm": 0.6443006566661341, + "learning_rate": 3.3149651740610464e-08, + "loss": 0.299, + "step": 21310 + }, + { + "epoch": 0.9645168590178773, + "grad_norm": 0.5848403475285998, + "learning_rate": 3.3065448173091873e-08, + "loss": 0.2754, + "step": 21311 + }, + { + "epoch": 0.9645621181262729, + "grad_norm": 0.6277945205866138, + "learning_rate": 3.298135132974112e-08, + "loss": 0.2824, + "step": 21312 + }, + { + "epoch": 0.9646073772346685, + "grad_norm": 0.6249229890940834, + "learning_rate": 3.289736121236675e-08, + "loss": 0.3334, + "step": 21313 + }, + { + "epoch": 0.9646526363430641, + "grad_norm": 0.23627308508275272, + "learning_rate": 3.2813477822772885e-08, + "loss": 0.4655, + "step": 21314 + }, + { + "epoch": 0.9646978954514596, + "grad_norm": 0.6215681145266133, + "learning_rate": 3.2729701162760865e-08, + "loss": 0.293, + "step": 21315 + }, + { + "epoch": 0.9647431545598552, + "grad_norm": 0.6019029106587596, + "learning_rate": 3.264603123413257e-08, + "loss": 0.2539, + "step": 21316 + }, + { + "epoch": 0.9647884136682507, + "grad_norm": 0.7327028825493089, + "learning_rate": 3.25624680386849e-08, + "loss": 0.298, + "step": 21317 + }, + { + "epoch": 0.9648336727766463, + "grad_norm": 0.5796198943379359, + "learning_rate": 3.247901157821365e-08, + "loss": 0.2691, + "step": 21318 + }, + { + "epoch": 0.9648789318850418, + "grad_norm": 0.628252502714485, + "learning_rate": 3.2395661854511264e-08, + "loss": 0.3111, + "step": 21319 + }, + { + "epoch": 0.9649241909934374, + "grad_norm": 0.6548347401051914, + "learning_rate": 3.23124188693702e-08, + "loss": 0.2912, + "step": 21320 + }, + { + "epoch": 0.964969450101833, + "grad_norm": 0.6173020789342651, + "learning_rate": 3.222928262457736e-08, + "loss": 0.2759, + "step": 21321 + }, + { + "epoch": 0.9650147092102286, + "grad_norm": 0.6195357469478212, + "learning_rate": 3.2146253121920215e-08, + "loss": 0.3075, + "step": 21322 + }, + { + "epoch": 0.9650599683186242, + "grad_norm": 0.6253934878655681, + "learning_rate": 3.2063330363182323e-08, + "loss": 0.2806, + "step": 21323 + }, + { + "epoch": 0.9651052274270197, + "grad_norm": 0.24519266579212162, + "learning_rate": 3.19805143501456e-08, + "loss": 0.4556, + "step": 21324 + }, + { + "epoch": 0.9651504865354152, + "grad_norm": 0.2714398967046984, + "learning_rate": 3.1897805084589726e-08, + "loss": 0.4621, + "step": 21325 + }, + { + "epoch": 0.9651957456438108, + "grad_norm": 0.7212628197749601, + "learning_rate": 3.1815202568291625e-08, + "loss": 0.2697, + "step": 21326 + }, + { + "epoch": 0.9652410047522064, + "grad_norm": 0.5578222247668211, + "learning_rate": 3.173270680302598e-08, + "loss": 0.2547, + "step": 21327 + }, + { + "epoch": 0.9652862638606019, + "grad_norm": 0.6141571513729598, + "learning_rate": 3.165031779056582e-08, + "loss": 0.321, + "step": 21328 + }, + { + "epoch": 0.9653315229689975, + "grad_norm": 0.5825078984311327, + "learning_rate": 3.156803553268084e-08, + "loss": 0.2925, + "step": 21329 + }, + { + "epoch": 0.9653767820773931, + "grad_norm": 0.5824074331733144, + "learning_rate": 3.1485860031140183e-08, + "loss": 0.2951, + "step": 21330 + }, + { + "epoch": 0.9654220411857887, + "grad_norm": 0.26065467366216083, + "learning_rate": 3.1403791287707986e-08, + "loss": 0.4912, + "step": 21331 + }, + { + "epoch": 0.9654673002941841, + "grad_norm": 0.6281474671279208, + "learning_rate": 3.1321829304148954e-08, + "loss": 0.3223, + "step": 21332 + }, + { + "epoch": 0.9655125594025797, + "grad_norm": 0.6394881447406083, + "learning_rate": 3.1239974082223347e-08, + "loss": 0.3109, + "step": 21333 + }, + { + "epoch": 0.9655578185109753, + "grad_norm": 0.6804236092716724, + "learning_rate": 3.115822562369086e-08, + "loss": 0.2848, + "step": 21334 + }, + { + "epoch": 0.9656030776193709, + "grad_norm": 0.2707283479344151, + "learning_rate": 3.107658393030677e-08, + "loss": 0.451, + "step": 21335 + }, + { + "epoch": 0.9656483367277665, + "grad_norm": 0.6753311154891868, + "learning_rate": 3.0995049003826325e-08, + "loss": 0.2892, + "step": 21336 + }, + { + "epoch": 0.965693595836162, + "grad_norm": 0.6298728408666605, + "learning_rate": 3.0913620846000916e-08, + "loss": 0.3038, + "step": 21337 + }, + { + "epoch": 0.9657388549445576, + "grad_norm": 0.5729926603889947, + "learning_rate": 3.083229945858079e-08, + "loss": 0.3137, + "step": 21338 + }, + { + "epoch": 0.9657841140529532, + "grad_norm": 0.7725364840099211, + "learning_rate": 3.075108484331235e-08, + "loss": 0.3039, + "step": 21339 + }, + { + "epoch": 0.9658293731613488, + "grad_norm": 0.6693582053497801, + "learning_rate": 3.066997700194197e-08, + "loss": 0.2881, + "step": 21340 + }, + { + "epoch": 0.9658746322697442, + "grad_norm": 0.6188826859538752, + "learning_rate": 3.0588975936211017e-08, + "loss": 0.2904, + "step": 21341 + }, + { + "epoch": 0.9659198913781398, + "grad_norm": 0.6369808490204127, + "learning_rate": 3.05080816478609e-08, + "loss": 0.3559, + "step": 21342 + }, + { + "epoch": 0.9659651504865354, + "grad_norm": 0.6384628270851854, + "learning_rate": 3.042729413862966e-08, + "loss": 0.3077, + "step": 21343 + }, + { + "epoch": 0.966010409594931, + "grad_norm": 0.6037050679353614, + "learning_rate": 3.034661341025258e-08, + "loss": 0.275, + "step": 21344 + }, + { + "epoch": 0.9660556687033266, + "grad_norm": 0.576146704475577, + "learning_rate": 3.0266039464463823e-08, + "loss": 0.2826, + "step": 21345 + }, + { + "epoch": 0.9661009278117221, + "grad_norm": 0.5736812009031691, + "learning_rate": 3.0185572302994795e-08, + "loss": 0.3294, + "step": 21346 + }, + { + "epoch": 0.9661461869201177, + "grad_norm": 0.6388020999905242, + "learning_rate": 3.0105211927574096e-08, + "loss": 0.2729, + "step": 21347 + }, + { + "epoch": 0.9661914460285133, + "grad_norm": 0.5807264304566786, + "learning_rate": 3.002495833992813e-08, + "loss": 0.3027, + "step": 21348 + }, + { + "epoch": 0.9662367051369088, + "grad_norm": 0.6342121065316381, + "learning_rate": 2.994481154178164e-08, + "loss": 0.3396, + "step": 21349 + }, + { + "epoch": 0.9662819642453043, + "grad_norm": 0.6107434989636781, + "learning_rate": 2.9864771534857114e-08, + "loss": 0.2826, + "step": 21350 + }, + { + "epoch": 0.9663272233536999, + "grad_norm": 0.6483472713840857, + "learning_rate": 2.978483832087431e-08, + "loss": 0.3127, + "step": 21351 + }, + { + "epoch": 0.9663724824620955, + "grad_norm": 0.5739299116426992, + "learning_rate": 2.970501190154962e-08, + "loss": 0.2856, + "step": 21352 + }, + { + "epoch": 0.9664177415704911, + "grad_norm": 0.25963244238987054, + "learning_rate": 2.9625292278600005e-08, + "loss": 0.4672, + "step": 21353 + }, + { + "epoch": 0.9664630006788866, + "grad_norm": 0.5650708348270717, + "learning_rate": 2.9545679453736874e-08, + "loss": 0.2673, + "step": 21354 + }, + { + "epoch": 0.9665082597872822, + "grad_norm": 0.5911926544516285, + "learning_rate": 2.9466173428672197e-08, + "loss": 0.2856, + "step": 21355 + }, + { + "epoch": 0.9665535188956778, + "grad_norm": 0.6348876111669358, + "learning_rate": 2.9386774205112934e-08, + "loss": 0.3381, + "step": 21356 + }, + { + "epoch": 0.9665987780040733, + "grad_norm": 0.5792450882193889, + "learning_rate": 2.9307481784766057e-08, + "loss": 0.2879, + "step": 21357 + }, + { + "epoch": 0.9666440371124689, + "grad_norm": 0.6526299208429099, + "learning_rate": 2.92282961693352e-08, + "loss": 0.3035, + "step": 21358 + }, + { + "epoch": 0.9666892962208644, + "grad_norm": 0.5981744444686902, + "learning_rate": 2.9149217360521788e-08, + "loss": 0.3048, + "step": 21359 + }, + { + "epoch": 0.96673455532926, + "grad_norm": 0.6631230697732848, + "learning_rate": 2.907024536002501e-08, + "loss": 0.2464, + "step": 21360 + }, + { + "epoch": 0.9667798144376556, + "grad_norm": 0.6415576068812007, + "learning_rate": 2.8991380169541284e-08, + "loss": 0.3114, + "step": 21361 + }, + { + "epoch": 0.9668250735460512, + "grad_norm": 0.6141966263098978, + "learning_rate": 2.8912621790765373e-08, + "loss": 0.2719, + "step": 21362 + }, + { + "epoch": 0.9668703326544467, + "grad_norm": 0.6159770387398168, + "learning_rate": 2.883397022538981e-08, + "loss": 0.3383, + "step": 21363 + }, + { + "epoch": 0.9669155917628423, + "grad_norm": 0.6686155973457726, + "learning_rate": 2.8755425475104904e-08, + "loss": 0.2737, + "step": 21364 + }, + { + "epoch": 0.9669608508712378, + "grad_norm": 0.3101889005652721, + "learning_rate": 2.8676987541597646e-08, + "loss": 0.4554, + "step": 21365 + }, + { + "epoch": 0.9670061099796334, + "grad_norm": 0.2625387624468325, + "learning_rate": 2.859865642655335e-08, + "loss": 0.4551, + "step": 21366 + }, + { + "epoch": 0.9670513690880289, + "grad_norm": 0.6784526673158491, + "learning_rate": 2.8520432131655673e-08, + "loss": 0.2948, + "step": 21367 + }, + { + "epoch": 0.9670966281964245, + "grad_norm": 0.7018253409325259, + "learning_rate": 2.8442314658584936e-08, + "loss": 0.2791, + "step": 21368 + }, + { + "epoch": 0.9671418873048201, + "grad_norm": 0.6030150366424402, + "learning_rate": 2.8364304009020348e-08, + "loss": 0.2803, + "step": 21369 + }, + { + "epoch": 0.9671871464132157, + "grad_norm": 0.5687094575670066, + "learning_rate": 2.8286400184637242e-08, + "loss": 0.3103, + "step": 21370 + }, + { + "epoch": 0.9672324055216113, + "grad_norm": 0.604944121680553, + "learning_rate": 2.820860318710983e-08, + "loss": 0.269, + "step": 21371 + }, + { + "epoch": 0.9672776646300068, + "grad_norm": 0.6127066407259337, + "learning_rate": 2.813091301811066e-08, + "loss": 0.3115, + "step": 21372 + }, + { + "epoch": 0.9673229237384023, + "grad_norm": 0.5783694521108265, + "learning_rate": 2.8053329679307293e-08, + "loss": 0.2924, + "step": 21373 + }, + { + "epoch": 0.9673681828467979, + "grad_norm": 0.28195681477137985, + "learning_rate": 2.797585317236784e-08, + "loss": 0.4593, + "step": 21374 + }, + { + "epoch": 0.9674134419551935, + "grad_norm": 0.6422170672348851, + "learning_rate": 2.789848349895763e-08, + "loss": 0.27, + "step": 21375 + }, + { + "epoch": 0.967458701063589, + "grad_norm": 0.5907711761183103, + "learning_rate": 2.782122066073756e-08, + "loss": 0.2965, + "step": 21376 + }, + { + "epoch": 0.9675039601719846, + "grad_norm": 0.2995926429078034, + "learning_rate": 2.7744064659369073e-08, + "loss": 0.4785, + "step": 21377 + }, + { + "epoch": 0.9675492192803802, + "grad_norm": 0.8353705596359702, + "learning_rate": 2.7667015496509187e-08, + "loss": 0.3488, + "step": 21378 + }, + { + "epoch": 0.9675944783887758, + "grad_norm": 0.6500850117419013, + "learning_rate": 2.7590073173813792e-08, + "loss": 0.3093, + "step": 21379 + }, + { + "epoch": 0.9676397374971714, + "grad_norm": 0.2531423524179933, + "learning_rate": 2.7513237692936567e-08, + "loss": 0.4659, + "step": 21380 + }, + { + "epoch": 0.9676849966055668, + "grad_norm": 0.649250415015185, + "learning_rate": 2.743650905552786e-08, + "loss": 0.2858, + "step": 21381 + }, + { + "epoch": 0.9677302557139624, + "grad_norm": 0.24597576493641854, + "learning_rate": 2.7359887263236352e-08, + "loss": 0.436, + "step": 21382 + }, + { + "epoch": 0.967775514822358, + "grad_norm": 0.23698909783832273, + "learning_rate": 2.7283372317708502e-08, + "loss": 0.4512, + "step": 21383 + }, + { + "epoch": 0.9678207739307536, + "grad_norm": 0.5635119405187751, + "learning_rate": 2.720696422058855e-08, + "loss": 0.3041, + "step": 21384 + }, + { + "epoch": 0.9678660330391491, + "grad_norm": 0.6416710534046843, + "learning_rate": 2.713066297351852e-08, + "loss": 0.3106, + "step": 21385 + }, + { + "epoch": 0.9679112921475447, + "grad_norm": 0.6454105343058075, + "learning_rate": 2.7054468578137093e-08, + "loss": 0.2724, + "step": 21386 + }, + { + "epoch": 0.9679565512559403, + "grad_norm": 0.8120809368879789, + "learning_rate": 2.6978381036081857e-08, + "loss": 0.2513, + "step": 21387 + }, + { + "epoch": 0.9680018103643359, + "grad_norm": 0.5913980149953101, + "learning_rate": 2.6902400348987613e-08, + "loss": 0.284, + "step": 21388 + }, + { + "epoch": 0.9680470694727313, + "grad_norm": 0.2593861442090093, + "learning_rate": 2.6826526518487496e-08, + "loss": 0.4646, + "step": 21389 + }, + { + "epoch": 0.9680923285811269, + "grad_norm": 0.5799799435681116, + "learning_rate": 2.6750759546211312e-08, + "loss": 0.2709, + "step": 21390 + }, + { + "epoch": 0.9681375876895225, + "grad_norm": 0.8052886730121234, + "learning_rate": 2.6675099433787212e-08, + "loss": 0.2577, + "step": 21391 + }, + { + "epoch": 0.9681828467979181, + "grad_norm": 0.6162388408613758, + "learning_rate": 2.6599546182840553e-08, + "loss": 0.2765, + "step": 21392 + }, + { + "epoch": 0.9682281059063137, + "grad_norm": 0.6170886181395424, + "learning_rate": 2.652409979499504e-08, + "loss": 0.3015, + "step": 21393 + }, + { + "epoch": 0.9682733650147092, + "grad_norm": 0.25442359485009247, + "learning_rate": 2.6448760271872152e-08, + "loss": 0.4623, + "step": 21394 + }, + { + "epoch": 0.9683186241231048, + "grad_norm": 0.2603122844717185, + "learning_rate": 2.6373527615090044e-08, + "loss": 0.4517, + "step": 21395 + }, + { + "epoch": 0.9683638832315004, + "grad_norm": 0.603507615130539, + "learning_rate": 2.6298401826265195e-08, + "loss": 0.2465, + "step": 21396 + }, + { + "epoch": 0.968409142339896, + "grad_norm": 0.5992182971411182, + "learning_rate": 2.6223382907012428e-08, + "loss": 0.3009, + "step": 21397 + }, + { + "epoch": 0.9684544014482914, + "grad_norm": 0.6311970985757361, + "learning_rate": 2.6148470858943787e-08, + "loss": 0.2966, + "step": 21398 + }, + { + "epoch": 0.968499660556687, + "grad_norm": 0.6006286490801301, + "learning_rate": 2.607366568366798e-08, + "loss": 0.2755, + "step": 21399 + }, + { + "epoch": 0.9685449196650826, + "grad_norm": 0.6708687272743549, + "learning_rate": 2.5998967382792618e-08, + "loss": 0.3063, + "step": 21400 + }, + { + "epoch": 0.9685901787734782, + "grad_norm": 0.5393526089892805, + "learning_rate": 2.592437595792363e-08, + "loss": 0.2948, + "step": 21401 + }, + { + "epoch": 0.9686354378818737, + "grad_norm": 0.5912557610649447, + "learning_rate": 2.584989141066252e-08, + "loss": 0.2808, + "step": 21402 + }, + { + "epoch": 0.9686806969902693, + "grad_norm": 0.6030725729127032, + "learning_rate": 2.577551374261078e-08, + "loss": 0.2584, + "step": 21403 + }, + { + "epoch": 0.9687259560986649, + "grad_norm": 0.6478863741545718, + "learning_rate": 2.5701242955365468e-08, + "loss": 0.2946, + "step": 21404 + }, + { + "epoch": 0.9687712152070604, + "grad_norm": 0.6117868458022317, + "learning_rate": 2.562707905052364e-08, + "loss": 0.2576, + "step": 21405 + }, + { + "epoch": 0.968816474315456, + "grad_norm": 0.2594879696804052, + "learning_rate": 2.555302202967791e-08, + "loss": 0.4557, + "step": 21406 + }, + { + "epoch": 0.9688617334238515, + "grad_norm": 0.2998048691048826, + "learning_rate": 2.5479071894420337e-08, + "loss": 0.4701, + "step": 21407 + }, + { + "epoch": 0.9689069925322471, + "grad_norm": 0.6665315299044354, + "learning_rate": 2.5405228646339096e-08, + "loss": 0.3045, + "step": 21408 + }, + { + "epoch": 0.9689522516406427, + "grad_norm": 0.6795854446055811, + "learning_rate": 2.5331492287021252e-08, + "loss": 0.3092, + "step": 21409 + }, + { + "epoch": 0.9689975107490383, + "grad_norm": 0.6512646157878369, + "learning_rate": 2.5257862818051092e-08, + "loss": 0.2733, + "step": 21410 + }, + { + "epoch": 0.9690427698574338, + "grad_norm": 0.2645611035802779, + "learning_rate": 2.5184340241010687e-08, + "loss": 0.4437, + "step": 21411 + }, + { + "epoch": 0.9690880289658294, + "grad_norm": 0.24962282761171117, + "learning_rate": 2.511092455747932e-08, + "loss": 0.4545, + "step": 21412 + }, + { + "epoch": 0.9691332880742249, + "grad_norm": 0.5633033273133744, + "learning_rate": 2.503761576903574e-08, + "loss": 0.2791, + "step": 21413 + }, + { + "epoch": 0.9691785471826205, + "grad_norm": 0.24964261861415946, + "learning_rate": 2.4964413877254233e-08, + "loss": 0.4514, + "step": 21414 + }, + { + "epoch": 0.9692238062910161, + "grad_norm": 0.8924893770088967, + "learning_rate": 2.489131888370744e-08, + "loss": 0.2838, + "step": 21415 + }, + { + "epoch": 0.9692690653994116, + "grad_norm": 0.6368048568730261, + "learning_rate": 2.4818330789966872e-08, + "loss": 0.2717, + "step": 21416 + }, + { + "epoch": 0.9693143245078072, + "grad_norm": 0.25813157938113807, + "learning_rate": 2.474544959760017e-08, + "loss": 0.4867, + "step": 21417 + }, + { + "epoch": 0.9693595836162028, + "grad_norm": 0.6155721713424308, + "learning_rate": 2.4672675308173298e-08, + "loss": 0.3078, + "step": 21418 + }, + { + "epoch": 0.9694048427245984, + "grad_norm": 0.5923410351217169, + "learning_rate": 2.460000792324946e-08, + "loss": 0.2826, + "step": 21419 + }, + { + "epoch": 0.9694501018329938, + "grad_norm": 0.6407965605028224, + "learning_rate": 2.4527447444391838e-08, + "loss": 0.3003, + "step": 21420 + }, + { + "epoch": 0.9694953609413894, + "grad_norm": 0.6538379741907768, + "learning_rate": 2.445499387315753e-08, + "loss": 0.3277, + "step": 21421 + }, + { + "epoch": 0.969540620049785, + "grad_norm": 0.6457363395930453, + "learning_rate": 2.4382647211104173e-08, + "loss": 0.3262, + "step": 21422 + }, + { + "epoch": 0.9695858791581806, + "grad_norm": 0.6266991708334412, + "learning_rate": 2.4310407459786634e-08, + "loss": 0.2973, + "step": 21423 + }, + { + "epoch": 0.9696311382665761, + "grad_norm": 0.26378314561444793, + "learning_rate": 2.423827462075701e-08, + "loss": 0.471, + "step": 21424 + }, + { + "epoch": 0.9696763973749717, + "grad_norm": 0.5846522553477222, + "learning_rate": 2.416624869556461e-08, + "loss": 0.3047, + "step": 21425 + }, + { + "epoch": 0.9697216564833673, + "grad_norm": 0.5748228073096529, + "learning_rate": 2.409432968575709e-08, + "loss": 0.2762, + "step": 21426 + }, + { + "epoch": 0.9697669155917629, + "grad_norm": 0.5776693574561639, + "learning_rate": 2.402251759288099e-08, + "loss": 0.3131, + "step": 21427 + }, + { + "epoch": 0.9698121747001585, + "grad_norm": 0.6249029756593694, + "learning_rate": 2.3950812418477852e-08, + "loss": 0.2992, + "step": 21428 + }, + { + "epoch": 0.9698574338085539, + "grad_norm": 1.7975295639136513, + "learning_rate": 2.3879214164088672e-08, + "loss": 0.2795, + "step": 21429 + }, + { + "epoch": 0.9699026929169495, + "grad_norm": 0.34538818894279005, + "learning_rate": 2.3807722831252768e-08, + "loss": 0.4724, + "step": 21430 + }, + { + "epoch": 0.9699479520253451, + "grad_norm": 0.6697070486805683, + "learning_rate": 2.3736338421505578e-08, + "loss": 0.3004, + "step": 21431 + }, + { + "epoch": 0.9699932111337407, + "grad_norm": 0.26052467045218836, + "learning_rate": 2.366506093638088e-08, + "loss": 0.4629, + "step": 21432 + }, + { + "epoch": 0.9700384702421362, + "grad_norm": 0.6098721901226468, + "learning_rate": 2.359389037741022e-08, + "loss": 0.3041, + "step": 21433 + }, + { + "epoch": 0.9700837293505318, + "grad_norm": 0.6389878370521055, + "learning_rate": 2.3522826746123496e-08, + "loss": 0.2828, + "step": 21434 + }, + { + "epoch": 0.9701289884589274, + "grad_norm": 0.6152278199424831, + "learning_rate": 2.3451870044046698e-08, + "loss": 0.2912, + "step": 21435 + }, + { + "epoch": 0.970174247567323, + "grad_norm": 0.25780271796413584, + "learning_rate": 2.338102027270528e-08, + "loss": 0.4857, + "step": 21436 + }, + { + "epoch": 0.9702195066757184, + "grad_norm": 0.6092924189798018, + "learning_rate": 2.33102774336208e-08, + "loss": 0.2852, + "step": 21437 + }, + { + "epoch": 0.970264765784114, + "grad_norm": 0.5870916846508486, + "learning_rate": 2.323964152831426e-08, + "loss": 0.3193, + "step": 21438 + }, + { + "epoch": 0.9703100248925096, + "grad_norm": 0.5496351854261411, + "learning_rate": 2.3169112558302232e-08, + "loss": 0.3011, + "step": 21439 + }, + { + "epoch": 0.9703552840009052, + "grad_norm": 0.5707888549972117, + "learning_rate": 2.3098690525101275e-08, + "loss": 0.2424, + "step": 21440 + }, + { + "epoch": 0.9704005431093008, + "grad_norm": 0.5956520405613818, + "learning_rate": 2.302837543022407e-08, + "loss": 0.2863, + "step": 21441 + }, + { + "epoch": 0.9704458022176963, + "grad_norm": 0.6552895738865977, + "learning_rate": 2.2958167275181076e-08, + "loss": 0.2945, + "step": 21442 + }, + { + "epoch": 0.9704910613260919, + "grad_norm": 0.5961285069005348, + "learning_rate": 2.288806606148164e-08, + "loss": 0.2832, + "step": 21443 + }, + { + "epoch": 0.9705363204344875, + "grad_norm": 0.6151196913769361, + "learning_rate": 2.281807179063178e-08, + "loss": 0.3379, + "step": 21444 + }, + { + "epoch": 0.970581579542883, + "grad_norm": 0.7587253293887296, + "learning_rate": 2.2748184464134736e-08, + "loss": 0.2987, + "step": 21445 + }, + { + "epoch": 0.9706268386512785, + "grad_norm": 0.6081690588146057, + "learning_rate": 2.26784040834932e-08, + "loss": 0.3182, + "step": 21446 + }, + { + "epoch": 0.9706720977596741, + "grad_norm": 0.6881531310815816, + "learning_rate": 2.2608730650205966e-08, + "loss": 0.278, + "step": 21447 + }, + { + "epoch": 0.9707173568680697, + "grad_norm": 0.642456803386314, + "learning_rate": 2.2539164165770178e-08, + "loss": 0.2851, + "step": 21448 + }, + { + "epoch": 0.9707626159764653, + "grad_norm": 0.6942711063770738, + "learning_rate": 2.2469704631680743e-08, + "loss": 0.2962, + "step": 21449 + }, + { + "epoch": 0.9708078750848609, + "grad_norm": 0.24719462371086962, + "learning_rate": 2.2400352049429807e-08, + "loss": 0.4807, + "step": 21450 + }, + { + "epoch": 0.9708531341932564, + "grad_norm": 0.7658959323580632, + "learning_rate": 2.2331106420507843e-08, + "loss": 0.2831, + "step": 21451 + }, + { + "epoch": 0.970898393301652, + "grad_norm": 0.6173714523200496, + "learning_rate": 2.2261967746402545e-08, + "loss": 0.3444, + "step": 21452 + }, + { + "epoch": 0.9709436524100475, + "grad_norm": 0.638420072583342, + "learning_rate": 2.2192936028599953e-08, + "loss": 0.2827, + "step": 21453 + }, + { + "epoch": 0.9709889115184431, + "grad_norm": 0.5560272357588397, + "learning_rate": 2.212401126858277e-08, + "loss": 0.2824, + "step": 21454 + }, + { + "epoch": 0.9710341706268386, + "grad_norm": 0.5624028101250275, + "learning_rate": 2.2055193467832582e-08, + "loss": 0.2749, + "step": 21455 + }, + { + "epoch": 0.9710794297352342, + "grad_norm": 0.6437701220694999, + "learning_rate": 2.1986482627827098e-08, + "loss": 0.3075, + "step": 21456 + }, + { + "epoch": 0.9711246888436298, + "grad_norm": 0.6032683639248221, + "learning_rate": 2.1917878750043475e-08, + "loss": 0.2736, + "step": 21457 + }, + { + "epoch": 0.9711699479520254, + "grad_norm": 0.6148573920082251, + "learning_rate": 2.1849381835956084e-08, + "loss": 0.3054, + "step": 21458 + }, + { + "epoch": 0.9712152070604209, + "grad_norm": 0.5825952684250644, + "learning_rate": 2.1780991887035973e-08, + "loss": 0.3534, + "step": 21459 + }, + { + "epoch": 0.9712604661688164, + "grad_norm": 0.87701726721472, + "learning_rate": 2.1712708904752522e-08, + "loss": 0.3276, + "step": 21460 + }, + { + "epoch": 0.971305725277212, + "grad_norm": 0.6016188627438112, + "learning_rate": 2.1644532890573444e-08, + "loss": 0.2737, + "step": 21461 + }, + { + "epoch": 0.9713509843856076, + "grad_norm": 0.6370604104920156, + "learning_rate": 2.1576463845964236e-08, + "loss": 0.2692, + "step": 21462 + }, + { + "epoch": 0.9713962434940032, + "grad_norm": 0.56852633621876, + "learning_rate": 2.150850177238595e-08, + "loss": 0.3225, + "step": 21463 + }, + { + "epoch": 0.9714415026023987, + "grad_norm": 0.6540170279504566, + "learning_rate": 2.1440646671300193e-08, + "loss": 0.3534, + "step": 21464 + }, + { + "epoch": 0.9714867617107943, + "grad_norm": 0.6270797679263911, + "learning_rate": 2.1372898544164134e-08, + "loss": 0.2592, + "step": 21465 + }, + { + "epoch": 0.9715320208191899, + "grad_norm": 0.6053132155608527, + "learning_rate": 2.1305257392433832e-08, + "loss": 0.3286, + "step": 21466 + }, + { + "epoch": 0.9715772799275855, + "grad_norm": 0.5849943563276676, + "learning_rate": 2.1237723217562566e-08, + "loss": 0.2471, + "step": 21467 + }, + { + "epoch": 0.971622539035981, + "grad_norm": 0.9313910065181935, + "learning_rate": 2.1170296021001956e-08, + "loss": 0.3247, + "step": 21468 + }, + { + "epoch": 0.9716677981443765, + "grad_norm": 0.6476378321677396, + "learning_rate": 2.1102975804200287e-08, + "loss": 0.2969, + "step": 21469 + }, + { + "epoch": 0.9717130572527721, + "grad_norm": 0.6802598270516329, + "learning_rate": 2.1035762568603623e-08, + "loss": 0.3016, + "step": 21470 + }, + { + "epoch": 0.9717583163611677, + "grad_norm": 0.25566338750406215, + "learning_rate": 2.096865631565692e-08, + "loss": 0.437, + "step": 21471 + }, + { + "epoch": 0.9718035754695632, + "grad_norm": 0.6144524105325259, + "learning_rate": 2.090165704680236e-08, + "loss": 0.2753, + "step": 21472 + }, + { + "epoch": 0.9718488345779588, + "grad_norm": 0.5939623725491535, + "learning_rate": 2.083476476347823e-08, + "loss": 0.3103, + "step": 21473 + }, + { + "epoch": 0.9718940936863544, + "grad_norm": 0.6245506587877645, + "learning_rate": 2.076797946712339e-08, + "loss": 0.299, + "step": 21474 + }, + { + "epoch": 0.97193935279475, + "grad_norm": 0.6063673889028894, + "learning_rate": 2.0701301159171683e-08, + "loss": 0.2893, + "step": 21475 + }, + { + "epoch": 0.9719846119031456, + "grad_norm": 0.6002761281196382, + "learning_rate": 2.0634729841056966e-08, + "loss": 0.2745, + "step": 21476 + }, + { + "epoch": 0.972029871011541, + "grad_norm": 0.6330634611823746, + "learning_rate": 2.0568265514208097e-08, + "loss": 0.2948, + "step": 21477 + }, + { + "epoch": 0.9720751301199366, + "grad_norm": 0.5653621329054418, + "learning_rate": 2.0501908180054486e-08, + "loss": 0.2946, + "step": 21478 + }, + { + "epoch": 0.9721203892283322, + "grad_norm": 0.6638168852132872, + "learning_rate": 2.0435657840021104e-08, + "loss": 0.2776, + "step": 21479 + }, + { + "epoch": 0.9721656483367278, + "grad_norm": 0.5561328122632877, + "learning_rate": 2.0369514495532373e-08, + "loss": 0.2782, + "step": 21480 + }, + { + "epoch": 0.9722109074451233, + "grad_norm": 0.24582704186734805, + "learning_rate": 2.0303478148008813e-08, + "loss": 0.4444, + "step": 21481 + }, + { + "epoch": 0.9722561665535189, + "grad_norm": 0.6173504157082066, + "learning_rate": 2.02375487988693e-08, + "loss": 0.3019, + "step": 21482 + }, + { + "epoch": 0.9723014256619145, + "grad_norm": 0.6212625353881241, + "learning_rate": 2.0171726449531025e-08, + "loss": 0.3275, + "step": 21483 + }, + { + "epoch": 0.97234668477031, + "grad_norm": 0.6496282193749826, + "learning_rate": 2.010601110140786e-08, + "loss": 0.3348, + "step": 21484 + }, + { + "epoch": 0.9723919438787056, + "grad_norm": 0.5653231467934803, + "learning_rate": 2.0040402755912013e-08, + "loss": 0.461, + "step": 21485 + }, + { + "epoch": 0.9724372029871011, + "grad_norm": 0.605088017779233, + "learning_rate": 1.9974901414452907e-08, + "loss": 0.2806, + "step": 21486 + }, + { + "epoch": 0.9724824620954967, + "grad_norm": 0.6410627323825772, + "learning_rate": 1.9909507078438307e-08, + "loss": 0.2612, + "step": 21487 + }, + { + "epoch": 0.9725277212038923, + "grad_norm": 0.2572270975628381, + "learning_rate": 1.984421974927375e-08, + "loss": 0.481, + "step": 21488 + }, + { + "epoch": 0.9725729803122879, + "grad_norm": 0.5717832368892245, + "learning_rate": 1.9779039428360904e-08, + "loss": 0.277, + "step": 21489 + }, + { + "epoch": 0.9726182394206834, + "grad_norm": 0.5836692238315675, + "learning_rate": 1.971396611710086e-08, + "loss": 0.2534, + "step": 21490 + }, + { + "epoch": 0.972663498529079, + "grad_norm": 0.6329943429107763, + "learning_rate": 1.9648999816891944e-08, + "loss": 0.2539, + "step": 21491 + }, + { + "epoch": 0.9727087576374746, + "grad_norm": 0.6262710039347652, + "learning_rate": 1.958414052913027e-08, + "loss": 0.2916, + "step": 21492 + }, + { + "epoch": 0.9727540167458701, + "grad_norm": 0.6340901437998767, + "learning_rate": 1.951938825520916e-08, + "loss": 0.3064, + "step": 21493 + }, + { + "epoch": 0.9727992758542656, + "grad_norm": 0.613620906601682, + "learning_rate": 1.9454742996519726e-08, + "loss": 0.3294, + "step": 21494 + }, + { + "epoch": 0.9728445349626612, + "grad_norm": 0.5792752864447169, + "learning_rate": 1.9390204754451967e-08, + "loss": 0.2749, + "step": 21495 + }, + { + "epoch": 0.9728897940710568, + "grad_norm": 0.26855686320942884, + "learning_rate": 1.9325773530391446e-08, + "loss": 0.4725, + "step": 21496 + }, + { + "epoch": 0.9729350531794524, + "grad_norm": 0.5983834047394934, + "learning_rate": 1.926144932572316e-08, + "loss": 0.277, + "step": 21497 + }, + { + "epoch": 0.972980312287848, + "grad_norm": 0.7360701247702012, + "learning_rate": 1.9197232141829335e-08, + "loss": 0.2616, + "step": 21498 + }, + { + "epoch": 0.9730255713962435, + "grad_norm": 0.6197883921720705, + "learning_rate": 1.913312198008943e-08, + "loss": 0.3041, + "step": 21499 + }, + { + "epoch": 0.973070830504639, + "grad_norm": 0.5844609404817493, + "learning_rate": 1.9069118841881228e-08, + "loss": 0.2714, + "step": 21500 + }, + { + "epoch": 0.9731160896130346, + "grad_norm": 0.5603732149038518, + "learning_rate": 1.9005222728579742e-08, + "loss": 0.3048, + "step": 21501 + }, + { + "epoch": 0.9731613487214302, + "grad_norm": 0.6533007099497831, + "learning_rate": 1.8941433641558315e-08, + "loss": 0.2807, + "step": 21502 + }, + { + "epoch": 0.9732066078298257, + "grad_norm": 0.6925200042670877, + "learning_rate": 1.8877751582186966e-08, + "loss": 0.2856, + "step": 21503 + }, + { + "epoch": 0.9732518669382213, + "grad_norm": 0.6110740454862127, + "learning_rate": 1.8814176551834595e-08, + "loss": 0.3288, + "step": 21504 + }, + { + "epoch": 0.9732971260466169, + "grad_norm": 0.6500400906628822, + "learning_rate": 1.8750708551867336e-08, + "loss": 0.3062, + "step": 21505 + }, + { + "epoch": 0.9733423851550125, + "grad_norm": 0.38635779551762117, + "learning_rate": 1.8687347583647985e-08, + "loss": 0.4977, + "step": 21506 + }, + { + "epoch": 0.973387644263408, + "grad_norm": 0.6038331706183906, + "learning_rate": 1.8624093648539344e-08, + "loss": 0.2942, + "step": 21507 + }, + { + "epoch": 0.9734329033718035, + "grad_norm": 0.6262687242978424, + "learning_rate": 1.856094674789921e-08, + "loss": 0.2343, + "step": 21508 + }, + { + "epoch": 0.9734781624801991, + "grad_norm": 0.6102937548707332, + "learning_rate": 1.8497906883085394e-08, + "loss": 0.3216, + "step": 21509 + }, + { + "epoch": 0.9735234215885947, + "grad_norm": 0.5662823014312907, + "learning_rate": 1.8434974055451248e-08, + "loss": 0.4984, + "step": 21510 + }, + { + "epoch": 0.9735686806969903, + "grad_norm": 0.6200777181397742, + "learning_rate": 1.8372148266350696e-08, + "loss": 0.2854, + "step": 21511 + }, + { + "epoch": 0.9736139398053858, + "grad_norm": 0.6630690064524812, + "learning_rate": 1.830942951713266e-08, + "loss": 0.2692, + "step": 21512 + }, + { + "epoch": 0.9736591989137814, + "grad_norm": 0.6760977205002116, + "learning_rate": 1.8246817809144392e-08, + "loss": 0.3083, + "step": 21513 + }, + { + "epoch": 0.973704458022177, + "grad_norm": 0.6257431106132486, + "learning_rate": 1.8184313143732035e-08, + "loss": 0.3112, + "step": 21514 + }, + { + "epoch": 0.9737497171305726, + "grad_norm": 0.5883702422264766, + "learning_rate": 1.812191552223841e-08, + "loss": 0.3552, + "step": 21515 + }, + { + "epoch": 0.973794976238968, + "grad_norm": 0.610938178761219, + "learning_rate": 1.8059624946004105e-08, + "loss": 0.2976, + "step": 21516 + }, + { + "epoch": 0.9738402353473636, + "grad_norm": 0.6088542630214259, + "learning_rate": 1.79974414163675e-08, + "loss": 0.3352, + "step": 21517 + }, + { + "epoch": 0.9738854944557592, + "grad_norm": 0.23932636750915598, + "learning_rate": 1.7935364934664744e-08, + "loss": 0.4551, + "step": 21518 + }, + { + "epoch": 0.9739307535641548, + "grad_norm": 0.6661497356134606, + "learning_rate": 1.7873395502229774e-08, + "loss": 0.3013, + "step": 21519 + }, + { + "epoch": 0.9739760126725503, + "grad_norm": 0.5806129832544668, + "learning_rate": 1.7811533120394296e-08, + "loss": 0.256, + "step": 21520 + }, + { + "epoch": 0.9740212717809459, + "grad_norm": 0.28544530450591754, + "learning_rate": 1.7749777790487256e-08, + "loss": 0.4692, + "step": 21521 + }, + { + "epoch": 0.9740665308893415, + "grad_norm": 0.5703201834714856, + "learning_rate": 1.7688129513835915e-08, + "loss": 0.3194, + "step": 21522 + }, + { + "epoch": 0.9741117899977371, + "grad_norm": 0.5872381541669788, + "learning_rate": 1.7626588291764225e-08, + "loss": 0.2476, + "step": 21523 + }, + { + "epoch": 0.9741570491061327, + "grad_norm": 0.5319838055635173, + "learning_rate": 1.7565154125595006e-08, + "loss": 0.2815, + "step": 21524 + }, + { + "epoch": 0.9742023082145281, + "grad_norm": 0.28202626818847565, + "learning_rate": 1.7503827016648876e-08, + "loss": 0.4754, + "step": 21525 + }, + { + "epoch": 0.9742475673229237, + "grad_norm": 0.6586370358676067, + "learning_rate": 1.7442606966242005e-08, + "loss": 0.2987, + "step": 21526 + }, + { + "epoch": 0.9742928264313193, + "grad_norm": 0.2766401223790382, + "learning_rate": 1.7381493975691667e-08, + "loss": 0.4714, + "step": 21527 + }, + { + "epoch": 0.9743380855397149, + "grad_norm": 0.6413995570584645, + "learning_rate": 1.7320488046309593e-08, + "loss": 0.2988, + "step": 21528 + }, + { + "epoch": 0.9743833446481104, + "grad_norm": 0.6030670799447602, + "learning_rate": 1.7259589179406953e-08, + "loss": 0.3021, + "step": 21529 + }, + { + "epoch": 0.974428603756506, + "grad_norm": 0.8385567228907049, + "learning_rate": 1.7198797376292708e-08, + "loss": 0.2757, + "step": 21530 + }, + { + "epoch": 0.9744738628649016, + "grad_norm": 0.6998410794379304, + "learning_rate": 1.7138112638272476e-08, + "loss": 0.3029, + "step": 21531 + }, + { + "epoch": 0.9745191219732972, + "grad_norm": 0.668868767753376, + "learning_rate": 1.7077534966650767e-08, + "loss": 0.3045, + "step": 21532 + }, + { + "epoch": 0.9745643810816927, + "grad_norm": 0.6094772471471711, + "learning_rate": 1.7017064362728764e-08, + "loss": 0.2379, + "step": 21533 + }, + { + "epoch": 0.9746096401900882, + "grad_norm": 0.6392838917586049, + "learning_rate": 1.6956700827806538e-08, + "loss": 0.2848, + "step": 21534 + }, + { + "epoch": 0.9746548992984838, + "grad_norm": 0.7005772112789435, + "learning_rate": 1.689644436317972e-08, + "loss": 0.3148, + "step": 21535 + }, + { + "epoch": 0.9747001584068794, + "grad_norm": 0.6039779318860282, + "learning_rate": 1.6836294970144495e-08, + "loss": 0.2571, + "step": 21536 + }, + { + "epoch": 0.974745417515275, + "grad_norm": 0.6087022627637411, + "learning_rate": 1.6776252649992608e-08, + "loss": 0.2869, + "step": 21537 + }, + { + "epoch": 0.9747906766236705, + "grad_norm": 0.6536183340738748, + "learning_rate": 1.6716317404014136e-08, + "loss": 0.276, + "step": 21538 + }, + { + "epoch": 0.9748359357320661, + "grad_norm": 0.5890982188008039, + "learning_rate": 1.665648923349694e-08, + "loss": 0.2726, + "step": 21539 + }, + { + "epoch": 0.9748811948404617, + "grad_norm": 0.6485504916729523, + "learning_rate": 1.659676813972666e-08, + "loss": 0.3057, + "step": 21540 + }, + { + "epoch": 0.9749264539488572, + "grad_norm": 0.6579237092730789, + "learning_rate": 1.6537154123986156e-08, + "loss": 0.3385, + "step": 21541 + }, + { + "epoch": 0.9749717130572527, + "grad_norm": 0.69823033168968, + "learning_rate": 1.647764718755718e-08, + "loss": 0.2696, + "step": 21542 + }, + { + "epoch": 0.9750169721656483, + "grad_norm": 0.5525465895809587, + "learning_rate": 1.641824733171815e-08, + "loss": 0.3085, + "step": 21543 + }, + { + "epoch": 0.9750622312740439, + "grad_norm": 0.5679357769200283, + "learning_rate": 1.6358954557744166e-08, + "loss": 0.3365, + "step": 21544 + }, + { + "epoch": 0.9751074903824395, + "grad_norm": 0.250665352604849, + "learning_rate": 1.629976886691087e-08, + "loss": 0.4753, + "step": 21545 + }, + { + "epoch": 0.9751527494908351, + "grad_norm": 0.5955064802272411, + "learning_rate": 1.6240690260488913e-08, + "loss": 0.3078, + "step": 21546 + }, + { + "epoch": 0.9751980085992306, + "grad_norm": 0.608417079637003, + "learning_rate": 1.6181718739748388e-08, + "loss": 0.3097, + "step": 21547 + }, + { + "epoch": 0.9752432677076261, + "grad_norm": 0.6143087542971571, + "learning_rate": 1.6122854305955505e-08, + "loss": 0.2917, + "step": 21548 + }, + { + "epoch": 0.9752885268160217, + "grad_norm": 0.2602212956379094, + "learning_rate": 1.6064096960376476e-08, + "loss": 0.4708, + "step": 21549 + }, + { + "epoch": 0.9753337859244173, + "grad_norm": 0.6126645269940824, + "learning_rate": 1.600544670427251e-08, + "loss": 0.331, + "step": 21550 + }, + { + "epoch": 0.9753790450328128, + "grad_norm": 0.6449883399574192, + "learning_rate": 1.5946903538904267e-08, + "loss": 0.2803, + "step": 21551 + }, + { + "epoch": 0.9754243041412084, + "grad_norm": 0.6410152600174044, + "learning_rate": 1.5888467465529632e-08, + "loss": 0.2897, + "step": 21552 + }, + { + "epoch": 0.975469563249604, + "grad_norm": 1.1902445501114625, + "learning_rate": 1.583013848540482e-08, + "loss": 0.3022, + "step": 21553 + }, + { + "epoch": 0.9755148223579996, + "grad_norm": 0.562064160439384, + "learning_rate": 1.5771916599782167e-08, + "loss": 0.2639, + "step": 21554 + }, + { + "epoch": 0.9755600814663951, + "grad_norm": 0.5666439513306696, + "learning_rate": 1.5713801809913443e-08, + "loss": 0.2595, + "step": 21555 + }, + { + "epoch": 0.9756053405747906, + "grad_norm": 0.6185233159292626, + "learning_rate": 1.5655794117047097e-08, + "loss": 0.253, + "step": 21556 + }, + { + "epoch": 0.9756505996831862, + "grad_norm": 0.6169772789722926, + "learning_rate": 1.5597893522428796e-08, + "loss": 0.304, + "step": 21557 + }, + { + "epoch": 0.9756958587915818, + "grad_norm": 0.27782104133037505, + "learning_rate": 1.5540100027304217e-08, + "loss": 0.4642, + "step": 21558 + }, + { + "epoch": 0.9757411178999774, + "grad_norm": 0.2715187409490002, + "learning_rate": 1.5482413632914028e-08, + "loss": 0.4763, + "step": 21559 + }, + { + "epoch": 0.9757863770083729, + "grad_norm": 0.5957365122967457, + "learning_rate": 1.5424834340497796e-08, + "loss": 0.3033, + "step": 21560 + }, + { + "epoch": 0.9758316361167685, + "grad_norm": 0.6438850183819776, + "learning_rate": 1.5367362151292863e-08, + "loss": 0.3173, + "step": 21561 + }, + { + "epoch": 0.9758768952251641, + "grad_norm": 0.8646528602373577, + "learning_rate": 1.5309997066534354e-08, + "loss": 0.2965, + "step": 21562 + }, + { + "epoch": 0.9759221543335597, + "grad_norm": 0.6283703457696358, + "learning_rate": 1.5252739087454617e-08, + "loss": 0.3036, + "step": 21563 + }, + { + "epoch": 0.9759674134419551, + "grad_norm": 0.5739317342776467, + "learning_rate": 1.5195588215283773e-08, + "loss": 0.2437, + "step": 21564 + }, + { + "epoch": 0.9760126725503507, + "grad_norm": 0.5952634209800168, + "learning_rate": 1.5138544451250292e-08, + "loss": 0.3071, + "step": 21565 + }, + { + "epoch": 0.9760579316587463, + "grad_norm": 0.6403235870149089, + "learning_rate": 1.5081607796579856e-08, + "loss": 0.3059, + "step": 21566 + }, + { + "epoch": 0.9761031907671419, + "grad_norm": 0.27436692115992706, + "learning_rate": 1.502477825249593e-08, + "loss": 0.4823, + "step": 21567 + }, + { + "epoch": 0.9761484498755375, + "grad_norm": 0.6302113621127572, + "learning_rate": 1.4968055820218653e-08, + "loss": 0.2929, + "step": 21568 + }, + { + "epoch": 0.976193708983933, + "grad_norm": 0.6228658000514758, + "learning_rate": 1.4911440500968155e-08, + "loss": 0.2988, + "step": 21569 + }, + { + "epoch": 0.9762389680923286, + "grad_norm": 0.2591940642929855, + "learning_rate": 1.4854932295959578e-08, + "loss": 0.4579, + "step": 21570 + }, + { + "epoch": 0.9762842272007242, + "grad_norm": 0.2359238616287538, + "learning_rate": 1.4798531206408617e-08, + "loss": 0.4573, + "step": 21571 + }, + { + "epoch": 0.9763294863091198, + "grad_norm": 0.5557793876162257, + "learning_rate": 1.474223723352597e-08, + "loss": 0.2585, + "step": 21572 + }, + { + "epoch": 0.9763747454175152, + "grad_norm": 0.6008817741426201, + "learning_rate": 1.4686050378521221e-08, + "loss": 0.2887, + "step": 21573 + }, + { + "epoch": 0.9764200045259108, + "grad_norm": 0.25315653321249215, + "learning_rate": 1.4629970642602298e-08, + "loss": 0.4633, + "step": 21574 + }, + { + "epoch": 0.9764652636343064, + "grad_norm": 0.5625693320994961, + "learning_rate": 1.457399802697379e-08, + "loss": 0.2273, + "step": 21575 + }, + { + "epoch": 0.976510522742702, + "grad_norm": 0.5776294944714752, + "learning_rate": 1.4518132532838624e-08, + "loss": 0.2596, + "step": 21576 + }, + { + "epoch": 0.9765557818510975, + "grad_norm": 0.6204162617245031, + "learning_rate": 1.4462374161396952e-08, + "loss": 0.273, + "step": 21577 + }, + { + "epoch": 0.9766010409594931, + "grad_norm": 0.5925661056508587, + "learning_rate": 1.440672291384726e-08, + "loss": 0.2819, + "step": 21578 + }, + { + "epoch": 0.9766463000678887, + "grad_norm": 0.5902542261005069, + "learning_rate": 1.4351178791384702e-08, + "loss": 0.3058, + "step": 21579 + }, + { + "epoch": 0.9766915591762843, + "grad_norm": 0.6734892936433218, + "learning_rate": 1.4295741795203322e-08, + "loss": 0.3097, + "step": 21580 + }, + { + "epoch": 0.9767368182846798, + "grad_norm": 0.25765264371655133, + "learning_rate": 1.4240411926493835e-08, + "loss": 0.4596, + "step": 21581 + }, + { + "epoch": 0.9767820773930753, + "grad_norm": 0.569592922648196, + "learning_rate": 1.4185189186445292e-08, + "loss": 0.2836, + "step": 21582 + }, + { + "epoch": 0.9768273365014709, + "grad_norm": 0.6310649915158688, + "learning_rate": 1.4130073576244518e-08, + "loss": 0.2876, + "step": 21583 + }, + { + "epoch": 0.9768725956098665, + "grad_norm": 0.5954175314647394, + "learning_rate": 1.4075065097075013e-08, + "loss": 0.289, + "step": 21584 + }, + { + "epoch": 0.9769178547182621, + "grad_norm": 0.584969965100779, + "learning_rate": 1.402016375011972e-08, + "loss": 0.3152, + "step": 21585 + }, + { + "epoch": 0.9769631138266576, + "grad_norm": 0.6513597095588576, + "learning_rate": 1.3965369536557694e-08, + "loss": 0.2726, + "step": 21586 + }, + { + "epoch": 0.9770083729350532, + "grad_norm": 0.6068463477849974, + "learning_rate": 1.3910682457566327e-08, + "loss": 0.3172, + "step": 21587 + }, + { + "epoch": 0.9770536320434487, + "grad_norm": 0.6569562461124709, + "learning_rate": 1.3856102514321345e-08, + "loss": 0.2749, + "step": 21588 + }, + { + "epoch": 0.9770988911518443, + "grad_norm": 0.6050937931355871, + "learning_rate": 1.3801629707994035e-08, + "loss": 0.2997, + "step": 21589 + }, + { + "epoch": 0.9771441502602398, + "grad_norm": 0.26659203847765517, + "learning_rate": 1.3747264039756236e-08, + "loss": 0.4529, + "step": 21590 + }, + { + "epoch": 0.9771894093686354, + "grad_norm": 0.6891775542701496, + "learning_rate": 1.3693005510775903e-08, + "loss": 0.2708, + "step": 21591 + }, + { + "epoch": 0.977234668477031, + "grad_norm": 0.6208702276175495, + "learning_rate": 1.3638854122218214e-08, + "loss": 0.2726, + "step": 21592 + }, + { + "epoch": 0.9772799275854266, + "grad_norm": 0.5831698339732241, + "learning_rate": 1.358480987524724e-08, + "loss": 0.2751, + "step": 21593 + }, + { + "epoch": 0.9773251866938222, + "grad_norm": 0.6343592251514779, + "learning_rate": 1.3530872771024273e-08, + "loss": 0.321, + "step": 21594 + }, + { + "epoch": 0.9773704458022177, + "grad_norm": 0.5875276035704957, + "learning_rate": 1.3477042810707829e-08, + "loss": 0.2605, + "step": 21595 + }, + { + "epoch": 0.9774157049106132, + "grad_norm": 0.24808703230881213, + "learning_rate": 1.3423319995454765e-08, + "loss": 0.4601, + "step": 21596 + }, + { + "epoch": 0.9774609640190088, + "grad_norm": 0.26599602174500386, + "learning_rate": 1.3369704326419709e-08, + "loss": 0.4717, + "step": 21597 + }, + { + "epoch": 0.9775062231274044, + "grad_norm": 0.6308260174148227, + "learning_rate": 1.3316195804753962e-08, + "loss": 0.2957, + "step": 21598 + }, + { + "epoch": 0.9775514822357999, + "grad_norm": 0.6417862465844587, + "learning_rate": 1.3262794431608272e-08, + "loss": 0.3095, + "step": 21599 + }, + { + "epoch": 0.9775967413441955, + "grad_norm": 0.5779867840850115, + "learning_rate": 1.32095002081295e-08, + "loss": 0.3006, + "step": 21600 + }, + { + "epoch": 0.9776420004525911, + "grad_norm": 0.6374846073793348, + "learning_rate": 1.3156313135462284e-08, + "loss": 0.2623, + "step": 21601 + }, + { + "epoch": 0.9776872595609867, + "grad_norm": 0.6026661398778114, + "learning_rate": 1.310323321475071e-08, + "loss": 0.3378, + "step": 21602 + }, + { + "epoch": 0.9777325186693823, + "grad_norm": 0.24092280344486997, + "learning_rate": 1.3050260447133866e-08, + "loss": 0.4632, + "step": 21603 + }, + { + "epoch": 0.9777777777777777, + "grad_norm": 0.6303012480618946, + "learning_rate": 1.2997394833750842e-08, + "loss": 0.295, + "step": 21604 + }, + { + "epoch": 0.9778230368861733, + "grad_norm": 0.5629119592200705, + "learning_rate": 1.2944636375737952e-08, + "loss": 0.2873, + "step": 21605 + }, + { + "epoch": 0.9778682959945689, + "grad_norm": 0.587805715190683, + "learning_rate": 1.289198507422762e-08, + "loss": 0.248, + "step": 21606 + }, + { + "epoch": 0.9779135551029645, + "grad_norm": 0.7009930509067366, + "learning_rate": 1.2839440930352276e-08, + "loss": 0.3042, + "step": 21607 + }, + { + "epoch": 0.97795881421136, + "grad_norm": 0.624810524584758, + "learning_rate": 1.2787003945239906e-08, + "loss": 0.2643, + "step": 21608 + }, + { + "epoch": 0.9780040733197556, + "grad_norm": 0.62834319904867, + "learning_rate": 1.2734674120018497e-08, + "loss": 0.2777, + "step": 21609 + }, + { + "epoch": 0.9780493324281512, + "grad_norm": 0.2711195268524511, + "learning_rate": 1.268245145581104e-08, + "loss": 0.4483, + "step": 21610 + }, + { + "epoch": 0.9780945915365468, + "grad_norm": 0.6405341538941641, + "learning_rate": 1.2630335953740524e-08, + "loss": 0.2703, + "step": 21611 + }, + { + "epoch": 0.9781398506449422, + "grad_norm": 0.7713359262031876, + "learning_rate": 1.257832761492661e-08, + "loss": 0.3029, + "step": 21612 + }, + { + "epoch": 0.9781851097533378, + "grad_norm": 0.6050305225094347, + "learning_rate": 1.2526426440486738e-08, + "loss": 0.297, + "step": 21613 + }, + { + "epoch": 0.9782303688617334, + "grad_norm": 0.2565092506379215, + "learning_rate": 1.2474632431536126e-08, + "loss": 0.4589, + "step": 21614 + }, + { + "epoch": 0.978275627970129, + "grad_norm": 0.6426891785778904, + "learning_rate": 1.2422945589187774e-08, + "loss": 0.3016, + "step": 21615 + }, + { + "epoch": 0.9783208870785246, + "grad_norm": 0.7105239256678185, + "learning_rate": 1.2371365914551903e-08, + "loss": 0.3074, + "step": 21616 + }, + { + "epoch": 0.9783661461869201, + "grad_norm": 0.6318290634122832, + "learning_rate": 1.2319893408737072e-08, + "loss": 0.3187, + "step": 21617 + }, + { + "epoch": 0.9784114052953157, + "grad_norm": 0.7091202626299058, + "learning_rate": 1.2268528072849063e-08, + "loss": 0.2676, + "step": 21618 + }, + { + "epoch": 0.9784566644037113, + "grad_norm": 0.5991788178591732, + "learning_rate": 1.221726990799199e-08, + "loss": 0.2756, + "step": 21619 + }, + { + "epoch": 0.9785019235121069, + "grad_norm": 0.2644385392660403, + "learning_rate": 1.21661189152672e-08, + "loss": 0.4842, + "step": 21620 + }, + { + "epoch": 0.9785471826205023, + "grad_norm": 0.6044750980358486, + "learning_rate": 1.2115075095773255e-08, + "loss": 0.3265, + "step": 21621 + }, + { + "epoch": 0.9785924417288979, + "grad_norm": 0.6782799426016777, + "learning_rate": 1.206413845060761e-08, + "loss": 0.3153, + "step": 21622 + }, + { + "epoch": 0.9786377008372935, + "grad_norm": 0.6519410449382191, + "learning_rate": 1.2013308980863836e-08, + "loss": 0.2665, + "step": 21623 + }, + { + "epoch": 0.9786829599456891, + "grad_norm": 0.5774124278120938, + "learning_rate": 1.1962586687634947e-08, + "loss": 0.2875, + "step": 21624 + }, + { + "epoch": 0.9787282190540846, + "grad_norm": 0.6004423970128941, + "learning_rate": 1.1911971572010073e-08, + "loss": 0.2441, + "step": 21625 + }, + { + "epoch": 0.9787734781624802, + "grad_norm": 0.5843913260285941, + "learning_rate": 1.1861463635077785e-08, + "loss": 0.3083, + "step": 21626 + }, + { + "epoch": 0.9788187372708758, + "grad_norm": 0.6598961792562245, + "learning_rate": 1.181106287792222e-08, + "loss": 0.3188, + "step": 21627 + }, + { + "epoch": 0.9788639963792714, + "grad_norm": 0.6118189962772933, + "learning_rate": 1.1760769301626951e-08, + "loss": 0.2678, + "step": 21628 + }, + { + "epoch": 0.9789092554876669, + "grad_norm": 0.7576996983657155, + "learning_rate": 1.1710582907272783e-08, + "loss": 0.2965, + "step": 21629 + }, + { + "epoch": 0.9789545145960624, + "grad_norm": 0.28542584526267356, + "learning_rate": 1.166050369593774e-08, + "loss": 0.4581, + "step": 21630 + }, + { + "epoch": 0.978999773704458, + "grad_norm": 0.5788657187354852, + "learning_rate": 1.1610531668697633e-08, + "loss": 0.2755, + "step": 21631 + }, + { + "epoch": 0.9790450328128536, + "grad_norm": 0.6078411782319739, + "learning_rate": 1.1560666826627154e-08, + "loss": 0.2844, + "step": 21632 + }, + { + "epoch": 0.9790902919212492, + "grad_norm": 0.6201364270028205, + "learning_rate": 1.1510909170796558e-08, + "loss": 0.3157, + "step": 21633 + }, + { + "epoch": 0.9791355510296447, + "grad_norm": 0.5508890145642606, + "learning_rate": 1.14612587022761e-08, + "loss": 0.2549, + "step": 21634 + }, + { + "epoch": 0.9791808101380403, + "grad_norm": 0.6795842409627164, + "learning_rate": 1.1411715422131598e-08, + "loss": 0.2701, + "step": 21635 + }, + { + "epoch": 0.9792260692464358, + "grad_norm": 0.5777709409082231, + "learning_rate": 1.1362279331428305e-08, + "loss": 0.2996, + "step": 21636 + }, + { + "epoch": 0.9792713283548314, + "grad_norm": 0.2564448952346507, + "learning_rate": 1.1312950431228153e-08, + "loss": 0.4574, + "step": 21637 + }, + { + "epoch": 0.979316587463227, + "grad_norm": 0.7122237193809913, + "learning_rate": 1.126372872259085e-08, + "loss": 0.2868, + "step": 21638 + }, + { + "epoch": 0.9793618465716225, + "grad_norm": 0.5895747232643146, + "learning_rate": 1.1214614206574437e-08, + "loss": 0.2437, + "step": 21639 + }, + { + "epoch": 0.9794071056800181, + "grad_norm": 0.630322761241128, + "learning_rate": 1.1165606884234182e-08, + "loss": 0.3135, + "step": 21640 + }, + { + "epoch": 0.9794523647884137, + "grad_norm": 0.6556277953449368, + "learning_rate": 1.111670675662313e-08, + "loss": 0.2899, + "step": 21641 + }, + { + "epoch": 0.9794976238968093, + "grad_norm": 0.5784803966331807, + "learning_rate": 1.1067913824791553e-08, + "loss": 0.2835, + "step": 21642 + }, + { + "epoch": 0.9795428830052048, + "grad_norm": 0.547317704540307, + "learning_rate": 1.1019228089788613e-08, + "loss": 0.2543, + "step": 21643 + }, + { + "epoch": 0.9795881421136003, + "grad_norm": 0.5978956990884308, + "learning_rate": 1.0970649552659585e-08, + "loss": 0.2773, + "step": 21644 + }, + { + "epoch": 0.9796334012219959, + "grad_norm": 0.5614664206206553, + "learning_rate": 1.092217821444863e-08, + "loss": 0.2687, + "step": 21645 + }, + { + "epoch": 0.9796786603303915, + "grad_norm": 0.672027978277878, + "learning_rate": 1.0873814076197142e-08, + "loss": 0.2588, + "step": 21646 + }, + { + "epoch": 0.979723919438787, + "grad_norm": 0.6207091688396222, + "learning_rate": 1.0825557138944843e-08, + "loss": 0.2885, + "step": 21647 + }, + { + "epoch": 0.9797691785471826, + "grad_norm": 0.6139079486988628, + "learning_rate": 1.0777407403728123e-08, + "loss": 0.2752, + "step": 21648 + }, + { + "epoch": 0.9798144376555782, + "grad_norm": 0.5793301468223664, + "learning_rate": 1.0729364871581716e-08, + "loss": 0.2969, + "step": 21649 + }, + { + "epoch": 0.9798596967639738, + "grad_norm": 0.7081059442224155, + "learning_rate": 1.0681429543538125e-08, + "loss": 0.309, + "step": 21650 + }, + { + "epoch": 0.9799049558723694, + "grad_norm": 0.6522737384702686, + "learning_rate": 1.0633601420626528e-08, + "loss": 0.31, + "step": 21651 + }, + { + "epoch": 0.9799502149807648, + "grad_norm": 0.6194217668405, + "learning_rate": 1.0585880503875546e-08, + "loss": 0.3273, + "step": 21652 + }, + { + "epoch": 0.9799954740891604, + "grad_norm": 0.7235806562053905, + "learning_rate": 1.0538266794309914e-08, + "loss": 0.2502, + "step": 21653 + }, + { + "epoch": 0.980040733197556, + "grad_norm": 0.6185869570994706, + "learning_rate": 1.049076029295326e-08, + "loss": 0.3025, + "step": 21654 + }, + { + "epoch": 0.9800859923059516, + "grad_norm": 0.628706820093385, + "learning_rate": 1.044336100082588e-08, + "loss": 0.3451, + "step": 21655 + }, + { + "epoch": 0.9801312514143471, + "grad_norm": 0.6164354143700633, + "learning_rate": 1.03960689189464e-08, + "loss": 0.3198, + "step": 21656 + }, + { + "epoch": 0.9801765105227427, + "grad_norm": 0.6485613291924816, + "learning_rate": 1.0348884048331232e-08, + "loss": 0.2957, + "step": 21657 + }, + { + "epoch": 0.9802217696311383, + "grad_norm": 0.6598439358952244, + "learning_rate": 1.0301806389994006e-08, + "loss": 0.3375, + "step": 21658 + }, + { + "epoch": 0.9802670287395339, + "grad_norm": 0.5931629157219409, + "learning_rate": 1.025483594494614e-08, + "loss": 0.3017, + "step": 21659 + }, + { + "epoch": 0.9803122878479293, + "grad_norm": 0.5585563100963906, + "learning_rate": 1.0207972714196824e-08, + "loss": 0.2703, + "step": 21660 + }, + { + "epoch": 0.9803575469563249, + "grad_norm": 0.5768770066117372, + "learning_rate": 1.0161216698753029e-08, + "loss": 0.329, + "step": 21661 + }, + { + "epoch": 0.9804028060647205, + "grad_norm": 0.266834695010642, + "learning_rate": 1.0114567899620066e-08, + "loss": 0.4897, + "step": 21662 + }, + { + "epoch": 0.9804480651731161, + "grad_norm": 0.6164671838543433, + "learning_rate": 1.0068026317799906e-08, + "loss": 0.2643, + "step": 21663 + }, + { + "epoch": 0.9804933242815117, + "grad_norm": 0.6455551846103544, + "learning_rate": 1.0021591954291754e-08, + "loss": 0.2698, + "step": 21664 + }, + { + "epoch": 0.9805385833899072, + "grad_norm": 0.5680472792211821, + "learning_rate": 9.975264810094254e-09, + "loss": 0.2947, + "step": 21665 + }, + { + "epoch": 0.9805838424983028, + "grad_norm": 0.6534256833339015, + "learning_rate": 9.929044886203276e-09, + "loss": 0.2885, + "step": 21666 + }, + { + "epoch": 0.9806291016066984, + "grad_norm": 0.6006793231315621, + "learning_rate": 9.882932183610806e-09, + "loss": 0.2808, + "step": 21667 + }, + { + "epoch": 0.980674360715094, + "grad_norm": 0.25155329493710854, + "learning_rate": 9.836926703307714e-09, + "loss": 0.4742, + "step": 21668 + }, + { + "epoch": 0.9807196198234894, + "grad_norm": 0.5974539516861112, + "learning_rate": 9.791028446283768e-09, + "loss": 0.3059, + "step": 21669 + }, + { + "epoch": 0.980764878931885, + "grad_norm": 0.6617095694002861, + "learning_rate": 9.745237413523733e-09, + "loss": 0.248, + "step": 21670 + }, + { + "epoch": 0.9808101380402806, + "grad_norm": 0.6060716678997371, + "learning_rate": 9.69955360601238e-09, + "loss": 0.2827, + "step": 21671 + }, + { + "epoch": 0.9808553971486762, + "grad_norm": 0.5792760913170358, + "learning_rate": 9.653977024731143e-09, + "loss": 0.3029, + "step": 21672 + }, + { + "epoch": 0.9809006562570718, + "grad_norm": 0.2910242789051668, + "learning_rate": 9.608507670659239e-09, + "loss": 0.4947, + "step": 21673 + }, + { + "epoch": 0.9809459153654673, + "grad_norm": 0.6148235384331048, + "learning_rate": 9.563145544773666e-09, + "loss": 0.2854, + "step": 21674 + }, + { + "epoch": 0.9809911744738629, + "grad_norm": 0.615447357686388, + "learning_rate": 9.517890648049199e-09, + "loss": 0.2991, + "step": 21675 + }, + { + "epoch": 0.9810364335822584, + "grad_norm": 0.6092808326297857, + "learning_rate": 9.472742981458393e-09, + "loss": 0.2366, + "step": 21676 + }, + { + "epoch": 0.981081692690654, + "grad_norm": 0.6672915344250399, + "learning_rate": 9.427702545970474e-09, + "loss": 0.3255, + "step": 21677 + }, + { + "epoch": 0.9811269517990495, + "grad_norm": 0.6059301178060165, + "learning_rate": 9.38276934255411e-09, + "loss": 0.2415, + "step": 21678 + }, + { + "epoch": 0.9811722109074451, + "grad_norm": 0.5872986394992463, + "learning_rate": 9.337943372175195e-09, + "loss": 0.272, + "step": 21679 + }, + { + "epoch": 0.9812174700158407, + "grad_norm": 0.5859666654621924, + "learning_rate": 9.293224635795184e-09, + "loss": 0.2612, + "step": 21680 + }, + { + "epoch": 0.9812627291242363, + "grad_norm": 0.6206243334069153, + "learning_rate": 9.248613134376638e-09, + "loss": 0.278, + "step": 21681 + }, + { + "epoch": 0.9813079882326318, + "grad_norm": 0.5897081586642346, + "learning_rate": 9.204108868877127e-09, + "loss": 0.3476, + "step": 21682 + }, + { + "epoch": 0.9813532473410274, + "grad_norm": 0.626181183441953, + "learning_rate": 9.15971184025366e-09, + "loss": 0.3461, + "step": 21683 + }, + { + "epoch": 0.981398506449423, + "grad_norm": 0.5570704921342378, + "learning_rate": 9.115422049459365e-09, + "loss": 0.3023, + "step": 21684 + }, + { + "epoch": 0.9814437655578185, + "grad_norm": 0.6182692913792313, + "learning_rate": 9.071239497446815e-09, + "loss": 0.3333, + "step": 21685 + }, + { + "epoch": 0.9814890246662141, + "grad_norm": 0.6284031326139313, + "learning_rate": 9.027164185164694e-09, + "loss": 0.3385, + "step": 21686 + }, + { + "epoch": 0.9815342837746096, + "grad_norm": 0.7153580755772212, + "learning_rate": 8.983196113560023e-09, + "loss": 0.2968, + "step": 21687 + }, + { + "epoch": 0.9815795428830052, + "grad_norm": 0.27580635853133384, + "learning_rate": 8.939335283577599e-09, + "loss": 0.4477, + "step": 21688 + }, + { + "epoch": 0.9816248019914008, + "grad_norm": 0.615050617869175, + "learning_rate": 8.895581696160006e-09, + "loss": 0.2952, + "step": 21689 + }, + { + "epoch": 0.9816700610997964, + "grad_norm": 0.5893081313297105, + "learning_rate": 8.851935352247597e-09, + "loss": 0.3283, + "step": 21690 + }, + { + "epoch": 0.9817153202081919, + "grad_norm": 0.6061131730742584, + "learning_rate": 8.808396252777962e-09, + "loss": 0.2646, + "step": 21691 + }, + { + "epoch": 0.9817605793165874, + "grad_norm": 0.602182475757348, + "learning_rate": 8.76496439868646e-09, + "loss": 0.2645, + "step": 21692 + }, + { + "epoch": 0.981805838424983, + "grad_norm": 0.5596327104464522, + "learning_rate": 8.721639790906788e-09, + "loss": 0.3046, + "step": 21693 + }, + { + "epoch": 0.9818510975333786, + "grad_norm": 0.24469286039476712, + "learning_rate": 8.67842243036876e-09, + "loss": 0.4679, + "step": 21694 + }, + { + "epoch": 0.9818963566417741, + "grad_norm": 0.7909436392895103, + "learning_rate": 8.635312318002742e-09, + "loss": 0.2419, + "step": 21695 + }, + { + "epoch": 0.9819416157501697, + "grad_norm": 0.28832400803941166, + "learning_rate": 8.59230945473355e-09, + "loss": 0.4627, + "step": 21696 + }, + { + "epoch": 0.9819868748585653, + "grad_norm": 0.6002097689708532, + "learning_rate": 8.549413841485443e-09, + "loss": 0.2726, + "step": 21697 + }, + { + "epoch": 0.9820321339669609, + "grad_norm": 0.5583495432364033, + "learning_rate": 8.506625479181018e-09, + "loss": 0.3065, + "step": 21698 + }, + { + "epoch": 0.9820773930753565, + "grad_norm": 0.26688574661396586, + "learning_rate": 8.46394436873843e-09, + "loss": 0.4479, + "step": 21699 + }, + { + "epoch": 0.9821226521837519, + "grad_norm": 0.6449065606928776, + "learning_rate": 8.421370511075833e-09, + "loss": 0.2828, + "step": 21700 + }, + { + "epoch": 0.9821679112921475, + "grad_norm": 0.6317061710680665, + "learning_rate": 8.378903907106938e-09, + "loss": 0.2696, + "step": 21701 + }, + { + "epoch": 0.9822131704005431, + "grad_norm": 0.577668120306397, + "learning_rate": 8.336544557745463e-09, + "loss": 0.2752, + "step": 21702 + }, + { + "epoch": 0.9822584295089387, + "grad_norm": 0.6065787159864158, + "learning_rate": 8.294292463900123e-09, + "loss": 0.2829, + "step": 21703 + }, + { + "epoch": 0.9823036886173342, + "grad_norm": 0.600593619384963, + "learning_rate": 8.25214762648019e-09, + "loss": 0.2909, + "step": 21704 + }, + { + "epoch": 0.9823489477257298, + "grad_norm": 0.6630398083335145, + "learning_rate": 8.210110046390496e-09, + "loss": 0.2916, + "step": 21705 + }, + { + "epoch": 0.9823942068341254, + "grad_norm": 0.7609258393204628, + "learning_rate": 8.168179724534209e-09, + "loss": 0.3217, + "step": 21706 + }, + { + "epoch": 0.982439465942521, + "grad_norm": 0.6582641164460659, + "learning_rate": 8.126356661812829e-09, + "loss": 0.2742, + "step": 21707 + }, + { + "epoch": 0.9824847250509166, + "grad_norm": 0.6759465229322568, + "learning_rate": 8.084640859124527e-09, + "loss": 0.3159, + "step": 21708 + }, + { + "epoch": 0.982529984159312, + "grad_norm": 0.6703636419723167, + "learning_rate": 8.043032317365807e-09, + "loss": 0.3072, + "step": 21709 + }, + { + "epoch": 0.9825752432677076, + "grad_norm": 1.1165883505399121, + "learning_rate": 8.001531037430954e-09, + "loss": 0.2814, + "step": 21710 + }, + { + "epoch": 0.9826205023761032, + "grad_norm": 0.6061301568076047, + "learning_rate": 7.960137020210923e-09, + "loss": 0.2428, + "step": 21711 + }, + { + "epoch": 0.9826657614844988, + "grad_norm": 0.6607322205290386, + "learning_rate": 7.918850266596112e-09, + "loss": 0.2852, + "step": 21712 + }, + { + "epoch": 0.9827110205928943, + "grad_norm": 1.1731956802419958, + "learning_rate": 7.877670777473035e-09, + "loss": 0.3121, + "step": 21713 + }, + { + "epoch": 0.9827562797012899, + "grad_norm": 0.4363254521210518, + "learning_rate": 7.836598553726538e-09, + "loss": 0.4782, + "step": 21714 + }, + { + "epoch": 0.9828015388096855, + "grad_norm": 0.4375526802347568, + "learning_rate": 7.79563359623925e-09, + "loss": 0.4794, + "step": 21715 + }, + { + "epoch": 0.982846797918081, + "grad_norm": 0.6432974621505368, + "learning_rate": 7.754775905891576e-09, + "loss": 0.2834, + "step": 21716 + }, + { + "epoch": 0.9828920570264765, + "grad_norm": 0.2588387571579923, + "learning_rate": 7.714025483561149e-09, + "loss": 0.4849, + "step": 21717 + }, + { + "epoch": 0.9829373161348721, + "grad_norm": 0.68694740461852, + "learning_rate": 7.673382330123936e-09, + "loss": 0.2615, + "step": 21718 + }, + { + "epoch": 0.9829825752432677, + "grad_norm": 1.042378564690906, + "learning_rate": 7.63284644645257e-09, + "loss": 0.2903, + "step": 21719 + }, + { + "epoch": 0.9830278343516633, + "grad_norm": 0.6446807855150728, + "learning_rate": 7.59241783341913e-09, + "loss": 0.3058, + "step": 21720 + }, + { + "epoch": 0.9830730934600589, + "grad_norm": 0.2694289263740075, + "learning_rate": 7.552096491891259e-09, + "loss": 0.4596, + "step": 21721 + }, + { + "epoch": 0.9831183525684544, + "grad_norm": 0.559283975254006, + "learning_rate": 7.511882422735483e-09, + "loss": 0.2648, + "step": 21722 + }, + { + "epoch": 0.98316361167685, + "grad_norm": 0.26536821524762305, + "learning_rate": 7.471775626816114e-09, + "loss": 0.4668, + "step": 21723 + }, + { + "epoch": 0.9832088707852455, + "grad_norm": 0.7440514698602366, + "learning_rate": 7.431776104994681e-09, + "loss": 0.3224, + "step": 21724 + }, + { + "epoch": 0.9832541298936411, + "grad_norm": 0.6962424596967072, + "learning_rate": 7.39188385813161e-09, + "loss": 0.2766, + "step": 21725 + }, + { + "epoch": 0.9832993890020366, + "grad_norm": 0.8940563277181036, + "learning_rate": 7.352098887082881e-09, + "loss": 0.2606, + "step": 21726 + }, + { + "epoch": 0.9833446481104322, + "grad_norm": 0.7214958076035248, + "learning_rate": 7.312421192703923e-09, + "loss": 0.3176, + "step": 21727 + }, + { + "epoch": 0.9833899072188278, + "grad_norm": 0.6343731980873564, + "learning_rate": 7.2728507758468295e-09, + "loss": 0.2644, + "step": 21728 + }, + { + "epoch": 0.9834351663272234, + "grad_norm": 0.5961209533114099, + "learning_rate": 7.233387637362587e-09, + "loss": 0.3388, + "step": 21729 + }, + { + "epoch": 0.9834804254356189, + "grad_norm": 0.6028494217289609, + "learning_rate": 7.194031778098853e-09, + "loss": 0.2604, + "step": 21730 + }, + { + "epoch": 0.9835256845440145, + "grad_norm": 0.6506180980165432, + "learning_rate": 7.1547831989005055e-09, + "loss": 0.3093, + "step": 21731 + }, + { + "epoch": 0.98357094365241, + "grad_norm": 0.6274426767704868, + "learning_rate": 7.1156419006118695e-09, + "loss": 0.2921, + "step": 21732 + }, + { + "epoch": 0.9836162027608056, + "grad_norm": 0.6356227393707362, + "learning_rate": 7.076607884073939e-09, + "loss": 0.3085, + "step": 21733 + }, + { + "epoch": 0.9836614618692012, + "grad_norm": 0.6440067946101039, + "learning_rate": 7.037681150124931e-09, + "loss": 0.3198, + "step": 21734 + }, + { + "epoch": 0.9837067209775967, + "grad_norm": 0.622467940468477, + "learning_rate": 6.998861699600845e-09, + "loss": 0.301, + "step": 21735 + }, + { + "epoch": 0.9837519800859923, + "grad_norm": 0.5591829293615435, + "learning_rate": 6.960149533337124e-09, + "loss": 0.2924, + "step": 21736 + }, + { + "epoch": 0.9837972391943879, + "grad_norm": 0.568720885745079, + "learning_rate": 6.921544652164769e-09, + "loss": 0.328, + "step": 21737 + }, + { + "epoch": 0.9838424983027835, + "grad_norm": 0.34489008410081096, + "learning_rate": 6.883047056913117e-09, + "loss": 0.4822, + "step": 21738 + }, + { + "epoch": 0.983887757411179, + "grad_norm": 0.6254423314914499, + "learning_rate": 6.844656748409284e-09, + "loss": 0.265, + "step": 21739 + }, + { + "epoch": 0.9839330165195745, + "grad_norm": 0.5603541966537482, + "learning_rate": 6.8063737274787214e-09, + "loss": 0.2559, + "step": 21740 + }, + { + "epoch": 0.9839782756279701, + "grad_norm": 0.5814920279218694, + "learning_rate": 6.768197994944103e-09, + "loss": 0.2705, + "step": 21741 + }, + { + "epoch": 0.9840235347363657, + "grad_norm": 0.6190468275501919, + "learning_rate": 6.730129551625331e-09, + "loss": 0.2855, + "step": 21742 + }, + { + "epoch": 0.9840687938447613, + "grad_norm": 0.5794138544317892, + "learning_rate": 6.692168398340082e-09, + "loss": 0.3286, + "step": 21743 + }, + { + "epoch": 0.9841140529531568, + "grad_norm": 0.6187931500635636, + "learning_rate": 6.6543145359043714e-09, + "loss": 0.2499, + "step": 21744 + }, + { + "epoch": 0.9841593120615524, + "grad_norm": 0.7168440597895542, + "learning_rate": 6.616567965131992e-09, + "loss": 0.3007, + "step": 21745 + }, + { + "epoch": 0.984204571169948, + "grad_norm": 0.5912575346461676, + "learning_rate": 6.578928686832853e-09, + "loss": 0.2904, + "step": 21746 + }, + { + "epoch": 0.9842498302783436, + "grad_norm": 0.5860976780626126, + "learning_rate": 6.54139670181686e-09, + "loss": 0.2431, + "step": 21747 + }, + { + "epoch": 0.984295089386739, + "grad_norm": 0.26903828166599186, + "learning_rate": 6.503972010890036e-09, + "loss": 0.457, + "step": 21748 + }, + { + "epoch": 0.9843403484951346, + "grad_norm": 0.6051058553500327, + "learning_rate": 6.466654614856183e-09, + "loss": 0.2648, + "step": 21749 + }, + { + "epoch": 0.9843856076035302, + "grad_norm": 0.25852584920291466, + "learning_rate": 6.42944451451799e-09, + "loss": 0.4579, + "step": 21750 + }, + { + "epoch": 0.9844308667119258, + "grad_norm": 0.2643033016293149, + "learning_rate": 6.392341710674266e-09, + "loss": 0.4677, + "step": 21751 + }, + { + "epoch": 0.9844761258203213, + "grad_norm": 0.25894655658059895, + "learning_rate": 6.355346204122148e-09, + "loss": 0.4427, + "step": 21752 + }, + { + "epoch": 0.9845213849287169, + "grad_norm": 0.6426058287872317, + "learning_rate": 6.318457995657113e-09, + "loss": 0.2816, + "step": 21753 + }, + { + "epoch": 0.9845666440371125, + "grad_norm": 0.6257485278487301, + "learning_rate": 6.281677086071303e-09, + "loss": 0.2634, + "step": 21754 + }, + { + "epoch": 0.9846119031455081, + "grad_norm": 0.6100901183104055, + "learning_rate": 6.245003476155198e-09, + "loss": 0.2717, + "step": 21755 + }, + { + "epoch": 0.9846571622539037, + "grad_norm": 0.6110668534928377, + "learning_rate": 6.208437166697056e-09, + "loss": 0.2676, + "step": 21756 + }, + { + "epoch": 0.9847024213622991, + "grad_norm": 0.584749554284075, + "learning_rate": 6.171978158482361e-09, + "loss": 0.2739, + "step": 21757 + }, + { + "epoch": 0.9847476804706947, + "grad_norm": 0.2766780699015175, + "learning_rate": 6.135626452294374e-09, + "loss": 0.4525, + "step": 21758 + }, + { + "epoch": 0.9847929395790903, + "grad_norm": 0.6570081472633232, + "learning_rate": 6.099382048914138e-09, + "loss": 0.3081, + "step": 21759 + }, + { + "epoch": 0.9848381986874859, + "grad_norm": 0.6301607344700081, + "learning_rate": 6.063244949120473e-09, + "loss": 0.2725, + "step": 21760 + }, + { + "epoch": 0.9848834577958814, + "grad_norm": 0.8074035664818978, + "learning_rate": 6.027215153689981e-09, + "loss": 0.2909, + "step": 21761 + }, + { + "epoch": 0.984928716904277, + "grad_norm": 0.5661608368630222, + "learning_rate": 5.9912926633970415e-09, + "loss": 0.2785, + "step": 21762 + }, + { + "epoch": 0.9849739760126726, + "grad_norm": 0.699512720197519, + "learning_rate": 5.955477479013816e-09, + "loss": 0.3317, + "step": 21763 + }, + { + "epoch": 0.9850192351210681, + "grad_norm": 0.5609269535348155, + "learning_rate": 5.919769601308578e-09, + "loss": 0.3022, + "step": 21764 + }, + { + "epoch": 0.9850644942294636, + "grad_norm": 0.5993231895671898, + "learning_rate": 5.8841690310496024e-09, + "loss": 0.3011, + "step": 21765 + }, + { + "epoch": 0.9851097533378592, + "grad_norm": 0.5689363005629489, + "learning_rate": 5.8486757690012775e-09, + "loss": 0.2157, + "step": 21766 + }, + { + "epoch": 0.9851550124462548, + "grad_norm": 0.6758401330216106, + "learning_rate": 5.8132898159268815e-09, + "loss": 0.3054, + "step": 21767 + }, + { + "epoch": 0.9852002715546504, + "grad_norm": 0.567882054273113, + "learning_rate": 5.778011172586362e-09, + "loss": 0.2651, + "step": 21768 + }, + { + "epoch": 0.985245530663046, + "grad_norm": 0.5951791861094041, + "learning_rate": 5.742839839738001e-09, + "loss": 0.2943, + "step": 21769 + }, + { + "epoch": 0.9852907897714415, + "grad_norm": 0.6362072194919404, + "learning_rate": 5.7077758181367516e-09, + "loss": 0.2947, + "step": 21770 + }, + { + "epoch": 0.9853360488798371, + "grad_norm": 0.5708793133016763, + "learning_rate": 5.6728191085370085e-09, + "loss": 0.3013, + "step": 21771 + }, + { + "epoch": 0.9853813079882326, + "grad_norm": 0.5973514269004946, + "learning_rate": 5.637969711689839e-09, + "loss": 0.2648, + "step": 21772 + }, + { + "epoch": 0.9854265670966282, + "grad_norm": 0.5688949733432468, + "learning_rate": 5.603227628342978e-09, + "loss": 0.2827, + "step": 21773 + }, + { + "epoch": 0.9854718262050237, + "grad_norm": 0.6476887178964443, + "learning_rate": 5.56859285924416e-09, + "loss": 0.3101, + "step": 21774 + }, + { + "epoch": 0.9855170853134193, + "grad_norm": 0.6396008335007618, + "learning_rate": 5.534065405136679e-09, + "loss": 0.2817, + "step": 21775 + }, + { + "epoch": 0.9855623444218149, + "grad_norm": 0.2376931055703006, + "learning_rate": 5.499645266762721e-09, + "loss": 0.448, + "step": 21776 + }, + { + "epoch": 0.9856076035302105, + "grad_norm": 0.6520128967399444, + "learning_rate": 5.465332444862248e-09, + "loss": 0.3093, + "step": 21777 + }, + { + "epoch": 0.985652862638606, + "grad_norm": 0.2865227420837884, + "learning_rate": 5.431126940172449e-09, + "loss": 0.4658, + "step": 21778 + }, + { + "epoch": 0.9856981217470016, + "grad_norm": 0.5659488627340931, + "learning_rate": 5.397028753427735e-09, + "loss": 0.2714, + "step": 21779 + }, + { + "epoch": 0.9857433808553971, + "grad_norm": 0.6335095816920012, + "learning_rate": 5.363037885360856e-09, + "loss": 0.3034, + "step": 21780 + }, + { + "epoch": 0.9857886399637927, + "grad_norm": 0.7757831393322988, + "learning_rate": 5.329154336702891e-09, + "loss": 0.2621, + "step": 21781 + }, + { + "epoch": 0.9858338990721883, + "grad_norm": 0.5468882390931025, + "learning_rate": 5.295378108181592e-09, + "loss": 0.261, + "step": 21782 + }, + { + "epoch": 0.9858791581805838, + "grad_norm": 0.5784789337992116, + "learning_rate": 5.261709200521936e-09, + "loss": 0.2598, + "step": 21783 + }, + { + "epoch": 0.9859244172889794, + "grad_norm": 0.6304084729105638, + "learning_rate": 5.228147614448342e-09, + "loss": 0.3111, + "step": 21784 + }, + { + "epoch": 0.985969676397375, + "grad_norm": 0.6380056859811842, + "learning_rate": 5.194693350681901e-09, + "loss": 0.2934, + "step": 21785 + }, + { + "epoch": 0.9860149355057706, + "grad_norm": 0.5925074282805367, + "learning_rate": 5.161346409940371e-09, + "loss": 0.2863, + "step": 21786 + }, + { + "epoch": 0.986060194614166, + "grad_norm": 0.28274373509663, + "learning_rate": 5.128106792941512e-09, + "loss": 0.494, + "step": 21787 + }, + { + "epoch": 0.9861054537225616, + "grad_norm": 0.9279097453101085, + "learning_rate": 5.094974500399197e-09, + "loss": 0.2801, + "step": 21788 + }, + { + "epoch": 0.9861507128309572, + "grad_norm": 0.6266657228728079, + "learning_rate": 5.061949533025079e-09, + "loss": 0.2814, + "step": 21789 + }, + { + "epoch": 0.9861959719393528, + "grad_norm": 0.3617893690237543, + "learning_rate": 5.02903189152859e-09, + "loss": 0.4443, + "step": 21790 + }, + { + "epoch": 0.9862412310477484, + "grad_norm": 0.603404324430023, + "learning_rate": 4.996221576617499e-09, + "loss": 0.3358, + "step": 21791 + }, + { + "epoch": 0.9862864901561439, + "grad_norm": 0.5743585459695362, + "learning_rate": 4.9635185889967966e-09, + "loss": 0.3327, + "step": 21792 + }, + { + "epoch": 0.9863317492645395, + "grad_norm": 0.25070337964915884, + "learning_rate": 4.930922929368698e-09, + "loss": 0.4703, + "step": 21793 + }, + { + "epoch": 0.9863770083729351, + "grad_norm": 0.29054184356976154, + "learning_rate": 4.89843459843431e-09, + "loss": 0.4523, + "step": 21794 + }, + { + "epoch": 0.9864222674813307, + "grad_norm": 0.5935151628381033, + "learning_rate": 4.8660535968908515e-09, + "loss": 0.3137, + "step": 21795 + }, + { + "epoch": 0.9864675265897261, + "grad_norm": 0.6235034034439103, + "learning_rate": 4.833779925434434e-09, + "loss": 0.3458, + "step": 21796 + }, + { + "epoch": 0.9865127856981217, + "grad_norm": 0.6738464231815146, + "learning_rate": 4.801613584758946e-09, + "loss": 0.29, + "step": 21797 + }, + { + "epoch": 0.9865580448065173, + "grad_norm": 0.2779606632713415, + "learning_rate": 4.769554575554947e-09, + "loss": 0.4779, + "step": 21798 + }, + { + "epoch": 0.9866033039149129, + "grad_norm": 0.5716203526929273, + "learning_rate": 4.737602898511884e-09, + "loss": 0.2765, + "step": 21799 + }, + { + "epoch": 0.9866485630233084, + "grad_norm": 0.24347567952338942, + "learning_rate": 4.705758554315876e-09, + "loss": 0.4396, + "step": 21800 + }, + { + "epoch": 0.986693822131704, + "grad_norm": 0.5874975033790542, + "learning_rate": 4.674021543651374e-09, + "loss": 0.2783, + "step": 21801 + }, + { + "epoch": 0.9867390812400996, + "grad_norm": 0.7032677349580255, + "learning_rate": 4.642391867199503e-09, + "loss": 0.2863, + "step": 21802 + }, + { + "epoch": 0.9867843403484952, + "grad_norm": 0.6305097485833882, + "learning_rate": 4.610869525641382e-09, + "loss": 0.3122, + "step": 21803 + }, + { + "epoch": 0.9868295994568907, + "grad_norm": 0.5912198400207207, + "learning_rate": 4.579454519653137e-09, + "loss": 0.2941, + "step": 21804 + }, + { + "epoch": 0.9868748585652862, + "grad_norm": 0.2503455997627201, + "learning_rate": 4.5481468499097845e-09, + "loss": 0.466, + "step": 21805 + }, + { + "epoch": 0.9869201176736818, + "grad_norm": 0.6940940885386304, + "learning_rate": 4.516946517084675e-09, + "loss": 0.2868, + "step": 21806 + }, + { + "epoch": 0.9869653767820774, + "grad_norm": 0.644379368900686, + "learning_rate": 4.485853521848382e-09, + "loss": 0.3202, + "step": 21807 + }, + { + "epoch": 0.987010635890473, + "grad_norm": 0.6286080941019534, + "learning_rate": 4.4548678648681506e-09, + "loss": 0.3505, + "step": 21808 + }, + { + "epoch": 0.9870558949988685, + "grad_norm": 0.6199985918169026, + "learning_rate": 4.423989546810115e-09, + "loss": 0.2872, + "step": 21809 + }, + { + "epoch": 0.9871011541072641, + "grad_norm": 0.5828107840187536, + "learning_rate": 4.3932185683376316e-09, + "loss": 0.3019, + "step": 21810 + }, + { + "epoch": 0.9871464132156597, + "grad_norm": 0.591062507801819, + "learning_rate": 4.362554930112395e-09, + "loss": 0.2626, + "step": 21811 + }, + { + "epoch": 0.9871916723240552, + "grad_norm": 0.6613664730270813, + "learning_rate": 4.331998632792766e-09, + "loss": 0.3347, + "step": 21812 + }, + { + "epoch": 0.9872369314324507, + "grad_norm": 0.591931367567967, + "learning_rate": 4.3015496770354435e-09, + "loss": 0.2522, + "step": 21813 + }, + { + "epoch": 0.9872821905408463, + "grad_norm": 0.6118863093822567, + "learning_rate": 4.2712080634949024e-09, + "loss": 0.3038, + "step": 21814 + }, + { + "epoch": 0.9873274496492419, + "grad_norm": 0.9204855378714794, + "learning_rate": 4.240973792822845e-09, + "loss": 0.3698, + "step": 21815 + }, + { + "epoch": 0.9873727087576375, + "grad_norm": 0.5471052873028251, + "learning_rate": 4.210846865668749e-09, + "loss": 0.2452, + "step": 21816 + }, + { + "epoch": 0.9874179678660331, + "grad_norm": 0.648162350342267, + "learning_rate": 4.180827282680433e-09, + "loss": 0.2873, + "step": 21817 + }, + { + "epoch": 0.9874632269744286, + "grad_norm": 0.6168060659402789, + "learning_rate": 4.1509150445023794e-09, + "loss": 0.283, + "step": 21818 + }, + { + "epoch": 0.9875084860828242, + "grad_norm": 0.6184546215910344, + "learning_rate": 4.121110151777407e-09, + "loss": 0.2466, + "step": 21819 + }, + { + "epoch": 0.9875537451912197, + "grad_norm": 0.7674514700369797, + "learning_rate": 4.0914126051466715e-09, + "loss": 0.2808, + "step": 21820 + }, + { + "epoch": 0.9875990042996153, + "grad_norm": 0.597187270312596, + "learning_rate": 4.06182240524744e-09, + "loss": 0.3179, + "step": 21821 + }, + { + "epoch": 0.9876442634080108, + "grad_norm": 0.668955246942683, + "learning_rate": 4.032339552715869e-09, + "loss": 0.3038, + "step": 21822 + }, + { + "epoch": 0.9876895225164064, + "grad_norm": 0.6083238151005289, + "learning_rate": 4.002964048185342e-09, + "loss": 0.2824, + "step": 21823 + }, + { + "epoch": 0.987734781624802, + "grad_norm": 0.6671406711905182, + "learning_rate": 3.973695892287022e-09, + "loss": 0.3158, + "step": 21824 + }, + { + "epoch": 0.9877800407331976, + "grad_norm": 0.7626532643269714, + "learning_rate": 3.944535085649848e-09, + "loss": 0.2742, + "step": 21825 + }, + { + "epoch": 0.9878252998415932, + "grad_norm": 0.6286400094111584, + "learning_rate": 3.915481628900541e-09, + "loss": 0.2999, + "step": 21826 + }, + { + "epoch": 0.9878705589499887, + "grad_norm": 0.5968127011830571, + "learning_rate": 3.8865355226630484e-09, + "loss": 0.2548, + "step": 21827 + }, + { + "epoch": 0.9879158180583842, + "grad_norm": 0.6086665535501046, + "learning_rate": 3.857696767559649e-09, + "loss": 0.3023, + "step": 21828 + }, + { + "epoch": 0.9879610771667798, + "grad_norm": 0.6130992682575301, + "learning_rate": 3.828965364209847e-09, + "loss": 0.278, + "step": 21829 + }, + { + "epoch": 0.9880063362751754, + "grad_norm": 0.6310926783173733, + "learning_rate": 3.8003413132309265e-09, + "loss": 0.2965, + "step": 21830 + }, + { + "epoch": 0.9880515953835709, + "grad_norm": 0.6549428048986663, + "learning_rate": 3.771824615237951e-09, + "loss": 0.3078, + "step": 21831 + }, + { + "epoch": 0.9880968544919665, + "grad_norm": 0.6044611572338786, + "learning_rate": 3.7434152708437645e-09, + "loss": 0.2942, + "step": 21832 + }, + { + "epoch": 0.9881421136003621, + "grad_norm": 0.6306510752902703, + "learning_rate": 3.7151132806589885e-09, + "loss": 0.2848, + "step": 21833 + }, + { + "epoch": 0.9881873727087577, + "grad_norm": 0.5962400120103846, + "learning_rate": 3.6869186452909157e-09, + "loss": 0.2606, + "step": 21834 + }, + { + "epoch": 0.9882326318171532, + "grad_norm": 0.6985939720579019, + "learning_rate": 3.6588313653468377e-09, + "loss": 0.3017, + "step": 21835 + }, + { + "epoch": 0.9882778909255487, + "grad_norm": 0.6139462080468838, + "learning_rate": 3.6308514414284958e-09, + "loss": 0.2337, + "step": 21836 + }, + { + "epoch": 0.9883231500339443, + "grad_norm": 0.6032328510366322, + "learning_rate": 3.6029788741387407e-09, + "loss": 0.3009, + "step": 21837 + }, + { + "epoch": 0.9883684091423399, + "grad_norm": 0.5821709047204009, + "learning_rate": 3.5752136640754278e-09, + "loss": 0.2985, + "step": 21838 + }, + { + "epoch": 0.9884136682507355, + "grad_norm": 0.6916801065461516, + "learning_rate": 3.5475558118353015e-09, + "loss": 0.2756, + "step": 21839 + }, + { + "epoch": 0.988458927359131, + "grad_norm": 0.5836411571093898, + "learning_rate": 3.5200053180128867e-09, + "loss": 0.2696, + "step": 21840 + }, + { + "epoch": 0.9885041864675266, + "grad_norm": 0.5775515617423247, + "learning_rate": 3.4925621831999325e-09, + "loss": 0.2749, + "step": 21841 + }, + { + "epoch": 0.9885494455759222, + "grad_norm": 0.6257537502517646, + "learning_rate": 3.4652264079859666e-09, + "loss": 0.274, + "step": 21842 + }, + { + "epoch": 0.9885947046843178, + "grad_norm": 0.5803599968862889, + "learning_rate": 3.4379979929588526e-09, + "loss": 0.2858, + "step": 21843 + }, + { + "epoch": 0.9886399637927132, + "grad_norm": 0.6363360456382771, + "learning_rate": 3.410876938703678e-09, + "loss": 0.3185, + "step": 21844 + }, + { + "epoch": 0.9886852229011088, + "grad_norm": 0.5635976745712896, + "learning_rate": 3.383863245802754e-09, + "loss": 0.2686, + "step": 21845 + }, + { + "epoch": 0.9887304820095044, + "grad_norm": 0.5524016813596654, + "learning_rate": 3.3569569148367286e-09, + "loss": 0.2938, + "step": 21846 + }, + { + "epoch": 0.9887757411179, + "grad_norm": 0.5676520951342483, + "learning_rate": 3.3301579463834722e-09, + "loss": 0.2986, + "step": 21847 + }, + { + "epoch": 0.9888210002262955, + "grad_norm": 0.5711577922083861, + "learning_rate": 3.30346634101919e-09, + "loss": 0.3036, + "step": 21848 + }, + { + "epoch": 0.9888662593346911, + "grad_norm": 0.6275128453665675, + "learning_rate": 3.276882099316758e-09, + "loss": 0.2543, + "step": 21849 + }, + { + "epoch": 0.9889115184430867, + "grad_norm": 0.6967194406864456, + "learning_rate": 3.250405221848496e-09, + "loss": 0.2959, + "step": 21850 + }, + { + "epoch": 0.9889567775514823, + "grad_norm": 0.5869615549138006, + "learning_rate": 3.224035709182283e-09, + "loss": 0.269, + "step": 21851 + }, + { + "epoch": 0.9890020366598778, + "grad_norm": 0.6351290356709387, + "learning_rate": 3.1977735618854423e-09, + "loss": 0.319, + "step": 21852 + }, + { + "epoch": 0.9890472957682733, + "grad_norm": 0.6284376408209436, + "learning_rate": 3.1716187805214127e-09, + "loss": 0.2715, + "step": 21853 + }, + { + "epoch": 0.9890925548766689, + "grad_norm": 0.624262732120063, + "learning_rate": 3.1455713656530774e-09, + "loss": 0.2756, + "step": 21854 + }, + { + "epoch": 0.9891378139850645, + "grad_norm": 0.5863698484875334, + "learning_rate": 3.1196313178399882e-09, + "loss": 0.278, + "step": 21855 + }, + { + "epoch": 0.9891830730934601, + "grad_norm": 0.5864017516835073, + "learning_rate": 3.093798637638923e-09, + "loss": 0.2609, + "step": 21856 + }, + { + "epoch": 0.9892283322018556, + "grad_norm": 0.6361268027097685, + "learning_rate": 3.0680733256055475e-09, + "loss": 0.313, + "step": 21857 + }, + { + "epoch": 0.9892735913102512, + "grad_norm": 0.5593585491784253, + "learning_rate": 3.0424553822921977e-09, + "loss": 0.2762, + "step": 21858 + }, + { + "epoch": 0.9893188504186468, + "grad_norm": 0.5934333708764978, + "learning_rate": 3.0169448082495446e-09, + "loss": 0.323, + "step": 21859 + }, + { + "epoch": 0.9893641095270423, + "grad_norm": 0.5554011995679686, + "learning_rate": 2.991541604025483e-09, + "loss": 0.2079, + "step": 21860 + }, + { + "epoch": 0.9894093686354379, + "grad_norm": 0.6551671338868413, + "learning_rate": 2.9662457701662428e-09, + "loss": 0.3135, + "step": 21861 + }, + { + "epoch": 0.9894546277438334, + "grad_norm": 0.5727764315689057, + "learning_rate": 2.9410573072152783e-09, + "loss": 0.2909, + "step": 21862 + }, + { + "epoch": 0.989499886852229, + "grad_norm": 0.5694765003150496, + "learning_rate": 2.915976215713268e-09, + "loss": 0.2915, + "step": 21863 + }, + { + "epoch": 0.9895451459606246, + "grad_norm": 0.6835921802362995, + "learning_rate": 2.8910024962003347e-09, + "loss": 0.3184, + "step": 21864 + }, + { + "epoch": 0.9895904050690202, + "grad_norm": 0.597071520318116, + "learning_rate": 2.866136149211607e-09, + "loss": 0.3253, + "step": 21865 + }, + { + "epoch": 0.9896356641774157, + "grad_norm": 0.6053926412628252, + "learning_rate": 2.8413771752822116e-09, + "loss": 0.2917, + "step": 21866 + }, + { + "epoch": 0.9896809232858113, + "grad_norm": 0.6335645278894662, + "learning_rate": 2.816725574943946e-09, + "loss": 0.2666, + "step": 21867 + }, + { + "epoch": 0.9897261823942068, + "grad_norm": 0.6513409620336323, + "learning_rate": 2.792181348726941e-09, + "loss": 0.2856, + "step": 21868 + }, + { + "epoch": 0.9897714415026024, + "grad_norm": 0.6472487900335032, + "learning_rate": 2.767744497157998e-09, + "loss": 0.3292, + "step": 21869 + }, + { + "epoch": 0.9898167006109979, + "grad_norm": 0.6328915428497753, + "learning_rate": 2.7434150207622525e-09, + "loss": 0.2938, + "step": 21870 + }, + { + "epoch": 0.9898619597193935, + "grad_norm": 0.676358863988407, + "learning_rate": 2.719192920063174e-09, + "loss": 0.3124, + "step": 21871 + }, + { + "epoch": 0.9899072188277891, + "grad_norm": 0.7129888996080095, + "learning_rate": 2.6950781955803475e-09, + "loss": 0.2925, + "step": 21872 + }, + { + "epoch": 0.9899524779361847, + "grad_norm": 0.5912825030245769, + "learning_rate": 2.6710708478316914e-09, + "loss": 0.2965, + "step": 21873 + }, + { + "epoch": 0.9899977370445803, + "grad_norm": 0.6327579557572981, + "learning_rate": 2.6471708773340154e-09, + "loss": 0.2893, + "step": 21874 + }, + { + "epoch": 0.9900429961529758, + "grad_norm": 0.6187239759164697, + "learning_rate": 2.623378284600797e-09, + "loss": 0.2831, + "step": 21875 + }, + { + "epoch": 0.9900882552613713, + "grad_norm": 0.5763850127898063, + "learning_rate": 2.599693070142739e-09, + "loss": 0.274, + "step": 21876 + }, + { + "epoch": 0.9901335143697669, + "grad_norm": 0.6044053598183847, + "learning_rate": 2.576115234468324e-09, + "loss": 0.3054, + "step": 21877 + }, + { + "epoch": 0.9901787734781625, + "grad_norm": 0.6342602639735581, + "learning_rate": 2.552644778085478e-09, + "loss": 0.3095, + "step": 21878 + }, + { + "epoch": 0.990224032586558, + "grad_norm": 0.5974125313774147, + "learning_rate": 2.5292817014976877e-09, + "loss": 0.2937, + "step": 21879 + }, + { + "epoch": 0.9902692916949536, + "grad_norm": 0.5630186640044591, + "learning_rate": 2.5060260052067742e-09, + "loss": 0.2344, + "step": 21880 + }, + { + "epoch": 0.9903145508033492, + "grad_norm": 0.6598742022611298, + "learning_rate": 2.4828776897128925e-09, + "loss": 0.3481, + "step": 21881 + }, + { + "epoch": 0.9903598099117448, + "grad_norm": 0.679798663117042, + "learning_rate": 2.459836755513423e-09, + "loss": 0.31, + "step": 21882 + }, + { + "epoch": 0.9904050690201402, + "grad_norm": 0.6326383323678978, + "learning_rate": 2.4369032031029695e-09, + "loss": 0.2829, + "step": 21883 + }, + { + "epoch": 0.9904503281285358, + "grad_norm": 0.6012953654016868, + "learning_rate": 2.4140770329750264e-09, + "loss": 0.2817, + "step": 21884 + }, + { + "epoch": 0.9904955872369314, + "grad_norm": 0.6390496404163677, + "learning_rate": 2.391358245619202e-09, + "loss": 0.2729, + "step": 21885 + }, + { + "epoch": 0.990540846345327, + "grad_norm": 0.6142964865838836, + "learning_rate": 2.3687468415245494e-09, + "loss": 0.2952, + "step": 21886 + }, + { + "epoch": 0.9905861054537226, + "grad_norm": 0.7176865699863244, + "learning_rate": 2.346242821176237e-09, + "loss": 0.3201, + "step": 21887 + }, + { + "epoch": 0.9906313645621181, + "grad_norm": 0.6598530408570948, + "learning_rate": 2.3238461850583206e-09, + "loss": 0.3243, + "step": 21888 + }, + { + "epoch": 0.9906766236705137, + "grad_norm": 0.6331095745795835, + "learning_rate": 2.3015569336509724e-09, + "loss": 0.3047, + "step": 21889 + }, + { + "epoch": 0.9907218827789093, + "grad_norm": 0.6414719345668272, + "learning_rate": 2.279375067434919e-09, + "loss": 0.2782, + "step": 21890 + }, + { + "epoch": 0.9907671418873049, + "grad_norm": 0.638176475097762, + "learning_rate": 2.2573005868853358e-09, + "loss": 0.3231, + "step": 21891 + }, + { + "epoch": 0.9908124009957003, + "grad_norm": 0.6025064831274091, + "learning_rate": 2.2353334924768435e-09, + "loss": 0.2795, + "step": 21892 + }, + { + "epoch": 0.9908576601040959, + "grad_norm": 0.5871043366477097, + "learning_rate": 2.213473784681286e-09, + "loss": 0.2546, + "step": 21893 + }, + { + "epoch": 0.9909029192124915, + "grad_norm": 0.6708388062940376, + "learning_rate": 2.1917214639693985e-09, + "loss": 0.2715, + "step": 21894 + }, + { + "epoch": 0.9909481783208871, + "grad_norm": 0.6180303987275302, + "learning_rate": 2.1700765308074743e-09, + "loss": 0.2948, + "step": 21895 + }, + { + "epoch": 0.9909934374292827, + "grad_norm": 0.5761702880401627, + "learning_rate": 2.1485389856606973e-09, + "loss": 0.3112, + "step": 21896 + }, + { + "epoch": 0.9910386965376782, + "grad_norm": 0.5283381025407042, + "learning_rate": 2.1271088289920304e-09, + "loss": 0.292, + "step": 21897 + }, + { + "epoch": 0.9910839556460738, + "grad_norm": 0.6000773893981395, + "learning_rate": 2.1057860612627713e-09, + "loss": 0.2763, + "step": 21898 + }, + { + "epoch": 0.9911292147544694, + "grad_norm": 0.5964113949286953, + "learning_rate": 2.0845706829297762e-09, + "loss": 0.3268, + "step": 21899 + }, + { + "epoch": 0.991174473862865, + "grad_norm": 0.5875991897924737, + "learning_rate": 2.0634626944493475e-09, + "loss": 0.251, + "step": 21900 + }, + { + "epoch": 0.9912197329712604, + "grad_norm": 0.6623000008080091, + "learning_rate": 2.0424620962750107e-09, + "loss": 0.3334, + "step": 21901 + }, + { + "epoch": 0.991264992079656, + "grad_norm": 0.5850960457521891, + "learning_rate": 2.021568888858627e-09, + "loss": 0.2595, + "step": 21902 + }, + { + "epoch": 0.9913102511880516, + "grad_norm": 0.5840999288698242, + "learning_rate": 2.0007830726481716e-09, + "loss": 0.2888, + "step": 21903 + }, + { + "epoch": 0.9913555102964472, + "grad_norm": 0.7335081157665665, + "learning_rate": 1.980104648090508e-09, + "loss": 0.2938, + "step": 21904 + }, + { + "epoch": 0.9914007694048427, + "grad_norm": 0.6805641085989351, + "learning_rate": 1.9595336156308375e-09, + "loss": 0.321, + "step": 21905 + }, + { + "epoch": 0.9914460285132383, + "grad_norm": 0.6457916470088789, + "learning_rate": 1.9390699757099174e-09, + "loss": 0.2899, + "step": 21906 + }, + { + "epoch": 0.9914912876216339, + "grad_norm": 0.6183658680749494, + "learning_rate": 1.9187137287685065e-09, + "loss": 0.2824, + "step": 21907 + }, + { + "epoch": 0.9915365467300294, + "grad_norm": 0.6144721012585188, + "learning_rate": 1.8984648752429222e-09, + "loss": 0.287, + "step": 21908 + }, + { + "epoch": 0.991581805838425, + "grad_norm": 0.6641942731452835, + "learning_rate": 1.878323415568928e-09, + "loss": 0.2704, + "step": 21909 + }, + { + "epoch": 0.9916270649468205, + "grad_norm": 1.0685510439896335, + "learning_rate": 1.8582893501795096e-09, + "loss": 0.2873, + "step": 21910 + }, + { + "epoch": 0.9916723240552161, + "grad_norm": 0.5948347391105585, + "learning_rate": 1.8383626795048793e-09, + "loss": 0.2665, + "step": 21911 + }, + { + "epoch": 0.9917175831636117, + "grad_norm": 0.625767466906673, + "learning_rate": 1.8185434039730277e-09, + "loss": 0.2922, + "step": 21912 + }, + { + "epoch": 0.9917628422720073, + "grad_norm": 0.5766022669904595, + "learning_rate": 1.7988315240097254e-09, + "loss": 0.2751, + "step": 21913 + }, + { + "epoch": 0.9918081013804028, + "grad_norm": 0.6275741354154193, + "learning_rate": 1.7792270400390776e-09, + "loss": 0.2615, + "step": 21914 + }, + { + "epoch": 0.9918533604887984, + "grad_norm": 0.5845001298448244, + "learning_rate": 1.759729952481859e-09, + "loss": 0.3006, + "step": 21915 + }, + { + "epoch": 0.9918986195971939, + "grad_norm": 1.86394089138806, + "learning_rate": 1.7403402617571785e-09, + "loss": 0.2822, + "step": 21916 + }, + { + "epoch": 0.9919438787055895, + "grad_norm": 0.6429230000792644, + "learning_rate": 1.72105796828137e-09, + "loss": 0.3051, + "step": 21917 + }, + { + "epoch": 0.991989137813985, + "grad_norm": 0.6895858610389437, + "learning_rate": 1.7018830724691016e-09, + "loss": 0.2702, + "step": 21918 + }, + { + "epoch": 0.9920343969223806, + "grad_norm": 0.592948558171695, + "learning_rate": 1.682815574732266e-09, + "loss": 0.2767, + "step": 21919 + }, + { + "epoch": 0.9920796560307762, + "grad_norm": 0.5687485020465194, + "learning_rate": 1.6638554754805358e-09, + "loss": 0.2975, + "step": 21920 + }, + { + "epoch": 0.9921249151391718, + "grad_norm": 0.5920284006547106, + "learning_rate": 1.6450027751213626e-09, + "loss": 0.2997, + "step": 21921 + }, + { + "epoch": 0.9921701742475674, + "grad_norm": 0.6121210158543754, + "learning_rate": 1.6262574740599778e-09, + "loss": 0.3029, + "step": 21922 + }, + { + "epoch": 0.9922154333559629, + "grad_norm": 0.5701848941441401, + "learning_rate": 1.6076195726982824e-09, + "loss": 0.2618, + "step": 21923 + }, + { + "epoch": 0.9922606924643584, + "grad_norm": 0.6239870659742223, + "learning_rate": 1.5890890714381769e-09, + "loss": 0.3071, + "step": 21924 + }, + { + "epoch": 0.992305951572754, + "grad_norm": 0.5982485102238448, + "learning_rate": 1.5706659706771211e-09, + "loss": 0.2781, + "step": 21925 + }, + { + "epoch": 0.9923512106811496, + "grad_norm": 0.5887485429538946, + "learning_rate": 1.5523502708103544e-09, + "loss": 0.304, + "step": 21926 + }, + { + "epoch": 0.9923964697895451, + "grad_norm": 0.6232348804393142, + "learning_rate": 1.5341419722325612e-09, + "loss": 0.2928, + "step": 21927 + }, + { + "epoch": 0.9924417288979407, + "grad_norm": 0.6679441376503535, + "learning_rate": 1.51604107533454e-09, + "loss": 0.2651, + "step": 21928 + }, + { + "epoch": 0.9924869880063363, + "grad_norm": 0.6308610655766753, + "learning_rate": 1.4980475805048688e-09, + "loss": 0.3113, + "step": 21929 + }, + { + "epoch": 0.9925322471147319, + "grad_norm": 0.6038910862193314, + "learning_rate": 1.4801614881304604e-09, + "loss": 0.2558, + "step": 21930 + }, + { + "epoch": 0.9925775062231275, + "grad_norm": 0.566272493740601, + "learning_rate": 1.462382798595452e-09, + "loss": 0.2528, + "step": 21931 + }, + { + "epoch": 0.9926227653315229, + "grad_norm": 0.6423144924625247, + "learning_rate": 1.4447115122817601e-09, + "loss": 0.2585, + "step": 21932 + }, + { + "epoch": 0.9926680244399185, + "grad_norm": 0.6201093438260887, + "learning_rate": 1.4271476295696363e-09, + "loss": 0.2633, + "step": 21933 + }, + { + "epoch": 0.9927132835483141, + "grad_norm": 0.6085470147719076, + "learning_rate": 1.4096911508365564e-09, + "loss": 0.28, + "step": 21934 + }, + { + "epoch": 0.9927585426567097, + "grad_norm": 0.637300112023428, + "learning_rate": 1.3923420764566653e-09, + "loss": 0.2697, + "step": 21935 + }, + { + "epoch": 0.9928038017651052, + "grad_norm": 0.5883667578079982, + "learning_rate": 1.3751004068035534e-09, + "loss": 0.2711, + "step": 21936 + }, + { + "epoch": 0.9928490608735008, + "grad_norm": 0.5961564073876877, + "learning_rate": 1.35796614224748e-09, + "loss": 0.2861, + "step": 21937 + }, + { + "epoch": 0.9928943199818964, + "grad_norm": 0.6375560477188555, + "learning_rate": 1.3409392831564838e-09, + "loss": 0.2655, + "step": 21938 + }, + { + "epoch": 0.992939579090292, + "grad_norm": 0.6792700890001889, + "learning_rate": 1.3240198298963836e-09, + "loss": 0.2948, + "step": 21939 + }, + { + "epoch": 0.9929848381986874, + "grad_norm": 0.5840208606838262, + "learning_rate": 1.3072077828307772e-09, + "loss": 0.3265, + "step": 21940 + }, + { + "epoch": 0.993030097307083, + "grad_norm": 0.6286625929006481, + "learning_rate": 1.2905031423210423e-09, + "loss": 0.2734, + "step": 21941 + }, + { + "epoch": 0.9930753564154786, + "grad_norm": 0.7285734220275707, + "learning_rate": 1.2739059087263362e-09, + "loss": 0.3262, + "step": 21942 + }, + { + "epoch": 0.9931206155238742, + "grad_norm": 0.5779318308145721, + "learning_rate": 1.257416082402485e-09, + "loss": 0.2495, + "step": 21943 + }, + { + "epoch": 0.9931658746322698, + "grad_norm": 0.6083077717806407, + "learning_rate": 1.2410336637047604e-09, + "loss": 0.3073, + "step": 21944 + }, + { + "epoch": 0.9932111337406653, + "grad_norm": 0.6214203937496386, + "learning_rate": 1.2247586529845479e-09, + "loss": 0.2834, + "step": 21945 + }, + { + "epoch": 0.9932563928490609, + "grad_norm": 0.5481423624767995, + "learning_rate": 1.2085910505915677e-09, + "loss": 0.3056, + "step": 21946 + }, + { + "epoch": 0.9933016519574565, + "grad_norm": 0.572538572374642, + "learning_rate": 1.1925308568733197e-09, + "loss": 0.2804, + "step": 21947 + }, + { + "epoch": 0.993346911065852, + "grad_norm": 0.6241378536217829, + "learning_rate": 1.176578072175083e-09, + "loss": 0.273, + "step": 21948 + }, + { + "epoch": 0.9933921701742475, + "grad_norm": 0.5799838848065667, + "learning_rate": 1.1607326968393617e-09, + "loss": 0.2911, + "step": 21949 + }, + { + "epoch": 0.9934374292826431, + "grad_norm": 0.6480647784604268, + "learning_rate": 1.1449947312064392e-09, + "loss": 0.3535, + "step": 21950 + }, + { + "epoch": 0.9934826883910387, + "grad_norm": 0.6990225541403731, + "learning_rate": 1.1293641756154883e-09, + "loss": 0.3251, + "step": 21951 + }, + { + "epoch": 0.9935279474994343, + "grad_norm": 0.5795906833178194, + "learning_rate": 1.1138410304012415e-09, + "loss": 0.3409, + "step": 21952 + }, + { + "epoch": 0.9935732066078298, + "grad_norm": 0.605951227260699, + "learning_rate": 1.0984252958973207e-09, + "loss": 0.3098, + "step": 21953 + }, + { + "epoch": 0.9936184657162254, + "grad_norm": 0.6328547372958854, + "learning_rate": 1.0831169724356828e-09, + "loss": 0.321, + "step": 21954 + }, + { + "epoch": 0.993663724824621, + "grad_norm": 0.6120233663098031, + "learning_rate": 1.0679160603449533e-09, + "loss": 0.3066, + "step": 21955 + }, + { + "epoch": 0.9937089839330165, + "grad_norm": 0.5841427755427862, + "learning_rate": 1.0528225599515385e-09, + "loss": 0.3099, + "step": 21956 + }, + { + "epoch": 0.9937542430414121, + "grad_norm": 0.6222734322557101, + "learning_rate": 1.037836471579623e-09, + "loss": 0.3139, + "step": 21957 + }, + { + "epoch": 0.9937995021498076, + "grad_norm": 0.6193352639862391, + "learning_rate": 1.0229577955517267e-09, + "loss": 0.3219, + "step": 21958 + }, + { + "epoch": 0.9938447612582032, + "grad_norm": 0.6356822729983301, + "learning_rate": 1.008186532187594e-09, + "loss": 0.2576, + "step": 21959 + }, + { + "epoch": 0.9938900203665988, + "grad_norm": 0.5978070819650005, + "learning_rate": 9.93522681803638e-10, + "loss": 0.3192, + "step": 21960 + }, + { + "epoch": 0.9939352794749944, + "grad_norm": 0.6261767044610226, + "learning_rate": 9.789662447157178e-10, + "loss": 0.3025, + "step": 21961 + }, + { + "epoch": 0.9939805385833899, + "grad_norm": 0.593104996327592, + "learning_rate": 9.645172212369158e-10, + "loss": 0.2947, + "step": 21962 + }, + { + "epoch": 0.9940257976917855, + "grad_norm": 0.6462433332947585, + "learning_rate": 9.501756116769844e-10, + "loss": 0.2828, + "step": 21963 + }, + { + "epoch": 0.994071056800181, + "grad_norm": 0.5524732648542988, + "learning_rate": 9.359414163445657e-10, + "loss": 0.3159, + "step": 21964 + }, + { + "epoch": 0.9941163159085766, + "grad_norm": 0.583561381683474, + "learning_rate": 9.218146355449709e-10, + "loss": 0.2795, + "step": 21965 + }, + { + "epoch": 0.9941615750169722, + "grad_norm": 0.6135715357641147, + "learning_rate": 9.07795269582401e-10, + "loss": 0.3043, + "step": 21966 + }, + { + "epoch": 0.9942068341253677, + "grad_norm": 0.6229441688619807, + "learning_rate": 8.938833187577267e-10, + "loss": 0.2594, + "step": 21967 + }, + { + "epoch": 0.9942520932337633, + "grad_norm": 0.6177528880302975, + "learning_rate": 8.800787833695978e-10, + "loss": 0.28, + "step": 21968 + }, + { + "epoch": 0.9942973523421589, + "grad_norm": 1.8804680331595953, + "learning_rate": 8.663816637149991e-10, + "loss": 0.2535, + "step": 21969 + }, + { + "epoch": 0.9943426114505545, + "grad_norm": 0.7889549714801867, + "learning_rate": 8.527919600886947e-10, + "loss": 0.3012, + "step": 21970 + }, + { + "epoch": 0.99438787055895, + "grad_norm": 0.6087356486241385, + "learning_rate": 8.393096727815631e-10, + "loss": 0.3013, + "step": 21971 + }, + { + "epoch": 0.9944331296673455, + "grad_norm": 0.6173904701348589, + "learning_rate": 8.259348020844827e-10, + "loss": 0.3094, + "step": 21972 + }, + { + "epoch": 0.9944783887757411, + "grad_norm": 0.6153369456157168, + "learning_rate": 8.126673482838909e-10, + "loss": 0.2814, + "step": 21973 + }, + { + "epoch": 0.9945236478841367, + "grad_norm": 0.6292016348416848, + "learning_rate": 7.995073116656704e-10, + "loss": 0.3302, + "step": 21974 + }, + { + "epoch": 0.9945689069925322, + "grad_norm": 0.5631017655648378, + "learning_rate": 7.864546925118177e-10, + "loss": 0.2664, + "step": 21975 + }, + { + "epoch": 0.9946141661009278, + "grad_norm": 0.6069259072107689, + "learning_rate": 7.735094911032193e-10, + "loss": 0.2962, + "step": 21976 + }, + { + "epoch": 0.9946594252093234, + "grad_norm": 0.531390973141288, + "learning_rate": 7.606717077179859e-10, + "loss": 0.2682, + "step": 21977 + }, + { + "epoch": 0.994704684317719, + "grad_norm": 0.7526144498408558, + "learning_rate": 7.47941342631453e-10, + "loss": 0.3383, + "step": 21978 + }, + { + "epoch": 0.9947499434261146, + "grad_norm": 0.6084599902690812, + "learning_rate": 7.353183961184007e-10, + "loss": 0.323, + "step": 21979 + }, + { + "epoch": 0.99479520253451, + "grad_norm": 0.5413245411564832, + "learning_rate": 7.228028684486132e-10, + "loss": 0.2905, + "step": 21980 + }, + { + "epoch": 0.9948404616429056, + "grad_norm": 0.598947853073034, + "learning_rate": 7.103947598918747e-10, + "loss": 0.3424, + "step": 21981 + }, + { + "epoch": 0.9948857207513012, + "grad_norm": 0.6844650599597334, + "learning_rate": 6.980940707146388e-10, + "loss": 0.2437, + "step": 21982 + }, + { + "epoch": 0.9949309798596968, + "grad_norm": 0.6625091436369556, + "learning_rate": 6.859008011816937e-10, + "loss": 0.2969, + "step": 21983 + }, + { + "epoch": 0.9949762389680923, + "grad_norm": 0.6723159430338779, + "learning_rate": 6.738149515539416e-10, + "loss": 0.2529, + "step": 21984 + }, + { + "epoch": 0.9950214980764879, + "grad_norm": 1.4999966827879183, + "learning_rate": 6.618365220917299e-10, + "loss": 0.3146, + "step": 21985 + }, + { + "epoch": 0.9950667571848835, + "grad_norm": 0.6497521224251643, + "learning_rate": 6.499655130526306e-10, + "loss": 0.3073, + "step": 21986 + }, + { + "epoch": 0.9951120162932791, + "grad_norm": 0.6168352312353976, + "learning_rate": 6.382019246908844e-10, + "loss": 0.2842, + "step": 21987 + }, + { + "epoch": 0.9951572754016745, + "grad_norm": 0.5962748762799298, + "learning_rate": 6.265457572601774e-10, + "loss": 0.2974, + "step": 21988 + }, + { + "epoch": 0.9952025345100701, + "grad_norm": 0.6219469205840408, + "learning_rate": 6.149970110108649e-10, + "loss": 0.2968, + "step": 21989 + }, + { + "epoch": 0.9952477936184657, + "grad_norm": 0.5677401620285023, + "learning_rate": 6.035556861905268e-10, + "loss": 0.256, + "step": 21990 + }, + { + "epoch": 0.9952930527268613, + "grad_norm": 0.6820100081540836, + "learning_rate": 5.922217830450772e-10, + "loss": 0.2936, + "step": 21991 + }, + { + "epoch": 0.9953383118352569, + "grad_norm": 0.5594585909652834, + "learning_rate": 5.809953018187652e-10, + "loss": 0.2635, + "step": 21992 + }, + { + "epoch": 0.9953835709436524, + "grad_norm": 0.6206856525103511, + "learning_rate": 5.698762427519544e-10, + "loss": 0.32, + "step": 21993 + }, + { + "epoch": 0.995428830052048, + "grad_norm": 0.5969689557141281, + "learning_rate": 5.588646060838976e-10, + "loss": 0.258, + "step": 21994 + }, + { + "epoch": 0.9954740891604436, + "grad_norm": 0.6377851737737551, + "learning_rate": 5.479603920516275e-10, + "loss": 0.328, + "step": 21995 + }, + { + "epoch": 0.9955193482688391, + "grad_norm": 0.5864974798279645, + "learning_rate": 5.371636008888459e-10, + "loss": 0.2919, + "step": 21996 + }, + { + "epoch": 0.9955646073772346, + "grad_norm": 0.599331196829423, + "learning_rate": 5.264742328275896e-10, + "loss": 0.3034, + "step": 21997 + }, + { + "epoch": 0.9956098664856302, + "grad_norm": 0.5859840998410031, + "learning_rate": 5.158922880976747e-10, + "loss": 0.3054, + "step": 21998 + }, + { + "epoch": 0.9956551255940258, + "grad_norm": 0.6133896988379982, + "learning_rate": 5.054177669266969e-10, + "loss": 0.2732, + "step": 21999 + }, + { + "epoch": 0.9957003847024214, + "grad_norm": 0.6355733324276068, + "learning_rate": 4.950506695394763e-10, + "loss": 0.2989, + "step": 22000 + }, + { + "epoch": 0.9957456438108169, + "grad_norm": 0.6405292750738774, + "learning_rate": 4.847909961586128e-10, + "loss": 0.2672, + "step": 22001 + }, + { + "epoch": 0.9957909029192125, + "grad_norm": 0.6117498728482897, + "learning_rate": 4.746387470044855e-10, + "loss": 0.2925, + "step": 22002 + }, + { + "epoch": 0.995836162027608, + "grad_norm": 0.5603673735108489, + "learning_rate": 4.645939222963636e-10, + "loss": 0.293, + "step": 22003 + }, + { + "epoch": 0.9958814211360036, + "grad_norm": 0.6255173214844361, + "learning_rate": 4.5465652224851996e-10, + "loss": 0.3283, + "step": 22004 + }, + { + "epoch": 0.9959266802443992, + "grad_norm": 0.6383297301865899, + "learning_rate": 4.4482654707522774e-10, + "loss": 0.2651, + "step": 22005 + }, + { + "epoch": 0.9959719393527947, + "grad_norm": 0.6596672113163915, + "learning_rate": 4.3510399698798445e-10, + "loss": 0.3096, + "step": 22006 + }, + { + "epoch": 0.9960171984611903, + "grad_norm": 0.5702939246958649, + "learning_rate": 4.2548887219551196e-10, + "loss": 0.2759, + "step": 22007 + }, + { + "epoch": 0.9960624575695859, + "grad_norm": 0.6001358151182172, + "learning_rate": 4.159811729037566e-10, + "loss": 0.3007, + "step": 22008 + }, + { + "epoch": 0.9961077166779815, + "grad_norm": 0.5805687401563262, + "learning_rate": 4.0658089931755463e-10, + "loss": 0.2896, + "step": 22009 + }, + { + "epoch": 0.996152975786377, + "grad_norm": 0.5658873242067638, + "learning_rate": 3.9728805163896654e-10, + "loss": 0.2929, + "step": 22010 + }, + { + "epoch": 0.9961982348947725, + "grad_norm": 0.6399124156608923, + "learning_rate": 3.8810263006783255e-10, + "loss": 0.2973, + "step": 22011 + }, + { + "epoch": 0.9962434940031681, + "grad_norm": 0.5762794400323175, + "learning_rate": 3.790246348012172e-10, + "loss": 0.3147, + "step": 22012 + }, + { + "epoch": 0.9962887531115637, + "grad_norm": 0.5395651917899117, + "learning_rate": 3.7005406603396464e-10, + "loss": 0.2454, + "step": 22013 + }, + { + "epoch": 0.9963340122199593, + "grad_norm": 0.5848163901483809, + "learning_rate": 3.6119092395869857e-10, + "loss": 0.3129, + "step": 22014 + }, + { + "epoch": 0.9963792713283548, + "grad_norm": 0.6193390910504796, + "learning_rate": 3.524352087669325e-10, + "loss": 0.3131, + "step": 22015 + }, + { + "epoch": 0.9964245304367504, + "grad_norm": 0.6067972758265497, + "learning_rate": 3.4378692064573895e-10, + "loss": 0.3218, + "step": 22016 + }, + { + "epoch": 0.996469789545146, + "grad_norm": 0.5943380199518459, + "learning_rate": 3.3524605978108027e-10, + "loss": 0.3294, + "step": 22017 + }, + { + "epoch": 0.9965150486535416, + "grad_norm": 0.6979205732955975, + "learning_rate": 3.268126263572535e-10, + "loss": 0.3097, + "step": 22018 + }, + { + "epoch": 0.996560307761937, + "grad_norm": 0.6026104421965898, + "learning_rate": 3.1848662055411484e-10, + "loss": 0.3155, + "step": 22019 + }, + { + "epoch": 0.9966055668703326, + "grad_norm": 0.5833935911580108, + "learning_rate": 3.1026804255207544e-10, + "loss": 0.348, + "step": 22020 + }, + { + "epoch": 0.9966508259787282, + "grad_norm": 0.5544309223963749, + "learning_rate": 3.0215689252655056e-10, + "loss": 0.2619, + "step": 22021 + }, + { + "epoch": 0.9966960850871238, + "grad_norm": 0.62487696640231, + "learning_rate": 2.9415317065240037e-10, + "loss": 0.2839, + "step": 22022 + }, + { + "epoch": 0.9967413441955193, + "grad_norm": 0.5779353778908592, + "learning_rate": 2.8625687710170933e-10, + "loss": 0.282, + "step": 22023 + }, + { + "epoch": 0.9967866033039149, + "grad_norm": 0.6887365734828341, + "learning_rate": 2.784680120437866e-10, + "loss": 0.28, + "step": 22024 + }, + { + "epoch": 0.9968318624123105, + "grad_norm": 0.6284514613591872, + "learning_rate": 2.7078657564572065e-10, + "loss": 0.2795, + "step": 22025 + }, + { + "epoch": 0.9968771215207061, + "grad_norm": 0.6018631891275054, + "learning_rate": 2.632125680734898e-10, + "loss": 0.3028, + "step": 22026 + }, + { + "epoch": 0.9969223806291017, + "grad_norm": 0.6014152852040981, + "learning_rate": 2.557459894891867e-10, + "loss": 0.3303, + "step": 22027 + }, + { + "epoch": 0.9969676397374971, + "grad_norm": 0.6160925266368528, + "learning_rate": 2.4838684005323853e-10, + "loss": 0.2885, + "step": 22028 + }, + { + "epoch": 0.9970128988458927, + "grad_norm": 0.7129821777469944, + "learning_rate": 2.4113511992385206e-10, + "loss": 0.2893, + "step": 22029 + }, + { + "epoch": 0.9970581579542883, + "grad_norm": 0.735108791638317, + "learning_rate": 2.3399082925701367e-10, + "loss": 0.2745, + "step": 22030 + }, + { + "epoch": 0.9971034170626839, + "grad_norm": 0.6263294300073722, + "learning_rate": 2.2695396820593408e-10, + "loss": 0.3017, + "step": 22031 + }, + { + "epoch": 0.9971486761710794, + "grad_norm": 0.650352346210055, + "learning_rate": 2.2002453692215875e-10, + "loss": 0.3046, + "step": 22032 + }, + { + "epoch": 0.997193935279475, + "grad_norm": 0.6100886433596671, + "learning_rate": 2.1320253555445758e-10, + "loss": 0.297, + "step": 22033 + }, + { + "epoch": 0.9972391943878706, + "grad_norm": 0.5483647830903922, + "learning_rate": 2.064879642488249e-10, + "loss": 0.2162, + "step": 22034 + }, + { + "epoch": 0.9972844534962662, + "grad_norm": 0.6325002031474769, + "learning_rate": 1.998808231506999e-10, + "loss": 0.2947, + "step": 22035 + }, + { + "epoch": 0.9973297126046616, + "grad_norm": 0.6281452891500079, + "learning_rate": 1.9338111240108094e-10, + "loss": 0.2726, + "step": 22036 + }, + { + "epoch": 0.9973749717130572, + "grad_norm": 0.6656382032845043, + "learning_rate": 1.8698883214041118e-10, + "loss": 0.2957, + "step": 22037 + }, + { + "epoch": 0.9974202308214528, + "grad_norm": 0.5760080145872826, + "learning_rate": 1.8070398250524811e-10, + "loss": 0.2731, + "step": 22038 + }, + { + "epoch": 0.9974654899298484, + "grad_norm": 0.5702775651101916, + "learning_rate": 1.7452656363103893e-10, + "loss": 0.2941, + "step": 22039 + }, + { + "epoch": 0.997510749038244, + "grad_norm": 0.6276471887693469, + "learning_rate": 1.6845657565045526e-10, + "loss": 0.266, + "step": 22040 + }, + { + "epoch": 0.9975560081466395, + "grad_norm": 0.576372293276278, + "learning_rate": 1.6249401869394832e-10, + "loss": 0.2733, + "step": 22041 + }, + { + "epoch": 0.9976012672550351, + "grad_norm": 0.6028893622022222, + "learning_rate": 1.5663889288919377e-10, + "loss": 0.3227, + "step": 22042 + }, + { + "epoch": 0.9976465263634307, + "grad_norm": 0.5989598041118115, + "learning_rate": 1.50891198362757e-10, + "loss": 0.2877, + "step": 22043 + }, + { + "epoch": 0.9976917854718262, + "grad_norm": 0.7686153505540809, + "learning_rate": 1.452509352378728e-10, + "loss": 0.3048, + "step": 22044 + }, + { + "epoch": 0.9977370445802217, + "grad_norm": 0.6289101541046046, + "learning_rate": 1.397181036361106e-10, + "loss": 0.2678, + "step": 22045 + }, + { + "epoch": 0.9977823036886173, + "grad_norm": 0.7850580188083028, + "learning_rate": 1.3429270367515402e-10, + "loss": 0.2787, + "step": 22046 + }, + { + "epoch": 0.9978275627970129, + "grad_norm": 0.6581661810845428, + "learning_rate": 1.289747354726867e-10, + "loss": 0.2581, + "step": 22047 + }, + { + "epoch": 0.9978728219054085, + "grad_norm": 0.6217159388539761, + "learning_rate": 1.237641991425065e-10, + "loss": 0.2898, + "step": 22048 + }, + { + "epoch": 0.9979180810138041, + "grad_norm": 0.6195719530982097, + "learning_rate": 1.1866109479674593e-10, + "loss": 0.2847, + "step": 22049 + }, + { + "epoch": 0.9979633401221996, + "grad_norm": 0.6210557170903431, + "learning_rate": 1.1366542254476198e-10, + "loss": 0.296, + "step": 22050 + }, + { + "epoch": 0.9980085992305952, + "grad_norm": 0.634926737070786, + "learning_rate": 1.087771824948014e-10, + "loss": 0.298, + "step": 22051 + }, + { + "epoch": 0.9980538583389907, + "grad_norm": 0.5940878592865888, + "learning_rate": 1.0399637475067004e-10, + "loss": 0.275, + "step": 22052 + }, + { + "epoch": 0.9980991174473863, + "grad_norm": 0.593622440995024, + "learning_rate": 9.932299941561862e-11, + "loss": 0.2628, + "step": 22053 + }, + { + "epoch": 0.9981443765557818, + "grad_norm": 0.5324880581632664, + "learning_rate": 9.475705659012236e-11, + "loss": 0.2769, + "step": 22054 + }, + { + "epoch": 0.9981896356641774, + "grad_norm": 0.6244130460469303, + "learning_rate": 9.029854637243595e-11, + "loss": 0.294, + "step": 22055 + }, + { + "epoch": 0.998234894772573, + "grad_norm": 0.5707182384816525, + "learning_rate": 8.594746885803862e-11, + "loss": 0.2953, + "step": 22056 + }, + { + "epoch": 0.9982801538809686, + "grad_norm": 0.602646670813106, + "learning_rate": 8.170382414074418e-11, + "loss": 0.2922, + "step": 22057 + }, + { + "epoch": 0.9983254129893641, + "grad_norm": 0.7079394099653381, + "learning_rate": 7.756761231159094e-11, + "loss": 0.2928, + "step": 22058 + }, + { + "epoch": 0.9983706720977596, + "grad_norm": 0.6180266400910659, + "learning_rate": 7.353883345939672e-11, + "loss": 0.309, + "step": 22059 + }, + { + "epoch": 0.9984159312061552, + "grad_norm": 0.6097297959534627, + "learning_rate": 6.961748767020382e-11, + "loss": 0.3378, + "step": 22060 + }, + { + "epoch": 0.9984611903145508, + "grad_norm": 0.6597148209260325, + "learning_rate": 6.580357502949942e-11, + "loss": 0.2849, + "step": 22061 + }, + { + "epoch": 0.9985064494229464, + "grad_norm": 0.5774377184498107, + "learning_rate": 6.209709561832977e-11, + "loss": 0.27, + "step": 22062 + }, + { + "epoch": 0.9985517085313419, + "grad_norm": 0.6361187588078144, + "learning_rate": 5.849804951663096e-11, + "loss": 0.3152, + "step": 22063 + }, + { + "epoch": 0.9985969676397375, + "grad_norm": 0.6457484657525558, + "learning_rate": 5.500643680156348e-11, + "loss": 0.2703, + "step": 22064 + }, + { + "epoch": 0.9986422267481331, + "grad_norm": 0.6391349815071075, + "learning_rate": 5.162225754806738e-11, + "loss": 0.2947, + "step": 22065 + }, + { + "epoch": 0.9986874858565287, + "grad_norm": 0.6407162809395204, + "learning_rate": 4.834551182941738e-11, + "loss": 0.2897, + "step": 22066 + }, + { + "epoch": 0.9987327449649241, + "grad_norm": 0.7632623349347577, + "learning_rate": 4.517619971500242e-11, + "loss": 0.2673, + "step": 22067 + }, + { + "epoch": 0.9987780040733197, + "grad_norm": 0.6325446996026688, + "learning_rate": 4.211432127421144e-11, + "loss": 0.2909, + "step": 22068 + }, + { + "epoch": 0.9988232631817153, + "grad_norm": 0.6078505153502691, + "learning_rate": 3.9159876571992495e-11, + "loss": 0.2539, + "step": 22069 + }, + { + "epoch": 0.9988685222901109, + "grad_norm": 0.5766445623028778, + "learning_rate": 3.6312865672183394e-11, + "loss": 0.2862, + "step": 22070 + }, + { + "epoch": 0.9989137813985064, + "grad_norm": 0.5453404898877032, + "learning_rate": 3.3573288635291304e-11, + "loss": 0.2688, + "step": 22071 + }, + { + "epoch": 0.998959040506902, + "grad_norm": 0.6131718694107106, + "learning_rate": 3.094114552126826e-11, + "loss": 0.2529, + "step": 22072 + }, + { + "epoch": 0.9990042996152976, + "grad_norm": 0.5919616950332834, + "learning_rate": 2.8416436385625412e-11, + "loss": 0.2526, + "step": 22073 + }, + { + "epoch": 0.9990495587236932, + "grad_norm": 0.673900937746956, + "learning_rate": 2.599916128331881e-11, + "loss": 0.2754, + "step": 22074 + }, + { + "epoch": 0.9990948178320888, + "grad_norm": 0.601033366388982, + "learning_rate": 2.3689320265973815e-11, + "loss": 0.2914, + "step": 22075 + }, + { + "epoch": 0.9991400769404842, + "grad_norm": 0.5964276240291718, + "learning_rate": 2.1486913383550467e-11, + "loss": 0.3096, + "step": 22076 + }, + { + "epoch": 0.9991853360488798, + "grad_norm": 0.7357281689405786, + "learning_rate": 1.9391940682678133e-11, + "loss": 0.259, + "step": 22077 + }, + { + "epoch": 0.9992305951572754, + "grad_norm": 0.6228373665170601, + "learning_rate": 1.740440220887596e-11, + "loss": 0.3087, + "step": 22078 + }, + { + "epoch": 0.999275854265671, + "grad_norm": 0.6323616538871186, + "learning_rate": 1.5524298004887527e-11, + "loss": 0.2665, + "step": 22079 + }, + { + "epoch": 0.9993211133740665, + "grad_norm": 0.6233482317348159, + "learning_rate": 1.3751628111235981e-11, + "loss": 0.2945, + "step": 22080 + }, + { + "epoch": 0.9993663724824621, + "grad_norm": 0.5677360160707733, + "learning_rate": 1.2086392565113792e-11, + "loss": 0.2714, + "step": 22081 + }, + { + "epoch": 0.9994116315908577, + "grad_norm": 0.5755998827043224, + "learning_rate": 1.0528591403713428e-11, + "loss": 0.2969, + "step": 22082 + }, + { + "epoch": 0.9994568906992533, + "grad_norm": 0.6115343210945876, + "learning_rate": 9.07822465923136e-12, + "loss": 0.3026, + "step": 22083 + }, + { + "epoch": 0.9995021498076488, + "grad_norm": 0.551669168000992, + "learning_rate": 7.735292363864055e-12, + "loss": 0.2999, + "step": 22084 + }, + { + "epoch": 0.9995474089160443, + "grad_norm": 0.630577749173783, + "learning_rate": 6.4997945453670885e-12, + "loss": 0.296, + "step": 22085 + }, + { + "epoch": 0.9995926680244399, + "grad_norm": 0.6528100887496123, + "learning_rate": 5.371731231496036e-12, + "loss": 0.2953, + "step": 22086 + }, + { + "epoch": 0.9996379271328355, + "grad_norm": 0.5826005800793707, + "learning_rate": 4.3511024455655806e-12, + "loss": 0.2665, + "step": 22087 + }, + { + "epoch": 0.9996831862412311, + "grad_norm": 0.5722049007305591, + "learning_rate": 3.437908209780183e-12, + "loss": 0.2648, + "step": 22088 + }, + { + "epoch": 0.9997284453496266, + "grad_norm": 0.615232940231692, + "learning_rate": 2.6321485435687465e-12, + "loss": 0.2973, + "step": 22089 + }, + { + "epoch": 0.9997737044580222, + "grad_norm": 0.5882069213196555, + "learning_rate": 1.9338234646948397e-12, + "loss": 0.2987, + "step": 22090 + }, + { + "epoch": 0.9998189635664178, + "grad_norm": 0.5699427946030503, + "learning_rate": 1.3429329881464725e-12, + "loss": 0.2605, + "step": 22091 + }, + { + "epoch": 0.9998642226748133, + "grad_norm": 0.6242708571432227, + "learning_rate": 8.59477126136099e-13, + "loss": 0.2567, + "step": 22092 + }, + { + "epoch": 0.9999094817832088, + "grad_norm": 0.6206500807977732, + "learning_rate": 4.834558897659492e-13, + "loss": 0.2771, + "step": 22093 + }, + { + "epoch": 0.9999547408916044, + "grad_norm": 0.6006375397432488, + "learning_rate": 2.148692862524726e-13, + "loss": 0.2852, + "step": 22094 + }, + { + "epoch": 1.0, + "grad_norm": 0.5607683363863506, + "learning_rate": 5.3717321701896033e-14, + "loss": 0.2744, + "step": 22095 + }, + { + "epoch": 1.0, + "step": 22095, + "total_flos": 1.469106584682496e+17, + "train_loss": 0.3743057166681648, + "train_runtime": 137166.71, + "train_samples_per_second": 82.478, + "train_steps_per_second": 0.161 + } + ], + "logging_steps": 1.0, + "max_steps": 22095, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 2000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.469106584682496e+17, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}