| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "eval_steps": 500, | |
| "global_step": 23130, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0311284046692607, | |
| "grad_norm": 11.856830596923828, | |
| "learning_rate": 3.314121037463977e-06, | |
| "loss": 2.0132, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0622568093385214, | |
| "grad_norm": 1.1055241823196411, | |
| "learning_rate": 6.7723342939481265e-06, | |
| "loss": 1.0751, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0933852140077821, | |
| "grad_norm": 1.4284316301345825, | |
| "learning_rate": 1.0230547550432277e-05, | |
| "loss": 1.038, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1245136186770428, | |
| "grad_norm": 0.6491210460662842, | |
| "learning_rate": 1.3688760806916426e-05, | |
| "loss": 0.9988, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.1556420233463035, | |
| "grad_norm": 0.39960888028144836, | |
| "learning_rate": 1.7146974063400578e-05, | |
| "loss": 0.9849, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1867704280155642, | |
| "grad_norm": 0.678686797618866, | |
| "learning_rate": 2.060518731988473e-05, | |
| "loss": 0.9767, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2178988326848249, | |
| "grad_norm": 0.5720846056938171, | |
| "learning_rate": 2.406340057636888e-05, | |
| "loss": 0.9684, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2490272373540856, | |
| "grad_norm": 0.6077919602394104, | |
| "learning_rate": 2.7521613832853026e-05, | |
| "loss": 0.967, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2801556420233463, | |
| "grad_norm": 0.7459629774093628, | |
| "learning_rate": 3.097982708933718e-05, | |
| "loss": 0.9553, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.311284046692607, | |
| "grad_norm": 0.8133582472801208, | |
| "learning_rate": 3.443804034582133e-05, | |
| "loss": 0.9474, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3424124513618677, | |
| "grad_norm": 0.7748175263404846, | |
| "learning_rate": 3.7896253602305474e-05, | |
| "loss": 0.9404, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3735408560311284, | |
| "grad_norm": 0.9244363903999329, | |
| "learning_rate": 4.135446685878963e-05, | |
| "loss": 0.9326, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4046692607003891, | |
| "grad_norm": 1.0879474878311157, | |
| "learning_rate": 4.4812680115273775e-05, | |
| "loss": 0.9112, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4357976653696498, | |
| "grad_norm": 1.3521575927734375, | |
| "learning_rate": 4.827089337175792e-05, | |
| "loss": 0.9033, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4669260700389105, | |
| "grad_norm": 1.4220598936080933, | |
| "learning_rate": 5.1729106628242076e-05, | |
| "loss": 0.8877, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.4980544747081712, | |
| "grad_norm": 1.345544457435608, | |
| "learning_rate": 5.518731988472623e-05, | |
| "loss": 0.8726, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5291828793774319, | |
| "grad_norm": 1.7283053398132324, | |
| "learning_rate": 5.864553314121038e-05, | |
| "loss": 0.8553, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5603112840466926, | |
| "grad_norm": 1.2822779417037964, | |
| "learning_rate": 6.210374639769453e-05, | |
| "loss": 0.841, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5914396887159533, | |
| "grad_norm": 2.8578877449035645, | |
| "learning_rate": 6.556195965417868e-05, | |
| "loss": 0.8262, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.622568093385214, | |
| "grad_norm": 1.603874683380127, | |
| "learning_rate": 6.902017291066282e-05, | |
| "loss": 0.7989, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6536964980544747, | |
| "grad_norm": 2.2428903579711914, | |
| "learning_rate": 7.247838616714697e-05, | |
| "loss": 0.7958, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6848249027237354, | |
| "grad_norm": 1.6760625839233398, | |
| "learning_rate": 7.593659942363113e-05, | |
| "loss": 0.7799, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7159533073929961, | |
| "grad_norm": 2.1145055294036865, | |
| "learning_rate": 7.939481268011528e-05, | |
| "loss": 0.7671, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7470817120622568, | |
| "grad_norm": 1.3805097341537476, | |
| "learning_rate": 8.285302593659943e-05, | |
| "loss": 0.7563, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.7782101167315175, | |
| "grad_norm": 2.1005349159240723, | |
| "learning_rate": 8.631123919308359e-05, | |
| "loss": 0.7396, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8093385214007782, | |
| "grad_norm": 1.6995466947555542, | |
| "learning_rate": 8.976945244956772e-05, | |
| "loss": 0.738, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.8404669260700389, | |
| "grad_norm": 1.5165631771087646, | |
| "learning_rate": 9.322766570605188e-05, | |
| "loss": 0.7208, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.8715953307392996, | |
| "grad_norm": 1.4923312664031982, | |
| "learning_rate": 9.668587896253603e-05, | |
| "loss": 0.7126, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9027237354085603, | |
| "grad_norm": 1.7333067655563354, | |
| "learning_rate": 9.999999950982757e-05, | |
| "loss": 0.6998, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.933852140077821, | |
| "grad_norm": 1.5790561437606812, | |
| "learning_rate": 9.999969364253642e-05, | |
| "loss": 0.6943, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9649805447470817, | |
| "grad_norm": 2.4895715713500977, | |
| "learning_rate": 9.999882310058304e-05, | |
| "loss": 0.6887, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.9961089494163424, | |
| "grad_norm": 1.2938724756240845, | |
| "learning_rate": 9.999738789379896e-05, | |
| "loss": 0.6728, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.027237354085603, | |
| "grad_norm": 2.313992738723755, | |
| "learning_rate": 9.999538803839277e-05, | |
| "loss": 0.6704, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.0583657587548638, | |
| "grad_norm": 2.3841969966888428, | |
| "learning_rate": 9.999282355694997e-05, | |
| "loss": 0.6683, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.0894941634241244, | |
| "grad_norm": 1.7238163948059082, | |
| "learning_rate": 9.998969447843267e-05, | |
| "loss": 0.6598, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.1206225680933852, | |
| "grad_norm": 1.889561653137207, | |
| "learning_rate": 9.998600083817934e-05, | |
| "loss": 0.6469, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.1517509727626458, | |
| "grad_norm": 2.078350305557251, | |
| "learning_rate": 9.998174267790433e-05, | |
| "loss": 0.6394, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.1828793774319066, | |
| "grad_norm": 3.088223934173584, | |
| "learning_rate": 9.99769200456974e-05, | |
| "loss": 0.642, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.2140077821011672, | |
| "grad_norm": 1.8867013454437256, | |
| "learning_rate": 9.997153299602332e-05, | |
| "loss": 0.6365, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.245136186770428, | |
| "grad_norm": 2.187405586242676, | |
| "learning_rate": 9.9965581589721e-05, | |
| "loss": 0.6216, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.2762645914396886, | |
| "grad_norm": 1.5248736143112183, | |
| "learning_rate": 9.995906589400307e-05, | |
| "loss": 0.6208, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.3073929961089494, | |
| "grad_norm": 1.3533403873443604, | |
| "learning_rate": 9.995198598245492e-05, | |
| "loss": 0.6143, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.3385214007782102, | |
| "grad_norm": 1.9436872005462646, | |
| "learning_rate": 9.994434193503399e-05, | |
| "loss": 0.6101, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.3696498054474708, | |
| "grad_norm": 1.5890527963638306, | |
| "learning_rate": 9.993613383806879e-05, | |
| "loss": 0.6011, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.4007782101167314, | |
| "grad_norm": 1.6845647096633911, | |
| "learning_rate": 9.9927361784258e-05, | |
| "loss": 0.6022, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.4319066147859922, | |
| "grad_norm": 1.5048511028289795, | |
| "learning_rate": 9.991802587266932e-05, | |
| "loss": 0.6078, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.463035019455253, | |
| "grad_norm": 1.8788032531738281, | |
| "learning_rate": 9.990812620873848e-05, | |
| "loss": 0.6014, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.4941634241245136, | |
| "grad_norm": 2.0226938724517822, | |
| "learning_rate": 9.989766290426795e-05, | |
| "loss": 0.5912, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.5252918287937742, | |
| "grad_norm": 1.9385308027267456, | |
| "learning_rate": 9.98866360774257e-05, | |
| "loss": 0.5812, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.556420233463035, | |
| "grad_norm": 1.2753961086273193, | |
| "learning_rate": 9.98750458527439e-05, | |
| "loss": 0.5825, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.5875486381322959, | |
| "grad_norm": 1.6889104843139648, | |
| "learning_rate": 9.986289236111747e-05, | |
| "loss": 0.58, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.6186770428015564, | |
| "grad_norm": 2.130415916442871, | |
| "learning_rate": 9.985017573980262e-05, | |
| "loss": 0.5853, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.649805447470817, | |
| "grad_norm": 1.8879446983337402, | |
| "learning_rate": 9.983689613241531e-05, | |
| "loss": 0.5806, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.6809338521400778, | |
| "grad_norm": 1.2330986261367798, | |
| "learning_rate": 9.982305368892964e-05, | |
| "loss": 0.574, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.7120622568093387, | |
| "grad_norm": 1.389142632484436, | |
| "learning_rate": 9.980864856567606e-05, | |
| "loss": 0.5743, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.7431906614785992, | |
| "grad_norm": 1.184691309928894, | |
| "learning_rate": 9.979368092533978e-05, | |
| "loss": 0.5691, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.7743190661478598, | |
| "grad_norm": 1.3246943950653076, | |
| "learning_rate": 9.977815093695875e-05, | |
| "loss": 0.5669, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.8054474708171206, | |
| "grad_norm": 1.5033084154129028, | |
| "learning_rate": 9.976205877592189e-05, | |
| "loss": 0.5636, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.8365758754863815, | |
| "grad_norm": 2.675381660461426, | |
| "learning_rate": 9.974540462396697e-05, | |
| "loss": 0.5554, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.867704280155642, | |
| "grad_norm": 1.4676384925842285, | |
| "learning_rate": 9.972818866917877e-05, | |
| "loss": 0.5526, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.8988326848249026, | |
| "grad_norm": 2.269249200820923, | |
| "learning_rate": 9.971041110598669e-05, | |
| "loss": 0.556, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.9299610894941635, | |
| "grad_norm": 1.7739601135253906, | |
| "learning_rate": 9.969207213516279e-05, | |
| "loss": 0.5546, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.9610894941634243, | |
| "grad_norm": 1.2574249505996704, | |
| "learning_rate": 9.967317196381936e-05, | |
| "loss": 0.549, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.9922178988326849, | |
| "grad_norm": 1.65413236618042, | |
| "learning_rate": 9.965371080540666e-05, | |
| "loss": 0.5537, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.0233463035019454, | |
| "grad_norm": 1.6155718564987183, | |
| "learning_rate": 9.96336888797105e-05, | |
| "loss": 0.5424, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.054474708171206, | |
| "grad_norm": 1.556755542755127, | |
| "learning_rate": 9.961310641284977e-05, | |
| "loss": 0.5396, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.085603112840467, | |
| "grad_norm": 1.5641894340515137, | |
| "learning_rate": 9.959196363727383e-05, | |
| "loss": 0.5465, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.1167315175097277, | |
| "grad_norm": 1.4483375549316406, | |
| "learning_rate": 9.957026079175996e-05, | |
| "loss": 0.5401, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.1478599221789882, | |
| "grad_norm": 1.8051731586456299, | |
| "learning_rate": 9.954799812141054e-05, | |
| "loss": 0.541, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.178988326848249, | |
| "grad_norm": 2.337942361831665, | |
| "learning_rate": 9.952517587765049e-05, | |
| "loss": 0.5359, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.21011673151751, | |
| "grad_norm": 1.5796310901641846, | |
| "learning_rate": 9.950179431822421e-05, | |
| "loss": 0.5361, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.2412451361867705, | |
| "grad_norm": 1.3433961868286133, | |
| "learning_rate": 9.947785370719281e-05, | |
| "loss": 0.5254, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.272373540856031, | |
| "grad_norm": 1.8424466848373413, | |
| "learning_rate": 9.945335431493108e-05, | |
| "loss": 0.5278, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.3035019455252916, | |
| "grad_norm": 1.280912160873413, | |
| "learning_rate": 9.942829641812445e-05, | |
| "loss": 0.5314, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.3346303501945527, | |
| "grad_norm": 2.389176368713379, | |
| "learning_rate": 9.94026802997658e-05, | |
| "loss": 0.5272, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.3657587548638133, | |
| "grad_norm": 1.804115653038025, | |
| "learning_rate": 9.93765062491524e-05, | |
| "loss": 0.5214, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.396887159533074, | |
| "grad_norm": 2.4799587726593018, | |
| "learning_rate": 9.934977456188253e-05, | |
| "loss": 0.5228, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.4280155642023344, | |
| "grad_norm": 1.3502540588378906, | |
| "learning_rate": 9.932248553985213e-05, | |
| "loss": 0.5269, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.4591439688715955, | |
| "grad_norm": 1.9639521837234497, | |
| "learning_rate": 9.929463949125151e-05, | |
| "loss": 0.5244, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.490272373540856, | |
| "grad_norm": 1.5300196409225464, | |
| "learning_rate": 9.926623673056173e-05, | |
| "loss": 0.5163, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.5214007782101167, | |
| "grad_norm": 1.3195667266845703, | |
| "learning_rate": 9.923727757855117e-05, | |
| "loss": 0.5155, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.5525291828793772, | |
| "grad_norm": 1.3704023361206055, | |
| "learning_rate": 9.920776236227181e-05, | |
| "loss": 0.5164, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.5836575875486383, | |
| "grad_norm": 1.2443211078643799, | |
| "learning_rate": 9.917769141505557e-05, | |
| "loss": 0.5119, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.614785992217899, | |
| "grad_norm": 1.7005102634429932, | |
| "learning_rate": 9.91470650765106e-05, | |
| "loss": 0.5191, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.6459143968871595, | |
| "grad_norm": 1.742263674736023, | |
| "learning_rate": 9.911588369251736e-05, | |
| "loss": 0.5207, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.6770428015564205, | |
| "grad_norm": 2.342224597930908, | |
| "learning_rate": 9.908414761522473e-05, | |
| "loss": 0.5116, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.708171206225681, | |
| "grad_norm": 1.481919765472412, | |
| "learning_rate": 9.905185720304612e-05, | |
| "loss": 0.5169, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.7392996108949417, | |
| "grad_norm": 2.477743148803711, | |
| "learning_rate": 9.901901282065529e-05, | |
| "loss": 0.5125, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.7704280155642023, | |
| "grad_norm": 1.231108546257019, | |
| "learning_rate": 9.898561483898233e-05, | |
| "loss": 0.5119, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.801556420233463, | |
| "grad_norm": 1.6876734495162964, | |
| "learning_rate": 9.895166363520943e-05, | |
| "loss": 0.5098, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.832684824902724, | |
| "grad_norm": 1.886053442955017, | |
| "learning_rate": 9.891715959276664e-05, | |
| "loss": 0.509, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.8638132295719845, | |
| "grad_norm": 2.044147253036499, | |
| "learning_rate": 9.88821031013275e-05, | |
| "loss": 0.5081, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.894941634241245, | |
| "grad_norm": 1.8339983224868774, | |
| "learning_rate": 9.88464945568047e-05, | |
| "loss": 0.5031, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.926070038910506, | |
| "grad_norm": 2.0237860679626465, | |
| "learning_rate": 9.881033436134555e-05, | |
| "loss": 0.5026, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.9571984435797667, | |
| "grad_norm": 1.222092866897583, | |
| "learning_rate": 9.877362292332749e-05, | |
| "loss": 0.4922, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.9883268482490273, | |
| "grad_norm": 2.8668859004974365, | |
| "learning_rate": 9.873636065735343e-05, | |
| "loss": 0.4978, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 3.019455252918288, | |
| "grad_norm": 1.8704198598861694, | |
| "learning_rate": 9.869854798424709e-05, | |
| "loss": 0.4999, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 3.0505836575875485, | |
| "grad_norm": 1.3280694484710693, | |
| "learning_rate": 9.866018533104826e-05, | |
| "loss": 0.4979, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 3.0817120622568095, | |
| "grad_norm": 1.6099941730499268, | |
| "learning_rate": 9.862127313100795e-05, | |
| "loss": 0.4966, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 3.11284046692607, | |
| "grad_norm": 1.797253131866455, | |
| "learning_rate": 9.858181182358355e-05, | |
| "loss": 0.4913, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.1439688715953307, | |
| "grad_norm": 1.4523372650146484, | |
| "learning_rate": 9.854180185443378e-05, | |
| "loss": 0.494, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 3.1750972762645913, | |
| "grad_norm": 1.665285587310791, | |
| "learning_rate": 9.850124367541371e-05, | |
| "loss": 0.495, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 3.2062256809338523, | |
| "grad_norm": 1.2931227684020996, | |
| "learning_rate": 9.84601377445697e-05, | |
| "loss": 0.4949, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 3.237354085603113, | |
| "grad_norm": 2.0045413970947266, | |
| "learning_rate": 9.841848452613412e-05, | |
| "loss": 0.4901, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 3.2684824902723735, | |
| "grad_norm": 1.2784613370895386, | |
| "learning_rate": 9.83762844905202e-05, | |
| "loss": 0.4967, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 3.299610894941634, | |
| "grad_norm": 1.485795497894287, | |
| "learning_rate": 9.833353811431669e-05, | |
| "loss": 0.4921, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 3.330739299610895, | |
| "grad_norm": 2.1288626194000244, | |
| "learning_rate": 9.829024588028244e-05, | |
| "loss": 0.4912, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 3.3618677042801557, | |
| "grad_norm": 1.5102566480636597, | |
| "learning_rate": 9.824640827734102e-05, | |
| "loss": 0.4938, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 3.3929961089494163, | |
| "grad_norm": 2.126574993133545, | |
| "learning_rate": 9.820202580057512e-05, | |
| "loss": 0.4881, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 3.424124513618677, | |
| "grad_norm": 1.1427215337753296, | |
| "learning_rate": 9.8157098951221e-05, | |
| "loss": 0.4956, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 3.455252918287938, | |
| "grad_norm": 1.847524881362915, | |
| "learning_rate": 9.811162823666287e-05, | |
| "loss": 0.4883, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 3.4863813229571985, | |
| "grad_norm": 1.3941086530685425, | |
| "learning_rate": 9.806561417042706e-05, | |
| "loss": 0.488, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 3.517509727626459, | |
| "grad_norm": 1.7835474014282227, | |
| "learning_rate": 9.801905727217631e-05, | |
| "loss": 0.4796, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 3.5486381322957197, | |
| "grad_norm": 2.4145917892456055, | |
| "learning_rate": 9.797195806770387e-05, | |
| "loss": 0.4856, | |
| "step": 2736 | |
| }, | |
| { | |
| "epoch": 3.5797665369649807, | |
| "grad_norm": 1.6567249298095703, | |
| "learning_rate": 9.792431708892752e-05, | |
| "loss": 0.4799, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 3.6108949416342413, | |
| "grad_norm": 1.7985295057296753, | |
| "learning_rate": 9.787613487388365e-05, | |
| "loss": 0.4886, | |
| "step": 2784 | |
| }, | |
| { | |
| "epoch": 3.642023346303502, | |
| "grad_norm": 1.7581013441085815, | |
| "learning_rate": 9.78274119667211e-05, | |
| "loss": 0.4835, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 3.673151750972763, | |
| "grad_norm": 1.6254545450210571, | |
| "learning_rate": 9.777814891769507e-05, | |
| "loss": 0.4841, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 3.7042801556420235, | |
| "grad_norm": 1.745969295501709, | |
| "learning_rate": 9.772834628316087e-05, | |
| "loss": 0.4848, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 3.735408560311284, | |
| "grad_norm": 1.762830138206482, | |
| "learning_rate": 9.767800462556769e-05, | |
| "loss": 0.476, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.7665369649805447, | |
| "grad_norm": 1.6283063888549805, | |
| "learning_rate": 9.762712451345217e-05, | |
| "loss": 0.48, | |
| "step": 2904 | |
| }, | |
| { | |
| "epoch": 3.7976653696498053, | |
| "grad_norm": 1.7204512357711792, | |
| "learning_rate": 9.757570652143202e-05, | |
| "loss": 0.4746, | |
| "step": 2928 | |
| }, | |
| { | |
| "epoch": 3.8287937743190663, | |
| "grad_norm": 2.6043598651885986, | |
| "learning_rate": 9.752375123019956e-05, | |
| "loss": 0.4805, | |
| "step": 2952 | |
| }, | |
| { | |
| "epoch": 3.859922178988327, | |
| "grad_norm": 2.134938955307007, | |
| "learning_rate": 9.74712592265151e-05, | |
| "loss": 0.4776, | |
| "step": 2976 | |
| }, | |
| { | |
| "epoch": 3.8910505836575875, | |
| "grad_norm": 1.4748331308364868, | |
| "learning_rate": 9.741823110320037e-05, | |
| "loss": 0.4725, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.9221789883268485, | |
| "grad_norm": 2.2188987731933594, | |
| "learning_rate": 9.73646674591318e-05, | |
| "loss": 0.4781, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 3.953307392996109, | |
| "grad_norm": 1.2936460971832275, | |
| "learning_rate": 9.731056889923374e-05, | |
| "loss": 0.4808, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 3.9844357976653697, | |
| "grad_norm": 2.5133862495422363, | |
| "learning_rate": 9.725593603447166e-05, | |
| "loss": 0.4839, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 4.01556420233463, | |
| "grad_norm": 2.2660224437713623, | |
| "learning_rate": 9.720076948184522e-05, | |
| "loss": 0.4709, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 4.046692607003891, | |
| "grad_norm": 1.573203444480896, | |
| "learning_rate": 9.714506986438134e-05, | |
| "loss": 0.4762, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 4.0778210116731515, | |
| "grad_norm": 1.9054023027420044, | |
| "learning_rate": 9.70888378111271e-05, | |
| "loss": 0.4796, | |
| "step": 3144 | |
| }, | |
| { | |
| "epoch": 4.108949416342412, | |
| "grad_norm": 2.2776753902435303, | |
| "learning_rate": 9.703207395714274e-05, | |
| "loss": 0.4705, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 4.1400778210116735, | |
| "grad_norm": 1.614623785018921, | |
| "learning_rate": 9.697477894349438e-05, | |
| "loss": 0.4713, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 4.171206225680934, | |
| "grad_norm": 2.478569269180298, | |
| "learning_rate": 9.691695341724681e-05, | |
| "loss": 0.4719, | |
| "step": 3216 | |
| }, | |
| { | |
| "epoch": 4.202334630350195, | |
| "grad_norm": 1.3797364234924316, | |
| "learning_rate": 9.685859803145625e-05, | |
| "loss": 0.4663, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 4.233463035019455, | |
| "grad_norm": 2.49601674079895, | |
| "learning_rate": 9.679971344516288e-05, | |
| "loss": 0.4827, | |
| "step": 3264 | |
| }, | |
| { | |
| "epoch": 4.264591439688716, | |
| "grad_norm": 1.5913656949996948, | |
| "learning_rate": 9.674030032338346e-05, | |
| "loss": 0.4869, | |
| "step": 3288 | |
| }, | |
| { | |
| "epoch": 4.2957198443579765, | |
| "grad_norm": 1.5114320516586304, | |
| "learning_rate": 9.668035933710378e-05, | |
| "loss": 0.4794, | |
| "step": 3312 | |
| }, | |
| { | |
| "epoch": 4.326848249027237, | |
| "grad_norm": 1.905714750289917, | |
| "learning_rate": 9.661989116327112e-05, | |
| "loss": 0.4702, | |
| "step": 3336 | |
| }, | |
| { | |
| "epoch": 4.357976653696498, | |
| "grad_norm": 1.6932348012924194, | |
| "learning_rate": 9.655889648478657e-05, | |
| "loss": 0.4693, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 4.389105058365759, | |
| "grad_norm": 1.9976513385772705, | |
| "learning_rate": 9.649737599049736e-05, | |
| "loss": 0.4705, | |
| "step": 3384 | |
| }, | |
| { | |
| "epoch": 4.42023346303502, | |
| "grad_norm": 1.4826905727386475, | |
| "learning_rate": 9.643533037518899e-05, | |
| "loss": 0.4697, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 4.45136186770428, | |
| "grad_norm": 1.617922306060791, | |
| "learning_rate": 9.637276033957755e-05, | |
| "loss": 0.4684, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 4.482490272373541, | |
| "grad_norm": 2.4124162197113037, | |
| "learning_rate": 9.630966659030158e-05, | |
| "loss": 0.462, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 4.5136186770428015, | |
| "grad_norm": 1.8999947309494019, | |
| "learning_rate": 9.624604983991434e-05, | |
| "loss": 0.4614, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 4.544747081712062, | |
| "grad_norm": 2.2038631439208984, | |
| "learning_rate": 9.618191080687552e-05, | |
| "loss": 0.473, | |
| "step": 3504 | |
| }, | |
| { | |
| "epoch": 4.575875486381323, | |
| "grad_norm": 1.5659903287887573, | |
| "learning_rate": 9.611725021554333e-05, | |
| "loss": 0.4632, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 4.607003891050583, | |
| "grad_norm": 2.38783597946167, | |
| "learning_rate": 9.605206879616617e-05, | |
| "loss": 0.4547, | |
| "step": 3552 | |
| }, | |
| { | |
| "epoch": 4.638132295719844, | |
| "grad_norm": 1.5512051582336426, | |
| "learning_rate": 9.59863672848745e-05, | |
| "loss": 0.4623, | |
| "step": 3576 | |
| }, | |
| { | |
| "epoch": 4.669260700389105, | |
| "grad_norm": 3.2371737957000732, | |
| "learning_rate": 9.592014642367243e-05, | |
| "loss": 0.4635, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.700389105058366, | |
| "grad_norm": 1.7594435214996338, | |
| "learning_rate": 9.585340696042935e-05, | |
| "loss": 0.4674, | |
| "step": 3624 | |
| }, | |
| { | |
| "epoch": 4.7315175097276265, | |
| "grad_norm": 1.3836287260055542, | |
| "learning_rate": 9.57861496488716e-05, | |
| "loss": 0.4611, | |
| "step": 3648 | |
| }, | |
| { | |
| "epoch": 4.762645914396887, | |
| "grad_norm": 1.7907147407531738, | |
| "learning_rate": 9.571837524857384e-05, | |
| "loss": 0.4609, | |
| "step": 3672 | |
| }, | |
| { | |
| "epoch": 4.793774319066148, | |
| "grad_norm": 1.7246521711349487, | |
| "learning_rate": 9.565008452495046e-05, | |
| "loss": 0.4588, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 4.824902723735408, | |
| "grad_norm": 2.1095101833343506, | |
| "learning_rate": 9.558127824924701e-05, | |
| "loss": 0.4623, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 4.856031128404669, | |
| "grad_norm": 1.1277464628219604, | |
| "learning_rate": 9.551195719853147e-05, | |
| "loss": 0.4568, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 4.88715953307393, | |
| "grad_norm": 1.2232158184051514, | |
| "learning_rate": 9.544212215568547e-05, | |
| "loss": 0.459, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 4.918287937743191, | |
| "grad_norm": 1.9220589399337769, | |
| "learning_rate": 9.53717739093954e-05, | |
| "loss": 0.4539, | |
| "step": 3792 | |
| }, | |
| { | |
| "epoch": 4.9494163424124515, | |
| "grad_norm": 1.6076886653900146, | |
| "learning_rate": 9.530091325414359e-05, | |
| "loss": 0.4583, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 4.980544747081712, | |
| "grad_norm": 1.2246028184890747, | |
| "learning_rate": 9.522954099019927e-05, | |
| "loss": 0.4567, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.011673151750973, | |
| "grad_norm": 1.4004205465316772, | |
| "learning_rate": 9.515765792360955e-05, | |
| "loss": 0.4535, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 5.042801556420233, | |
| "grad_norm": 1.30203378200531, | |
| "learning_rate": 9.508526486619036e-05, | |
| "loss": 0.452, | |
| "step": 3888 | |
| }, | |
| { | |
| "epoch": 5.073929961089494, | |
| "grad_norm": 1.538682222366333, | |
| "learning_rate": 9.501236263551719e-05, | |
| "loss": 0.4511, | |
| "step": 3912 | |
| }, | |
| { | |
| "epoch": 5.1050583657587545, | |
| "grad_norm": 1.3054372072219849, | |
| "learning_rate": 9.493895205491595e-05, | |
| "loss": 0.4489, | |
| "step": 3936 | |
| }, | |
| { | |
| "epoch": 5.136186770428016, | |
| "grad_norm": 1.4001922607421875, | |
| "learning_rate": 9.486503395345358e-05, | |
| "loss": 0.4577, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 5.167315175097277, | |
| "grad_norm": 1.3580487966537476, | |
| "learning_rate": 9.47906091659288e-05, | |
| "loss": 0.4519, | |
| "step": 3984 | |
| }, | |
| { | |
| "epoch": 5.198443579766537, | |
| "grad_norm": 1.929853081703186, | |
| "learning_rate": 9.47156785328626e-05, | |
| "loss": 0.4562, | |
| "step": 4008 | |
| }, | |
| { | |
| "epoch": 5.229571984435798, | |
| "grad_norm": 1.9883568286895752, | |
| "learning_rate": 9.464024290048879e-05, | |
| "loss": 0.4573, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 5.260700389105058, | |
| "grad_norm": 1.7795013189315796, | |
| "learning_rate": 9.456430312074432e-05, | |
| "loss": 0.4513, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 5.291828793774319, | |
| "grad_norm": 1.1019718647003174, | |
| "learning_rate": 9.44878600512599e-05, | |
| "loss": 0.4475, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 5.3229571984435795, | |
| "grad_norm": 1.604556918144226, | |
| "learning_rate": 9.441091455535007e-05, | |
| "loss": 0.4466, | |
| "step": 4104 | |
| }, | |
| { | |
| "epoch": 5.35408560311284, | |
| "grad_norm": 1.8707294464111328, | |
| "learning_rate": 9.433346750200363e-05, | |
| "loss": 0.4501, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 5.385214007782102, | |
| "grad_norm": 1.6021867990493774, | |
| "learning_rate": 9.425551976587366e-05, | |
| "loss": 0.4443, | |
| "step": 4152 | |
| }, | |
| { | |
| "epoch": 5.416342412451362, | |
| "grad_norm": 1.7186486721038818, | |
| "learning_rate": 9.417707222726784e-05, | |
| "loss": 0.4374, | |
| "step": 4176 | |
| }, | |
| { | |
| "epoch": 5.447470817120623, | |
| "grad_norm": 2.0640745162963867, | |
| "learning_rate": 9.409812577213833e-05, | |
| "loss": 0.4468, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 5.478599221789883, | |
| "grad_norm": 2.1669087409973145, | |
| "learning_rate": 9.401868129207181e-05, | |
| "loss": 0.4501, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 5.509727626459144, | |
| "grad_norm": 2.237527847290039, | |
| "learning_rate": 9.393873968427953e-05, | |
| "loss": 0.4469, | |
| "step": 4248 | |
| }, | |
| { | |
| "epoch": 5.5408560311284045, | |
| "grad_norm": 1.5120989084243774, | |
| "learning_rate": 9.385830185158701e-05, | |
| "loss": 0.4425, | |
| "step": 4272 | |
| }, | |
| { | |
| "epoch": 5.571984435797665, | |
| "grad_norm": 2.029425621032715, | |
| "learning_rate": 9.377736870242393e-05, | |
| "loss": 0.4509, | |
| "step": 4296 | |
| }, | |
| { | |
| "epoch": 5.603112840466926, | |
| "grad_norm": 1.609480857849121, | |
| "learning_rate": 9.369594115081386e-05, | |
| "loss": 0.4528, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 5.634241245136186, | |
| "grad_norm": 1.126060128211975, | |
| "learning_rate": 9.361402011636395e-05, | |
| "loss": 0.4435, | |
| "step": 4344 | |
| }, | |
| { | |
| "epoch": 5.665369649805448, | |
| "grad_norm": 3.637361526489258, | |
| "learning_rate": 9.353160652425452e-05, | |
| "loss": 0.4466, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 5.696498054474708, | |
| "grad_norm": 3.3293521404266357, | |
| "learning_rate": 9.344870130522863e-05, | |
| "loss": 0.4495, | |
| "step": 4392 | |
| }, | |
| { | |
| "epoch": 5.727626459143969, | |
| "grad_norm": 1.1623419523239136, | |
| "learning_rate": 9.33653053955815e-05, | |
| "loss": 0.4362, | |
| "step": 4416 | |
| }, | |
| { | |
| "epoch": 5.7587548638132295, | |
| "grad_norm": 1.3908815383911133, | |
| "learning_rate": 9.328141973715008e-05, | |
| "loss": 0.445, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 5.78988326848249, | |
| "grad_norm": 1.1905103921890259, | |
| "learning_rate": 9.31970452773023e-05, | |
| "loss": 0.4399, | |
| "step": 4464 | |
| }, | |
| { | |
| "epoch": 5.821011673151751, | |
| "grad_norm": 1.7141236066818237, | |
| "learning_rate": 9.311218296892636e-05, | |
| "loss": 0.4396, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 5.852140077821011, | |
| "grad_norm": 1.5528429746627808, | |
| "learning_rate": 9.302683377042007e-05, | |
| "loss": 0.4369, | |
| "step": 4512 | |
| }, | |
| { | |
| "epoch": 5.883268482490273, | |
| "grad_norm": 1.206060528755188, | |
| "learning_rate": 9.29409986456799e-05, | |
| "loss": 0.443, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 5.914396887159533, | |
| "grad_norm": 1.2948627471923828, | |
| "learning_rate": 9.285467856409023e-05, | |
| "loss": 0.4421, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 5.945525291828794, | |
| "grad_norm": 1.6690411567687988, | |
| "learning_rate": 9.276787450051225e-05, | |
| "loss": 0.4393, | |
| "step": 4584 | |
| }, | |
| { | |
| "epoch": 5.976653696498055, | |
| "grad_norm": 1.4727965593338013, | |
| "learning_rate": 9.26805874352731e-05, | |
| "loss": 0.443, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 6.007782101167315, | |
| "grad_norm": 2.1878299713134766, | |
| "learning_rate": 9.25928183541547e-05, | |
| "loss": 0.4359, | |
| "step": 4632 | |
| }, | |
| { | |
| "epoch": 6.038910505836576, | |
| "grad_norm": 1.5079774856567383, | |
| "learning_rate": 9.250456824838263e-05, | |
| "loss": 0.438, | |
| "step": 4656 | |
| }, | |
| { | |
| "epoch": 6.070038910505836, | |
| "grad_norm": 1.5700092315673828, | |
| "learning_rate": 9.241583811461498e-05, | |
| "loss": 0.4355, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 6.101167315175097, | |
| "grad_norm": 1.0717116594314575, | |
| "learning_rate": 9.232662895493107e-05, | |
| "loss": 0.4337, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 6.132295719844358, | |
| "grad_norm": 1.775414228439331, | |
| "learning_rate": 9.223694177682009e-05, | |
| "loss": 0.4398, | |
| "step": 4728 | |
| }, | |
| { | |
| "epoch": 6.163424124513619, | |
| "grad_norm": 3.057781457901001, | |
| "learning_rate": 9.214677759316982e-05, | |
| "loss": 0.4367, | |
| "step": 4752 | |
| }, | |
| { | |
| "epoch": 6.19455252918288, | |
| "grad_norm": 1.2848880290985107, | |
| "learning_rate": 9.205613742225507e-05, | |
| "loss": 0.433, | |
| "step": 4776 | |
| }, | |
| { | |
| "epoch": 6.22568093385214, | |
| "grad_norm": 1.5465294122695923, | |
| "learning_rate": 9.196502228772626e-05, | |
| "loss": 0.442, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.256809338521401, | |
| "grad_norm": 1.1864486932754517, | |
| "learning_rate": 9.18734332185979e-05, | |
| "loss": 0.4356, | |
| "step": 4824 | |
| }, | |
| { | |
| "epoch": 6.287937743190661, | |
| "grad_norm": 1.6817840337753296, | |
| "learning_rate": 9.17813712492368e-05, | |
| "loss": 0.4386, | |
| "step": 4848 | |
| }, | |
| { | |
| "epoch": 6.319066147859922, | |
| "grad_norm": 1.285474181175232, | |
| "learning_rate": 9.16888374193506e-05, | |
| "loss": 0.4306, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 6.3501945525291825, | |
| "grad_norm": 1.5364230871200562, | |
| "learning_rate": 9.159583277397587e-05, | |
| "loss": 0.4333, | |
| "step": 4896 | |
| }, | |
| { | |
| "epoch": 6.381322957198444, | |
| "grad_norm": 1.8164541721343994, | |
| "learning_rate": 9.150235836346639e-05, | |
| "loss": 0.4285, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 6.412451361867705, | |
| "grad_norm": 1.5146026611328125, | |
| "learning_rate": 9.140841524348125e-05, | |
| "loss": 0.4354, | |
| "step": 4944 | |
| }, | |
| { | |
| "epoch": 6.443579766536965, | |
| "grad_norm": 1.238393783569336, | |
| "learning_rate": 9.131400447497294e-05, | |
| "loss": 0.4257, | |
| "step": 4968 | |
| }, | |
| { | |
| "epoch": 6.474708171206226, | |
| "grad_norm": 1.4109466075897217, | |
| "learning_rate": 9.121912712417536e-05, | |
| "loss": 0.43, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 6.505836575875486, | |
| "grad_norm": 1.8265984058380127, | |
| "learning_rate": 9.11237842625918e-05, | |
| "loss": 0.4373, | |
| "step": 5016 | |
| }, | |
| { | |
| "epoch": 6.536964980544747, | |
| "grad_norm": 1.5519527196884155, | |
| "learning_rate": 9.102797696698284e-05, | |
| "loss": 0.4347, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 6.5680933852140075, | |
| "grad_norm": 1.314172387123108, | |
| "learning_rate": 9.093170631935412e-05, | |
| "loss": 0.4348, | |
| "step": 5064 | |
| }, | |
| { | |
| "epoch": 6.599221789883268, | |
| "grad_norm": 1.7968671321868896, | |
| "learning_rate": 9.083497340694425e-05, | |
| "loss": 0.4379, | |
| "step": 5088 | |
| }, | |
| { | |
| "epoch": 6.630350194552529, | |
| "grad_norm": 1.166242003440857, | |
| "learning_rate": 9.073777932221239e-05, | |
| "loss": 0.4313, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 6.66147859922179, | |
| "grad_norm": 1.9698489904403687, | |
| "learning_rate": 9.064012516282601e-05, | |
| "loss": 0.441, | |
| "step": 5136 | |
| }, | |
| { | |
| "epoch": 6.692607003891051, | |
| "grad_norm": 1.2938389778137207, | |
| "learning_rate": 9.054201203164845e-05, | |
| "loss": 0.4301, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 6.723735408560311, | |
| "grad_norm": 5.220723628997803, | |
| "learning_rate": 9.044344103672651e-05, | |
| "loss": 0.4232, | |
| "step": 5184 | |
| }, | |
| { | |
| "epoch": 6.754863813229572, | |
| "grad_norm": 1.7442070245742798, | |
| "learning_rate": 9.034441329127783e-05, | |
| "loss": 0.4343, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 6.785992217898833, | |
| "grad_norm": 4.927098274230957, | |
| "learning_rate": 9.024492991367848e-05, | |
| "loss": 0.4279, | |
| "step": 5232 | |
| }, | |
| { | |
| "epoch": 6.817120622568093, | |
| "grad_norm": 1.1979647874832153, | |
| "learning_rate": 9.014499202745019e-05, | |
| "loss": 0.4312, | |
| "step": 5256 | |
| }, | |
| { | |
| "epoch": 6.848249027237354, | |
| "grad_norm": 1.6905076503753662, | |
| "learning_rate": 9.004460076124768e-05, | |
| "loss": 0.432, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 6.879377431906615, | |
| "grad_norm": 1.388134241104126, | |
| "learning_rate": 8.994375724884604e-05, | |
| "loss": 0.4314, | |
| "step": 5304 | |
| }, | |
| { | |
| "epoch": 6.910505836575876, | |
| "grad_norm": 2.4431025981903076, | |
| "learning_rate": 8.984246262912774e-05, | |
| "loss": 0.4341, | |
| "step": 5328 | |
| }, | |
| { | |
| "epoch": 6.941634241245136, | |
| "grad_norm": 2.5521421432495117, | |
| "learning_rate": 8.974071804606989e-05, | |
| "loss": 0.4251, | |
| "step": 5352 | |
| }, | |
| { | |
| "epoch": 6.972762645914397, | |
| "grad_norm": 1.6180981397628784, | |
| "learning_rate": 8.96385246487313e-05, | |
| "loss": 0.4332, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 7.003891050583658, | |
| "grad_norm": 1.673168659210205, | |
| "learning_rate": 8.95358835912395e-05, | |
| "loss": 0.4258, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 7.035019455252918, | |
| "grad_norm": 2.032773733139038, | |
| "learning_rate": 8.943279603277767e-05, | |
| "loss": 0.4337, | |
| "step": 5424 | |
| }, | |
| { | |
| "epoch": 7.066147859922179, | |
| "grad_norm": 1.7290483713150024, | |
| "learning_rate": 8.932926313757157e-05, | |
| "loss": 0.4312, | |
| "step": 5448 | |
| }, | |
| { | |
| "epoch": 7.097276264591439, | |
| "grad_norm": 4.685028076171875, | |
| "learning_rate": 8.922528607487645e-05, | |
| "loss": 0.4416, | |
| "step": 5472 | |
| }, | |
| { | |
| "epoch": 7.1284046692607, | |
| "grad_norm": 1.5580335855484009, | |
| "learning_rate": 8.912086601896372e-05, | |
| "loss": 0.4358, | |
| "step": 5496 | |
| }, | |
| { | |
| "epoch": 7.159533073929961, | |
| "grad_norm": 1.332607388496399, | |
| "learning_rate": 8.901600414910785e-05, | |
| "loss": 0.4288, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 7.190661478599222, | |
| "grad_norm": 1.2149999141693115, | |
| "learning_rate": 8.891070164957288e-05, | |
| "loss": 0.4238, | |
| "step": 5544 | |
| }, | |
| { | |
| "epoch": 7.221789883268483, | |
| "grad_norm": 1.4633874893188477, | |
| "learning_rate": 8.880495970959917e-05, | |
| "loss": 0.4278, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 7.252918287937743, | |
| "grad_norm": 1.4801607131958008, | |
| "learning_rate": 8.869877952338991e-05, | |
| "loss": 0.4227, | |
| "step": 5592 | |
| }, | |
| { | |
| "epoch": 7.284046692607004, | |
| "grad_norm": 1.8194708824157715, | |
| "learning_rate": 8.85921622900977e-05, | |
| "loss": 0.4192, | |
| "step": 5616 | |
| }, | |
| { | |
| "epoch": 7.315175097276264, | |
| "grad_norm": 1.111076831817627, | |
| "learning_rate": 8.848510921381089e-05, | |
| "loss": 0.4231, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 7.346303501945525, | |
| "grad_norm": 1.4320513010025024, | |
| "learning_rate": 8.83776215035401e-05, | |
| "loss": 0.4224, | |
| "step": 5664 | |
| }, | |
| { | |
| "epoch": 7.377431906614786, | |
| "grad_norm": 1.80966317653656, | |
| "learning_rate": 8.826970037320448e-05, | |
| "loss": 0.4183, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 7.408560311284047, | |
| "grad_norm": 1.843509554862976, | |
| "learning_rate": 8.816134704161807e-05, | |
| "loss": 0.417, | |
| "step": 5712 | |
| }, | |
| { | |
| "epoch": 7.439688715953308, | |
| "grad_norm": 1.2015341520309448, | |
| "learning_rate": 8.805256273247598e-05, | |
| "loss": 0.4177, | |
| "step": 5736 | |
| }, | |
| { | |
| "epoch": 7.470817120622568, | |
| "grad_norm": 1.6432462930679321, | |
| "learning_rate": 8.794334867434059e-05, | |
| "loss": 0.4236, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 7.501945525291829, | |
| "grad_norm": 1.354224443435669, | |
| "learning_rate": 8.783370610062769e-05, | |
| "loss": 0.4142, | |
| "step": 5784 | |
| }, | |
| { | |
| "epoch": 7.533073929961089, | |
| "grad_norm": 1.6838608980178833, | |
| "learning_rate": 8.772363624959255e-05, | |
| "loss": 0.4173, | |
| "step": 5808 | |
| }, | |
| { | |
| "epoch": 7.56420233463035, | |
| "grad_norm": 1.8743314743041992, | |
| "learning_rate": 8.761314036431588e-05, | |
| "loss": 0.4248, | |
| "step": 5832 | |
| }, | |
| { | |
| "epoch": 7.595330739299611, | |
| "grad_norm": 1.4311802387237549, | |
| "learning_rate": 8.750221969268985e-05, | |
| "loss": 0.4204, | |
| "step": 5856 | |
| }, | |
| { | |
| "epoch": 7.626459143968871, | |
| "grad_norm": 1.4219359159469604, | |
| "learning_rate": 8.739087548740404e-05, | |
| "loss": 0.4201, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 7.657587548638133, | |
| "grad_norm": 2.070533275604248, | |
| "learning_rate": 8.727910900593114e-05, | |
| "loss": 0.4229, | |
| "step": 5904 | |
| }, | |
| { | |
| "epoch": 7.688715953307393, | |
| "grad_norm": 1.4531338214874268, | |
| "learning_rate": 8.716692151051293e-05, | |
| "loss": 0.42, | |
| "step": 5928 | |
| }, | |
| { | |
| "epoch": 7.719844357976654, | |
| "grad_norm": 2.2621729373931885, | |
| "learning_rate": 8.705431426814585e-05, | |
| "loss": 0.4171, | |
| "step": 5952 | |
| }, | |
| { | |
| "epoch": 7.750972762645914, | |
| "grad_norm": 1.242394208908081, | |
| "learning_rate": 8.694128855056683e-05, | |
| "loss": 0.4133, | |
| "step": 5976 | |
| }, | |
| { | |
| "epoch": 7.782101167315175, | |
| "grad_norm": 1.2939616441726685, | |
| "learning_rate": 8.68278456342389e-05, | |
| "loss": 0.4185, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 7.813229571984436, | |
| "grad_norm": 2.0788450241088867, | |
| "learning_rate": 8.671398680033668e-05, | |
| "loss": 0.4183, | |
| "step": 6024 | |
| }, | |
| { | |
| "epoch": 7.844357976653696, | |
| "grad_norm": 2.538680076599121, | |
| "learning_rate": 8.659971333473206e-05, | |
| "loss": 0.4246, | |
| "step": 6048 | |
| }, | |
| { | |
| "epoch": 7.875486381322958, | |
| "grad_norm": 2.1128950119018555, | |
| "learning_rate": 8.648502652797954e-05, | |
| "loss": 0.4156, | |
| "step": 6072 | |
| }, | |
| { | |
| "epoch": 7.906614785992218, | |
| "grad_norm": 2.2612478733062744, | |
| "learning_rate": 8.636992767530171e-05, | |
| "loss": 0.409, | |
| "step": 6096 | |
| }, | |
| { | |
| "epoch": 7.937743190661479, | |
| "grad_norm": 2.0751936435699463, | |
| "learning_rate": 8.625441807657471e-05, | |
| "loss": 0.4264, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 7.968871595330739, | |
| "grad_norm": 2.009459972381592, | |
| "learning_rate": 8.613849903631334e-05, | |
| "loss": 0.4255, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.8576405048370361, | |
| "learning_rate": 8.602217186365655e-05, | |
| "loss": 0.4211, | |
| "step": 6168 | |
| }, | |
| { | |
| "epoch": 8.03112840466926, | |
| "grad_norm": 2.817073345184326, | |
| "learning_rate": 8.590543787235252e-05, | |
| "loss": 0.4156, | |
| "step": 6192 | |
| }, | |
| { | |
| "epoch": 8.062256809338521, | |
| "grad_norm": 1.5011825561523438, | |
| "learning_rate": 8.578829838074389e-05, | |
| "loss": 0.41, | |
| "step": 6216 | |
| }, | |
| { | |
| "epoch": 8.093385214007782, | |
| "grad_norm": 1.2293556928634644, | |
| "learning_rate": 8.567075471175281e-05, | |
| "loss": 0.417, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 8.124513618677042, | |
| "grad_norm": 1.415345549583435, | |
| "learning_rate": 8.555280819286603e-05, | |
| "loss": 0.4148, | |
| "step": 6264 | |
| }, | |
| { | |
| "epoch": 8.155642023346303, | |
| "grad_norm": 2.2379307746887207, | |
| "learning_rate": 8.543446015611995e-05, | |
| "loss": 0.4104, | |
| "step": 6288 | |
| }, | |
| { | |
| "epoch": 8.186770428015564, | |
| "grad_norm": 1.0670602321624756, | |
| "learning_rate": 8.531571193808549e-05, | |
| "loss": 0.4131, | |
| "step": 6312 | |
| }, | |
| { | |
| "epoch": 8.217898832684824, | |
| "grad_norm": 1.0915449857711792, | |
| "learning_rate": 8.519656487985309e-05, | |
| "loss": 0.4073, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 8.249027237354085, | |
| "grad_norm": 1.4844944477081299, | |
| "learning_rate": 8.507702032701748e-05, | |
| "loss": 0.4109, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 8.280155642023347, | |
| "grad_norm": 1.1173604726791382, | |
| "learning_rate": 8.495707962966253e-05, | |
| "loss": 0.4145, | |
| "step": 6384 | |
| }, | |
| { | |
| "epoch": 8.311284046692608, | |
| "grad_norm": 1.5978012084960938, | |
| "learning_rate": 8.4836744142346e-05, | |
| "loss": 0.4108, | |
| "step": 6408 | |
| }, | |
| { | |
| "epoch": 8.342412451361868, | |
| "grad_norm": 1.7912710905075073, | |
| "learning_rate": 8.471601522408422e-05, | |
| "loss": 0.4155, | |
| "step": 6432 | |
| }, | |
| { | |
| "epoch": 8.373540856031129, | |
| "grad_norm": 2.182061195373535, | |
| "learning_rate": 8.459489423833678e-05, | |
| "loss": 0.4117, | |
| "step": 6456 | |
| }, | |
| { | |
| "epoch": 8.40466926070039, | |
| "grad_norm": 1.8379067182540894, | |
| "learning_rate": 8.447338255299106e-05, | |
| "loss": 0.4104, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 8.43579766536965, | |
| "grad_norm": 1.4474197626113892, | |
| "learning_rate": 8.435148154034694e-05, | |
| "loss": 0.4142, | |
| "step": 6504 | |
| }, | |
| { | |
| "epoch": 8.46692607003891, | |
| "grad_norm": 2.309518575668335, | |
| "learning_rate": 8.422919257710104e-05, | |
| "loss": 0.4079, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 8.498054474708171, | |
| "grad_norm": 1.2606794834136963, | |
| "learning_rate": 8.410651704433146e-05, | |
| "loss": 0.4125, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 8.529182879377432, | |
| "grad_norm": 1.683693766593933, | |
| "learning_rate": 8.398345632748194e-05, | |
| "loss": 0.4132, | |
| "step": 6576 | |
| }, | |
| { | |
| "epoch": 8.560311284046692, | |
| "grad_norm": 2.342796802520752, | |
| "learning_rate": 8.386001181634642e-05, | |
| "loss": 0.4125, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 8.591439688715953, | |
| "grad_norm": 0.9687896370887756, | |
| "learning_rate": 8.373618490505315e-05, | |
| "loss": 0.4082, | |
| "step": 6624 | |
| }, | |
| { | |
| "epoch": 8.622568093385214, | |
| "grad_norm": 1.2769346237182617, | |
| "learning_rate": 8.361197699204911e-05, | |
| "loss": 0.413, | |
| "step": 6648 | |
| }, | |
| { | |
| "epoch": 8.653696498054474, | |
| "grad_norm": 1.4064596891403198, | |
| "learning_rate": 8.348738948008413e-05, | |
| "loss": 0.4172, | |
| "step": 6672 | |
| }, | |
| { | |
| "epoch": 8.684824902723735, | |
| "grad_norm": 1.0059700012207031, | |
| "learning_rate": 8.336242377619501e-05, | |
| "loss": 0.4132, | |
| "step": 6696 | |
| }, | |
| { | |
| "epoch": 8.715953307392995, | |
| "grad_norm": 1.5852705240249634, | |
| "learning_rate": 8.323708129168979e-05, | |
| "loss": 0.4129, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 8.747081712062258, | |
| "grad_norm": 1.879469394683838, | |
| "learning_rate": 8.31113634421316e-05, | |
| "loss": 0.4104, | |
| "step": 6744 | |
| }, | |
| { | |
| "epoch": 8.778210116731518, | |
| "grad_norm": 1.1461695432662964, | |
| "learning_rate": 8.298527164732283e-05, | |
| "loss": 0.4068, | |
| "step": 6768 | |
| }, | |
| { | |
| "epoch": 8.809338521400779, | |
| "grad_norm": 1.1254854202270508, | |
| "learning_rate": 8.285880733128907e-05, | |
| "loss": 0.4118, | |
| "step": 6792 | |
| }, | |
| { | |
| "epoch": 8.84046692607004, | |
| "grad_norm": 1.7840899229049683, | |
| "learning_rate": 8.273197192226294e-05, | |
| "loss": 0.4113, | |
| "step": 6816 | |
| }, | |
| { | |
| "epoch": 8.8715953307393, | |
| "grad_norm": 1.618880271911621, | |
| "learning_rate": 8.260476685266807e-05, | |
| "loss": 0.4065, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 8.90272373540856, | |
| "grad_norm": 1.2630411386489868, | |
| "learning_rate": 8.247719355910284e-05, | |
| "loss": 0.4029, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 8.933852140077821, | |
| "grad_norm": 1.138664960861206, | |
| "learning_rate": 8.234925348232421e-05, | |
| "loss": 0.4012, | |
| "step": 6888 | |
| }, | |
| { | |
| "epoch": 8.964980544747082, | |
| "grad_norm": 1.4435471296310425, | |
| "learning_rate": 8.222094806723143e-05, | |
| "loss": 0.4068, | |
| "step": 6912 | |
| }, | |
| { | |
| "epoch": 8.996108949416342, | |
| "grad_norm": 1.9499974250793457, | |
| "learning_rate": 8.209227876284972e-05, | |
| "loss": 0.4092, | |
| "step": 6936 | |
| }, | |
| { | |
| "epoch": 9.027237354085603, | |
| "grad_norm": 2.3621513843536377, | |
| "learning_rate": 8.196324702231389e-05, | |
| "loss": 0.4048, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 9.058365758754864, | |
| "grad_norm": 1.2890691757202148, | |
| "learning_rate": 8.183385430285197e-05, | |
| "loss": 0.3996, | |
| "step": 6984 | |
| }, | |
| { | |
| "epoch": 9.089494163424124, | |
| "grad_norm": 1.3257933855056763, | |
| "learning_rate": 8.170410206576872e-05, | |
| "loss": 0.3985, | |
| "step": 7008 | |
| }, | |
| { | |
| "epoch": 9.120622568093385, | |
| "grad_norm": 1.485418677330017, | |
| "learning_rate": 8.157399177642914e-05, | |
| "loss": 0.3994, | |
| "step": 7032 | |
| }, | |
| { | |
| "epoch": 9.151750972762645, | |
| "grad_norm": 1.115235686302185, | |
| "learning_rate": 8.144352490424187e-05, | |
| "loss": 0.3997, | |
| "step": 7056 | |
| }, | |
| { | |
| "epoch": 9.182879377431906, | |
| "grad_norm": 1.565184473991394, | |
| "learning_rate": 8.131270292264272e-05, | |
| "loss": 0.4059, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 9.214007782101167, | |
| "grad_norm": 1.3453902006149292, | |
| "learning_rate": 8.118152730907788e-05, | |
| "loss": 0.406, | |
| "step": 7104 | |
| }, | |
| { | |
| "epoch": 9.245136186770427, | |
| "grad_norm": 1.4093341827392578, | |
| "learning_rate": 8.104999954498734e-05, | |
| "loss": 0.4029, | |
| "step": 7128 | |
| }, | |
| { | |
| "epoch": 9.27626459143969, | |
| "grad_norm": 1.1250804662704468, | |
| "learning_rate": 8.091812111578812e-05, | |
| "loss": 0.4097, | |
| "step": 7152 | |
| }, | |
| { | |
| "epoch": 9.30739299610895, | |
| "grad_norm": 1.6016291379928589, | |
| "learning_rate": 8.07858935108575e-05, | |
| "loss": 0.4078, | |
| "step": 7176 | |
| }, | |
| { | |
| "epoch": 9.33852140077821, | |
| "grad_norm": 1.8599820137023926, | |
| "learning_rate": 8.065331822351618e-05, | |
| "loss": 0.4029, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 9.369649805447471, | |
| "grad_norm": 1.2994579076766968, | |
| "learning_rate": 8.052039675101143e-05, | |
| "loss": 0.4079, | |
| "step": 7224 | |
| }, | |
| { | |
| "epoch": 9.400778210116732, | |
| "grad_norm": 1.200239896774292, | |
| "learning_rate": 8.038713059450026e-05, | |
| "loss": 0.4017, | |
| "step": 7248 | |
| }, | |
| { | |
| "epoch": 9.431906614785992, | |
| "grad_norm": 3.8246068954467773, | |
| "learning_rate": 8.025352125903227e-05, | |
| "loss": 0.4006, | |
| "step": 7272 | |
| }, | |
| { | |
| "epoch": 9.463035019455253, | |
| "grad_norm": 1.4172035455703735, | |
| "learning_rate": 8.011957025353287e-05, | |
| "loss": 0.4028, | |
| "step": 7296 | |
| }, | |
| { | |
| "epoch": 9.494163424124514, | |
| "grad_norm": 2.0654618740081787, | |
| "learning_rate": 7.998527909078607e-05, | |
| "loss": 0.4014, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 9.525291828793774, | |
| "grad_norm": 1.3547816276550293, | |
| "learning_rate": 7.985064928741754e-05, | |
| "loss": 0.3981, | |
| "step": 7344 | |
| }, | |
| { | |
| "epoch": 9.556420233463035, | |
| "grad_norm": 1.3812025785446167, | |
| "learning_rate": 7.971568236387734e-05, | |
| "loss": 0.406, | |
| "step": 7368 | |
| }, | |
| { | |
| "epoch": 9.587548638132295, | |
| "grad_norm": 1.438240885734558, | |
| "learning_rate": 7.958037984442285e-05, | |
| "loss": 0.4011, | |
| "step": 7392 | |
| }, | |
| { | |
| "epoch": 9.618677042801556, | |
| "grad_norm": 1.7840272188186646, | |
| "learning_rate": 7.944474325710154e-05, | |
| "loss": 0.401, | |
| "step": 7416 | |
| }, | |
| { | |
| "epoch": 9.649805447470817, | |
| "grad_norm": 1.251658320426941, | |
| "learning_rate": 7.930877413373367e-05, | |
| "loss": 0.3969, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 9.680933852140077, | |
| "grad_norm": 2.252761125564575, | |
| "learning_rate": 7.917247400989505e-05, | |
| "loss": 0.4049, | |
| "step": 7464 | |
| }, | |
| { | |
| "epoch": 9.712062256809338, | |
| "grad_norm": 1.476012110710144, | |
| "learning_rate": 7.903584442489958e-05, | |
| "loss": 0.401, | |
| "step": 7488 | |
| }, | |
| { | |
| "epoch": 9.7431906614786, | |
| "grad_norm": 2.692723035812378, | |
| "learning_rate": 7.889888692178207e-05, | |
| "loss": 0.4017, | |
| "step": 7512 | |
| }, | |
| { | |
| "epoch": 9.77431906614786, | |
| "grad_norm": 3.0412638187408447, | |
| "learning_rate": 7.87616030472806e-05, | |
| "loss": 0.4093, | |
| "step": 7536 | |
| }, | |
| { | |
| "epoch": 9.805447470817121, | |
| "grad_norm": 1.527076244354248, | |
| "learning_rate": 7.862399435181917e-05, | |
| "loss": 0.3988, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 9.836575875486382, | |
| "grad_norm": 1.2038588523864746, | |
| "learning_rate": 7.848606238949021e-05, | |
| "loss": 0.4058, | |
| "step": 7584 | |
| }, | |
| { | |
| "epoch": 9.867704280155642, | |
| "grad_norm": 1.9050565958023071, | |
| "learning_rate": 7.834780871803693e-05, | |
| "loss": 0.3943, | |
| "step": 7608 | |
| }, | |
| { | |
| "epoch": 9.898832684824903, | |
| "grad_norm": 1.483185887336731, | |
| "learning_rate": 7.82092348988358e-05, | |
| "loss": 0.3992, | |
| "step": 7632 | |
| }, | |
| { | |
| "epoch": 9.929961089494164, | |
| "grad_norm": 1.5043606758117676, | |
| "learning_rate": 7.80703424968789e-05, | |
| "loss": 0.3989, | |
| "step": 7656 | |
| }, | |
| { | |
| "epoch": 9.961089494163424, | |
| "grad_norm": 1.194094181060791, | |
| "learning_rate": 7.793113308075626e-05, | |
| "loss": 0.4007, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 9.992217898832685, | |
| "grad_norm": 1.5360095500946045, | |
| "learning_rate": 7.77916082226381e-05, | |
| "loss": 0.395, | |
| "step": 7704 | |
| }, | |
| { | |
| "epoch": 10.023346303501945, | |
| "grad_norm": 1.1073459386825562, | |
| "learning_rate": 7.76517694982571e-05, | |
| "loss": 0.3989, | |
| "step": 7728 | |
| }, | |
| { | |
| "epoch": 10.054474708171206, | |
| "grad_norm": 1.4059771299362183, | |
| "learning_rate": 7.751161848689063e-05, | |
| "loss": 0.3964, | |
| "step": 7752 | |
| }, | |
| { | |
| "epoch": 10.085603112840467, | |
| "grad_norm": 1.8619714975357056, | |
| "learning_rate": 7.737115677134294e-05, | |
| "loss": 0.3964, | |
| "step": 7776 | |
| }, | |
| { | |
| "epoch": 10.116731517509727, | |
| "grad_norm": 0.8621863722801208, | |
| "learning_rate": 7.723038593792712e-05, | |
| "loss": 0.4019, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 10.147859922178988, | |
| "grad_norm": 1.542912483215332, | |
| "learning_rate": 7.708930757644739e-05, | |
| "loss": 0.3957, | |
| "step": 7824 | |
| }, | |
| { | |
| "epoch": 10.178988326848248, | |
| "grad_norm": 1.8078597784042358, | |
| "learning_rate": 7.694792328018106e-05, | |
| "loss": 0.3991, | |
| "step": 7848 | |
| }, | |
| { | |
| "epoch": 10.210116731517509, | |
| "grad_norm": 1.4210093021392822, | |
| "learning_rate": 7.680623464586048e-05, | |
| "loss": 0.3925, | |
| "step": 7872 | |
| }, | |
| { | |
| "epoch": 10.24124513618677, | |
| "grad_norm": 1.6985816955566406, | |
| "learning_rate": 7.66642432736551e-05, | |
| "loss": 0.3984, | |
| "step": 7896 | |
| }, | |
| { | |
| "epoch": 10.272373540856032, | |
| "grad_norm": 1.4291504621505737, | |
| "learning_rate": 7.652195076715332e-05, | |
| "loss": 0.4016, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 10.303501945525293, | |
| "grad_norm": 1.3934870958328247, | |
| "learning_rate": 7.637935873334448e-05, | |
| "loss": 0.3992, | |
| "step": 7944 | |
| }, | |
| { | |
| "epoch": 10.334630350194553, | |
| "grad_norm": 1.5841765403747559, | |
| "learning_rate": 7.623646878260062e-05, | |
| "loss": 0.3989, | |
| "step": 7968 | |
| }, | |
| { | |
| "epoch": 10.365758754863814, | |
| "grad_norm": 1.1344020366668701, | |
| "learning_rate": 7.60932825286583e-05, | |
| "loss": 0.3934, | |
| "step": 7992 | |
| }, | |
| { | |
| "epoch": 10.396887159533074, | |
| "grad_norm": 1.1252238750457764, | |
| "learning_rate": 7.594980158860043e-05, | |
| "loss": 0.3947, | |
| "step": 8016 | |
| }, | |
| { | |
| "epoch": 10.428015564202335, | |
| "grad_norm": 1.5455870628356934, | |
| "learning_rate": 7.580602758283796e-05, | |
| "loss": 0.3897, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 10.459143968871595, | |
| "grad_norm": 2.1351683139801025, | |
| "learning_rate": 7.566196213509163e-05, | |
| "loss": 0.3911, | |
| "step": 8064 | |
| }, | |
| { | |
| "epoch": 10.490272373540856, | |
| "grad_norm": 1.9759098291397095, | |
| "learning_rate": 7.551760687237351e-05, | |
| "loss": 0.3973, | |
| "step": 8088 | |
| }, | |
| { | |
| "epoch": 10.521400778210117, | |
| "grad_norm": 1.0132018327713013, | |
| "learning_rate": 7.537296342496884e-05, | |
| "loss": 0.3957, | |
| "step": 8112 | |
| }, | |
| { | |
| "epoch": 10.552529182879377, | |
| "grad_norm": 2.219759464263916, | |
| "learning_rate": 7.522803342641737e-05, | |
| "loss": 0.3887, | |
| "step": 8136 | |
| }, | |
| { | |
| "epoch": 10.583657587548638, | |
| "grad_norm": 2.361774206161499, | |
| "learning_rate": 7.508281851349512e-05, | |
| "loss": 0.3975, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 10.614785992217898, | |
| "grad_norm": 1.4584128856658936, | |
| "learning_rate": 7.493732032619578e-05, | |
| "loss": 0.4, | |
| "step": 8184 | |
| }, | |
| { | |
| "epoch": 10.645914396887159, | |
| "grad_norm": 1.375190019607544, | |
| "learning_rate": 7.47915405077122e-05, | |
| "loss": 0.4021, | |
| "step": 8208 | |
| }, | |
| { | |
| "epoch": 10.67704280155642, | |
| "grad_norm": 1.5501540899276733, | |
| "learning_rate": 7.464548070441785e-05, | |
| "loss": 0.3943, | |
| "step": 8232 | |
| }, | |
| { | |
| "epoch": 10.70817120622568, | |
| "grad_norm": 1.5805977582931519, | |
| "learning_rate": 7.449914256584828e-05, | |
| "loss": 0.3915, | |
| "step": 8256 | |
| }, | |
| { | |
| "epoch": 10.739299610894943, | |
| "grad_norm": 1.0127402544021606, | |
| "learning_rate": 7.435252774468237e-05, | |
| "loss": 0.3899, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 10.770428015564203, | |
| "grad_norm": 1.5114730596542358, | |
| "learning_rate": 7.420563789672375e-05, | |
| "loss": 0.3922, | |
| "step": 8304 | |
| }, | |
| { | |
| "epoch": 10.801556420233464, | |
| "grad_norm": 1.1805211305618286, | |
| "learning_rate": 7.405847468088209e-05, | |
| "loss": 0.3951, | |
| "step": 8328 | |
| }, | |
| { | |
| "epoch": 10.832684824902724, | |
| "grad_norm": 1.1337734460830688, | |
| "learning_rate": 7.391103975915436e-05, | |
| "loss": 0.3954, | |
| "step": 8352 | |
| }, | |
| { | |
| "epoch": 10.863813229571985, | |
| "grad_norm": 1.024134874343872, | |
| "learning_rate": 7.376333479660607e-05, | |
| "loss": 0.3829, | |
| "step": 8376 | |
| }, | |
| { | |
| "epoch": 10.894941634241246, | |
| "grad_norm": 1.2885181903839111, | |
| "learning_rate": 7.361536146135243e-05, | |
| "loss": 0.3904, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 10.926070038910506, | |
| "grad_norm": 1.2240935564041138, | |
| "learning_rate": 7.346712142453954e-05, | |
| "loss": 0.3904, | |
| "step": 8424 | |
| }, | |
| { | |
| "epoch": 10.957198443579767, | |
| "grad_norm": 1.2982319593429565, | |
| "learning_rate": 7.33186163603255e-05, | |
| "loss": 0.3944, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 10.988326848249027, | |
| "grad_norm": 1.0359567403793335, | |
| "learning_rate": 7.316984794586155e-05, | |
| "loss": 0.3989, | |
| "step": 8472 | |
| }, | |
| { | |
| "epoch": 11.019455252918288, | |
| "grad_norm": 2.0623931884765625, | |
| "learning_rate": 7.302081786127304e-05, | |
| "loss": 0.3853, | |
| "step": 8496 | |
| }, | |
| { | |
| "epoch": 11.050583657587548, | |
| "grad_norm": 1.2377070188522339, | |
| "learning_rate": 7.287152778964055e-05, | |
| "loss": 0.3913, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 11.081712062256809, | |
| "grad_norm": 1.016614556312561, | |
| "learning_rate": 7.272197941698084e-05, | |
| "loss": 0.3882, | |
| "step": 8544 | |
| }, | |
| { | |
| "epoch": 11.11284046692607, | |
| "grad_norm": 1.5649337768554688, | |
| "learning_rate": 7.257217443222777e-05, | |
| "loss": 0.378, | |
| "step": 8568 | |
| }, | |
| { | |
| "epoch": 11.14396887159533, | |
| "grad_norm": 1.4619653224945068, | |
| "learning_rate": 7.242211452721331e-05, | |
| "loss": 0.3874, | |
| "step": 8592 | |
| }, | |
| { | |
| "epoch": 11.17509727626459, | |
| "grad_norm": 1.6870439052581787, | |
| "learning_rate": 7.227180139664836e-05, | |
| "loss": 0.3867, | |
| "step": 8616 | |
| }, | |
| { | |
| "epoch": 11.206225680933851, | |
| "grad_norm": 1.0460180044174194, | |
| "learning_rate": 7.212123673810363e-05, | |
| "loss": 0.394, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 11.237354085603112, | |
| "grad_norm": 1.0444591045379639, | |
| "learning_rate": 7.19704222519905e-05, | |
| "loss": 0.3877, | |
| "step": 8664 | |
| }, | |
| { | |
| "epoch": 11.268482490272374, | |
| "grad_norm": 1.3924522399902344, | |
| "learning_rate": 7.181935964154182e-05, | |
| "loss": 0.3836, | |
| "step": 8688 | |
| }, | |
| { | |
| "epoch": 11.299610894941635, | |
| "grad_norm": 2.0957131385803223, | |
| "learning_rate": 7.166805061279257e-05, | |
| "loss": 0.3879, | |
| "step": 8712 | |
| }, | |
| { | |
| "epoch": 11.330739299610896, | |
| "grad_norm": 1.5147196054458618, | |
| "learning_rate": 7.151649687456074e-05, | |
| "loss": 0.3888, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 11.361867704280156, | |
| "grad_norm": 1.5958192348480225, | |
| "learning_rate": 7.136470013842791e-05, | |
| "loss": 0.3883, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 11.392996108949417, | |
| "grad_norm": 1.494354248046875, | |
| "learning_rate": 7.121266211872004e-05, | |
| "loss": 0.3847, | |
| "step": 8784 | |
| }, | |
| { | |
| "epoch": 11.424124513618677, | |
| "grad_norm": 1.3116648197174072, | |
| "learning_rate": 7.106038453248794e-05, | |
| "loss": 0.3913, | |
| "step": 8808 | |
| }, | |
| { | |
| "epoch": 11.455252918287938, | |
| "grad_norm": 2.947636842727661, | |
| "learning_rate": 7.090786909948809e-05, | |
| "loss": 0.3837, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 11.486381322957198, | |
| "grad_norm": 1.8480781316757202, | |
| "learning_rate": 7.075511754216304e-05, | |
| "loss": 0.3816, | |
| "step": 8856 | |
| }, | |
| { | |
| "epoch": 11.517509727626459, | |
| "grad_norm": 1.5083237886428833, | |
| "learning_rate": 7.060213158562205e-05, | |
| "loss": 0.3856, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 11.54863813229572, | |
| "grad_norm": 1.2127504348754883, | |
| "learning_rate": 7.044891295762154e-05, | |
| "loss": 0.3861, | |
| "step": 8904 | |
| }, | |
| { | |
| "epoch": 11.57976653696498, | |
| "grad_norm": 1.0090476274490356, | |
| "learning_rate": 7.029546338854569e-05, | |
| "loss": 0.3894, | |
| "step": 8928 | |
| }, | |
| { | |
| "epoch": 11.61089494163424, | |
| "grad_norm": 0.9990460872650146, | |
| "learning_rate": 7.014178461138676e-05, | |
| "loss": 0.388, | |
| "step": 8952 | |
| }, | |
| { | |
| "epoch": 11.642023346303501, | |
| "grad_norm": 1.7229726314544678, | |
| "learning_rate": 6.998787836172564e-05, | |
| "loss": 0.3883, | |
| "step": 8976 | |
| }, | |
| { | |
| "epoch": 11.673151750972762, | |
| "grad_norm": 1.0046260356903076, | |
| "learning_rate": 6.983374637771217e-05, | |
| "loss": 0.3853, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 11.704280155642023, | |
| "grad_norm": 1.4152393341064453, | |
| "learning_rate": 6.967939040004551e-05, | |
| "loss": 0.3829, | |
| "step": 9024 | |
| }, | |
| { | |
| "epoch": 11.735408560311285, | |
| "grad_norm": 1.2723467350006104, | |
| "learning_rate": 6.952481217195456e-05, | |
| "loss": 0.3879, | |
| "step": 9048 | |
| }, | |
| { | |
| "epoch": 11.766536964980546, | |
| "grad_norm": 1.7674216032028198, | |
| "learning_rate": 6.937001343917818e-05, | |
| "loss": 0.3909, | |
| "step": 9072 | |
| }, | |
| { | |
| "epoch": 11.797665369649806, | |
| "grad_norm": 1.4604827165603638, | |
| "learning_rate": 6.92149959499455e-05, | |
| "loss": 0.3878, | |
| "step": 9096 | |
| }, | |
| { | |
| "epoch": 11.828793774319067, | |
| "grad_norm": 1.5532753467559814, | |
| "learning_rate": 6.905976145495628e-05, | |
| "loss": 0.3884, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 11.859922178988327, | |
| "grad_norm": 1.1423866748809814, | |
| "learning_rate": 6.890431170736091e-05, | |
| "loss": 0.3861, | |
| "step": 9144 | |
| }, | |
| { | |
| "epoch": 11.891050583657588, | |
| "grad_norm": 1.350380778312683, | |
| "learning_rate": 6.874864846274087e-05, | |
| "loss": 0.3813, | |
| "step": 9168 | |
| }, | |
| { | |
| "epoch": 11.922178988326849, | |
| "grad_norm": 1.2758312225341797, | |
| "learning_rate": 6.85927734790887e-05, | |
| "loss": 0.3877, | |
| "step": 9192 | |
| }, | |
| { | |
| "epoch": 11.95330739299611, | |
| "grad_norm": 1.970986247062683, | |
| "learning_rate": 6.843668851678831e-05, | |
| "loss": 0.3828, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 11.98443579766537, | |
| "grad_norm": 1.340889811515808, | |
| "learning_rate": 6.828039533859489e-05, | |
| "loss": 0.3875, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 12.01556420233463, | |
| "grad_norm": 1.2335118055343628, | |
| "learning_rate": 6.812389570961525e-05, | |
| "loss": 0.3809, | |
| "step": 9264 | |
| }, | |
| { | |
| "epoch": 12.04669260700389, | |
| "grad_norm": 1.2043426036834717, | |
| "learning_rate": 6.796719139728777e-05, | |
| "loss": 0.3835, | |
| "step": 9288 | |
| }, | |
| { | |
| "epoch": 12.077821011673151, | |
| "grad_norm": 1.197809100151062, | |
| "learning_rate": 6.781028417136231e-05, | |
| "loss": 0.3792, | |
| "step": 9312 | |
| }, | |
| { | |
| "epoch": 12.108949416342412, | |
| "grad_norm": 1.2524584531784058, | |
| "learning_rate": 6.765317580388046e-05, | |
| "loss": 0.3842, | |
| "step": 9336 | |
| }, | |
| { | |
| "epoch": 12.140077821011673, | |
| "grad_norm": 1.082410454750061, | |
| "learning_rate": 6.749586806915535e-05, | |
| "loss": 0.3827, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 12.171206225680933, | |
| "grad_norm": 1.2853772640228271, | |
| "learning_rate": 6.733836274375176e-05, | |
| "loss": 0.3755, | |
| "step": 9384 | |
| }, | |
| { | |
| "epoch": 12.202334630350194, | |
| "grad_norm": 1.6849515438079834, | |
| "learning_rate": 6.718066160646585e-05, | |
| "loss": 0.38, | |
| "step": 9408 | |
| }, | |
| { | |
| "epoch": 12.233463035019454, | |
| "grad_norm": 2.0715172290802, | |
| "learning_rate": 6.702276643830531e-05, | |
| "loss": 0.3799, | |
| "step": 9432 | |
| }, | |
| { | |
| "epoch": 12.264591439688717, | |
| "grad_norm": 1.7511128187179565, | |
| "learning_rate": 6.686467902246909e-05, | |
| "loss": 0.3752, | |
| "step": 9456 | |
| }, | |
| { | |
| "epoch": 12.295719844357977, | |
| "grad_norm": 1.1407638788223267, | |
| "learning_rate": 6.670640114432724e-05, | |
| "loss": 0.3834, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 12.326848249027238, | |
| "grad_norm": 1.0695194005966187, | |
| "learning_rate": 6.654793459140089e-05, | |
| "loss": 0.3835, | |
| "step": 9504 | |
| }, | |
| { | |
| "epoch": 12.357976653696499, | |
| "grad_norm": 1.285834789276123, | |
| "learning_rate": 6.638928115334196e-05, | |
| "loss": 0.3904, | |
| "step": 9528 | |
| }, | |
| { | |
| "epoch": 12.38910505836576, | |
| "grad_norm": 1.508699893951416, | |
| "learning_rate": 6.623044262191293e-05, | |
| "loss": 0.3964, | |
| "step": 9552 | |
| }, | |
| { | |
| "epoch": 12.42023346303502, | |
| "grad_norm": 1.287642002105713, | |
| "learning_rate": 6.607142079096668e-05, | |
| "loss": 0.3819, | |
| "step": 9576 | |
| }, | |
| { | |
| "epoch": 12.45136186770428, | |
| "grad_norm": 2.893951892852783, | |
| "learning_rate": 6.591221745642621e-05, | |
| "loss": 0.3805, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 12.482490272373541, | |
| "grad_norm": 1.4402974843978882, | |
| "learning_rate": 6.575283441626433e-05, | |
| "loss": 0.376, | |
| "step": 9624 | |
| }, | |
| { | |
| "epoch": 12.513618677042802, | |
| "grad_norm": 1.156258225440979, | |
| "learning_rate": 6.559327347048331e-05, | |
| "loss": 0.3778, | |
| "step": 9648 | |
| }, | |
| { | |
| "epoch": 12.544747081712062, | |
| "grad_norm": 1.5183446407318115, | |
| "learning_rate": 6.543353642109469e-05, | |
| "loss": 0.382, | |
| "step": 9672 | |
| }, | |
| { | |
| "epoch": 12.575875486381323, | |
| "grad_norm": 1.611879825592041, | |
| "learning_rate": 6.527362507209879e-05, | |
| "loss": 0.3791, | |
| "step": 9696 | |
| }, | |
| { | |
| "epoch": 12.607003891050583, | |
| "grad_norm": 1.3625446557998657, | |
| "learning_rate": 6.511354122946443e-05, | |
| "loss": 0.379, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 12.638132295719844, | |
| "grad_norm": 1.2298206090927124, | |
| "learning_rate": 6.495328670110848e-05, | |
| "loss": 0.3773, | |
| "step": 9744 | |
| }, | |
| { | |
| "epoch": 12.669260700389104, | |
| "grad_norm": 1.0427093505859375, | |
| "learning_rate": 6.479286329687543e-05, | |
| "loss": 0.3752, | |
| "step": 9768 | |
| }, | |
| { | |
| "epoch": 12.700389105058365, | |
| "grad_norm": 1.6555167436599731, | |
| "learning_rate": 6.463227282851708e-05, | |
| "loss": 0.3771, | |
| "step": 9792 | |
| }, | |
| { | |
| "epoch": 12.731517509727626, | |
| "grad_norm": 1.3086024522781372, | |
| "learning_rate": 6.447151710967187e-05, | |
| "loss": 0.377, | |
| "step": 9816 | |
| }, | |
| { | |
| "epoch": 12.762645914396888, | |
| "grad_norm": 1.3003504276275635, | |
| "learning_rate": 6.431059795584453e-05, | |
| "loss": 0.3812, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 12.793774319066149, | |
| "grad_norm": 1.4847590923309326, | |
| "learning_rate": 6.414951718438561e-05, | |
| "loss": 0.3778, | |
| "step": 9864 | |
| }, | |
| { | |
| "epoch": 12.82490272373541, | |
| "grad_norm": 1.3426965475082397, | |
| "learning_rate": 6.398827661447084e-05, | |
| "loss": 0.3794, | |
| "step": 9888 | |
| }, | |
| { | |
| "epoch": 12.85603112840467, | |
| "grad_norm": 1.2530086040496826, | |
| "learning_rate": 6.382687806708067e-05, | |
| "loss": 0.3728, | |
| "step": 9912 | |
| }, | |
| { | |
| "epoch": 12.88715953307393, | |
| "grad_norm": 1.8029588460922241, | |
| "learning_rate": 6.366532336497968e-05, | |
| "loss": 0.3795, | |
| "step": 9936 | |
| }, | |
| { | |
| "epoch": 12.918287937743191, | |
| "grad_norm": 1.9585580825805664, | |
| "learning_rate": 6.350361433269599e-05, | |
| "loss": 0.3769, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 12.949416342412452, | |
| "grad_norm": 1.7418956756591797, | |
| "learning_rate": 6.334175279650062e-05, | |
| "loss": 0.3778, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 12.980544747081712, | |
| "grad_norm": 1.6264042854309082, | |
| "learning_rate": 6.317974058438697e-05, | |
| "loss": 0.3821, | |
| "step": 10008 | |
| }, | |
| { | |
| "epoch": 13.011673151750973, | |
| "grad_norm": 0.9489176869392395, | |
| "learning_rate": 6.301757952605007e-05, | |
| "loss": 0.374, | |
| "step": 10032 | |
| }, | |
| { | |
| "epoch": 13.042801556420233, | |
| "grad_norm": 2.183706045150757, | |
| "learning_rate": 6.285527145286594e-05, | |
| "loss": 0.3736, | |
| "step": 10056 | |
| }, | |
| { | |
| "epoch": 13.073929961089494, | |
| "grad_norm": 1.3998112678527832, | |
| "learning_rate": 6.269281819787095e-05, | |
| "loss": 0.3726, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 13.105058365758754, | |
| "grad_norm": 1.5030006170272827, | |
| "learning_rate": 6.253022159574108e-05, | |
| "loss": 0.3741, | |
| "step": 10104 | |
| }, | |
| { | |
| "epoch": 13.136186770428015, | |
| "grad_norm": 2.579502820968628, | |
| "learning_rate": 6.23674834827712e-05, | |
| "loss": 0.373, | |
| "step": 10128 | |
| }, | |
| { | |
| "epoch": 13.167315175097276, | |
| "grad_norm": 1.5349212884902954, | |
| "learning_rate": 6.220460569685437e-05, | |
| "loss": 0.3739, | |
| "step": 10152 | |
| }, | |
| { | |
| "epoch": 13.198443579766536, | |
| "grad_norm": 1.6323474645614624, | |
| "learning_rate": 6.204159007746103e-05, | |
| "loss": 0.3729, | |
| "step": 10176 | |
| }, | |
| { | |
| "epoch": 13.229571984435797, | |
| "grad_norm": 1.1729427576065063, | |
| "learning_rate": 6.187843846561824e-05, | |
| "loss": 0.3759, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 13.26070038910506, | |
| "grad_norm": 2.276395320892334, | |
| "learning_rate": 6.171515270388892e-05, | |
| "loss": 0.3657, | |
| "step": 10224 | |
| }, | |
| { | |
| "epoch": 13.29182879377432, | |
| "grad_norm": 0.9925207495689392, | |
| "learning_rate": 6.155173463635103e-05, | |
| "loss": 0.3724, | |
| "step": 10248 | |
| }, | |
| { | |
| "epoch": 13.32295719844358, | |
| "grad_norm": 0.9079545140266418, | |
| "learning_rate": 6.13881861085767e-05, | |
| "loss": 0.3675, | |
| "step": 10272 | |
| }, | |
| { | |
| "epoch": 13.354085603112841, | |
| "grad_norm": 2.5486135482788086, | |
| "learning_rate": 6.122450896761147e-05, | |
| "loss": 0.3684, | |
| "step": 10296 | |
| }, | |
| { | |
| "epoch": 13.385214007782102, | |
| "grad_norm": 1.5650309324264526, | |
| "learning_rate": 6.106070506195332e-05, | |
| "loss": 0.3765, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 13.416342412451362, | |
| "grad_norm": 0.9130122065544128, | |
| "learning_rate": 6.0896776241531916e-05, | |
| "loss": 0.3788, | |
| "step": 10344 | |
| }, | |
| { | |
| "epoch": 13.447470817120623, | |
| "grad_norm": 1.1227184534072876, | |
| "learning_rate": 6.073272435768761e-05, | |
| "loss": 0.3717, | |
| "step": 10368 | |
| }, | |
| { | |
| "epoch": 13.478599221789883, | |
| "grad_norm": 2.312488079071045, | |
| "learning_rate": 6.0568551263150606e-05, | |
| "loss": 0.3775, | |
| "step": 10392 | |
| }, | |
| { | |
| "epoch": 13.509727626459144, | |
| "grad_norm": 1.1797654628753662, | |
| "learning_rate": 6.040425881201998e-05, | |
| "loss": 0.3721, | |
| "step": 10416 | |
| }, | |
| { | |
| "epoch": 13.540856031128405, | |
| "grad_norm": 3.0446395874023438, | |
| "learning_rate": 6.0239848859742795e-05, | |
| "loss": 0.3698, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 13.571984435797665, | |
| "grad_norm": 1.0386089086532593, | |
| "learning_rate": 6.007532326309313e-05, | |
| "loss": 0.3724, | |
| "step": 10464 | |
| }, | |
| { | |
| "epoch": 13.603112840466926, | |
| "grad_norm": 1.4335585832595825, | |
| "learning_rate": 5.9910683880151064e-05, | |
| "loss": 0.3749, | |
| "step": 10488 | |
| }, | |
| { | |
| "epoch": 13.634241245136186, | |
| "grad_norm": 1.4243568181991577, | |
| "learning_rate": 5.974593257028176e-05, | |
| "loss": 0.3714, | |
| "step": 10512 | |
| }, | |
| { | |
| "epoch": 13.665369649805447, | |
| "grad_norm": 1.3887135982513428, | |
| "learning_rate": 5.958107119411441e-05, | |
| "loss": 0.3763, | |
| "step": 10536 | |
| }, | |
| { | |
| "epoch": 13.696498054474707, | |
| "grad_norm": 1.4939093589782715, | |
| "learning_rate": 5.941610161352128e-05, | |
| "loss": 0.3689, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 13.727626459143968, | |
| "grad_norm": 1.3950523138046265, | |
| "learning_rate": 5.925102569159661e-05, | |
| "loss": 0.3721, | |
| "step": 10584 | |
| }, | |
| { | |
| "epoch": 13.75875486381323, | |
| "grad_norm": 1.5457286834716797, | |
| "learning_rate": 5.9085845292635645e-05, | |
| "loss": 0.3736, | |
| "step": 10608 | |
| }, | |
| { | |
| "epoch": 13.789883268482491, | |
| "grad_norm": 1.7134722471237183, | |
| "learning_rate": 5.8920562282113534e-05, | |
| "loss": 0.3705, | |
| "step": 10632 | |
| }, | |
| { | |
| "epoch": 13.821011673151752, | |
| "grad_norm": 1.9264869689941406, | |
| "learning_rate": 5.875517852666428e-05, | |
| "loss": 0.3731, | |
| "step": 10656 | |
| }, | |
| { | |
| "epoch": 13.852140077821012, | |
| "grad_norm": 1.9957599639892578, | |
| "learning_rate": 5.8589695894059626e-05, | |
| "loss": 0.3727, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 13.883268482490273, | |
| "grad_norm": 1.0721269845962524, | |
| "learning_rate": 5.842411625318805e-05, | |
| "loss": 0.3717, | |
| "step": 10704 | |
| }, | |
| { | |
| "epoch": 13.914396887159533, | |
| "grad_norm": 1.339650273323059, | |
| "learning_rate": 5.825844147403353e-05, | |
| "loss": 0.3781, | |
| "step": 10728 | |
| }, | |
| { | |
| "epoch": 13.945525291828794, | |
| "grad_norm": 1.0256425142288208, | |
| "learning_rate": 5.809267342765456e-05, | |
| "loss": 0.3743, | |
| "step": 10752 | |
| }, | |
| { | |
| "epoch": 13.976653696498055, | |
| "grad_norm": 1.1623256206512451, | |
| "learning_rate": 5.792681398616293e-05, | |
| "loss": 0.372, | |
| "step": 10776 | |
| }, | |
| { | |
| "epoch": 14.007782101167315, | |
| "grad_norm": 2.1772332191467285, | |
| "learning_rate": 5.776086502270258e-05, | |
| "loss": 0.3768, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 14.038910505836576, | |
| "grad_norm": 1.4126263856887817, | |
| "learning_rate": 5.759482841142848e-05, | |
| "loss": 0.3689, | |
| "step": 10824 | |
| }, | |
| { | |
| "epoch": 14.070038910505836, | |
| "grad_norm": 1.1903387308120728, | |
| "learning_rate": 5.742870602748547e-05, | |
| "loss": 0.3667, | |
| "step": 10848 | |
| }, | |
| { | |
| "epoch": 14.101167315175097, | |
| "grad_norm": 1.1915792226791382, | |
| "learning_rate": 5.7262499746987094e-05, | |
| "loss": 0.372, | |
| "step": 10872 | |
| }, | |
| { | |
| "epoch": 14.132295719844358, | |
| "grad_norm": 1.3118023872375488, | |
| "learning_rate": 5.7096211446994344e-05, | |
| "loss": 0.3673, | |
| "step": 10896 | |
| }, | |
| { | |
| "epoch": 14.163424124513618, | |
| "grad_norm": 1.0034823417663574, | |
| "learning_rate": 5.692984300549451e-05, | |
| "loss": 0.3743, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 14.194552529182879, | |
| "grad_norm": 1.1173166036605835, | |
| "learning_rate": 5.6763396301379976e-05, | |
| "loss": 0.3722, | |
| "step": 10944 | |
| }, | |
| { | |
| "epoch": 14.22568093385214, | |
| "grad_norm": 1.1479343175888062, | |
| "learning_rate": 5.659687321442701e-05, | |
| "loss": 0.3691, | |
| "step": 10968 | |
| }, | |
| { | |
| "epoch": 14.2568093385214, | |
| "grad_norm": 1.3507132530212402, | |
| "learning_rate": 5.6430275625274456e-05, | |
| "loss": 0.3655, | |
| "step": 10992 | |
| }, | |
| { | |
| "epoch": 14.287937743190662, | |
| "grad_norm": 1.1012446880340576, | |
| "learning_rate": 5.626360541540261e-05, | |
| "loss": 0.366, | |
| "step": 11016 | |
| }, | |
| { | |
| "epoch": 14.319066147859923, | |
| "grad_norm": 1.2122224569320679, | |
| "learning_rate": 5.609686446711191e-05, | |
| "loss": 0.3608, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 14.350194552529183, | |
| "grad_norm": 0.9675916433334351, | |
| "learning_rate": 5.593005466350164e-05, | |
| "loss": 0.3677, | |
| "step": 11064 | |
| }, | |
| { | |
| "epoch": 14.381322957198444, | |
| "grad_norm": 1.0538902282714844, | |
| "learning_rate": 5.576317788844875e-05, | |
| "loss": 0.369, | |
| "step": 11088 | |
| }, | |
| { | |
| "epoch": 14.412451361867705, | |
| "grad_norm": 2.077829122543335, | |
| "learning_rate": 5.55962360265865e-05, | |
| "loss": 0.3642, | |
| "step": 11112 | |
| }, | |
| { | |
| "epoch": 14.443579766536965, | |
| "grad_norm": 1.2885998487472534, | |
| "learning_rate": 5.542923096328325e-05, | |
| "loss": 0.3685, | |
| "step": 11136 | |
| }, | |
| { | |
| "epoch": 14.474708171206226, | |
| "grad_norm": 2.953463077545166, | |
| "learning_rate": 5.526216458462111e-05, | |
| "loss": 0.3683, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 14.505836575875486, | |
| "grad_norm": 1.336449384689331, | |
| "learning_rate": 5.509503877737465e-05, | |
| "loss": 0.3627, | |
| "step": 11184 | |
| }, | |
| { | |
| "epoch": 14.536964980544747, | |
| "grad_norm": 4.623841762542725, | |
| "learning_rate": 5.4927855428989624e-05, | |
| "loss": 0.3738, | |
| "step": 11208 | |
| }, | |
| { | |
| "epoch": 14.568093385214008, | |
| "grad_norm": 1.4652122259140015, | |
| "learning_rate": 5.476061642756161e-05, | |
| "loss": 0.3722, | |
| "step": 11232 | |
| }, | |
| { | |
| "epoch": 14.599221789883268, | |
| "grad_norm": 1.3524249792099, | |
| "learning_rate": 5.4593323661814686e-05, | |
| "loss": 0.3586, | |
| "step": 11256 | |
| }, | |
| { | |
| "epoch": 14.630350194552529, | |
| "grad_norm": 1.833708643913269, | |
| "learning_rate": 5.442597902108019e-05, | |
| "loss": 0.3568, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 14.66147859922179, | |
| "grad_norm": 1.4893455505371094, | |
| "learning_rate": 5.425858439527525e-05, | |
| "loss": 0.3698, | |
| "step": 11304 | |
| }, | |
| { | |
| "epoch": 14.69260700389105, | |
| "grad_norm": 1.7463867664337158, | |
| "learning_rate": 5.409114167488152e-05, | |
| "loss": 0.3726, | |
| "step": 11328 | |
| }, | |
| { | |
| "epoch": 14.72373540856031, | |
| "grad_norm": 1.5364842414855957, | |
| "learning_rate": 5.392365275092383e-05, | |
| "loss": 0.3656, | |
| "step": 11352 | |
| }, | |
| { | |
| "epoch": 14.754863813229573, | |
| "grad_norm": 1.4161092042922974, | |
| "learning_rate": 5.37561195149488e-05, | |
| "loss": 0.3636, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 14.785992217898833, | |
| "grad_norm": 1.125667691230774, | |
| "learning_rate": 5.358854385900348e-05, | |
| "loss": 0.3636, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 14.817120622568094, | |
| "grad_norm": 1.9482998847961426, | |
| "learning_rate": 5.342092767561402e-05, | |
| "loss": 0.3646, | |
| "step": 11424 | |
| }, | |
| { | |
| "epoch": 14.848249027237355, | |
| "grad_norm": 1.8707369565963745, | |
| "learning_rate": 5.325327285776425e-05, | |
| "loss": 0.3657, | |
| "step": 11448 | |
| }, | |
| { | |
| "epoch": 14.879377431906615, | |
| "grad_norm": 1.7567267417907715, | |
| "learning_rate": 5.308558129887431e-05, | |
| "loss": 0.3628, | |
| "step": 11472 | |
| }, | |
| { | |
| "epoch": 14.910505836575876, | |
| "grad_norm": 1.5714308023452759, | |
| "learning_rate": 5.2917854892779304e-05, | |
| "loss": 0.3667, | |
| "step": 11496 | |
| }, | |
| { | |
| "epoch": 14.941634241245136, | |
| "grad_norm": 2.1905322074890137, | |
| "learning_rate": 5.275009553370788e-05, | |
| "loss": 0.371, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 14.972762645914397, | |
| "grad_norm": 2.8119211196899414, | |
| "learning_rate": 5.2582305116260835e-05, | |
| "loss": 0.3704, | |
| "step": 11544 | |
| }, | |
| { | |
| "epoch": 15.003891050583658, | |
| "grad_norm": 1.1872552633285522, | |
| "learning_rate": 5.241448553538968e-05, | |
| "loss": 0.3755, | |
| "step": 11568 | |
| }, | |
| { | |
| "epoch": 15.035019455252918, | |
| "grad_norm": 1.4244314432144165, | |
| "learning_rate": 5.224663868637538e-05, | |
| "loss": 0.3599, | |
| "step": 11592 | |
| }, | |
| { | |
| "epoch": 15.066147859922179, | |
| "grad_norm": 1.2808740139007568, | |
| "learning_rate": 5.2078766464806796e-05, | |
| "loss": 0.3683, | |
| "step": 11616 | |
| }, | |
| { | |
| "epoch": 15.09727626459144, | |
| "grad_norm": 1.0528135299682617, | |
| "learning_rate": 5.191087076655935e-05, | |
| "loss": 0.3598, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 15.1284046692607, | |
| "grad_norm": 1.8377207517623901, | |
| "learning_rate": 5.174295348777357e-05, | |
| "loss": 0.3553, | |
| "step": 11664 | |
| }, | |
| { | |
| "epoch": 15.15953307392996, | |
| "grad_norm": 1.7853907346725464, | |
| "learning_rate": 5.1575016524833754e-05, | |
| "loss": 0.3614, | |
| "step": 11688 | |
| }, | |
| { | |
| "epoch": 15.190661478599221, | |
| "grad_norm": 1.7978260517120361, | |
| "learning_rate": 5.140706177434645e-05, | |
| "loss": 0.3608, | |
| "step": 11712 | |
| }, | |
| { | |
| "epoch": 15.221789883268482, | |
| "grad_norm": 1.1315481662750244, | |
| "learning_rate": 5.123909113311915e-05, | |
| "loss": 0.3635, | |
| "step": 11736 | |
| }, | |
| { | |
| "epoch": 15.252918287937742, | |
| "grad_norm": 1.6177383661270142, | |
| "learning_rate": 5.1071106498138764e-05, | |
| "loss": 0.3624, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 15.284046692607005, | |
| "grad_norm": 1.2278454303741455, | |
| "learning_rate": 5.0903109766550264e-05, | |
| "loss": 0.3658, | |
| "step": 11784 | |
| }, | |
| { | |
| "epoch": 15.315175097276265, | |
| "grad_norm": 1.3733409643173218, | |
| "learning_rate": 5.073510283563523e-05, | |
| "loss": 0.3612, | |
| "step": 11808 | |
| }, | |
| { | |
| "epoch": 15.346303501945526, | |
| "grad_norm": 1.3404691219329834, | |
| "learning_rate": 5.05670876027904e-05, | |
| "loss": 0.3629, | |
| "step": 11832 | |
| }, | |
| { | |
| "epoch": 15.377431906614786, | |
| "grad_norm": 1.2201738357543945, | |
| "learning_rate": 5.039906596550633e-05, | |
| "loss": 0.3666, | |
| "step": 11856 | |
| }, | |
| { | |
| "epoch": 15.408560311284047, | |
| "grad_norm": 2.0148181915283203, | |
| "learning_rate": 5.023103982134586e-05, | |
| "loss": 0.3665, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 15.439688715953308, | |
| "grad_norm": 1.249961256980896, | |
| "learning_rate": 5.006301106792274e-05, | |
| "loss": 0.3647, | |
| "step": 11904 | |
| }, | |
| { | |
| "epoch": 15.470817120622568, | |
| "grad_norm": 1.5822800397872925, | |
| "learning_rate": 4.989498160288019e-05, | |
| "loss": 0.3659, | |
| "step": 11928 | |
| }, | |
| { | |
| "epoch": 15.501945525291829, | |
| "grad_norm": 1.1686407327651978, | |
| "learning_rate": 4.9726953323869456e-05, | |
| "loss": 0.363, | |
| "step": 11952 | |
| }, | |
| { | |
| "epoch": 15.53307392996109, | |
| "grad_norm": 1.8801552057266235, | |
| "learning_rate": 4.9558928128528414e-05, | |
| "loss": 0.3623, | |
| "step": 11976 | |
| }, | |
| { | |
| "epoch": 15.56420233463035, | |
| "grad_norm": 1.2335692644119263, | |
| "learning_rate": 4.9390907914460105e-05, | |
| "loss": 0.3664, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 15.59533073929961, | |
| "grad_norm": 1.496955156326294, | |
| "learning_rate": 4.9222894579211276e-05, | |
| "loss": 0.3644, | |
| "step": 12024 | |
| }, | |
| { | |
| "epoch": 15.626459143968871, | |
| "grad_norm": 1.6293377876281738, | |
| "learning_rate": 4.905489002025106e-05, | |
| "loss": 0.3605, | |
| "step": 12048 | |
| }, | |
| { | |
| "epoch": 15.657587548638132, | |
| "grad_norm": 1.2555320262908936, | |
| "learning_rate": 4.8886896134949415e-05, | |
| "loss": 0.3594, | |
| "step": 12072 | |
| }, | |
| { | |
| "epoch": 15.688715953307392, | |
| "grad_norm": 1.2741057872772217, | |
| "learning_rate": 4.871891482055575e-05, | |
| "loss": 0.3622, | |
| "step": 12096 | |
| }, | |
| { | |
| "epoch": 15.719844357976653, | |
| "grad_norm": 2.100410223007202, | |
| "learning_rate": 4.855094797417758e-05, | |
| "loss": 0.3612, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 15.750972762645915, | |
| "grad_norm": 0.88619464635849, | |
| "learning_rate": 4.8382997492758936e-05, | |
| "loss": 0.3589, | |
| "step": 12144 | |
| }, | |
| { | |
| "epoch": 15.782101167315176, | |
| "grad_norm": 1.5951071977615356, | |
| "learning_rate": 4.8215065273059085e-05, | |
| "loss": 0.3613, | |
| "step": 12168 | |
| }, | |
| { | |
| "epoch": 15.813229571984436, | |
| "grad_norm": 1.1034135818481445, | |
| "learning_rate": 4.8047153211631e-05, | |
| "loss": 0.3609, | |
| "step": 12192 | |
| }, | |
| { | |
| "epoch": 15.844357976653697, | |
| "grad_norm": 1.9069421291351318, | |
| "learning_rate": 4.787926320480009e-05, | |
| "loss": 0.3617, | |
| "step": 12216 | |
| }, | |
| { | |
| "epoch": 15.875486381322958, | |
| "grad_norm": 2.139292001724243, | |
| "learning_rate": 4.7711397148642583e-05, | |
| "loss": 0.3582, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 15.906614785992218, | |
| "grad_norm": 1.134293556213379, | |
| "learning_rate": 4.7543556938964275e-05, | |
| "loss": 0.361, | |
| "step": 12264 | |
| }, | |
| { | |
| "epoch": 15.937743190661479, | |
| "grad_norm": 1.2520484924316406, | |
| "learning_rate": 4.7375744471279084e-05, | |
| "loss": 0.3613, | |
| "step": 12288 | |
| }, | |
| { | |
| "epoch": 15.96887159533074, | |
| "grad_norm": 1.2001314163208008, | |
| "learning_rate": 4.720796164078755e-05, | |
| "loss": 0.363, | |
| "step": 12312 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.0038580894470215, | |
| "learning_rate": 4.7040210342355584e-05, | |
| "loss": 0.3566, | |
| "step": 12336 | |
| }, | |
| { | |
| "epoch": 16.03112840466926, | |
| "grad_norm": 1.0586698055267334, | |
| "learning_rate": 4.6872492470492914e-05, | |
| "loss": 0.3554, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 16.06225680933852, | |
| "grad_norm": 1.4238923788070679, | |
| "learning_rate": 4.670480991933182e-05, | |
| "loss": 0.3598, | |
| "step": 12384 | |
| }, | |
| { | |
| "epoch": 16.09338521400778, | |
| "grad_norm": 1.7448209524154663, | |
| "learning_rate": 4.6537164582605674e-05, | |
| "loss": 0.3523, | |
| "step": 12408 | |
| }, | |
| { | |
| "epoch": 16.124513618677042, | |
| "grad_norm": 0.9236373901367188, | |
| "learning_rate": 4.6369558353627517e-05, | |
| "loss": 0.3556, | |
| "step": 12432 | |
| }, | |
| { | |
| "epoch": 16.155642023346303, | |
| "grad_norm": 1.2013592720031738, | |
| "learning_rate": 4.6201993125268804e-05, | |
| "loss": 0.352, | |
| "step": 12456 | |
| }, | |
| { | |
| "epoch": 16.186770428015564, | |
| "grad_norm": 1.267756700515747, | |
| "learning_rate": 4.603447078993788e-05, | |
| "loss": 0.3578, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 16.217898832684824, | |
| "grad_norm": 1.0369305610656738, | |
| "learning_rate": 4.586699323955871e-05, | |
| "loss": 0.3476, | |
| "step": 12504 | |
| }, | |
| { | |
| "epoch": 16.249027237354085, | |
| "grad_norm": 1.4075908660888672, | |
| "learning_rate": 4.569956236554945e-05, | |
| "loss": 0.3544, | |
| "step": 12528 | |
| }, | |
| { | |
| "epoch": 16.280155642023345, | |
| "grad_norm": 1.3998584747314453, | |
| "learning_rate": 4.5532180058801145e-05, | |
| "loss": 0.3596, | |
| "step": 12552 | |
| }, | |
| { | |
| "epoch": 16.311284046692606, | |
| "grad_norm": 1.5231702327728271, | |
| "learning_rate": 4.5364848209656336e-05, | |
| "loss": 0.3542, | |
| "step": 12576 | |
| }, | |
| { | |
| "epoch": 16.342412451361866, | |
| "grad_norm": 1.283345103263855, | |
| "learning_rate": 4.5197568707887675e-05, | |
| "loss": 0.3526, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 16.373540856031127, | |
| "grad_norm": 1.3944894075393677, | |
| "learning_rate": 4.503034344267671e-05, | |
| "loss": 0.357, | |
| "step": 12624 | |
| }, | |
| { | |
| "epoch": 16.404669260700388, | |
| "grad_norm": 1.9900680780410767, | |
| "learning_rate": 4.486317430259238e-05, | |
| "loss": 0.3603, | |
| "step": 12648 | |
| }, | |
| { | |
| "epoch": 16.43579766536965, | |
| "grad_norm": 0.9823328852653503, | |
| "learning_rate": 4.4696063175569804e-05, | |
| "loss": 0.3545, | |
| "step": 12672 | |
| }, | |
| { | |
| "epoch": 16.46692607003891, | |
| "grad_norm": 1.634529709815979, | |
| "learning_rate": 4.452901194888897e-05, | |
| "loss": 0.3543, | |
| "step": 12696 | |
| }, | |
| { | |
| "epoch": 16.49805447470817, | |
| "grad_norm": 1.4010380506515503, | |
| "learning_rate": 4.436202250915329e-05, | |
| "loss": 0.3524, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 16.529182879377434, | |
| "grad_norm": 1.239943504333496, | |
| "learning_rate": 4.419509674226846e-05, | |
| "loss": 0.3648, | |
| "step": 12744 | |
| }, | |
| { | |
| "epoch": 16.560311284046694, | |
| "grad_norm": 3.315246820449829, | |
| "learning_rate": 4.4028236533421016e-05, | |
| "loss": 0.3624, | |
| "step": 12768 | |
| }, | |
| { | |
| "epoch": 16.591439688715955, | |
| "grad_norm": 1.0445722341537476, | |
| "learning_rate": 4.3861443767057205e-05, | |
| "loss": 0.3536, | |
| "step": 12792 | |
| }, | |
| { | |
| "epoch": 16.622568093385215, | |
| "grad_norm": 1.154893398284912, | |
| "learning_rate": 4.369472032686149e-05, | |
| "loss": 0.3608, | |
| "step": 12816 | |
| }, | |
| { | |
| "epoch": 16.653696498054476, | |
| "grad_norm": 2.0033769607543945, | |
| "learning_rate": 4.352806809573547e-05, | |
| "loss": 0.3511, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 16.684824902723737, | |
| "grad_norm": 1.4693876504898071, | |
| "learning_rate": 4.336148895577656e-05, | |
| "loss": 0.3531, | |
| "step": 12864 | |
| }, | |
| { | |
| "epoch": 16.715953307392997, | |
| "grad_norm": 1.8765549659729004, | |
| "learning_rate": 4.319498478825663e-05, | |
| "loss": 0.3563, | |
| "step": 12888 | |
| }, | |
| { | |
| "epoch": 16.747081712062258, | |
| "grad_norm": 1.6893914937973022, | |
| "learning_rate": 4.302855747360092e-05, | |
| "loss": 0.3579, | |
| "step": 12912 | |
| }, | |
| { | |
| "epoch": 16.77821011673152, | |
| "grad_norm": 1.183452844619751, | |
| "learning_rate": 4.286220889136668e-05, | |
| "loss": 0.3637, | |
| "step": 12936 | |
| }, | |
| { | |
| "epoch": 16.80933852140078, | |
| "grad_norm": 1.102815866470337, | |
| "learning_rate": 4.269594092022203e-05, | |
| "loss": 0.3561, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 16.84046692607004, | |
| "grad_norm": 0.9764434695243835, | |
| "learning_rate": 4.252975543792468e-05, | |
| "loss": 0.3581, | |
| "step": 12984 | |
| }, | |
| { | |
| "epoch": 16.8715953307393, | |
| "grad_norm": 2.3779425621032715, | |
| "learning_rate": 4.2363654321300735e-05, | |
| "loss": 0.3531, | |
| "step": 13008 | |
| }, | |
| { | |
| "epoch": 16.90272373540856, | |
| "grad_norm": 1.463118076324463, | |
| "learning_rate": 4.219763944622356e-05, | |
| "loss": 0.3562, | |
| "step": 13032 | |
| }, | |
| { | |
| "epoch": 16.93385214007782, | |
| "grad_norm": 1.756101369857788, | |
| "learning_rate": 4.203171268759248e-05, | |
| "loss": 0.3566, | |
| "step": 13056 | |
| }, | |
| { | |
| "epoch": 16.964980544747082, | |
| "grad_norm": 1.5917153358459473, | |
| "learning_rate": 4.1865875919311726e-05, | |
| "loss": 0.3504, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 16.996108949416342, | |
| "grad_norm": 2.404031753540039, | |
| "learning_rate": 4.170013101426917e-05, | |
| "loss": 0.3581, | |
| "step": 13104 | |
| }, | |
| { | |
| "epoch": 17.027237354085603, | |
| "grad_norm": 1.3285900354385376, | |
| "learning_rate": 4.153447984431527e-05, | |
| "loss": 0.3499, | |
| "step": 13128 | |
| }, | |
| { | |
| "epoch": 17.058365758754864, | |
| "grad_norm": 1.0520793199539185, | |
| "learning_rate": 4.136892428024187e-05, | |
| "loss": 0.3547, | |
| "step": 13152 | |
| }, | |
| { | |
| "epoch": 17.089494163424124, | |
| "grad_norm": 1.0784560441970825, | |
| "learning_rate": 4.120346619176102e-05, | |
| "loss": 0.3525, | |
| "step": 13176 | |
| }, | |
| { | |
| "epoch": 17.120622568093385, | |
| "grad_norm": 1.9099761247634888, | |
| "learning_rate": 4.103810744748403e-05, | |
| "loss": 0.3531, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 17.151750972762645, | |
| "grad_norm": 1.4144366979599, | |
| "learning_rate": 4.0872849914900175e-05, | |
| "loss": 0.3431, | |
| "step": 13224 | |
| }, | |
| { | |
| "epoch": 17.182879377431906, | |
| "grad_norm": 1.078682541847229, | |
| "learning_rate": 4.070769546035571e-05, | |
| "loss": 0.3563, | |
| "step": 13248 | |
| }, | |
| { | |
| "epoch": 17.214007782101167, | |
| "grad_norm": 2.5183982849121094, | |
| "learning_rate": 4.054264594903281e-05, | |
| "loss": 0.3534, | |
| "step": 13272 | |
| }, | |
| { | |
| "epoch": 17.245136186770427, | |
| "grad_norm": 1.3110893964767456, | |
| "learning_rate": 4.037770324492841e-05, | |
| "loss": 0.351, | |
| "step": 13296 | |
| }, | |
| { | |
| "epoch": 17.276264591439688, | |
| "grad_norm": 1.4684545993804932, | |
| "learning_rate": 4.021286921083326e-05, | |
| "loss": 0.3525, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 17.30739299610895, | |
| "grad_norm": 1.3898323774337769, | |
| "learning_rate": 4.004814570831078e-05, | |
| "loss": 0.353, | |
| "step": 13344 | |
| }, | |
| { | |
| "epoch": 17.33852140077821, | |
| "grad_norm": 1.7565838098526, | |
| "learning_rate": 3.9883534597676177e-05, | |
| "loss": 0.3566, | |
| "step": 13368 | |
| }, | |
| { | |
| "epoch": 17.36964980544747, | |
| "grad_norm": 1.3672667741775513, | |
| "learning_rate": 3.971903773797528e-05, | |
| "loss": 0.3502, | |
| "step": 13392 | |
| }, | |
| { | |
| "epoch": 17.40077821011673, | |
| "grad_norm": 1.2242878675460815, | |
| "learning_rate": 3.955465698696363e-05, | |
| "loss": 0.3518, | |
| "step": 13416 | |
| }, | |
| { | |
| "epoch": 17.43190661478599, | |
| "grad_norm": 2.410991907119751, | |
| "learning_rate": 3.939039420108556e-05, | |
| "loss": 0.3503, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 17.46303501945525, | |
| "grad_norm": 1.4282727241516113, | |
| "learning_rate": 3.922625123545305e-05, | |
| "loss": 0.3488, | |
| "step": 13464 | |
| }, | |
| { | |
| "epoch": 17.494163424124515, | |
| "grad_norm": 1.5992825031280518, | |
| "learning_rate": 3.906222994382495e-05, | |
| "loss": 0.3567, | |
| "step": 13488 | |
| }, | |
| { | |
| "epoch": 17.525291828793776, | |
| "grad_norm": 2.398169994354248, | |
| "learning_rate": 3.889833217858594e-05, | |
| "loss": 0.3542, | |
| "step": 13512 | |
| }, | |
| { | |
| "epoch": 17.556420233463037, | |
| "grad_norm": 1.140195608139038, | |
| "learning_rate": 3.873455979072569e-05, | |
| "loss": 0.3493, | |
| "step": 13536 | |
| }, | |
| { | |
| "epoch": 17.587548638132297, | |
| "grad_norm": 1.305156946182251, | |
| "learning_rate": 3.8570914629817886e-05, | |
| "loss": 0.3504, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 17.618677042801558, | |
| "grad_norm": 9.382534980773926, | |
| "learning_rate": 3.840739854399934e-05, | |
| "loss": 0.3534, | |
| "step": 13584 | |
| }, | |
| { | |
| "epoch": 17.64980544747082, | |
| "grad_norm": 1.1403177976608276, | |
| "learning_rate": 3.824401337994923e-05, | |
| "loss": 0.3461, | |
| "step": 13608 | |
| }, | |
| { | |
| "epoch": 17.68093385214008, | |
| "grad_norm": 2.1274640560150146, | |
| "learning_rate": 3.808076098286806e-05, | |
| "loss": 0.3521, | |
| "step": 13632 | |
| }, | |
| { | |
| "epoch": 17.71206225680934, | |
| "grad_norm": 1.9969298839569092, | |
| "learning_rate": 3.7917643196457e-05, | |
| "loss": 0.3521, | |
| "step": 13656 | |
| }, | |
| { | |
| "epoch": 17.7431906614786, | |
| "grad_norm": 1.2433438301086426, | |
| "learning_rate": 3.775466186289693e-05, | |
| "loss": 0.3565, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 17.77431906614786, | |
| "grad_norm": 1.7864729166030884, | |
| "learning_rate": 3.7591818822827745e-05, | |
| "loss": 0.3508, | |
| "step": 13704 | |
| }, | |
| { | |
| "epoch": 17.80544747081712, | |
| "grad_norm": 1.7596447467803955, | |
| "learning_rate": 3.7429115915327484e-05, | |
| "loss": 0.3533, | |
| "step": 13728 | |
| }, | |
| { | |
| "epoch": 17.836575875486382, | |
| "grad_norm": 1.7605047225952148, | |
| "learning_rate": 3.726655497789156e-05, | |
| "loss": 0.3553, | |
| "step": 13752 | |
| }, | |
| { | |
| "epoch": 17.867704280155642, | |
| "grad_norm": 1.5380836725234985, | |
| "learning_rate": 3.710413784641212e-05, | |
| "loss": 0.3526, | |
| "step": 13776 | |
| }, | |
| { | |
| "epoch": 17.898832684824903, | |
| "grad_norm": 1.448866844177246, | |
| "learning_rate": 3.694186635515714e-05, | |
| "loss": 0.3516, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 17.929961089494164, | |
| "grad_norm": 1.527550458908081, | |
| "learning_rate": 3.677974233674983e-05, | |
| "loss": 0.3438, | |
| "step": 13824 | |
| }, | |
| { | |
| "epoch": 17.961089494163424, | |
| "grad_norm": 1.3250521421432495, | |
| "learning_rate": 3.661776762214797e-05, | |
| "loss": 0.3551, | |
| "step": 13848 | |
| }, | |
| { | |
| "epoch": 17.992217898832685, | |
| "grad_norm": 1.4741333723068237, | |
| "learning_rate": 3.6455944040623075e-05, | |
| "loss": 0.3529, | |
| "step": 13872 | |
| }, | |
| { | |
| "epoch": 18.023346303501945, | |
| "grad_norm": 2.2234058380126953, | |
| "learning_rate": 3.6294273419739874e-05, | |
| "loss": 0.3486, | |
| "step": 13896 | |
| }, | |
| { | |
| "epoch": 18.054474708171206, | |
| "grad_norm": 1.4099419116973877, | |
| "learning_rate": 3.613275758533561e-05, | |
| "loss": 0.3473, | |
| "step": 13920 | |
| }, | |
| { | |
| "epoch": 18.085603112840467, | |
| "grad_norm": 1.9094316959381104, | |
| "learning_rate": 3.5971398361499466e-05, | |
| "loss": 0.3548, | |
| "step": 13944 | |
| }, | |
| { | |
| "epoch": 18.116731517509727, | |
| "grad_norm": 1.2845815420150757, | |
| "learning_rate": 3.581019757055188e-05, | |
| "loss": 0.345, | |
| "step": 13968 | |
| }, | |
| { | |
| "epoch": 18.147859922178988, | |
| "grad_norm": 2.0491998195648193, | |
| "learning_rate": 3.564915703302407e-05, | |
| "loss": 0.3474, | |
| "step": 13992 | |
| }, | |
| { | |
| "epoch": 18.17898832684825, | |
| "grad_norm": 1.3620078563690186, | |
| "learning_rate": 3.5488278567637426e-05, | |
| "loss": 0.3452, | |
| "step": 14016 | |
| }, | |
| { | |
| "epoch": 18.21011673151751, | |
| "grad_norm": 4.295355796813965, | |
| "learning_rate": 3.53275639912829e-05, | |
| "loss": 0.3474, | |
| "step": 14040 | |
| }, | |
| { | |
| "epoch": 18.24124513618677, | |
| "grad_norm": 2.150200366973877, | |
| "learning_rate": 3.516701511900062e-05, | |
| "loss": 0.3465, | |
| "step": 14064 | |
| }, | |
| { | |
| "epoch": 18.27237354085603, | |
| "grad_norm": 1.407614827156067, | |
| "learning_rate": 3.500663376395927e-05, | |
| "loss": 0.3453, | |
| "step": 14088 | |
| }, | |
| { | |
| "epoch": 18.30350194552529, | |
| "grad_norm": 1.2066164016723633, | |
| "learning_rate": 3.484642173743575e-05, | |
| "loss": 0.3477, | |
| "step": 14112 | |
| }, | |
| { | |
| "epoch": 18.33463035019455, | |
| "grad_norm": 1.1473839282989502, | |
| "learning_rate": 3.4686380848794544e-05, | |
| "loss": 0.3448, | |
| "step": 14136 | |
| }, | |
| { | |
| "epoch": 18.365758754863812, | |
| "grad_norm": 2.0838565826416016, | |
| "learning_rate": 3.452651290546742e-05, | |
| "loss": 0.3451, | |
| "step": 14160 | |
| }, | |
| { | |
| "epoch": 18.396887159533073, | |
| "grad_norm": 1.3917421102523804, | |
| "learning_rate": 3.436681971293301e-05, | |
| "loss": 0.3442, | |
| "step": 14184 | |
| }, | |
| { | |
| "epoch": 18.428015564202333, | |
| "grad_norm": 1.2915924787521362, | |
| "learning_rate": 3.420730307469632e-05, | |
| "loss": 0.3409, | |
| "step": 14208 | |
| }, | |
| { | |
| "epoch": 18.459143968871594, | |
| "grad_norm": 2.337096691131592, | |
| "learning_rate": 3.404796479226852e-05, | |
| "loss": 0.3471, | |
| "step": 14232 | |
| }, | |
| { | |
| "epoch": 18.490272373540854, | |
| "grad_norm": 1.732359528541565, | |
| "learning_rate": 3.3888806665146374e-05, | |
| "loss": 0.3478, | |
| "step": 14256 | |
| }, | |
| { | |
| "epoch": 18.52140077821012, | |
| "grad_norm": 1.1314399242401123, | |
| "learning_rate": 3.3729830490792166e-05, | |
| "loss": 0.345, | |
| "step": 14280 | |
| }, | |
| { | |
| "epoch": 18.55252918287938, | |
| "grad_norm": 1.5127285718917847, | |
| "learning_rate": 3.357103806461328e-05, | |
| "loss": 0.3405, | |
| "step": 14304 | |
| }, | |
| { | |
| "epoch": 18.58365758754864, | |
| "grad_norm": 1.306648850440979, | |
| "learning_rate": 3.3412431179941847e-05, | |
| "loss": 0.3443, | |
| "step": 14328 | |
| }, | |
| { | |
| "epoch": 18.6147859922179, | |
| "grad_norm": 1.189726710319519, | |
| "learning_rate": 3.3254011628014656e-05, | |
| "loss": 0.3447, | |
| "step": 14352 | |
| }, | |
| { | |
| "epoch": 18.64591439688716, | |
| "grad_norm": 1.2058913707733154, | |
| "learning_rate": 3.309578119795278e-05, | |
| "loss": 0.347, | |
| "step": 14376 | |
| }, | |
| { | |
| "epoch": 18.67704280155642, | |
| "grad_norm": 1.702572226524353, | |
| "learning_rate": 3.293774167674149e-05, | |
| "loss": 0.3496, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 18.708171206225682, | |
| "grad_norm": 1.8515872955322266, | |
| "learning_rate": 3.277989484920996e-05, | |
| "loss": 0.344, | |
| "step": 14424 | |
| }, | |
| { | |
| "epoch": 18.739299610894943, | |
| "grad_norm": 1.8190243244171143, | |
| "learning_rate": 3.26222424980112e-05, | |
| "loss": 0.3499, | |
| "step": 14448 | |
| }, | |
| { | |
| "epoch": 18.770428015564203, | |
| "grad_norm": 1.261648416519165, | |
| "learning_rate": 3.246478640360191e-05, | |
| "loss": 0.345, | |
| "step": 14472 | |
| }, | |
| { | |
| "epoch": 18.801556420233464, | |
| "grad_norm": 1.3052914142608643, | |
| "learning_rate": 3.2307528344222296e-05, | |
| "loss": 0.3505, | |
| "step": 14496 | |
| }, | |
| { | |
| "epoch": 18.832684824902724, | |
| "grad_norm": 1.5217386484146118, | |
| "learning_rate": 3.215047009587609e-05, | |
| "loss": 0.3507, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 18.863813229571985, | |
| "grad_norm": 1.2934740781784058, | |
| "learning_rate": 3.1993613432310384e-05, | |
| "loss": 0.3459, | |
| "step": 14544 | |
| }, | |
| { | |
| "epoch": 18.894941634241246, | |
| "grad_norm": 1.5978559255599976, | |
| "learning_rate": 3.183696012499574e-05, | |
| "loss": 0.3464, | |
| "step": 14568 | |
| }, | |
| { | |
| "epoch": 18.926070038910506, | |
| "grad_norm": 1.2306820154190063, | |
| "learning_rate": 3.168051194310609e-05, | |
| "loss": 0.3446, | |
| "step": 14592 | |
| }, | |
| { | |
| "epoch": 18.957198443579767, | |
| "grad_norm": 1.1488240957260132, | |
| "learning_rate": 3.152427065349867e-05, | |
| "loss": 0.3475, | |
| "step": 14616 | |
| }, | |
| { | |
| "epoch": 18.988326848249027, | |
| "grad_norm": 3.1832704544067383, | |
| "learning_rate": 3.1368238020694316e-05, | |
| "loss": 0.3437, | |
| "step": 14640 | |
| }, | |
| { | |
| "epoch": 19.019455252918288, | |
| "grad_norm": 2.3371617794036865, | |
| "learning_rate": 3.121241580685727e-05, | |
| "loss": 0.3465, | |
| "step": 14664 | |
| }, | |
| { | |
| "epoch": 19.05058365758755, | |
| "grad_norm": 2.816099166870117, | |
| "learning_rate": 3.1056805771775436e-05, | |
| "loss": 0.3435, | |
| "step": 14688 | |
| }, | |
| { | |
| "epoch": 19.08171206225681, | |
| "grad_norm": 1.3421522378921509, | |
| "learning_rate": 3.090140967284046e-05, | |
| "loss": 0.3418, | |
| "step": 14712 | |
| }, | |
| { | |
| "epoch": 19.11284046692607, | |
| "grad_norm": 1.8488672971725464, | |
| "learning_rate": 3.07462292650279e-05, | |
| "loss": 0.348, | |
| "step": 14736 | |
| }, | |
| { | |
| "epoch": 19.14396887159533, | |
| "grad_norm": 1.2293037176132202, | |
| "learning_rate": 3.05912663008774e-05, | |
| "loss": 0.342, | |
| "step": 14760 | |
| }, | |
| { | |
| "epoch": 19.17509727626459, | |
| "grad_norm": 1.7620015144348145, | |
| "learning_rate": 3.043652253047281e-05, | |
| "loss": 0.3454, | |
| "step": 14784 | |
| }, | |
| { | |
| "epoch": 19.20622568093385, | |
| "grad_norm": 1.6479402780532837, | |
| "learning_rate": 3.0281999701422637e-05, | |
| "loss": 0.3427, | |
| "step": 14808 | |
| }, | |
| { | |
| "epoch": 19.237354085603112, | |
| "grad_norm": 1.5058902502059937, | |
| "learning_rate": 3.012769955884005e-05, | |
| "loss": 0.3328, | |
| "step": 14832 | |
| }, | |
| { | |
| "epoch": 19.268482490272373, | |
| "grad_norm": 1.6616445779800415, | |
| "learning_rate": 2.9973623845323347e-05, | |
| "loss": 0.3441, | |
| "step": 14856 | |
| }, | |
| { | |
| "epoch": 19.299610894941633, | |
| "grad_norm": 1.5390020608901978, | |
| "learning_rate": 2.9819774300936255e-05, | |
| "loss": 0.3434, | |
| "step": 14880 | |
| }, | |
| { | |
| "epoch": 19.330739299610894, | |
| "grad_norm": 1.7172026634216309, | |
| "learning_rate": 2.9666152663188172e-05, | |
| "loss": 0.3439, | |
| "step": 14904 | |
| }, | |
| { | |
| "epoch": 19.361867704280154, | |
| "grad_norm": 1.134320855140686, | |
| "learning_rate": 2.9512760667014682e-05, | |
| "loss": 0.3431, | |
| "step": 14928 | |
| }, | |
| { | |
| "epoch": 19.392996108949415, | |
| "grad_norm": 4.418805122375488, | |
| "learning_rate": 2.935960004475784e-05, | |
| "loss": 0.344, | |
| "step": 14952 | |
| }, | |
| { | |
| "epoch": 19.424124513618676, | |
| "grad_norm": 1.3951141834259033, | |
| "learning_rate": 2.920667252614674e-05, | |
| "loss": 0.3334, | |
| "step": 14976 | |
| }, | |
| { | |
| "epoch": 19.455252918287936, | |
| "grad_norm": 2.0081377029418945, | |
| "learning_rate": 2.9053979838277834e-05, | |
| "loss": 0.3413, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 19.486381322957197, | |
| "grad_norm": 1.0862860679626465, | |
| "learning_rate": 2.890152370559552e-05, | |
| "loss": 0.3406, | |
| "step": 15024 | |
| }, | |
| { | |
| "epoch": 19.51750972762646, | |
| "grad_norm": 1.3487762212753296, | |
| "learning_rate": 2.8749305849872686e-05, | |
| "loss": 0.3335, | |
| "step": 15048 | |
| }, | |
| { | |
| "epoch": 19.54863813229572, | |
| "grad_norm": 1.122753381729126, | |
| "learning_rate": 2.8597327990191146e-05, | |
| "loss": 0.3491, | |
| "step": 15072 | |
| }, | |
| { | |
| "epoch": 19.579766536964982, | |
| "grad_norm": 1.518355131149292, | |
| "learning_rate": 2.844559184292239e-05, | |
| "loss": 0.3405, | |
| "step": 15096 | |
| }, | |
| { | |
| "epoch": 19.610894941634243, | |
| "grad_norm": 1.0469350814819336, | |
| "learning_rate": 2.829409912170806e-05, | |
| "loss": 0.3395, | |
| "step": 15120 | |
| }, | |
| { | |
| "epoch": 19.642023346303503, | |
| "grad_norm": 1.915490984916687, | |
| "learning_rate": 2.814285153744064e-05, | |
| "loss": 0.3426, | |
| "step": 15144 | |
| }, | |
| { | |
| "epoch": 19.673151750972764, | |
| "grad_norm": 1.477184772491455, | |
| "learning_rate": 2.7991850798244197e-05, | |
| "loss": 0.3463, | |
| "step": 15168 | |
| }, | |
| { | |
| "epoch": 19.704280155642024, | |
| "grad_norm": 1.3598774671554565, | |
| "learning_rate": 2.7841098609454976e-05, | |
| "loss": 0.3454, | |
| "step": 15192 | |
| }, | |
| { | |
| "epoch": 19.735408560311285, | |
| "grad_norm": 2.6406991481781006, | |
| "learning_rate": 2.769059667360227e-05, | |
| "loss": 0.3422, | |
| "step": 15216 | |
| }, | |
| { | |
| "epoch": 19.766536964980546, | |
| "grad_norm": 1.2698395252227783, | |
| "learning_rate": 2.754034669038905e-05, | |
| "loss": 0.3473, | |
| "step": 15240 | |
| }, | |
| { | |
| "epoch": 19.797665369649806, | |
| "grad_norm": 1.3700004816055298, | |
| "learning_rate": 2.7390350356672934e-05, | |
| "loss": 0.3434, | |
| "step": 15264 | |
| }, | |
| { | |
| "epoch": 19.828793774319067, | |
| "grad_norm": 1.1726247072219849, | |
| "learning_rate": 2.7240609366446845e-05, | |
| "loss": 0.3421, | |
| "step": 15288 | |
| }, | |
| { | |
| "epoch": 19.859922178988327, | |
| "grad_norm": 1.5183639526367188, | |
| "learning_rate": 2.709112541082e-05, | |
| "loss": 0.3418, | |
| "step": 15312 | |
| }, | |
| { | |
| "epoch": 19.891050583657588, | |
| "grad_norm": 1.1311919689178467, | |
| "learning_rate": 2.6941900177998824e-05, | |
| "loss": 0.3411, | |
| "step": 15336 | |
| }, | |
| { | |
| "epoch": 19.92217898832685, | |
| "grad_norm": 1.6014869213104248, | |
| "learning_rate": 2.6792935353267757e-05, | |
| "loss": 0.339, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 19.95330739299611, | |
| "grad_norm": 1.8378218412399292, | |
| "learning_rate": 2.6644232618970382e-05, | |
| "loss": 0.3464, | |
| "step": 15384 | |
| }, | |
| { | |
| "epoch": 19.98443579766537, | |
| "grad_norm": 2.1291933059692383, | |
| "learning_rate": 2.6495793654490292e-05, | |
| "loss": 0.3409, | |
| "step": 15408 | |
| }, | |
| { | |
| "epoch": 20.01556420233463, | |
| "grad_norm": 1.1774524450302124, | |
| "learning_rate": 2.6347620136232232e-05, | |
| "loss": 0.339, | |
| "step": 15432 | |
| }, | |
| { | |
| "epoch": 20.04669260700389, | |
| "grad_norm": 1.3319616317749023, | |
| "learning_rate": 2.6199713737603055e-05, | |
| "loss": 0.3376, | |
| "step": 15456 | |
| }, | |
| { | |
| "epoch": 20.07782101167315, | |
| "grad_norm": 1.488239049911499, | |
| "learning_rate": 2.60520761289929e-05, | |
| "loss": 0.3379, | |
| "step": 15480 | |
| }, | |
| { | |
| "epoch": 20.108949416342412, | |
| "grad_norm": 1.2733827829360962, | |
| "learning_rate": 2.590470897775636e-05, | |
| "loss": 0.3352, | |
| "step": 15504 | |
| }, | |
| { | |
| "epoch": 20.140077821011673, | |
| "grad_norm": 2.291374921798706, | |
| "learning_rate": 2.575761394819351e-05, | |
| "loss": 0.3395, | |
| "step": 15528 | |
| }, | |
| { | |
| "epoch": 20.171206225680933, | |
| "grad_norm": 1.3169567584991455, | |
| "learning_rate": 2.5610792701531298e-05, | |
| "loss": 0.3365, | |
| "step": 15552 | |
| }, | |
| { | |
| "epoch": 20.202334630350194, | |
| "grad_norm": 1.0463300943374634, | |
| "learning_rate": 2.54642468959046e-05, | |
| "loss": 0.337, | |
| "step": 15576 | |
| }, | |
| { | |
| "epoch": 20.233463035019454, | |
| "grad_norm": 1.5346705913543701, | |
| "learning_rate": 2.5317978186337664e-05, | |
| "loss": 0.3394, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 20.264591439688715, | |
| "grad_norm": 1.6092703342437744, | |
| "learning_rate": 2.5171988224725267e-05, | |
| "loss": 0.3308, | |
| "step": 15624 | |
| }, | |
| { | |
| "epoch": 20.295719844357976, | |
| "grad_norm": 1.3011606931686401, | |
| "learning_rate": 2.5026278659814144e-05, | |
| "loss": 0.339, | |
| "step": 15648 | |
| }, | |
| { | |
| "epoch": 20.326848249027236, | |
| "grad_norm": 1.2459102869033813, | |
| "learning_rate": 2.4880851137184403e-05, | |
| "loss": 0.3308, | |
| "step": 15672 | |
| }, | |
| { | |
| "epoch": 20.357976653696497, | |
| "grad_norm": 1.4810408353805542, | |
| "learning_rate": 2.4735707299230808e-05, | |
| "loss": 0.3376, | |
| "step": 15696 | |
| }, | |
| { | |
| "epoch": 20.389105058365757, | |
| "grad_norm": 1.2645267248153687, | |
| "learning_rate": 2.4590848785144386e-05, | |
| "loss": 0.3402, | |
| "step": 15720 | |
| }, | |
| { | |
| "epoch": 20.420233463035018, | |
| "grad_norm": 2.001779556274414, | |
| "learning_rate": 2.4446277230893823e-05, | |
| "loss": 0.3358, | |
| "step": 15744 | |
| }, | |
| { | |
| "epoch": 20.45136186770428, | |
| "grad_norm": 3.0970067977905273, | |
| "learning_rate": 2.4301994269206968e-05, | |
| "loss": 0.334, | |
| "step": 15768 | |
| }, | |
| { | |
| "epoch": 20.48249027237354, | |
| "grad_norm": 1.4983640909194946, | |
| "learning_rate": 2.415800152955247e-05, | |
| "loss": 0.3424, | |
| "step": 15792 | |
| }, | |
| { | |
| "epoch": 20.5136186770428, | |
| "grad_norm": 1.3392024040222168, | |
| "learning_rate": 2.40143006381213e-05, | |
| "loss": 0.3463, | |
| "step": 15816 | |
| }, | |
| { | |
| "epoch": 20.544747081712064, | |
| "grad_norm": 1.4383450746536255, | |
| "learning_rate": 2.3870893217808495e-05, | |
| "loss": 0.3354, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 20.575875486381324, | |
| "grad_norm": 1.4223530292510986, | |
| "learning_rate": 2.3727780888194658e-05, | |
| "loss": 0.333, | |
| "step": 15864 | |
| }, | |
| { | |
| "epoch": 20.607003891050585, | |
| "grad_norm": 1.5441044569015503, | |
| "learning_rate": 2.3584965265527847e-05, | |
| "loss": 0.3335, | |
| "step": 15888 | |
| }, | |
| { | |
| "epoch": 20.638132295719846, | |
| "grad_norm": 0.8291170597076416, | |
| "learning_rate": 2.344244796270524e-05, | |
| "loss": 0.3389, | |
| "step": 15912 | |
| }, | |
| { | |
| "epoch": 20.669260700389106, | |
| "grad_norm": 2.7805609703063965, | |
| "learning_rate": 2.330023058925486e-05, | |
| "loss": 0.3353, | |
| "step": 15936 | |
| }, | |
| { | |
| "epoch": 20.700389105058367, | |
| "grad_norm": 1.6097582578659058, | |
| "learning_rate": 2.3158314751317513e-05, | |
| "loss": 0.339, | |
| "step": 15960 | |
| }, | |
| { | |
| "epoch": 20.731517509727627, | |
| "grad_norm": 1.4149878025054932, | |
| "learning_rate": 2.3016702051628547e-05, | |
| "loss": 0.3375, | |
| "step": 15984 | |
| }, | |
| { | |
| "epoch": 20.762645914396888, | |
| "grad_norm": 1.2236443758010864, | |
| "learning_rate": 2.2875394089499847e-05, | |
| "loss": 0.3358, | |
| "step": 16008 | |
| }, | |
| { | |
| "epoch": 20.79377431906615, | |
| "grad_norm": 1.0645393133163452, | |
| "learning_rate": 2.2734392460801727e-05, | |
| "loss": 0.3377, | |
| "step": 16032 | |
| }, | |
| { | |
| "epoch": 20.82490272373541, | |
| "grad_norm": 1.2843340635299683, | |
| "learning_rate": 2.259369875794485e-05, | |
| "loss": 0.3332, | |
| "step": 16056 | |
| }, | |
| { | |
| "epoch": 20.85603112840467, | |
| "grad_norm": 1.735514760017395, | |
| "learning_rate": 2.2453314569862366e-05, | |
| "loss": 0.3364, | |
| "step": 16080 | |
| }, | |
| { | |
| "epoch": 20.88715953307393, | |
| "grad_norm": 1.3856208324432373, | |
| "learning_rate": 2.2313241481991855e-05, | |
| "loss": 0.3389, | |
| "step": 16104 | |
| }, | |
| { | |
| "epoch": 20.91828793774319, | |
| "grad_norm": 1.7546725273132324, | |
| "learning_rate": 2.217348107625748e-05, | |
| "loss": 0.3373, | |
| "step": 16128 | |
| }, | |
| { | |
| "epoch": 20.94941634241245, | |
| "grad_norm": 1.3664530515670776, | |
| "learning_rate": 2.2034034931052096e-05, | |
| "loss": 0.3398, | |
| "step": 16152 | |
| }, | |
| { | |
| "epoch": 20.980544747081712, | |
| "grad_norm": 5.165532112121582, | |
| "learning_rate": 2.1894904621219463e-05, | |
| "loss": 0.3372, | |
| "step": 16176 | |
| }, | |
| { | |
| "epoch": 21.011673151750973, | |
| "grad_norm": 1.3261635303497314, | |
| "learning_rate": 2.175609171803644e-05, | |
| "loss": 0.3381, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 21.042801556420233, | |
| "grad_norm": 1.8854881525039673, | |
| "learning_rate": 2.1617597789195193e-05, | |
| "loss": 0.3347, | |
| "step": 16224 | |
| }, | |
| { | |
| "epoch": 21.073929961089494, | |
| "grad_norm": 1.3904035091400146, | |
| "learning_rate": 2.1479424398785573e-05, | |
| "loss": 0.3346, | |
| "step": 16248 | |
| }, | |
| { | |
| "epoch": 21.105058365758754, | |
| "grad_norm": 1.318601369857788, | |
| "learning_rate": 2.1341573107277392e-05, | |
| "loss": 0.3347, | |
| "step": 16272 | |
| }, | |
| { | |
| "epoch": 21.136186770428015, | |
| "grad_norm": 1.0564274787902832, | |
| "learning_rate": 2.1204045471502803e-05, | |
| "loss": 0.3295, | |
| "step": 16296 | |
| }, | |
| { | |
| "epoch": 21.167315175097276, | |
| "grad_norm": 0.9953235387802124, | |
| "learning_rate": 2.106684304463874e-05, | |
| "loss": 0.3339, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 21.198443579766536, | |
| "grad_norm": 1.0253063440322876, | |
| "learning_rate": 2.092996737618939e-05, | |
| "loss": 0.3271, | |
| "step": 16344 | |
| }, | |
| { | |
| "epoch": 21.229571984435797, | |
| "grad_norm": 1.5001134872436523, | |
| "learning_rate": 2.079342001196869e-05, | |
| "loss": 0.3359, | |
| "step": 16368 | |
| }, | |
| { | |
| "epoch": 21.260700389105057, | |
| "grad_norm": 1.1106650829315186, | |
| "learning_rate": 2.0657202494082773e-05, | |
| "loss": 0.327, | |
| "step": 16392 | |
| }, | |
| { | |
| "epoch": 21.291828793774318, | |
| "grad_norm": 1.0053423643112183, | |
| "learning_rate": 2.052131636091273e-05, | |
| "loss": 0.3398, | |
| "step": 16416 | |
| }, | |
| { | |
| "epoch": 21.32295719844358, | |
| "grad_norm": 1.3083621263504028, | |
| "learning_rate": 2.038576314709707e-05, | |
| "loss": 0.3306, | |
| "step": 16440 | |
| }, | |
| { | |
| "epoch": 21.35408560311284, | |
| "grad_norm": 1.4561755657196045, | |
| "learning_rate": 2.0250544383514457e-05, | |
| "loss": 0.3364, | |
| "step": 16464 | |
| }, | |
| { | |
| "epoch": 21.3852140077821, | |
| "grad_norm": 1.0885835886001587, | |
| "learning_rate": 2.0115661597266476e-05, | |
| "loss": 0.3355, | |
| "step": 16488 | |
| }, | |
| { | |
| "epoch": 21.41634241245136, | |
| "grad_norm": 1.3506430387496948, | |
| "learning_rate": 1.998111631166027e-05, | |
| "loss": 0.3334, | |
| "step": 16512 | |
| }, | |
| { | |
| "epoch": 21.44747081712062, | |
| "grad_norm": 1.0331530570983887, | |
| "learning_rate": 1.9846910046191446e-05, | |
| "loss": 0.3303, | |
| "step": 16536 | |
| }, | |
| { | |
| "epoch": 21.47859922178988, | |
| "grad_norm": 1.0616254806518555, | |
| "learning_rate": 1.9713044316526813e-05, | |
| "loss": 0.3348, | |
| "step": 16560 | |
| }, | |
| { | |
| "epoch": 21.509727626459146, | |
| "grad_norm": 2.5577657222747803, | |
| "learning_rate": 1.9579520634487386e-05, | |
| "loss": 0.335, | |
| "step": 16584 | |
| }, | |
| { | |
| "epoch": 21.540856031128406, | |
| "grad_norm": 1.5290476083755493, | |
| "learning_rate": 1.9446340508031185e-05, | |
| "loss": 0.3382, | |
| "step": 16608 | |
| }, | |
| { | |
| "epoch": 21.571984435797667, | |
| "grad_norm": 0.8804724216461182, | |
| "learning_rate": 1.931350544123627e-05, | |
| "loss": 0.3257, | |
| "step": 16632 | |
| }, | |
| { | |
| "epoch": 21.603112840466927, | |
| "grad_norm": 1.1799284219741821, | |
| "learning_rate": 1.918101693428379e-05, | |
| "loss": 0.3298, | |
| "step": 16656 | |
| }, | |
| { | |
| "epoch": 21.634241245136188, | |
| "grad_norm": 1.3328742980957031, | |
| "learning_rate": 1.9048876483440942e-05, | |
| "loss": 0.3373, | |
| "step": 16680 | |
| }, | |
| { | |
| "epoch": 21.66536964980545, | |
| "grad_norm": 0.9985073208808899, | |
| "learning_rate": 1.8917085581044193e-05, | |
| "loss": 0.3313, | |
| "step": 16704 | |
| }, | |
| { | |
| "epoch": 21.69649805447471, | |
| "grad_norm": 1.498244047164917, | |
| "learning_rate": 1.8785645715482285e-05, | |
| "loss": 0.3303, | |
| "step": 16728 | |
| }, | |
| { | |
| "epoch": 21.72762645914397, | |
| "grad_norm": 1.6468580961227417, | |
| "learning_rate": 1.8654558371179583e-05, | |
| "loss": 0.3252, | |
| "step": 16752 | |
| }, | |
| { | |
| "epoch": 21.75875486381323, | |
| "grad_norm": 1.6541725397109985, | |
| "learning_rate": 1.8523825028579212e-05, | |
| "loss": 0.3299, | |
| "step": 16776 | |
| }, | |
| { | |
| "epoch": 21.78988326848249, | |
| "grad_norm": 0.9805202484130859, | |
| "learning_rate": 1.8393447164126282e-05, | |
| "loss": 0.3342, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 21.82101167315175, | |
| "grad_norm": 0.9097315073013306, | |
| "learning_rate": 1.8263426250251388e-05, | |
| "loss": 0.3309, | |
| "step": 16824 | |
| }, | |
| { | |
| "epoch": 21.852140077821012, | |
| "grad_norm": 1.2603996992111206, | |
| "learning_rate": 1.8133763755353816e-05, | |
| "loss": 0.3387, | |
| "step": 16848 | |
| }, | |
| { | |
| "epoch": 21.883268482490273, | |
| "grad_norm": 1.0283710956573486, | |
| "learning_rate": 1.800446114378508e-05, | |
| "loss": 0.3325, | |
| "step": 16872 | |
| }, | |
| { | |
| "epoch": 21.914396887159533, | |
| "grad_norm": 2.601137399673462, | |
| "learning_rate": 1.7875519875832254e-05, | |
| "loss": 0.3356, | |
| "step": 16896 | |
| }, | |
| { | |
| "epoch": 21.945525291828794, | |
| "grad_norm": 1.0405902862548828, | |
| "learning_rate": 1.774694140770163e-05, | |
| "loss": 0.3339, | |
| "step": 16920 | |
| }, | |
| { | |
| "epoch": 21.976653696498055, | |
| "grad_norm": 1.504928708076477, | |
| "learning_rate": 1.7618727191502188e-05, | |
| "loss": 0.3329, | |
| "step": 16944 | |
| }, | |
| { | |
| "epoch": 22.007782101167315, | |
| "grad_norm": 1.1356394290924072, | |
| "learning_rate": 1.749087867522912e-05, | |
| "loss": 0.331, | |
| "step": 16968 | |
| }, | |
| { | |
| "epoch": 22.038910505836576, | |
| "grad_norm": 1.3053059577941895, | |
| "learning_rate": 1.7363397302747687e-05, | |
| "loss": 0.3316, | |
| "step": 16992 | |
| }, | |
| { | |
| "epoch": 22.070038910505836, | |
| "grad_norm": 1.8512986898422241, | |
| "learning_rate": 1.723628451377669e-05, | |
| "loss": 0.3286, | |
| "step": 17016 | |
| }, | |
| { | |
| "epoch": 22.101167315175097, | |
| "grad_norm": 1.1379419565200806, | |
| "learning_rate": 1.7109541743872366e-05, | |
| "loss": 0.3311, | |
| "step": 17040 | |
| }, | |
| { | |
| "epoch": 22.132295719844358, | |
| "grad_norm": 1.0137568712234497, | |
| "learning_rate": 1.698317042441211e-05, | |
| "loss": 0.3294, | |
| "step": 17064 | |
| }, | |
| { | |
| "epoch": 22.163424124513618, | |
| "grad_norm": 1.1163158416748047, | |
| "learning_rate": 1.6857171982578286e-05, | |
| "loss": 0.3247, | |
| "step": 17088 | |
| }, | |
| { | |
| "epoch": 22.19455252918288, | |
| "grad_norm": 0.992064893245697, | |
| "learning_rate": 1.6731547841342193e-05, | |
| "loss": 0.3331, | |
| "step": 17112 | |
| }, | |
| { | |
| "epoch": 22.22568093385214, | |
| "grad_norm": 1.2021843194961548, | |
| "learning_rate": 1.6606299419447894e-05, | |
| "loss": 0.3284, | |
| "step": 17136 | |
| }, | |
| { | |
| "epoch": 22.2568093385214, | |
| "grad_norm": 2.352348566055298, | |
| "learning_rate": 1.6481428131396275e-05, | |
| "loss": 0.3315, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 22.28793774319066, | |
| "grad_norm": 1.283078908920288, | |
| "learning_rate": 1.6356935387428996e-05, | |
| "loss": 0.3262, | |
| "step": 17184 | |
| }, | |
| { | |
| "epoch": 22.31906614785992, | |
| "grad_norm": 1.2125391960144043, | |
| "learning_rate": 1.6232822593512654e-05, | |
| "loss": 0.3312, | |
| "step": 17208 | |
| }, | |
| { | |
| "epoch": 22.35019455252918, | |
| "grad_norm": 1.2397364377975464, | |
| "learning_rate": 1.610909115132286e-05, | |
| "loss": 0.3268, | |
| "step": 17232 | |
| }, | |
| { | |
| "epoch": 22.381322957198442, | |
| "grad_norm": 1.4817135334014893, | |
| "learning_rate": 1.5985742458228338e-05, | |
| "loss": 0.3283, | |
| "step": 17256 | |
| }, | |
| { | |
| "epoch": 22.412451361867703, | |
| "grad_norm": 2.0548017024993896, | |
| "learning_rate": 1.58627779072753e-05, | |
| "loss": 0.3249, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 22.443579766536963, | |
| "grad_norm": 1.4913387298583984, | |
| "learning_rate": 1.574019888717155e-05, | |
| "loss": 0.3277, | |
| "step": 17304 | |
| }, | |
| { | |
| "epoch": 22.474708171206224, | |
| "grad_norm": 1.2476876974105835, | |
| "learning_rate": 1.5618006782270904e-05, | |
| "loss": 0.3298, | |
| "step": 17328 | |
| }, | |
| { | |
| "epoch": 22.505836575875485, | |
| "grad_norm": 1.2181342840194702, | |
| "learning_rate": 1.5496202972557556e-05, | |
| "loss": 0.329, | |
| "step": 17352 | |
| }, | |
| { | |
| "epoch": 22.53696498054475, | |
| "grad_norm": 1.3082391023635864, | |
| "learning_rate": 1.5374788833630404e-05, | |
| "loss": 0.328, | |
| "step": 17376 | |
| }, | |
| { | |
| "epoch": 22.56809338521401, | |
| "grad_norm": 1.217458963394165, | |
| "learning_rate": 1.5253765736687636e-05, | |
| "loss": 0.3273, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 22.59922178988327, | |
| "grad_norm": 1.1426113843917847, | |
| "learning_rate": 1.5133135048511127e-05, | |
| "loss": 0.3314, | |
| "step": 17424 | |
| }, | |
| { | |
| "epoch": 22.63035019455253, | |
| "grad_norm": 1.8684285879135132, | |
| "learning_rate": 1.5012898131451114e-05, | |
| "loss": 0.3301, | |
| "step": 17448 | |
| }, | |
| { | |
| "epoch": 22.66147859922179, | |
| "grad_norm": 1.1370235681533813, | |
| "learning_rate": 1.489305634341071e-05, | |
| "loss": 0.3315, | |
| "step": 17472 | |
| }, | |
| { | |
| "epoch": 22.69260700389105, | |
| "grad_norm": 1.1359672546386719, | |
| "learning_rate": 1.4773611037830626e-05, | |
| "loss": 0.3283, | |
| "step": 17496 | |
| }, | |
| { | |
| "epoch": 22.723735408560312, | |
| "grad_norm": 1.3090800046920776, | |
| "learning_rate": 1.4654563563673901e-05, | |
| "loss": 0.3282, | |
| "step": 17520 | |
| }, | |
| { | |
| "epoch": 22.754863813229573, | |
| "grad_norm": 1.2736905813217163, | |
| "learning_rate": 1.4535915265410593e-05, | |
| "loss": 0.33, | |
| "step": 17544 | |
| }, | |
| { | |
| "epoch": 22.785992217898833, | |
| "grad_norm": 1.189782977104187, | |
| "learning_rate": 1.4417667483002688e-05, | |
| "loss": 0.3267, | |
| "step": 17568 | |
| }, | |
| { | |
| "epoch": 22.817120622568094, | |
| "grad_norm": 2.092562437057495, | |
| "learning_rate": 1.4299821551888881e-05, | |
| "loss": 0.3276, | |
| "step": 17592 | |
| }, | |
| { | |
| "epoch": 22.848249027237355, | |
| "grad_norm": 1.8085280656814575, | |
| "learning_rate": 1.4182378802969582e-05, | |
| "loss": 0.3267, | |
| "step": 17616 | |
| }, | |
| { | |
| "epoch": 22.879377431906615, | |
| "grad_norm": 1.2389247417449951, | |
| "learning_rate": 1.4065340562591784e-05, | |
| "loss": 0.3322, | |
| "step": 17640 | |
| }, | |
| { | |
| "epoch": 22.910505836575876, | |
| "grad_norm": 2.3639073371887207, | |
| "learning_rate": 1.3948708152534162e-05, | |
| "loss": 0.3286, | |
| "step": 17664 | |
| }, | |
| { | |
| "epoch": 22.941634241245136, | |
| "grad_norm": 1.4584684371948242, | |
| "learning_rate": 1.3832482889992138e-05, | |
| "loss": 0.3275, | |
| "step": 17688 | |
| }, | |
| { | |
| "epoch": 22.972762645914397, | |
| "grad_norm": 1.2135454416275024, | |
| "learning_rate": 1.3716666087562951e-05, | |
| "loss": 0.3331, | |
| "step": 17712 | |
| }, | |
| { | |
| "epoch": 23.003891050583658, | |
| "grad_norm": 1.1459728479385376, | |
| "learning_rate": 1.3601259053230924e-05, | |
| "loss": 0.3259, | |
| "step": 17736 | |
| }, | |
| { | |
| "epoch": 23.035019455252918, | |
| "grad_norm": 1.1459057331085205, | |
| "learning_rate": 1.3486263090352563e-05, | |
| "loss": 0.3229, | |
| "step": 17760 | |
| }, | |
| { | |
| "epoch": 23.06614785992218, | |
| "grad_norm": 1.3186362981796265, | |
| "learning_rate": 1.3371679497641997e-05, | |
| "loss": 0.3242, | |
| "step": 17784 | |
| }, | |
| { | |
| "epoch": 23.09727626459144, | |
| "grad_norm": 0.9882354736328125, | |
| "learning_rate": 1.3257509569156162e-05, | |
| "loss": 0.3263, | |
| "step": 17808 | |
| }, | |
| { | |
| "epoch": 23.1284046692607, | |
| "grad_norm": 1.146543264389038, | |
| "learning_rate": 1.3143754594280266e-05, | |
| "loss": 0.3239, | |
| "step": 17832 | |
| }, | |
| { | |
| "epoch": 23.15953307392996, | |
| "grad_norm": 1.5829049348831177, | |
| "learning_rate": 1.3030415857713246e-05, | |
| "loss": 0.3274, | |
| "step": 17856 | |
| }, | |
| { | |
| "epoch": 23.19066147859922, | |
| "grad_norm": 1.1690993309020996, | |
| "learning_rate": 1.2917494639453171e-05, | |
| "loss": 0.3266, | |
| "step": 17880 | |
| }, | |
| { | |
| "epoch": 23.22178988326848, | |
| "grad_norm": 2.0189902782440186, | |
| "learning_rate": 1.280499221478289e-05, | |
| "loss": 0.3277, | |
| "step": 17904 | |
| }, | |
| { | |
| "epoch": 23.252918287937742, | |
| "grad_norm": 2.8502254486083984, | |
| "learning_rate": 1.269290985425557e-05, | |
| "loss": 0.3309, | |
| "step": 17928 | |
| }, | |
| { | |
| "epoch": 23.284046692607003, | |
| "grad_norm": 1.144399881362915, | |
| "learning_rate": 1.2581248823680336e-05, | |
| "loss": 0.3302, | |
| "step": 17952 | |
| }, | |
| { | |
| "epoch": 23.315175097276263, | |
| "grad_norm": 1.0023480653762817, | |
| "learning_rate": 1.2470010384108012e-05, | |
| "loss": 0.3259, | |
| "step": 17976 | |
| }, | |
| { | |
| "epoch": 23.346303501945524, | |
| "grad_norm": 1.0780220031738281, | |
| "learning_rate": 1.2359195791816841e-05, | |
| "loss": 0.3274, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 23.377431906614785, | |
| "grad_norm": 1.4481017589569092, | |
| "learning_rate": 1.2248806298298372e-05, | |
| "loss": 0.3191, | |
| "step": 18024 | |
| }, | |
| { | |
| "epoch": 23.408560311284045, | |
| "grad_norm": 0.9282727837562561, | |
| "learning_rate": 1.2138843150243212e-05, | |
| "loss": 0.326, | |
| "step": 18048 | |
| }, | |
| { | |
| "epoch": 23.439688715953306, | |
| "grad_norm": 1.2329308986663818, | |
| "learning_rate": 1.2029307589527062e-05, | |
| "loss": 0.3245, | |
| "step": 18072 | |
| }, | |
| { | |
| "epoch": 23.470817120622566, | |
| "grad_norm": 1.535043478012085, | |
| "learning_rate": 1.1920200853196623e-05, | |
| "loss": 0.3273, | |
| "step": 18096 | |
| }, | |
| { | |
| "epoch": 23.50194552529183, | |
| "grad_norm": 1.5993396043777466, | |
| "learning_rate": 1.1811524173455618e-05, | |
| "loss": 0.3242, | |
| "step": 18120 | |
| }, | |
| { | |
| "epoch": 23.53307392996109, | |
| "grad_norm": 2.646594762802124, | |
| "learning_rate": 1.1703278777650929e-05, | |
| "loss": 0.3323, | |
| "step": 18144 | |
| }, | |
| { | |
| "epoch": 23.56420233463035, | |
| "grad_norm": 1.254061222076416, | |
| "learning_rate": 1.1595465888258661e-05, | |
| "loss": 0.3238, | |
| "step": 18168 | |
| }, | |
| { | |
| "epoch": 23.595330739299612, | |
| "grad_norm": 1.3275645971298218, | |
| "learning_rate": 1.1488086722870439e-05, | |
| "loss": 0.328, | |
| "step": 18192 | |
| }, | |
| { | |
| "epoch": 23.626459143968873, | |
| "grad_norm": 1.366665244102478, | |
| "learning_rate": 1.1381142494179586e-05, | |
| "loss": 0.3275, | |
| "step": 18216 | |
| }, | |
| { | |
| "epoch": 23.657587548638134, | |
| "grad_norm": 1.2128342390060425, | |
| "learning_rate": 1.1274634409967389e-05, | |
| "loss": 0.3247, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 23.688715953307394, | |
| "grad_norm": 1.168764591217041, | |
| "learning_rate": 1.1168563673089589e-05, | |
| "loss": 0.3239, | |
| "step": 18264 | |
| }, | |
| { | |
| "epoch": 23.719844357976655, | |
| "grad_norm": 1.2446372509002686, | |
| "learning_rate": 1.1062931481462647e-05, | |
| "loss": 0.32, | |
| "step": 18288 | |
| }, | |
| { | |
| "epoch": 23.750972762645915, | |
| "grad_norm": 1.4571527242660522, | |
| "learning_rate": 1.095773902805033e-05, | |
| "loss": 0.3272, | |
| "step": 18312 | |
| }, | |
| { | |
| "epoch": 23.782101167315176, | |
| "grad_norm": 1.1576392650604248, | |
| "learning_rate": 1.0852987500850148e-05, | |
| "loss": 0.3251, | |
| "step": 18336 | |
| }, | |
| { | |
| "epoch": 23.813229571984436, | |
| "grad_norm": 1.3691147565841675, | |
| "learning_rate": 1.0748678082880049e-05, | |
| "loss": 0.3253, | |
| "step": 18360 | |
| }, | |
| { | |
| "epoch": 23.844357976653697, | |
| "grad_norm": 1.859039068222046, | |
| "learning_rate": 1.0644811952164957e-05, | |
| "loss": 0.3293, | |
| "step": 18384 | |
| }, | |
| { | |
| "epoch": 23.875486381322958, | |
| "grad_norm": 1.2036535739898682, | |
| "learning_rate": 1.0541390281723478e-05, | |
| "loss": 0.3269, | |
| "step": 18408 | |
| }, | |
| { | |
| "epoch": 23.90661478599222, | |
| "grad_norm": 1.459100365638733, | |
| "learning_rate": 1.043841423955474e-05, | |
| "loss": 0.3276, | |
| "step": 18432 | |
| }, | |
| { | |
| "epoch": 23.93774319066148, | |
| "grad_norm": 1.2927861213684082, | |
| "learning_rate": 1.0335884988625084e-05, | |
| "loss": 0.3263, | |
| "step": 18456 | |
| }, | |
| { | |
| "epoch": 23.96887159533074, | |
| "grad_norm": 1.4151058197021484, | |
| "learning_rate": 1.0233803686855014e-05, | |
| "loss": 0.321, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 1.434226393699646, | |
| "learning_rate": 1.0132171487106068e-05, | |
| "loss": 0.3202, | |
| "step": 18504 | |
| }, | |
| { | |
| "epoch": 24.03112840466926, | |
| "grad_norm": 1.2331753969192505, | |
| "learning_rate": 1.0030989537167857e-05, | |
| "loss": 0.3242, | |
| "step": 18528 | |
| }, | |
| { | |
| "epoch": 24.06225680933852, | |
| "grad_norm": 1.6305173635482788, | |
| "learning_rate": 9.930258979745055e-06, | |
| "loss": 0.3221, | |
| "step": 18552 | |
| }, | |
| { | |
| "epoch": 24.09338521400778, | |
| "grad_norm": 1.1515713930130005, | |
| "learning_rate": 9.82998095244449e-06, | |
| "loss": 0.3217, | |
| "step": 18576 | |
| }, | |
| { | |
| "epoch": 24.124513618677042, | |
| "grad_norm": 1.1086283922195435, | |
| "learning_rate": 9.730156587762335e-06, | |
| "loss": 0.3225, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 24.155642023346303, | |
| "grad_norm": 1.256364107131958, | |
| "learning_rate": 9.630787013071286e-06, | |
| "loss": 0.3218, | |
| "step": 18624 | |
| }, | |
| { | |
| "epoch": 24.186770428015564, | |
| "grad_norm": 1.2893520593643188, | |
| "learning_rate": 9.531873350607823e-06, | |
| "loss": 0.3285, | |
| "step": 18648 | |
| }, | |
| { | |
| "epoch": 24.217898832684824, | |
| "grad_norm": 1.1564453840255737, | |
| "learning_rate": 9.433416717459592e-06, | |
| "loss": 0.3234, | |
| "step": 18672 | |
| }, | |
| { | |
| "epoch": 24.249027237354085, | |
| "grad_norm": 1.6299091577529907, | |
| "learning_rate": 9.3354182255527e-06, | |
| "loss": 0.3237, | |
| "step": 18696 | |
| }, | |
| { | |
| "epoch": 24.280155642023345, | |
| "grad_norm": 0.9497871994972229, | |
| "learning_rate": 9.237878981639264e-06, | |
| "loss": 0.3226, | |
| "step": 18720 | |
| }, | |
| { | |
| "epoch": 24.311284046692606, | |
| "grad_norm": 1.3882777690887451, | |
| "learning_rate": 9.140800087284801e-06, | |
| "loss": 0.322, | |
| "step": 18744 | |
| }, | |
| { | |
| "epoch": 24.342412451361866, | |
| "grad_norm": 1.1506375074386597, | |
| "learning_rate": 9.044182638855891e-06, | |
| "loss": 0.3274, | |
| "step": 18768 | |
| }, | |
| { | |
| "epoch": 24.373540856031127, | |
| "grad_norm": 0.8968532681465149, | |
| "learning_rate": 8.948027727507708e-06, | |
| "loss": 0.319, | |
| "step": 18792 | |
| }, | |
| { | |
| "epoch": 24.404669260700388, | |
| "grad_norm": 1.5157815217971802, | |
| "learning_rate": 8.852336439171733e-06, | |
| "loss": 0.3254, | |
| "step": 18816 | |
| }, | |
| { | |
| "epoch": 24.43579766536965, | |
| "grad_norm": 0.9984537959098816, | |
| "learning_rate": 8.757109854543533e-06, | |
| "loss": 0.3244, | |
| "step": 18840 | |
| }, | |
| { | |
| "epoch": 24.46692607003891, | |
| "grad_norm": 1.8151588439941406, | |
| "learning_rate": 8.662349049070463e-06, | |
| "loss": 0.3198, | |
| "step": 18864 | |
| }, | |
| { | |
| "epoch": 24.49805447470817, | |
| "grad_norm": 1.1167311668395996, | |
| "learning_rate": 8.568055092939615e-06, | |
| "loss": 0.3179, | |
| "step": 18888 | |
| }, | |
| { | |
| "epoch": 24.529182879377434, | |
| "grad_norm": 1.3895347118377686, | |
| "learning_rate": 8.474229051065657e-06, | |
| "loss": 0.3211, | |
| "step": 18912 | |
| }, | |
| { | |
| "epoch": 24.560311284046694, | |
| "grad_norm": 1.2524361610412598, | |
| "learning_rate": 8.38087198307887e-06, | |
| "loss": 0.32, | |
| "step": 18936 | |
| }, | |
| { | |
| "epoch": 24.591439688715955, | |
| "grad_norm": 1.389087200164795, | |
| "learning_rate": 8.287984943313114e-06, | |
| "loss": 0.3251, | |
| "step": 18960 | |
| }, | |
| { | |
| "epoch": 24.622568093385215, | |
| "grad_norm": 1.6150294542312622, | |
| "learning_rate": 8.195568980793967e-06, | |
| "loss": 0.3275, | |
| "step": 18984 | |
| }, | |
| { | |
| "epoch": 24.653696498054476, | |
| "grad_norm": 1.6251153945922852, | |
| "learning_rate": 8.103625139226895e-06, | |
| "loss": 0.3225, | |
| "step": 19008 | |
| }, | |
| { | |
| "epoch": 24.684824902723737, | |
| "grad_norm": 1.5373034477233887, | |
| "learning_rate": 8.012154456985388e-06, | |
| "loss": 0.3253, | |
| "step": 19032 | |
| }, | |
| { | |
| "epoch": 24.715953307392997, | |
| "grad_norm": 0.9456262588500977, | |
| "learning_rate": 7.921157967099336e-06, | |
| "loss": 0.3151, | |
| "step": 19056 | |
| }, | |
| { | |
| "epoch": 24.747081712062258, | |
| "grad_norm": 0.9828768372535706, | |
| "learning_rate": 7.830636697243254e-06, | |
| "loss": 0.3252, | |
| "step": 19080 | |
| }, | |
| { | |
| "epoch": 24.77821011673152, | |
| "grad_norm": 1.8610461950302124, | |
| "learning_rate": 7.740591669724772e-06, | |
| "loss": 0.325, | |
| "step": 19104 | |
| }, | |
| { | |
| "epoch": 24.80933852140078, | |
| "grad_norm": 1.8049260377883911, | |
| "learning_rate": 7.651023901473032e-06, | |
| "loss": 0.3204, | |
| "step": 19128 | |
| }, | |
| { | |
| "epoch": 24.84046692607004, | |
| "grad_norm": 1.1601166725158691, | |
| "learning_rate": 7.561934404027193e-06, | |
| "loss": 0.3231, | |
| "step": 19152 | |
| }, | |
| { | |
| "epoch": 24.8715953307393, | |
| "grad_norm": 1.2389658689498901, | |
| "learning_rate": 7.473324183525088e-06, | |
| "loss": 0.329, | |
| "step": 19176 | |
| }, | |
| { | |
| "epoch": 24.90272373540856, | |
| "grad_norm": 1.0001511573791504, | |
| "learning_rate": 7.385194240691751e-06, | |
| "loss": 0.319, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 24.93385214007782, | |
| "grad_norm": 1.7757816314697266, | |
| "learning_rate": 7.297545570828207e-06, | |
| "loss": 0.3267, | |
| "step": 19224 | |
| }, | |
| { | |
| "epoch": 24.964980544747082, | |
| "grad_norm": 1.1014970541000366, | |
| "learning_rate": 7.210379163800185e-06, | |
| "loss": 0.3223, | |
| "step": 19248 | |
| }, | |
| { | |
| "epoch": 24.996108949416342, | |
| "grad_norm": 1.6188836097717285, | |
| "learning_rate": 7.123696004026947e-06, | |
| "loss": 0.3227, | |
| "step": 19272 | |
| }, | |
| { | |
| "epoch": 25.027237354085603, | |
| "grad_norm": 1.2841421365737915, | |
| "learning_rate": 7.037497070470167e-06, | |
| "loss": 0.32, | |
| "step": 19296 | |
| }, | |
| { | |
| "epoch": 25.058365758754864, | |
| "grad_norm": 1.2222139835357666, | |
| "learning_rate": 6.951783336622864e-06, | |
| "loss": 0.3217, | |
| "step": 19320 | |
| }, | |
| { | |
| "epoch": 25.089494163424124, | |
| "grad_norm": 1.0179907083511353, | |
| "learning_rate": 6.866555770498473e-06, | |
| "loss": 0.3182, | |
| "step": 19344 | |
| }, | |
| { | |
| "epoch": 25.120622568093385, | |
| "grad_norm": 0.9595916271209717, | |
| "learning_rate": 6.781815334619812e-06, | |
| "loss": 0.3195, | |
| "step": 19368 | |
| }, | |
| { | |
| "epoch": 25.151750972762645, | |
| "grad_norm": 1.2857320308685303, | |
| "learning_rate": 6.6975629860082935e-06, | |
| "loss": 0.3177, | |
| "step": 19392 | |
| }, | |
| { | |
| "epoch": 25.182879377431906, | |
| "grad_norm": 1.7358510494232178, | |
| "learning_rate": 6.613799676173088e-06, | |
| "loss": 0.3208, | |
| "step": 19416 | |
| }, | |
| { | |
| "epoch": 25.214007782101167, | |
| "grad_norm": 1.8369121551513672, | |
| "learning_rate": 6.530526351100347e-06, | |
| "loss": 0.3196, | |
| "step": 19440 | |
| }, | |
| { | |
| "epoch": 25.245136186770427, | |
| "grad_norm": 2.4744224548339844, | |
| "learning_rate": 6.447743951242591e-06, | |
| "loss": 0.3239, | |
| "step": 19464 | |
| }, | |
| { | |
| "epoch": 25.276264591439688, | |
| "grad_norm": 1.2925540208816528, | |
| "learning_rate": 6.3654534115079936e-06, | |
| "loss": 0.3157, | |
| "step": 19488 | |
| }, | |
| { | |
| "epoch": 25.30739299610895, | |
| "grad_norm": 1.1039607524871826, | |
| "learning_rate": 6.28365566124991e-06, | |
| "loss": 0.3229, | |
| "step": 19512 | |
| }, | |
| { | |
| "epoch": 25.33852140077821, | |
| "grad_norm": 0.8712733387947083, | |
| "learning_rate": 6.202351624256359e-06, | |
| "loss": 0.3181, | |
| "step": 19536 | |
| }, | |
| { | |
| "epoch": 25.36964980544747, | |
| "grad_norm": 1.236718773841858, | |
| "learning_rate": 6.1215422187395345e-06, | |
| "loss": 0.3172, | |
| "step": 19560 | |
| }, | |
| { | |
| "epoch": 25.40077821011673, | |
| "grad_norm": 1.4729557037353516, | |
| "learning_rate": 6.041228357325529e-06, | |
| "loss": 0.3244, | |
| "step": 19584 | |
| }, | |
| { | |
| "epoch": 25.43190661478599, | |
| "grad_norm": 1.1015067100524902, | |
| "learning_rate": 5.961410947043927e-06, | |
| "loss": 0.3227, | |
| "step": 19608 | |
| }, | |
| { | |
| "epoch": 25.46303501945525, | |
| "grad_norm": 1.4798215627670288, | |
| "learning_rate": 5.882090889317671e-06, | |
| "loss": 0.3208, | |
| "step": 19632 | |
| }, | |
| { | |
| "epoch": 25.494163424124515, | |
| "grad_norm": 1.9315009117126465, | |
| "learning_rate": 5.803269079952739e-06, | |
| "loss": 0.3158, | |
| "step": 19656 | |
| }, | |
| { | |
| "epoch": 25.525291828793776, | |
| "grad_norm": 1.1661323308944702, | |
| "learning_rate": 5.724946409128179e-06, | |
| "loss": 0.3194, | |
| "step": 19680 | |
| }, | |
| { | |
| "epoch": 25.556420233463037, | |
| "grad_norm": 1.796525239944458, | |
| "learning_rate": 5.647123761385975e-06, | |
| "loss": 0.3236, | |
| "step": 19704 | |
| }, | |
| { | |
| "epoch": 25.587548638132297, | |
| "grad_norm": 1.251969814300537, | |
| "learning_rate": 5.569802015621039e-06, | |
| "loss": 0.3228, | |
| "step": 19728 | |
| }, | |
| { | |
| "epoch": 25.618677042801558, | |
| "grad_norm": 1.9998018741607666, | |
| "learning_rate": 5.492982045071355e-06, | |
| "loss": 0.3248, | |
| "step": 19752 | |
| }, | |
| { | |
| "epoch": 25.64980544747082, | |
| "grad_norm": 1.0044583082199097, | |
| "learning_rate": 5.4166647173080345e-06, | |
| "loss": 0.3246, | |
| "step": 19776 | |
| }, | |
| { | |
| "epoch": 25.68093385214008, | |
| "grad_norm": 1.0275497436523438, | |
| "learning_rate": 5.340850894225607e-06, | |
| "loss": 0.3253, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 25.71206225680934, | |
| "grad_norm": 1.0156971216201782, | |
| "learning_rate": 5.265541432032212e-06, | |
| "loss": 0.3171, | |
| "step": 19824 | |
| }, | |
| { | |
| "epoch": 25.7431906614786, | |
| "grad_norm": 1.4596341848373413, | |
| "learning_rate": 5.190737181239941e-06, | |
| "loss": 0.3212, | |
| "step": 19848 | |
| }, | |
| { | |
| "epoch": 25.77431906614786, | |
| "grad_norm": 1.2357956171035767, | |
| "learning_rate": 5.116438986655303e-06, | |
| "loss": 0.3268, | |
| "step": 19872 | |
| }, | |
| { | |
| "epoch": 25.80544747081712, | |
| "grad_norm": 1.335877537727356, | |
| "learning_rate": 5.042647687369573e-06, | |
| "loss": 0.3218, | |
| "step": 19896 | |
| }, | |
| { | |
| "epoch": 25.836575875486382, | |
| "grad_norm": 1.5729907751083374, | |
| "learning_rate": 4.969364116749414e-06, | |
| "loss": 0.3205, | |
| "step": 19920 | |
| }, | |
| { | |
| "epoch": 25.867704280155642, | |
| "grad_norm": 1.5255457162857056, | |
| "learning_rate": 4.89658910242739e-06, | |
| "loss": 0.3165, | |
| "step": 19944 | |
| }, | |
| { | |
| "epoch": 25.898832684824903, | |
| "grad_norm": 1.195453405380249, | |
| "learning_rate": 4.8243234662926905e-06, | |
| "loss": 0.323, | |
| "step": 19968 | |
| }, | |
| { | |
| "epoch": 25.929961089494164, | |
| "grad_norm": 1.1830676794052124, | |
| "learning_rate": 4.75256802448178e-06, | |
| "loss": 0.3173, | |
| "step": 19992 | |
| }, | |
| { | |
| "epoch": 25.961089494163424, | |
| "grad_norm": 0.9383173584938049, | |
| "learning_rate": 4.681323587369213e-06, | |
| "loss": 0.3159, | |
| "step": 20016 | |
| }, | |
| { | |
| "epoch": 25.992217898832685, | |
| "grad_norm": 1.3204113245010376, | |
| "learning_rate": 4.610590959558497e-06, | |
| "loss": 0.3217, | |
| "step": 20040 | |
| }, | |
| { | |
| "epoch": 26.023346303501945, | |
| "grad_norm": 1.1940529346466064, | |
| "learning_rate": 4.540370939872974e-06, | |
| "loss": 0.3188, | |
| "step": 20064 | |
| }, | |
| { | |
| "epoch": 26.054474708171206, | |
| "grad_norm": 1.7250840663909912, | |
| "learning_rate": 4.470664321346829e-06, | |
| "loss": 0.3192, | |
| "step": 20088 | |
| }, | |
| { | |
| "epoch": 26.085603112840467, | |
| "grad_norm": 0.9612188339233398, | |
| "learning_rate": 4.401471891216114e-06, | |
| "loss": 0.3183, | |
| "step": 20112 | |
| }, | |
| { | |
| "epoch": 26.116731517509727, | |
| "grad_norm": 1.175308108329773, | |
| "learning_rate": 4.332794430909854e-06, | |
| "loss": 0.3162, | |
| "step": 20136 | |
| }, | |
| { | |
| "epoch": 26.147859922178988, | |
| "grad_norm": 1.3628140687942505, | |
| "learning_rate": 4.264632716041234e-06, | |
| "loss": 0.3173, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 26.17898832684825, | |
| "grad_norm": 0.9504318237304688, | |
| "learning_rate": 4.196987516398831e-06, | |
| "loss": 0.3259, | |
| "step": 20184 | |
| }, | |
| { | |
| "epoch": 26.21011673151751, | |
| "grad_norm": 1.6836086511611938, | |
| "learning_rate": 4.129859595937946e-06, | |
| "loss": 0.3188, | |
| "step": 20208 | |
| }, | |
| { | |
| "epoch": 26.24124513618677, | |
| "grad_norm": 1.2717008590698242, | |
| "learning_rate": 4.063249712771922e-06, | |
| "loss": 0.321, | |
| "step": 20232 | |
| }, | |
| { | |
| "epoch": 26.27237354085603, | |
| "grad_norm": 1.989966869354248, | |
| "learning_rate": 3.997158619163644e-06, | |
| "loss": 0.3215, | |
| "step": 20256 | |
| }, | |
| { | |
| "epoch": 26.30350194552529, | |
| "grad_norm": 1.1739614009857178, | |
| "learning_rate": 3.931587061517011e-06, | |
| "loss": 0.3193, | |
| "step": 20280 | |
| }, | |
| { | |
| "epoch": 26.33463035019455, | |
| "grad_norm": 1.1167713403701782, | |
| "learning_rate": 3.8665357803685025e-06, | |
| "loss": 0.3174, | |
| "step": 20304 | |
| }, | |
| { | |
| "epoch": 26.365758754863812, | |
| "grad_norm": 1.379565715789795, | |
| "learning_rate": 3.8020055103788144e-06, | |
| "loss": 0.3218, | |
| "step": 20328 | |
| }, | |
| { | |
| "epoch": 26.396887159533073, | |
| "grad_norm": 1.4840023517608643, | |
| "learning_rate": 3.7379969803245763e-06, | |
| "loss": 0.3213, | |
| "step": 20352 | |
| }, | |
| { | |
| "epoch": 26.428015564202333, | |
| "grad_norm": 1.1443723440170288, | |
| "learning_rate": 3.6745109130901288e-06, | |
| "loss": 0.3141, | |
| "step": 20376 | |
| }, | |
| { | |
| "epoch": 26.459143968871594, | |
| "grad_norm": 1.090888500213623, | |
| "learning_rate": 3.6115480256593394e-06, | |
| "loss": 0.3212, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 26.490272373540854, | |
| "grad_norm": 1.472679615020752, | |
| "learning_rate": 3.5491090291075004e-06, | |
| "loss": 0.3151, | |
| "step": 20424 | |
| }, | |
| { | |
| "epoch": 26.52140077821012, | |
| "grad_norm": 0.9774566292762756, | |
| "learning_rate": 3.487194628593332e-06, | |
| "loss": 0.3214, | |
| "step": 20448 | |
| }, | |
| { | |
| "epoch": 26.55252918287938, | |
| "grad_norm": 2.1687231063842773, | |
| "learning_rate": 3.4258055233509665e-06, | |
| "loss": 0.324, | |
| "step": 20472 | |
| }, | |
| { | |
| "epoch": 26.58365758754864, | |
| "grad_norm": 1.2352170944213867, | |
| "learning_rate": 3.364942406682109e-06, | |
| "loss": 0.3101, | |
| "step": 20496 | |
| }, | |
| { | |
| "epoch": 26.6147859922179, | |
| "grad_norm": 2.996083974838257, | |
| "learning_rate": 3.304605965948149e-06, | |
| "loss": 0.3141, | |
| "step": 20520 | |
| }, | |
| { | |
| "epoch": 26.64591439688716, | |
| "grad_norm": 1.5926743745803833, | |
| "learning_rate": 3.244796882562462e-06, | |
| "loss": 0.3229, | |
| "step": 20544 | |
| }, | |
| { | |
| "epoch": 26.67704280155642, | |
| "grad_norm": 1.1748905181884766, | |
| "learning_rate": 3.1855158319826774e-06, | |
| "loss": 0.3213, | |
| "step": 20568 | |
| }, | |
| { | |
| "epoch": 26.708171206225682, | |
| "grad_norm": 1.1093063354492188, | |
| "learning_rate": 3.126763483703016e-06, | |
| "loss": 0.3178, | |
| "step": 20592 | |
| }, | |
| { | |
| "epoch": 26.739299610894943, | |
| "grad_norm": 1.1090799570083618, | |
| "learning_rate": 3.0685405012468137e-06, | |
| "loss": 0.3198, | |
| "step": 20616 | |
| }, | |
| { | |
| "epoch": 26.770428015564203, | |
| "grad_norm": 1.0905050039291382, | |
| "learning_rate": 3.010847542158951e-06, | |
| "loss": 0.3192, | |
| "step": 20640 | |
| }, | |
| { | |
| "epoch": 26.801556420233464, | |
| "grad_norm": 1.8493279218673706, | |
| "learning_rate": 2.953685257998451e-06, | |
| "loss": 0.3204, | |
| "step": 20664 | |
| }, | |
| { | |
| "epoch": 26.832684824902724, | |
| "grad_norm": 1.2924058437347412, | |
| "learning_rate": 2.8970542943311583e-06, | |
| "loss": 0.3261, | |
| "step": 20688 | |
| }, | |
| { | |
| "epoch": 26.863813229571985, | |
| "grad_norm": 0.9771651029586792, | |
| "learning_rate": 2.8409552907223804e-06, | |
| "loss": 0.3132, | |
| "step": 20712 | |
| }, | |
| { | |
| "epoch": 26.894941634241246, | |
| "grad_norm": 1.0269138813018799, | |
| "learning_rate": 2.785388880729739e-06, | |
| "loss": 0.3199, | |
| "step": 20736 | |
| }, | |
| { | |
| "epoch": 26.926070038910506, | |
| "grad_norm": 1.309114933013916, | |
| "learning_rate": 2.7303556918959305e-06, | |
| "loss": 0.3145, | |
| "step": 20760 | |
| }, | |
| { | |
| "epoch": 26.957198443579767, | |
| "grad_norm": 1.0709702968597412, | |
| "learning_rate": 2.6758563457417286e-06, | |
| "loss": 0.3192, | |
| "step": 20784 | |
| }, | |
| { | |
| "epoch": 26.988326848249027, | |
| "grad_norm": 1.4049859046936035, | |
| "learning_rate": 2.621891457758896e-06, | |
| "loss": 0.3206, | |
| "step": 20808 | |
| }, | |
| { | |
| "epoch": 27.019455252918288, | |
| "grad_norm": 1.3224713802337646, | |
| "learning_rate": 2.568461637403252e-06, | |
| "loss": 0.312, | |
| "step": 20832 | |
| }, | |
| { | |
| "epoch": 27.05058365758755, | |
| "grad_norm": 1.3082164525985718, | |
| "learning_rate": 2.5155674880878334e-06, | |
| "loss": 0.3108, | |
| "step": 20856 | |
| }, | |
| { | |
| "epoch": 27.08171206225681, | |
| "grad_norm": 0.991944432258606, | |
| "learning_rate": 2.4632096071759925e-06, | |
| "loss": 0.3188, | |
| "step": 20880 | |
| }, | |
| { | |
| "epoch": 27.11284046692607, | |
| "grad_norm": 1.2203731536865234, | |
| "learning_rate": 2.4113885859747497e-06, | |
| "loss": 0.3108, | |
| "step": 20904 | |
| }, | |
| { | |
| "epoch": 27.14396887159533, | |
| "grad_norm": 1.203995704650879, | |
| "learning_rate": 2.360105009728025e-06, | |
| "loss": 0.3102, | |
| "step": 20928 | |
| }, | |
| { | |
| "epoch": 27.17509727626459, | |
| "grad_norm": 1.6264797449111938, | |
| "learning_rate": 2.3093594576101107e-06, | |
| "loss": 0.3174, | |
| "step": 20952 | |
| }, | |
| { | |
| "epoch": 27.20622568093385, | |
| "grad_norm": 1.3530755043029785, | |
| "learning_rate": 2.2591525027190473e-06, | |
| "loss": 0.3252, | |
| "step": 20976 | |
| }, | |
| { | |
| "epoch": 27.237354085603112, | |
| "grad_norm": 2.048307418823242, | |
| "learning_rate": 2.20948471207022e-06, | |
| "loss": 0.3184, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 27.268482490272373, | |
| "grad_norm": 1.320873737335205, | |
| "learning_rate": 2.160356646589934e-06, | |
| "loss": 0.3191, | |
| "step": 21024 | |
| }, | |
| { | |
| "epoch": 27.299610894941633, | |
| "grad_norm": 1.1831213235855103, | |
| "learning_rate": 2.111768861109048e-06, | |
| "loss": 0.3183, | |
| "step": 21048 | |
| }, | |
| { | |
| "epoch": 27.330739299610894, | |
| "grad_norm": 1.0811506509780884, | |
| "learning_rate": 2.0637219043567636e-06, | |
| "loss": 0.3177, | |
| "step": 21072 | |
| }, | |
| { | |
| "epoch": 27.361867704280154, | |
| "grad_norm": 1.1472513675689697, | |
| "learning_rate": 2.0162163189543838e-06, | |
| "loss": 0.3171, | |
| "step": 21096 | |
| }, | |
| { | |
| "epoch": 27.392996108949415, | |
| "grad_norm": 1.6906425952911377, | |
| "learning_rate": 1.9692526414092084e-06, | |
| "loss": 0.3223, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 27.424124513618676, | |
| "grad_norm": 1.600865364074707, | |
| "learning_rate": 1.9228314021084548e-06, | |
| "loss": 0.3151, | |
| "step": 21144 | |
| }, | |
| { | |
| "epoch": 27.455252918287936, | |
| "grad_norm": 1.7052664756774902, | |
| "learning_rate": 1.8769531253132854e-06, | |
| "loss": 0.3172, | |
| "step": 21168 | |
| }, | |
| { | |
| "epoch": 27.486381322957197, | |
| "grad_norm": 1.2754665613174438, | |
| "learning_rate": 1.83161832915289e-06, | |
| "loss": 0.3181, | |
| "step": 21192 | |
| }, | |
| { | |
| "epoch": 27.51750972762646, | |
| "grad_norm": 0.9670736193656921, | |
| "learning_rate": 1.7868275256186174e-06, | |
| "loss": 0.3209, | |
| "step": 21216 | |
| }, | |
| { | |
| "epoch": 27.54863813229572, | |
| "grad_norm": 1.7570668458938599, | |
| "learning_rate": 1.7425812205582147e-06, | |
| "loss": 0.3151, | |
| "step": 21240 | |
| }, | |
| { | |
| "epoch": 27.579766536964982, | |
| "grad_norm": 1.1468702554702759, | |
| "learning_rate": 1.6988799136700706e-06, | |
| "loss": 0.32, | |
| "step": 21264 | |
| }, | |
| { | |
| "epoch": 27.610894941634243, | |
| "grad_norm": 1.837241768836975, | |
| "learning_rate": 1.6557240984976408e-06, | |
| "loss": 0.3176, | |
| "step": 21288 | |
| }, | |
| { | |
| "epoch": 27.642023346303503, | |
| "grad_norm": 1.050024151802063, | |
| "learning_rate": 1.613114262423815e-06, | |
| "loss": 0.3169, | |
| "step": 21312 | |
| }, | |
| { | |
| "epoch": 27.673151750972764, | |
| "grad_norm": 1.0731110572814941, | |
| "learning_rate": 1.5710508866654261e-06, | |
| "loss": 0.3204, | |
| "step": 21336 | |
| }, | |
| { | |
| "epoch": 27.704280155642024, | |
| "grad_norm": 1.2539221048355103, | |
| "learning_rate": 1.5295344462678495e-06, | |
| "loss": 0.3168, | |
| "step": 21360 | |
| }, | |
| { | |
| "epoch": 27.735408560311285, | |
| "grad_norm": 1.4090372323989868, | |
| "learning_rate": 1.488565410099585e-06, | |
| "loss": 0.3164, | |
| "step": 21384 | |
| }, | |
| { | |
| "epoch": 27.766536964980546, | |
| "grad_norm": 1.5965330600738525, | |
| "learning_rate": 1.4481442408470047e-06, | |
| "loss": 0.3216, | |
| "step": 21408 | |
| }, | |
| { | |
| "epoch": 27.797665369649806, | |
| "grad_norm": 1.1138761043548584, | |
| "learning_rate": 1.4082713950091198e-06, | |
| "loss": 0.3206, | |
| "step": 21432 | |
| }, | |
| { | |
| "epoch": 27.828793774319067, | |
| "grad_norm": 1.1677641868591309, | |
| "learning_rate": 1.3689473228923944e-06, | |
| "loss": 0.3241, | |
| "step": 21456 | |
| }, | |
| { | |
| "epoch": 27.859922178988327, | |
| "grad_norm": 2.1310067176818848, | |
| "learning_rate": 1.3301724686056894e-06, | |
| "loss": 0.3187, | |
| "step": 21480 | |
| }, | |
| { | |
| "epoch": 27.891050583657588, | |
| "grad_norm": 1.3181018829345703, | |
| "learning_rate": 1.2919472700552382e-06, | |
| "loss": 0.3164, | |
| "step": 21504 | |
| }, | |
| { | |
| "epoch": 27.92217898832685, | |
| "grad_norm": 1.476120114326477, | |
| "learning_rate": 1.2542721589397234e-06, | |
| "loss": 0.3184, | |
| "step": 21528 | |
| }, | |
| { | |
| "epoch": 27.95330739299611, | |
| "grad_norm": 1.1621023416519165, | |
| "learning_rate": 1.217147560745352e-06, | |
| "loss": 0.319, | |
| "step": 21552 | |
| }, | |
| { | |
| "epoch": 27.98443579766537, | |
| "grad_norm": 1.1426842212677002, | |
| "learning_rate": 1.1805738947410938e-06, | |
| "loss": 0.3155, | |
| "step": 21576 | |
| }, | |
| { | |
| "epoch": 28.01556420233463, | |
| "grad_norm": 2.4093399047851562, | |
| "learning_rate": 1.1445515739739399e-06, | |
| "loss": 0.3135, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 28.04669260700389, | |
| "grad_norm": 1.5340672731399536, | |
| "learning_rate": 1.1090810052642064e-06, | |
| "loss": 0.3181, | |
| "step": 21624 | |
| }, | |
| { | |
| "epoch": 28.07782101167315, | |
| "grad_norm": 1.0847253799438477, | |
| "learning_rate": 1.0741625892009833e-06, | |
| "loss": 0.3165, | |
| "step": 21648 | |
| }, | |
| { | |
| "epoch": 28.108949416342412, | |
| "grad_norm": 1.3261409997940063, | |
| "learning_rate": 1.0397967201375814e-06, | |
| "loss": 0.3204, | |
| "step": 21672 | |
| }, | |
| { | |
| "epoch": 28.140077821011673, | |
| "grad_norm": 1.0757031440734863, | |
| "learning_rate": 1.0059837861870812e-06, | |
| "loss": 0.3187, | |
| "step": 21696 | |
| }, | |
| { | |
| "epoch": 28.171206225680933, | |
| "grad_norm": 1.2534974813461304, | |
| "learning_rate": 9.727241692179756e-07, | |
| "loss": 0.3096, | |
| "step": 21720 | |
| }, | |
| { | |
| "epoch": 28.202334630350194, | |
| "grad_norm": 1.2287142276763916, | |
| "learning_rate": 9.400182448498163e-07, | |
| "loss": 0.3169, | |
| "step": 21744 | |
| }, | |
| { | |
| "epoch": 28.233463035019454, | |
| "grad_norm": 0.9463332891464233, | |
| "learning_rate": 9.078663824490131e-07, | |
| "loss": 0.3185, | |
| "step": 21768 | |
| }, | |
| { | |
| "epoch": 28.264591439688715, | |
| "grad_norm": 2.7430317401885986, | |
| "learning_rate": 8.762689451246198e-07, | |
| "loss": 0.3178, | |
| "step": 21792 | |
| }, | |
| { | |
| "epoch": 28.295719844357976, | |
| "grad_norm": 1.1905908584594727, | |
| "learning_rate": 8.452262897242768e-07, | |
| "loss": 0.3197, | |
| "step": 21816 | |
| }, | |
| { | |
| "epoch": 28.326848249027236, | |
| "grad_norm": 0.894260823726654, | |
| "learning_rate": 8.147387668301421e-07, | |
| "loss": 0.3201, | |
| "step": 21840 | |
| }, | |
| { | |
| "epoch": 28.357976653696497, | |
| "grad_norm": 1.122759222984314, | |
| "learning_rate": 7.848067207549603e-07, | |
| "loss": 0.3102, | |
| "step": 21864 | |
| }, | |
| { | |
| "epoch": 28.389105058365757, | |
| "grad_norm": 1.454839825630188, | |
| "learning_rate": 7.554304895381781e-07, | |
| "loss": 0.3156, | |
| "step": 21888 | |
| }, | |
| { | |
| "epoch": 28.420233463035018, | |
| "grad_norm": 1.348819613456726, | |
| "learning_rate": 7.266104049420797e-07, | |
| "loss": 0.3173, | |
| "step": 21912 | |
| }, | |
| { | |
| "epoch": 28.45136186770428, | |
| "grad_norm": 1.397900104522705, | |
| "learning_rate": 6.983467924480957e-07, | |
| "loss": 0.3206, | |
| "step": 21936 | |
| }, | |
| { | |
| "epoch": 28.48249027237354, | |
| "grad_norm": 2.4935896396636963, | |
| "learning_rate": 6.706399712531009e-07, | |
| "loss": 0.3227, | |
| "step": 21960 | |
| }, | |
| { | |
| "epoch": 28.5136186770428, | |
| "grad_norm": 1.3364354372024536, | |
| "learning_rate": 6.434902542658106e-07, | |
| "loss": 0.3143, | |
| "step": 21984 | |
| }, | |
| { | |
| "epoch": 28.544747081712064, | |
| "grad_norm": 1.0415703058242798, | |
| "learning_rate": 6.168979481032455e-07, | |
| "loss": 0.3204, | |
| "step": 22008 | |
| }, | |
| { | |
| "epoch": 28.575875486381324, | |
| "grad_norm": 1.0268234014511108, | |
| "learning_rate": 5.908633530872732e-07, | |
| "loss": 0.3163, | |
| "step": 22032 | |
| }, | |
| { | |
| "epoch": 28.607003891050585, | |
| "grad_norm": 1.0088456869125366, | |
| "learning_rate": 5.653867632412269e-07, | |
| "loss": 0.3118, | |
| "step": 22056 | |
| }, | |
| { | |
| "epoch": 28.638132295719846, | |
| "grad_norm": 1.52815842628479, | |
| "learning_rate": 5.404684662865589e-07, | |
| "loss": 0.3166, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 28.669260700389106, | |
| "grad_norm": 1.0740587711334229, | |
| "learning_rate": 5.161087436396095e-07, | |
| "loss": 0.3157, | |
| "step": 22104 | |
| }, | |
| { | |
| "epoch": 28.700389105058367, | |
| "grad_norm": 1.263934850692749, | |
| "learning_rate": 4.923078704084372e-07, | |
| "loss": 0.3169, | |
| "step": 22128 | |
| }, | |
| { | |
| "epoch": 28.731517509727627, | |
| "grad_norm": 1.1837375164031982, | |
| "learning_rate": 4.690661153896825e-07, | |
| "loss": 0.3177, | |
| "step": 22152 | |
| }, | |
| { | |
| "epoch": 28.762645914396888, | |
| "grad_norm": 1.1407973766326904, | |
| "learning_rate": 4.463837410655536e-07, | |
| "loss": 0.3161, | |
| "step": 22176 | |
| }, | |
| { | |
| "epoch": 28.79377431906615, | |
| "grad_norm": 1.019492268562317, | |
| "learning_rate": 4.242610036008676e-07, | |
| "loss": 0.3135, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 28.82490272373541, | |
| "grad_norm": 1.7875498533248901, | |
| "learning_rate": 4.026981528401419e-07, | |
| "loss": 0.3213, | |
| "step": 22224 | |
| }, | |
| { | |
| "epoch": 28.85603112840467, | |
| "grad_norm": 0.9684593677520752, | |
| "learning_rate": 3.8169543230477387e-07, | |
| "loss": 0.3151, | |
| "step": 22248 | |
| }, | |
| { | |
| "epoch": 28.88715953307393, | |
| "grad_norm": 1.086421012878418, | |
| "learning_rate": 3.612530791903046e-07, | |
| "loss": 0.3172, | |
| "step": 22272 | |
| }, | |
| { | |
| "epoch": 28.91828793774319, | |
| "grad_norm": 1.9420697689056396, | |
| "learning_rate": 3.4137132436372064e-07, | |
| "loss": 0.3181, | |
| "step": 22296 | |
| }, | |
| { | |
| "epoch": 28.94941634241245, | |
| "grad_norm": 1.217786192893982, | |
| "learning_rate": 3.2205039236086197e-07, | |
| "loss": 0.3151, | |
| "step": 22320 | |
| }, | |
| { | |
| "epoch": 28.980544747081712, | |
| "grad_norm": 1.1275442838668823, | |
| "learning_rate": 3.0329050138388494e-07, | |
| "loss": 0.3193, | |
| "step": 22344 | |
| }, | |
| { | |
| "epoch": 29.011673151750973, | |
| "grad_norm": 0.9701781272888184, | |
| "learning_rate": 2.850918632987809e-07, | |
| "loss": 0.316, | |
| "step": 22368 | |
| }, | |
| { | |
| "epoch": 29.042801556420233, | |
| "grad_norm": 1.0859931707382202, | |
| "learning_rate": 2.674546836330172e-07, | |
| "loss": 0.3169, | |
| "step": 22392 | |
| }, | |
| { | |
| "epoch": 29.073929961089494, | |
| "grad_norm": 0.9976264834403992, | |
| "learning_rate": 2.503791615731721e-07, | |
| "loss": 0.3172, | |
| "step": 22416 | |
| }, | |
| { | |
| "epoch": 29.105058365758754, | |
| "grad_norm": 2.1112818717956543, | |
| "learning_rate": 2.3386548996272572e-07, | |
| "loss": 0.3202, | |
| "step": 22440 | |
| }, | |
| { | |
| "epoch": 29.136186770428015, | |
| "grad_norm": 1.3070718050003052, | |
| "learning_rate": 2.1791385529986163e-07, | |
| "loss": 0.3163, | |
| "step": 22464 | |
| }, | |
| { | |
| "epoch": 29.167315175097276, | |
| "grad_norm": 1.5637389421463013, | |
| "learning_rate": 2.02524437735363e-07, | |
| "loss": 0.3183, | |
| "step": 22488 | |
| }, | |
| { | |
| "epoch": 29.198443579766536, | |
| "grad_norm": 1.19569730758667, | |
| "learning_rate": 1.876974110705698e-07, | |
| "loss": 0.3176, | |
| "step": 22512 | |
| }, | |
| { | |
| "epoch": 29.229571984435797, | |
| "grad_norm": 2.7948904037475586, | |
| "learning_rate": 1.7343294275543599e-07, | |
| "loss": 0.3181, | |
| "step": 22536 | |
| }, | |
| { | |
| "epoch": 29.260700389105057, | |
| "grad_norm": 2.1853528022766113, | |
| "learning_rate": 1.597311938866308e-07, | |
| "loss": 0.3144, | |
| "step": 22560 | |
| }, | |
| { | |
| "epoch": 29.291828793774318, | |
| "grad_norm": 1.4694305658340454, | |
| "learning_rate": 1.4659231920571282e-07, | |
| "loss": 0.318, | |
| "step": 22584 | |
| }, | |
| { | |
| "epoch": 29.32295719844358, | |
| "grad_norm": 1.037607192993164, | |
| "learning_rate": 1.3401646709736983e-07, | |
| "loss": 0.3142, | |
| "step": 22608 | |
| }, | |
| { | |
| "epoch": 29.35408560311284, | |
| "grad_norm": 0.9353266358375549, | |
| "learning_rate": 1.2200377958778708e-07, | |
| "loss": 0.3133, | |
| "step": 22632 | |
| }, | |
| { | |
| "epoch": 29.3852140077821, | |
| "grad_norm": 1.4458966255187988, | |
| "learning_rate": 1.1055439234299858e-07, | |
| "loss": 0.3164, | |
| "step": 22656 | |
| }, | |
| { | |
| "epoch": 29.41634241245136, | |
| "grad_norm": 0.9110085368156433, | |
| "learning_rate": 9.966843466736597e-08, | |
| "loss": 0.3157, | |
| "step": 22680 | |
| }, | |
| { | |
| "epoch": 29.44747081712062, | |
| "grad_norm": 1.0257847309112549, | |
| "learning_rate": 8.934602950213533e-08, | |
| "loss": 0.319, | |
| "step": 22704 | |
| }, | |
| { | |
| "epoch": 29.47859922178988, | |
| "grad_norm": 1.2331140041351318, | |
| "learning_rate": 7.958729342403826e-08, | |
| "loss": 0.3177, | |
| "step": 22728 | |
| }, | |
| { | |
| "epoch": 29.509727626459146, | |
| "grad_norm": 2.199601650238037, | |
| "learning_rate": 7.039233664396516e-08, | |
| "loss": 0.3164, | |
| "step": 22752 | |
| }, | |
| { | |
| "epoch": 29.540856031128406, | |
| "grad_norm": 1.1412527561187744, | |
| "learning_rate": 6.176126300573848e-08, | |
| "loss": 0.3127, | |
| "step": 22776 | |
| }, | |
| { | |
| "epoch": 29.571984435797667, | |
| "grad_norm": 1.556688904762268, | |
| "learning_rate": 5.369416998492471e-08, | |
| "loss": 0.3181, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 29.603112840466927, | |
| "grad_norm": 1.2471084594726562, | |
| "learning_rate": 4.619114868774643e-08, | |
| "loss": 0.3152, | |
| "step": 22824 | |
| }, | |
| { | |
| "epoch": 29.634241245136188, | |
| "grad_norm": 1.3103766441345215, | |
| "learning_rate": 3.92522838500331e-08, | |
| "loss": 0.3171, | |
| "step": 22848 | |
| }, | |
| { | |
| "epoch": 29.66536964980545, | |
| "grad_norm": 1.0881154537200928, | |
| "learning_rate": 3.2877653836299594e-08, | |
| "loss": 0.3162, | |
| "step": 22872 | |
| }, | |
| { | |
| "epoch": 29.69649805447471, | |
| "grad_norm": 0.981332004070282, | |
| "learning_rate": 2.7067330638824718e-08, | |
| "loss": 0.3152, | |
| "step": 22896 | |
| }, | |
| { | |
| "epoch": 29.72762645914397, | |
| "grad_norm": 2.1748950481414795, | |
| "learning_rate": 2.1821379876851845e-08, | |
| "loss": 0.3138, | |
| "step": 22920 | |
| }, | |
| { | |
| "epoch": 29.75875486381323, | |
| "grad_norm": 1.0983901023864746, | |
| "learning_rate": 1.7139860795861717e-08, | |
| "loss": 0.3194, | |
| "step": 22944 | |
| }, | |
| { | |
| "epoch": 29.78988326848249, | |
| "grad_norm": 0.9180955290794373, | |
| "learning_rate": 1.3022826266873012e-08, | |
| "loss": 0.3155, | |
| "step": 22968 | |
| }, | |
| { | |
| "epoch": 29.82101167315175, | |
| "grad_norm": 4.426241397857666, | |
| "learning_rate": 9.470322785881668e-09, | |
| "loss": 0.3176, | |
| "step": 22992 | |
| }, | |
| { | |
| "epoch": 29.852140077821012, | |
| "grad_norm": 1.521730661392212, | |
| "learning_rate": 6.482390473294686e-09, | |
| "loss": 0.3179, | |
| "step": 23016 | |
| }, | |
| { | |
| "epoch": 29.883268482490273, | |
| "grad_norm": 1.1130119562149048, | |
| "learning_rate": 4.059063073524882e-09, | |
| "loss": 0.3199, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 29.914396887159533, | |
| "grad_norm": 1.0622695684432983, | |
| "learning_rate": 2.2003679545690158e-09, | |
| "loss": 0.3167, | |
| "step": 23064 | |
| }, | |
| { | |
| "epoch": 29.945525291828794, | |
| "grad_norm": 1.495850920677185, | |
| "learning_rate": 9.063261077080221e-10, | |
| "loss": 0.3201, | |
| "step": 23088 | |
| }, | |
| { | |
| "epoch": 29.976653696498055, | |
| "grad_norm": 1.2298061847686768, | |
| "learning_rate": 1.7695214729607224e-10, | |
| "loss": 0.3134, | |
| "step": 23112 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 23130, | |
| "total_flos": 9.11148472281858e+17, | |
| "train_loss": 0.3991138265909079, | |
| "train_runtime": 54856.7027, | |
| "train_samples_per_second": 107.912, | |
| "train_steps_per_second": 0.422 | |
| } | |
| ], | |
| "logging_steps": 24, | |
| "max_steps": 23130, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 1157, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.11148472281858e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |