Instructions to use fals3/bigcode-starcoder2-15b-unit-test-lora with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use fals3/bigcode-starcoder2-15b-unit-test-lora with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("bigcode/starcoder2-15b") model = PeftModel.from_pretrained(base_model, "fals3/bigcode-starcoder2-15b-unit-test-lora") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9987642455032266, | |
| "eval_steps": 500, | |
| "global_step": 2730, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010984484415762735, | |
| "grad_norm": 0.13173329830169678, | |
| "learning_rate": 1.0989010989010988e-06, | |
| "loss": 0.8751, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002196896883152547, | |
| "grad_norm": 0.19401921331882477, | |
| "learning_rate": 2.1978021978021976e-06, | |
| "loss": 1.3488, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0032953453247288205, | |
| "grad_norm": 0.142131969332695, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "loss": 0.8371, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004393793766305094, | |
| "grad_norm": 0.1124999076128006, | |
| "learning_rate": 4.395604395604395e-06, | |
| "loss": 1.0039, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005492242207881368, | |
| "grad_norm": 0.20683947205543518, | |
| "learning_rate": 5.494505494505494e-06, | |
| "loss": 1.4423, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006590690649457641, | |
| "grad_norm": 0.2007640153169632, | |
| "learning_rate": 6.5934065934065935e-06, | |
| "loss": 0.9797, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007689139091033915, | |
| "grad_norm": 0.1362670361995697, | |
| "learning_rate": 7.692307692307692e-06, | |
| "loss": 1.0443, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008787587532610188, | |
| "grad_norm": 0.21512511372566223, | |
| "learning_rate": 8.79120879120879e-06, | |
| "loss": 1.2888, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.009886035974186462, | |
| "grad_norm": 0.13403186202049255, | |
| "learning_rate": 9.89010989010989e-06, | |
| "loss": 0.9637, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.010984484415762736, | |
| "grad_norm": 0.16911157965660095, | |
| "learning_rate": 1.0989010989010989e-05, | |
| "loss": 0.8824, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012082932857339008, | |
| "grad_norm": 0.19280359148979187, | |
| "learning_rate": 1.2087912087912087e-05, | |
| "loss": 0.9843, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013181381298915282, | |
| "grad_norm": 0.15720519423484802, | |
| "learning_rate": 1.3186813186813187e-05, | |
| "loss": 0.9769, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.014279829740491556, | |
| "grad_norm": 0.18622402846813202, | |
| "learning_rate": 1.4285714285714284e-05, | |
| "loss": 0.903, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.01537827818206783, | |
| "grad_norm": 0.1491895169019699, | |
| "learning_rate": 1.5384615384615384e-05, | |
| "loss": 1.065, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.016476726623644102, | |
| "grad_norm": 0.16883142292499542, | |
| "learning_rate": 1.6483516483516482e-05, | |
| "loss": 0.9916, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.017575175065220376, | |
| "grad_norm": 0.155453160405159, | |
| "learning_rate": 1.758241758241758e-05, | |
| "loss": 1.1048, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01867362350679665, | |
| "grad_norm": 0.12869666516780853, | |
| "learning_rate": 1.868131868131868e-05, | |
| "loss": 0.9355, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.019772071948372924, | |
| "grad_norm": 0.18860433995723724, | |
| "learning_rate": 1.978021978021978e-05, | |
| "loss": 1.1779, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.020870520389949198, | |
| "grad_norm": 0.30738529562950134, | |
| "learning_rate": 2.087912087912088e-05, | |
| "loss": 0.905, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.021968968831525472, | |
| "grad_norm": 0.30248674750328064, | |
| "learning_rate": 2.1978021978021977e-05, | |
| "loss": 1.0749, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.023067417273101742, | |
| "grad_norm": 0.17005079984664917, | |
| "learning_rate": 2.3076923076923076e-05, | |
| "loss": 1.0141, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.024165865714678016, | |
| "grad_norm": 0.5497377514839172, | |
| "learning_rate": 2.4175824175824174e-05, | |
| "loss": 0.804, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02526431415625429, | |
| "grad_norm": 0.23464925587177277, | |
| "learning_rate": 2.5274725274725276e-05, | |
| "loss": 1.0592, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.026362762597830564, | |
| "grad_norm": 0.2906591594219208, | |
| "learning_rate": 2.6373626373626374e-05, | |
| "loss": 1.4096, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.027461211039406838, | |
| "grad_norm": 0.14552968740463257, | |
| "learning_rate": 2.747252747252747e-05, | |
| "loss": 0.8827, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.028559659480983112, | |
| "grad_norm": 0.26139914989471436, | |
| "learning_rate": 2.8571428571428567e-05, | |
| "loss": 1.1081, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.029658107922559386, | |
| "grad_norm": 0.16122505068778992, | |
| "learning_rate": 2.9670329670329666e-05, | |
| "loss": 0.8967, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03075655636413566, | |
| "grad_norm": 0.19174647331237793, | |
| "learning_rate": 3.076923076923077e-05, | |
| "loss": 0.7527, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.031855004805711934, | |
| "grad_norm": 0.24506032466888428, | |
| "learning_rate": 3.1868131868131866e-05, | |
| "loss": 1.0981, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.032953453247288204, | |
| "grad_norm": 0.18928349018096924, | |
| "learning_rate": 3.2967032967032964e-05, | |
| "loss": 1.2955, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03405190168886448, | |
| "grad_norm": 0.20482106506824493, | |
| "learning_rate": 3.406593406593406e-05, | |
| "loss": 0.886, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.03515035013044075, | |
| "grad_norm": 0.17304010689258575, | |
| "learning_rate": 3.516483516483516e-05, | |
| "loss": 1.0062, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03624879857201702, | |
| "grad_norm": 0.17006444931030273, | |
| "learning_rate": 3.626373626373626e-05, | |
| "loss": 0.76, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0373472470135933, | |
| "grad_norm": 0.16570955514907837, | |
| "learning_rate": 3.736263736263736e-05, | |
| "loss": 0.7512, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03844569545516957, | |
| "grad_norm": 0.4470347464084625, | |
| "learning_rate": 3.8461538461538456e-05, | |
| "loss": 1.051, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03954414389674585, | |
| "grad_norm": 0.3013080060482025, | |
| "learning_rate": 3.956043956043956e-05, | |
| "loss": 1.1269, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.04064259233832212, | |
| "grad_norm": 0.33114469051361084, | |
| "learning_rate": 4.065934065934065e-05, | |
| "loss": 1.046, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.041741040779898396, | |
| "grad_norm": 0.3496829867362976, | |
| "learning_rate": 4.175824175824176e-05, | |
| "loss": 0.9139, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.042839489221474666, | |
| "grad_norm": 0.36173877120018005, | |
| "learning_rate": 4.285714285714285e-05, | |
| "loss": 1.16, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.043937937663050944, | |
| "grad_norm": 0.23047995567321777, | |
| "learning_rate": 4.3956043956043955e-05, | |
| "loss": 0.8623, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.045036386104627214, | |
| "grad_norm": 0.33733946084976196, | |
| "learning_rate": 4.5054945054945046e-05, | |
| "loss": 0.873, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.046134834546203485, | |
| "grad_norm": 0.43975624442100525, | |
| "learning_rate": 4.615384615384615e-05, | |
| "loss": 0.9374, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04723328298777976, | |
| "grad_norm": 0.5429202318191528, | |
| "learning_rate": 4.725274725274725e-05, | |
| "loss": 1.0699, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04833173142935603, | |
| "grad_norm": 0.39317595958709717, | |
| "learning_rate": 4.835164835164835e-05, | |
| "loss": 0.7719, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04943017987093231, | |
| "grad_norm": 0.41328710317611694, | |
| "learning_rate": 4.9450549450549446e-05, | |
| "loss": 1.112, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05052862831250858, | |
| "grad_norm": 0.5977774858474731, | |
| "learning_rate": 5.054945054945055e-05, | |
| "loss": 0.9408, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.05162707675408486, | |
| "grad_norm": 0.6984797716140747, | |
| "learning_rate": 5.164835164835164e-05, | |
| "loss": 0.9766, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.05272552519566113, | |
| "grad_norm": 0.5161548256874084, | |
| "learning_rate": 5.274725274725275e-05, | |
| "loss": 1.3705, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0538239736372374, | |
| "grad_norm": 0.5750108361244202, | |
| "learning_rate": 5.384615384615384e-05, | |
| "loss": 0.9492, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.054922422078813676, | |
| "grad_norm": 0.7861920595169067, | |
| "learning_rate": 5.494505494505494e-05, | |
| "loss": 1.1495, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05602087052038995, | |
| "grad_norm": 0.5992287993431091, | |
| "learning_rate": 5.6043956043956037e-05, | |
| "loss": 1.2818, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.057119318961966224, | |
| "grad_norm": 0.5470016598701477, | |
| "learning_rate": 5.7142857142857135e-05, | |
| "loss": 1.0385, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.058217767403542495, | |
| "grad_norm": 0.7035269141197205, | |
| "learning_rate": 5.824175824175824e-05, | |
| "loss": 0.785, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05931621584511877, | |
| "grad_norm": 0.5253639817237854, | |
| "learning_rate": 5.934065934065933e-05, | |
| "loss": 0.6092, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06041466428669504, | |
| "grad_norm": 0.5233064293861389, | |
| "learning_rate": 6.043956043956044e-05, | |
| "loss": 0.7853, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06151311272827132, | |
| "grad_norm": 0.4508589804172516, | |
| "learning_rate": 6.153846153846154e-05, | |
| "loss": 0.5737, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06261156116984759, | |
| "grad_norm": 1.0521594285964966, | |
| "learning_rate": 6.263736263736263e-05, | |
| "loss": 1.0132, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06371000961142387, | |
| "grad_norm": 0.3572557866573334, | |
| "learning_rate": 6.373626373626373e-05, | |
| "loss": 0.655, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06480845805300013, | |
| "grad_norm": 0.600371241569519, | |
| "learning_rate": 6.483516483516483e-05, | |
| "loss": 0.8897, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06590690649457641, | |
| "grad_norm": 0.6430579423904419, | |
| "learning_rate": 6.593406593406593e-05, | |
| "loss": 0.8058, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06700535493615269, | |
| "grad_norm": 0.5309410095214844, | |
| "learning_rate": 6.703296703296703e-05, | |
| "loss": 0.7312, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.06810380337772896, | |
| "grad_norm": 0.46225860714912415, | |
| "learning_rate": 6.813186813186813e-05, | |
| "loss": 0.8607, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06920225181930523, | |
| "grad_norm": 0.8889493346214294, | |
| "learning_rate": 6.923076923076922e-05, | |
| "loss": 0.7791, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.0703007002608815, | |
| "grad_norm": 0.5721575617790222, | |
| "learning_rate": 7.032967032967032e-05, | |
| "loss": 0.9426, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07139914870245778, | |
| "grad_norm": 0.8355056047439575, | |
| "learning_rate": 7.142857142857142e-05, | |
| "loss": 0.621, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07249759714403405, | |
| "grad_norm": 1.3048707246780396, | |
| "learning_rate": 7.252747252747252e-05, | |
| "loss": 0.8869, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07359604558561032, | |
| "grad_norm": 0.5817797183990479, | |
| "learning_rate": 7.362637362637362e-05, | |
| "loss": 0.8385, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.0746944940271866, | |
| "grad_norm": 1.2051454782485962, | |
| "learning_rate": 7.472527472527472e-05, | |
| "loss": 0.7566, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07579294246876288, | |
| "grad_norm": 0.8565987944602966, | |
| "learning_rate": 7.582417582417581e-05, | |
| "loss": 0.8374, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07689139091033914, | |
| "grad_norm": 0.7503894567489624, | |
| "learning_rate": 7.692307692307691e-05, | |
| "loss": 0.6749, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07798983935191542, | |
| "grad_norm": 0.6298589706420898, | |
| "learning_rate": 7.802197802197802e-05, | |
| "loss": 0.9096, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0790882877934917, | |
| "grad_norm": 0.8327789306640625, | |
| "learning_rate": 7.912087912087912e-05, | |
| "loss": 0.9836, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08018673623506796, | |
| "grad_norm": 1.0001461505889893, | |
| "learning_rate": 8.021978021978021e-05, | |
| "loss": 0.6917, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08128518467664424, | |
| "grad_norm": 0.8373435735702515, | |
| "learning_rate": 8.13186813186813e-05, | |
| "loss": 0.7703, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08238363311822051, | |
| "grad_norm": 0.9785758256912231, | |
| "learning_rate": 8.241758241758242e-05, | |
| "loss": 0.8004, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08348208155979679, | |
| "grad_norm": 0.8900540471076965, | |
| "learning_rate": 8.351648351648352e-05, | |
| "loss": 0.8238, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08458053000137306, | |
| "grad_norm": 0.7411159873008728, | |
| "learning_rate": 8.46153846153846e-05, | |
| "loss": 1.0364, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08567897844294933, | |
| "grad_norm": 0.4975040555000305, | |
| "learning_rate": 8.57142857142857e-05, | |
| "loss": 0.4814, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.08677742688452561, | |
| "grad_norm": 0.6698398590087891, | |
| "learning_rate": 8.681318681318681e-05, | |
| "loss": 0.6828, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08787587532610189, | |
| "grad_norm": 0.5883696675300598, | |
| "learning_rate": 8.791208791208791e-05, | |
| "loss": 0.92, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08897432376767815, | |
| "grad_norm": 0.9050906896591187, | |
| "learning_rate": 8.901098901098901e-05, | |
| "loss": 0.7229, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09007277220925443, | |
| "grad_norm": 0.5996706485748291, | |
| "learning_rate": 9.010989010989009e-05, | |
| "loss": 0.699, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0911712206508307, | |
| "grad_norm": 2.0782630443573, | |
| "learning_rate": 9.120879120879119e-05, | |
| "loss": 1.2118, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.09226966909240697, | |
| "grad_norm": 0.759730875492096, | |
| "learning_rate": 9.23076923076923e-05, | |
| "loss": 0.6397, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09336811753398325, | |
| "grad_norm": 1.1138097047805786, | |
| "learning_rate": 9.34065934065934e-05, | |
| "loss": 0.8973, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09446656597555952, | |
| "grad_norm": 0.9852680563926697, | |
| "learning_rate": 9.45054945054945e-05, | |
| "loss": 1.0733, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0955650144171358, | |
| "grad_norm": 0.8435002565383911, | |
| "learning_rate": 9.560439560439558e-05, | |
| "loss": 0.8977, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09666346285871207, | |
| "grad_norm": 1.3031998872756958, | |
| "learning_rate": 9.67032967032967e-05, | |
| "loss": 0.9852, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09776191130028834, | |
| "grad_norm": 0.6343463063240051, | |
| "learning_rate": 9.78021978021978e-05, | |
| "loss": 0.6147, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09886035974186462, | |
| "grad_norm": 0.7061794996261597, | |
| "learning_rate": 9.890109890109889e-05, | |
| "loss": 0.7437, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09995880818344088, | |
| "grad_norm": 1.2231422662734985, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.7944, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10105725662501716, | |
| "grad_norm": 0.7199704647064209, | |
| "learning_rate": 0.0001010989010989011, | |
| "loss": 0.7355, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10215570506659344, | |
| "grad_norm": 1.2740516662597656, | |
| "learning_rate": 0.00010219780219780219, | |
| "loss": 0.7622, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10325415350816972, | |
| "grad_norm": 0.7762659788131714, | |
| "learning_rate": 0.00010329670329670329, | |
| "loss": 0.7074, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10435260194974598, | |
| "grad_norm": 0.6618936061859131, | |
| "learning_rate": 0.00010439560439560438, | |
| "loss": 0.7667, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10545105039132226, | |
| "grad_norm": 0.7244533896446228, | |
| "learning_rate": 0.0001054945054945055, | |
| "loss": 0.6451, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10654949883289853, | |
| "grad_norm": 0.6391953229904175, | |
| "learning_rate": 0.0001065934065934066, | |
| "loss": 0.5637, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1076479472744748, | |
| "grad_norm": 0.6992442607879639, | |
| "learning_rate": 0.00010769230769230768, | |
| "loss": 0.7112, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10874639571605108, | |
| "grad_norm": 1.0820791721343994, | |
| "learning_rate": 0.00010879120879120878, | |
| "loss": 0.9199, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.10984484415762735, | |
| "grad_norm": 0.6012185215950012, | |
| "learning_rate": 0.00010989010989010988, | |
| "loss": 0.5574, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11094329259920363, | |
| "grad_norm": 0.822455644607544, | |
| "learning_rate": 0.00011098901098901099, | |
| "loss": 0.5185, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.1120417410407799, | |
| "grad_norm": 0.9417555332183838, | |
| "learning_rate": 0.00011208791208791207, | |
| "loss": 0.6883, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11314018948235617, | |
| "grad_norm": 1.0258208513259888, | |
| "learning_rate": 0.00011318681318681317, | |
| "loss": 0.7588, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11423863792393245, | |
| "grad_norm": 1.904179573059082, | |
| "learning_rate": 0.00011428571428571427, | |
| "loss": 0.7425, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11533708636550873, | |
| "grad_norm": 1.5453238487243652, | |
| "learning_rate": 0.00011538461538461538, | |
| "loss": 0.658, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11643553480708499, | |
| "grad_norm": 0.8801619410514832, | |
| "learning_rate": 0.00011648351648351648, | |
| "loss": 0.8432, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11753398324866127, | |
| "grad_norm": 0.8567579388618469, | |
| "learning_rate": 0.00011758241758241756, | |
| "loss": 0.5904, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11863243169023754, | |
| "grad_norm": 0.9351131319999695, | |
| "learning_rate": 0.00011868131868131866, | |
| "loss": 0.7228, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.11973088013181381, | |
| "grad_norm": 0.8817545175552368, | |
| "learning_rate": 0.00011978021978021978, | |
| "loss": 0.7853, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.12082932857339009, | |
| "grad_norm": 1.0484094619750977, | |
| "learning_rate": 0.00012087912087912087, | |
| "loss": 0.7049, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12192777701496636, | |
| "grad_norm": 1.80658757686615, | |
| "learning_rate": 0.00012197802197802197, | |
| "loss": 0.669, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.12302622545654264, | |
| "grad_norm": 1.5311473608016968, | |
| "learning_rate": 0.00012307692307692307, | |
| "loss": 0.8342, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1241246738981189, | |
| "grad_norm": 0.8968105912208557, | |
| "learning_rate": 0.00012417582417582416, | |
| "loss": 0.7199, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12522312233969518, | |
| "grad_norm": 0.6149659156799316, | |
| "learning_rate": 0.00012527472527472527, | |
| "loss": 0.4961, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12632157078127146, | |
| "grad_norm": 8.04592227935791, | |
| "learning_rate": 0.00012637362637362635, | |
| "loss": 0.7515, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12742001922284774, | |
| "grad_norm": 0.7797659039497375, | |
| "learning_rate": 0.00012747252747252746, | |
| "loss": 0.7281, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.128518467664424, | |
| "grad_norm": 0.6414046883583069, | |
| "learning_rate": 0.00012857142857142855, | |
| "loss": 0.6655, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12961691610600026, | |
| "grad_norm": 4.678529262542725, | |
| "learning_rate": 0.00012967032967032966, | |
| "loss": 0.9165, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.13071536454757654, | |
| "grad_norm": 0.8540724515914917, | |
| "learning_rate": 0.00013076923076923077, | |
| "loss": 0.7064, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13181381298915282, | |
| "grad_norm": 1.057844638824463, | |
| "learning_rate": 0.00013186813186813186, | |
| "loss": 0.6617, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1329122614307291, | |
| "grad_norm": 0.8429140448570251, | |
| "learning_rate": 0.00013296703296703294, | |
| "loss": 0.8156, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.13401070987230537, | |
| "grad_norm": 0.9944230914115906, | |
| "learning_rate": 0.00013406593406593405, | |
| "loss": 0.5851, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13510915831388165, | |
| "grad_norm": 0.6582810878753662, | |
| "learning_rate": 0.00013516483516483517, | |
| "loss": 0.5819, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.13620760675545793, | |
| "grad_norm": 1.3106951713562012, | |
| "learning_rate": 0.00013626373626373625, | |
| "loss": 0.7598, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13730605519703418, | |
| "grad_norm": 1.0464080572128296, | |
| "learning_rate": 0.00013736263736263734, | |
| "loss": 0.7241, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13840450363861045, | |
| "grad_norm": 0.8519262075424194, | |
| "learning_rate": 0.00013846153846153845, | |
| "loss": 0.7001, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.13950295208018673, | |
| "grad_norm": 1.2764228582382202, | |
| "learning_rate": 0.00013956043956043956, | |
| "loss": 0.7152, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.140601400521763, | |
| "grad_norm": 1.157472014427185, | |
| "learning_rate": 0.00014065934065934064, | |
| "loss": 0.697, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1416998489633393, | |
| "grad_norm": 0.7153847813606262, | |
| "learning_rate": 0.00014175824175824173, | |
| "loss": 0.6897, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14279829740491556, | |
| "grad_norm": 0.7254152297973633, | |
| "learning_rate": 0.00014285714285714284, | |
| "loss": 0.5263, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14389674584649184, | |
| "grad_norm": 1.3370522260665894, | |
| "learning_rate": 0.00014395604395604395, | |
| "loss": 0.7587, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1449951942880681, | |
| "grad_norm": 1.092029333114624, | |
| "learning_rate": 0.00014505494505494504, | |
| "loss": 0.8674, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.14609364272964437, | |
| "grad_norm": 0.6123655438423157, | |
| "learning_rate": 0.00014615384615384615, | |
| "loss": 0.7163, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14719209117122065, | |
| "grad_norm": 0.8476639986038208, | |
| "learning_rate": 0.00014725274725274723, | |
| "loss": 0.7241, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14829053961279692, | |
| "grad_norm": 0.9986979961395264, | |
| "learning_rate": 0.00014835164835164835, | |
| "loss": 0.6229, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1493889880543732, | |
| "grad_norm": 0.8208728432655334, | |
| "learning_rate": 0.00014945054945054943, | |
| "loss": 0.5441, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15048743649594948, | |
| "grad_norm": 0.742091953754425, | |
| "learning_rate": 0.00015054945054945054, | |
| "loss": 0.6047, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.15158588493752576, | |
| "grad_norm": 1.6566306352615356, | |
| "learning_rate": 0.00015164835164835163, | |
| "loss": 0.6381, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.152684333379102, | |
| "grad_norm": 0.7735741138458252, | |
| "learning_rate": 0.0001527472527472527, | |
| "loss": 0.5842, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15378278182067828, | |
| "grad_norm": 0.7116795778274536, | |
| "learning_rate": 0.00015384615384615382, | |
| "loss": 0.7117, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.15488123026225456, | |
| "grad_norm": 0.6912885904312134, | |
| "learning_rate": 0.00015494505494505494, | |
| "loss": 0.763, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15597967870383084, | |
| "grad_norm": 1.0789505243301392, | |
| "learning_rate": 0.00015604395604395605, | |
| "loss": 0.5534, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.15707812714540711, | |
| "grad_norm": 1.0304033756256104, | |
| "learning_rate": 0.00015714285714285713, | |
| "loss": 0.4961, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1581765755869834, | |
| "grad_norm": 1.0216940641403198, | |
| "learning_rate": 0.00015824175824175824, | |
| "loss": 0.8167, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.15927502402855967, | |
| "grad_norm": 0.7767283916473389, | |
| "learning_rate": 0.00015934065934065933, | |
| "loss": 0.649, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.16037347247013592, | |
| "grad_norm": 0.6125204563140869, | |
| "learning_rate": 0.00016043956043956041, | |
| "loss": 0.6596, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1614719209117122, | |
| "grad_norm": 2.113314390182495, | |
| "learning_rate": 0.00016153846153846153, | |
| "loss": 0.6825, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16257036935328847, | |
| "grad_norm": 1.3892889022827148, | |
| "learning_rate": 0.0001626373626373626, | |
| "loss": 0.5162, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16366881779486475, | |
| "grad_norm": 1.2544710636138916, | |
| "learning_rate": 0.0001637362637362637, | |
| "loss": 0.5992, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.16476726623644103, | |
| "grad_norm": 1.2952786684036255, | |
| "learning_rate": 0.00016483516483516484, | |
| "loss": 0.5968, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1658657146780173, | |
| "grad_norm": 0.9910382628440857, | |
| "learning_rate": 0.00016593406593406592, | |
| "loss": 0.6138, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.16696416311959358, | |
| "grad_norm": 0.7291635870933533, | |
| "learning_rate": 0.00016703296703296703, | |
| "loss": 0.8957, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.16806261156116986, | |
| "grad_norm": 0.7290105819702148, | |
| "learning_rate": 0.00016813186813186812, | |
| "loss": 0.4864, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.1691610600027461, | |
| "grad_norm": 1.1888444423675537, | |
| "learning_rate": 0.0001692307692307692, | |
| "loss": 0.913, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1702595084443224, | |
| "grad_norm": 0.8183659315109253, | |
| "learning_rate": 0.0001703296703296703, | |
| "loss": 0.6405, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.17135795688589867, | |
| "grad_norm": 0.8549530506134033, | |
| "learning_rate": 0.0001714285714285714, | |
| "loss": 0.7019, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.17245640532747494, | |
| "grad_norm": 0.5960697531700134, | |
| "learning_rate": 0.0001725274725274725, | |
| "loss": 0.6728, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.17355485376905122, | |
| "grad_norm": 0.6802973747253418, | |
| "learning_rate": 0.00017362637362637362, | |
| "loss": 0.6462, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.1746533022106275, | |
| "grad_norm": 0.5056049823760986, | |
| "learning_rate": 0.00017472527472527473, | |
| "loss": 0.5155, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.17575175065220378, | |
| "grad_norm": 0.8181887865066528, | |
| "learning_rate": 0.00017582417582417582, | |
| "loss": 0.6631, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17685019909378003, | |
| "grad_norm": 0.5748574137687683, | |
| "learning_rate": 0.0001769230769230769, | |
| "loss": 0.5807, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.1779486475353563, | |
| "grad_norm": 0.8585043549537659, | |
| "learning_rate": 0.00017802197802197802, | |
| "loss": 0.5412, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.17904709597693258, | |
| "grad_norm": 0.8763203620910645, | |
| "learning_rate": 0.0001791208791208791, | |
| "loss": 1.0859, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.18014554441850886, | |
| "grad_norm": 0.7327267527580261, | |
| "learning_rate": 0.00018021978021978018, | |
| "loss": 0.8034, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.18124399286008513, | |
| "grad_norm": 0.6813991665840149, | |
| "learning_rate": 0.0001813186813186813, | |
| "loss": 0.9236, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1823424413016614, | |
| "grad_norm": 2.9234185218811035, | |
| "learning_rate": 0.00018241758241758238, | |
| "loss": 0.9148, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1834408897432377, | |
| "grad_norm": 0.8117207884788513, | |
| "learning_rate": 0.00018351648351648352, | |
| "loss": 1.0514, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.18453933818481394, | |
| "grad_norm": 0.6485300064086914, | |
| "learning_rate": 0.0001846153846153846, | |
| "loss": 0.4764, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.18563778662639022, | |
| "grad_norm": 0.43059054017066956, | |
| "learning_rate": 0.00018571428571428572, | |
| "loss": 0.6289, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.1867362350679665, | |
| "grad_norm": 1.007095456123352, | |
| "learning_rate": 0.0001868131868131868, | |
| "loss": 0.5889, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18783468350954277, | |
| "grad_norm": 1.6733218431472778, | |
| "learning_rate": 0.0001879120879120879, | |
| "loss": 0.8036, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.18893313195111905, | |
| "grad_norm": 0.7533760666847229, | |
| "learning_rate": 0.000189010989010989, | |
| "loss": 0.7282, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.19003158039269533, | |
| "grad_norm": 0.45892444252967834, | |
| "learning_rate": 0.00019010989010989008, | |
| "loss": 0.6273, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.1911300288342716, | |
| "grad_norm": 0.54690021276474, | |
| "learning_rate": 0.00019120879120879117, | |
| "loss": 0.669, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.19222847727584785, | |
| "grad_norm": 0.7361836433410645, | |
| "learning_rate": 0.0001923076923076923, | |
| "loss": 0.8945, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19332692571742413, | |
| "grad_norm": 0.5876324772834778, | |
| "learning_rate": 0.0001934065934065934, | |
| "loss": 0.7557, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1944253741590004, | |
| "grad_norm": 0.7753897309303284, | |
| "learning_rate": 0.0001945054945054945, | |
| "loss": 0.7904, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.19552382260057669, | |
| "grad_norm": 0.6244968771934509, | |
| "learning_rate": 0.0001956043956043956, | |
| "loss": 0.7617, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.19662227104215296, | |
| "grad_norm": 0.6300948262214661, | |
| "learning_rate": 0.00019670329670329667, | |
| "loss": 0.5884, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.19772071948372924, | |
| "grad_norm": 0.5845354795455933, | |
| "learning_rate": 0.00019780219780219779, | |
| "loss": 0.8034, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19881916792530552, | |
| "grad_norm": 0.5231277942657471, | |
| "learning_rate": 0.00019890109890109887, | |
| "loss": 0.5302, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.19991761636688177, | |
| "grad_norm": 0.8393481969833374, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 0.6376, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.20101606480845804, | |
| "grad_norm": 0.5777038335800171, | |
| "learning_rate": 0.00020109890109890107, | |
| "loss": 0.5777, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.20211451325003432, | |
| "grad_norm": 0.7751956582069397, | |
| "learning_rate": 0.0002021978021978022, | |
| "loss": 0.8368, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2032129616916106, | |
| "grad_norm": 1.5582187175750732, | |
| "learning_rate": 0.0002032967032967033, | |
| "loss": 0.5087, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.20431141013318688, | |
| "grad_norm": 0.8304231762886047, | |
| "learning_rate": 0.00020439560439560438, | |
| "loss": 0.5512, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.20540985857476315, | |
| "grad_norm": 0.8545000553131104, | |
| "learning_rate": 0.0002054945054945055, | |
| "loss": 1.2533, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.20650830701633943, | |
| "grad_norm": 0.4891647696495056, | |
| "learning_rate": 0.00020659340659340657, | |
| "loss": 0.5738, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.20760675545791568, | |
| "grad_norm": 0.7159665822982788, | |
| "learning_rate": 0.00020769230769230766, | |
| "loss": 0.9266, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.20870520389949196, | |
| "grad_norm": 0.5053237080574036, | |
| "learning_rate": 0.00020879120879120877, | |
| "loss": 0.4574, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.20980365234106824, | |
| "grad_norm": 0.728336751461029, | |
| "learning_rate": 0.00020989010989010985, | |
| "loss": 0.6871, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.2109021007826445, | |
| "grad_norm": 0.8593311309814453, | |
| "learning_rate": 0.000210989010989011, | |
| "loss": 0.6788, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2120005492242208, | |
| "grad_norm": 1.247111201286316, | |
| "learning_rate": 0.00021208791208791208, | |
| "loss": 0.5428, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.21309899766579707, | |
| "grad_norm": 0.6636946201324463, | |
| "learning_rate": 0.0002131868131868132, | |
| "loss": 0.7935, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.21419744610737335, | |
| "grad_norm": 0.5811622738838196, | |
| "learning_rate": 0.00021428571428571427, | |
| "loss": 0.4322, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2152958945489496, | |
| "grad_norm": 0.5329126715660095, | |
| "learning_rate": 0.00021538461538461536, | |
| "loss": 0.7037, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.21639434299052587, | |
| "grad_norm": 1.730969786643982, | |
| "learning_rate": 0.00021648351648351647, | |
| "loss": 1.0315, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.21749279143210215, | |
| "grad_norm": 0.5242175459861755, | |
| "learning_rate": 0.00021758241758241756, | |
| "loss": 0.9285, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.21859123987367843, | |
| "grad_norm": 0.4745596945285797, | |
| "learning_rate": 0.00021868131868131864, | |
| "loss": 0.5414, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2196896883152547, | |
| "grad_norm": 0.8693228363990784, | |
| "learning_rate": 0.00021978021978021975, | |
| "loss": 0.4576, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22078813675683098, | |
| "grad_norm": 0.7073357105255127, | |
| "learning_rate": 0.00022087912087912086, | |
| "loss": 0.778, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.22188658519840726, | |
| "grad_norm": 0.535009503364563, | |
| "learning_rate": 0.00022197802197802198, | |
| "loss": 0.7734, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.2229850336399835, | |
| "grad_norm": 0.5862578749656677, | |
| "learning_rate": 0.00022307692307692306, | |
| "loss": 0.8612, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2240834820815598, | |
| "grad_norm": 0.5167233943939209, | |
| "learning_rate": 0.00022417582417582415, | |
| "loss": 0.6122, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.22518193052313606, | |
| "grad_norm": 0.8982027769088745, | |
| "learning_rate": 0.00022527472527472526, | |
| "loss": 0.8905, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.22628037896471234, | |
| "grad_norm": 0.7311340570449829, | |
| "learning_rate": 0.00022637362637362634, | |
| "loss": 1.0151, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.22737882740628862, | |
| "grad_norm": 0.45674124360084534, | |
| "learning_rate": 0.00022747252747252745, | |
| "loss": 0.7056, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2284772758478649, | |
| "grad_norm": 0.6916844844818115, | |
| "learning_rate": 0.00022857142857142854, | |
| "loss": 0.5977, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22957572428944117, | |
| "grad_norm": 0.6632958650588989, | |
| "learning_rate": 0.00022967032967032962, | |
| "loss": 0.8228, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.23067417273101745, | |
| "grad_norm": 0.3243491053581238, | |
| "learning_rate": 0.00023076923076923076, | |
| "loss": 0.4823, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2317726211725937, | |
| "grad_norm": 0.45630499720573425, | |
| "learning_rate": 0.00023186813186813185, | |
| "loss": 0.7206, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.23287106961416998, | |
| "grad_norm": 0.6726184487342834, | |
| "learning_rate": 0.00023296703296703296, | |
| "loss": 0.8211, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.23396951805574626, | |
| "grad_norm": 0.45092982053756714, | |
| "learning_rate": 0.00023406593406593405, | |
| "loss": 0.6812, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.23506796649732253, | |
| "grad_norm": 0.5624651312828064, | |
| "learning_rate": 0.00023516483516483513, | |
| "loss": 0.726, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2361664149388988, | |
| "grad_norm": 1.1685765981674194, | |
| "learning_rate": 0.00023626373626373624, | |
| "loss": 0.7906, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2372648633804751, | |
| "grad_norm": 0.581599771976471, | |
| "learning_rate": 0.00023736263736263733, | |
| "loss": 0.7049, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.23836331182205137, | |
| "grad_norm": 0.7660847902297974, | |
| "learning_rate": 0.00023846153846153844, | |
| "loss": 0.6105, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.23946176026362762, | |
| "grad_norm": 0.5126472115516663, | |
| "learning_rate": 0.00023956043956043955, | |
| "loss": 0.7134, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2405602087052039, | |
| "grad_norm": 0.48460498452186584, | |
| "learning_rate": 0.00024065934065934066, | |
| "loss": 0.5578, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.24165865714678017, | |
| "grad_norm": 0.41463029384613037, | |
| "learning_rate": 0.00024175824175824175, | |
| "loss": 0.5589, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24275710558835645, | |
| "grad_norm": 2.0703623294830322, | |
| "learning_rate": 0.00024285714285714283, | |
| "loss": 0.7128, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.24385555402993273, | |
| "grad_norm": 1.5641820430755615, | |
| "learning_rate": 0.00024395604395604394, | |
| "loss": 0.4439, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.244954002471509, | |
| "grad_norm": 0.34634652733802795, | |
| "learning_rate": 0.00024505494505494503, | |
| "loss": 0.5389, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.24605245091308528, | |
| "grad_norm": 0.5669183135032654, | |
| "learning_rate": 0.00024615384615384614, | |
| "loss": 0.5699, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.24715089935466153, | |
| "grad_norm": 0.6459633111953735, | |
| "learning_rate": 0.0002472527472527472, | |
| "loss": 0.7904, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2482493477962378, | |
| "grad_norm": 0.9719502925872803, | |
| "learning_rate": 0.0002483516483516483, | |
| "loss": 0.7354, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.24934779623781408, | |
| "grad_norm": 0.7433357834815979, | |
| "learning_rate": 0.0002494505494505494, | |
| "loss": 0.5772, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.25044624467939036, | |
| "grad_norm": 0.42272481322288513, | |
| "learning_rate": 0.00025054945054945053, | |
| "loss": 0.5609, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.2515446931209666, | |
| "grad_norm": 1.2868828773498535, | |
| "learning_rate": 0.00025164835164835165, | |
| "loss": 0.5775, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2526431415625429, | |
| "grad_norm": 0.40398430824279785, | |
| "learning_rate": 0.0002527472527472527, | |
| "loss": 0.742, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.25374159000411917, | |
| "grad_norm": 0.46501678228378296, | |
| "learning_rate": 0.0002538461538461538, | |
| "loss": 0.69, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.25484003844569547, | |
| "grad_norm": 0.46631869673728943, | |
| "learning_rate": 0.00025494505494505493, | |
| "loss": 0.7712, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2559384868872717, | |
| "grad_norm": 0.6761367321014404, | |
| "learning_rate": 0.000256043956043956, | |
| "loss": 0.64, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.257036935328848, | |
| "grad_norm": 0.6253519654273987, | |
| "learning_rate": 0.0002571428571428571, | |
| "loss": 0.5499, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2581353837704243, | |
| "grad_norm": 1.0556268692016602, | |
| "learning_rate": 0.0002582417582417582, | |
| "loss": 0.869, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2592338322120005, | |
| "grad_norm": 0.4816044867038727, | |
| "learning_rate": 0.0002593406593406593, | |
| "loss": 0.6061, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.26033228065357683, | |
| "grad_norm": 1.1049383878707886, | |
| "learning_rate": 0.00026043956043956043, | |
| "loss": 0.7695, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2614307290951531, | |
| "grad_norm": 0.44643181562423706, | |
| "learning_rate": 0.00026153846153846154, | |
| "loss": 0.7849, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2625291775367294, | |
| "grad_norm": 0.5231640338897705, | |
| "learning_rate": 0.0002626373626373626, | |
| "loss": 0.8033, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.26362762597830564, | |
| "grad_norm": 0.5537316799163818, | |
| "learning_rate": 0.0002637362637362637, | |
| "loss": 0.7317, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26472607441988194, | |
| "grad_norm": 0.42069998383522034, | |
| "learning_rate": 0.0002648351648351648, | |
| "loss": 0.6325, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2658245228614582, | |
| "grad_norm": 0.8009732365608215, | |
| "learning_rate": 0.0002659340659340659, | |
| "loss": 0.6589, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.26692297130303444, | |
| "grad_norm": 1.2626444101333618, | |
| "learning_rate": 0.000267032967032967, | |
| "loss": 0.5845, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.26802141974461074, | |
| "grad_norm": 0.4783913195133209, | |
| "learning_rate": 0.0002681318681318681, | |
| "loss": 0.8844, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.269119868186187, | |
| "grad_norm": 1.098160982131958, | |
| "learning_rate": 0.0002692307692307692, | |
| "loss": 0.6134, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.2702183166277633, | |
| "grad_norm": 1.0397273302078247, | |
| "learning_rate": 0.00027032967032967033, | |
| "loss": 0.7861, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.27131676506933955, | |
| "grad_norm": 0.9729229807853699, | |
| "learning_rate": 0.0002714285714285714, | |
| "loss": 0.7691, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.27241521351091585, | |
| "grad_norm": 0.44837963581085205, | |
| "learning_rate": 0.0002725274725274725, | |
| "loss": 0.9414, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2735136619524921, | |
| "grad_norm": 1.4863499402999878, | |
| "learning_rate": 0.0002736263736263736, | |
| "loss": 0.5825, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.27461211039406835, | |
| "grad_norm": 0.5948237180709839, | |
| "learning_rate": 0.00027472527472527467, | |
| "loss": 0.4934, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27571055883564466, | |
| "grad_norm": 0.5448721051216125, | |
| "learning_rate": 0.0002758241758241758, | |
| "loss": 0.6295, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2768090072772209, | |
| "grad_norm": 0.4309394657611847, | |
| "learning_rate": 0.0002769230769230769, | |
| "loss": 0.6561, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2779074557187972, | |
| "grad_norm": 0.7659335136413574, | |
| "learning_rate": 0.000278021978021978, | |
| "loss": 0.7588, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.27900590416037346, | |
| "grad_norm": 0.45655715465545654, | |
| "learning_rate": 0.0002791208791208791, | |
| "loss": 0.5257, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.28010435260194977, | |
| "grad_norm": 0.5390630960464478, | |
| "learning_rate": 0.0002802197802197802, | |
| "loss": 0.7051, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.281202801043526, | |
| "grad_norm": 0.39703306555747986, | |
| "learning_rate": 0.0002813186813186813, | |
| "loss": 0.6137, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.28230124948510227, | |
| "grad_norm": 0.4662924110889435, | |
| "learning_rate": 0.0002824175824175824, | |
| "loss": 0.4897, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2833996979266786, | |
| "grad_norm": 0.39399877190589905, | |
| "learning_rate": 0.00028351648351648346, | |
| "loss": 0.6235, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2844981463682548, | |
| "grad_norm": 0.497549444437027, | |
| "learning_rate": 0.00028461538461538457, | |
| "loss": 0.5134, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.28559659480983113, | |
| "grad_norm": 0.6597803235054016, | |
| "learning_rate": 0.0002857142857142857, | |
| "loss": 0.7955, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2866950432514074, | |
| "grad_norm": 0.5545711517333984, | |
| "learning_rate": 0.0002868131868131868, | |
| "loss": 0.833, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2877934916929837, | |
| "grad_norm": 1.0227786302566528, | |
| "learning_rate": 0.0002879120879120879, | |
| "loss": 0.5249, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.28889194013455993, | |
| "grad_norm": 0.5727143883705139, | |
| "learning_rate": 0.000289010989010989, | |
| "loss": 0.6319, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2899903885761362, | |
| "grad_norm": 0.39322397112846375, | |
| "learning_rate": 0.0002901098901098901, | |
| "loss": 0.7003, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2910888370177125, | |
| "grad_norm": 0.5657737851142883, | |
| "learning_rate": 0.0002912087912087912, | |
| "loss": 0.7085, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.29218728545928874, | |
| "grad_norm": 0.4305976927280426, | |
| "learning_rate": 0.0002923076923076923, | |
| "loss": 0.5931, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.29328573390086504, | |
| "grad_norm": 0.5300284624099731, | |
| "learning_rate": 0.00029340659340659336, | |
| "loss": 0.7881, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.2943841823424413, | |
| "grad_norm": 0.5922349095344543, | |
| "learning_rate": 0.00029450549450549447, | |
| "loss": 0.8688, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2954826307840176, | |
| "grad_norm": 0.5700828433036804, | |
| "learning_rate": 0.0002956043956043956, | |
| "loss": 1.1328, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.29658107922559385, | |
| "grad_norm": 0.6773694753646851, | |
| "learning_rate": 0.0002967032967032967, | |
| "loss": 0.7821, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2976795276671701, | |
| "grad_norm": 0.5200739502906799, | |
| "learning_rate": 0.0002978021978021978, | |
| "loss": 0.8775, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2987779761087464, | |
| "grad_norm": 0.9860020875930786, | |
| "learning_rate": 0.00029890109890109886, | |
| "loss": 0.9141, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.29987642455032265, | |
| "grad_norm": 0.7012956142425537, | |
| "learning_rate": 0.0003, | |
| "loss": 0.7672, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.30097487299189896, | |
| "grad_norm": 0.4128098785877228, | |
| "learning_rate": 0.0002998778998778999, | |
| "loss": 0.3969, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3020733214334752, | |
| "grad_norm": 0.366597980260849, | |
| "learning_rate": 0.00029975579975579974, | |
| "loss": 0.639, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3031717698750515, | |
| "grad_norm": 0.5208033919334412, | |
| "learning_rate": 0.0002996336996336996, | |
| "loss": 0.664, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.30427021831662776, | |
| "grad_norm": 0.45519202947616577, | |
| "learning_rate": 0.0002995115995115995, | |
| "loss": 0.8495, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.305368666758204, | |
| "grad_norm": 0.6617010831832886, | |
| "learning_rate": 0.0002993894993894994, | |
| "loss": 1.0204, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3064671151997803, | |
| "grad_norm": 1.4151723384857178, | |
| "learning_rate": 0.00029926739926739923, | |
| "loss": 0.8289, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.30756556364135657, | |
| "grad_norm": 0.6531035900115967, | |
| "learning_rate": 0.00029914529914529915, | |
| "loss": 0.7571, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.30866401208293287, | |
| "grad_norm": 0.8595600724220276, | |
| "learning_rate": 0.000299023199023199, | |
| "loss": 0.9668, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.3097624605245091, | |
| "grad_norm": 0.50210040807724, | |
| "learning_rate": 0.00029890109890109886, | |
| "loss": 0.6662, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3108609089660854, | |
| "grad_norm": 0.6004669666290283, | |
| "learning_rate": 0.0002987789987789988, | |
| "loss": 0.7127, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3119593574076617, | |
| "grad_norm": 0.8085057139396667, | |
| "learning_rate": 0.00029865689865689863, | |
| "loss": 0.9266, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3130578058492379, | |
| "grad_norm": 0.44965627789497375, | |
| "learning_rate": 0.0002985347985347985, | |
| "loss": 0.7118, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.31415625429081423, | |
| "grad_norm": 0.5758265852928162, | |
| "learning_rate": 0.00029841269841269835, | |
| "loss": 0.6915, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3152547027323905, | |
| "grad_norm": 0.5623393058776855, | |
| "learning_rate": 0.00029829059829059826, | |
| "loss": 0.6962, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3163531511739668, | |
| "grad_norm": 0.857796311378479, | |
| "learning_rate": 0.0002981684981684982, | |
| "loss": 0.676, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.31745159961554303, | |
| "grad_norm": 0.36431241035461426, | |
| "learning_rate": 0.000298046398046398, | |
| "loss": 0.5475, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.31855004805711934, | |
| "grad_norm": 0.4778802692890167, | |
| "learning_rate": 0.0002979242979242979, | |
| "loss": 0.7198, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3196484964986956, | |
| "grad_norm": 0.4887610673904419, | |
| "learning_rate": 0.0002978021978021978, | |
| "loss": 0.5559, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.32074694494027184, | |
| "grad_norm": 0.745379626750946, | |
| "learning_rate": 0.00029768009768009766, | |
| "loss": 1.0509, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.32184539338184814, | |
| "grad_norm": 0.40081167221069336, | |
| "learning_rate": 0.0002975579975579975, | |
| "loss": 0.6564, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3229438418234244, | |
| "grad_norm": 0.5133034586906433, | |
| "learning_rate": 0.00029743589743589743, | |
| "loss": 0.6765, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3240422902650007, | |
| "grad_norm": 0.5123881697654724, | |
| "learning_rate": 0.0002973137973137973, | |
| "loss": 0.8001, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.32514073870657695, | |
| "grad_norm": 0.3771597743034363, | |
| "learning_rate": 0.00029719169719169715, | |
| "loss": 0.785, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.32623918714815325, | |
| "grad_norm": 0.38929086923599243, | |
| "learning_rate": 0.00029706959706959706, | |
| "loss": 0.7273, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3273376355897295, | |
| "grad_norm": 0.47761446237564087, | |
| "learning_rate": 0.0002969474969474969, | |
| "loss": 0.6997, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3284360840313058, | |
| "grad_norm": 0.4798452854156494, | |
| "learning_rate": 0.0002968253968253968, | |
| "loss": 0.7171, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.32953453247288206, | |
| "grad_norm": 0.5864073038101196, | |
| "learning_rate": 0.0002967032967032967, | |
| "loss": 0.7075, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3306329809144583, | |
| "grad_norm": 0.6298258900642395, | |
| "learning_rate": 0.00029658119658119655, | |
| "loss": 0.8659, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3317314293560346, | |
| "grad_norm": 0.9764651656150818, | |
| "learning_rate": 0.0002964590964590964, | |
| "loss": 0.7451, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.33282987779761086, | |
| "grad_norm": 0.7084535360336304, | |
| "learning_rate": 0.0002963369963369963, | |
| "loss": 0.7896, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.33392832623918717, | |
| "grad_norm": 0.3226016163825989, | |
| "learning_rate": 0.0002962148962148962, | |
| "loss": 0.5614, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.3350267746807634, | |
| "grad_norm": 0.5515668988227844, | |
| "learning_rate": 0.0002960927960927961, | |
| "loss": 0.6981, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3361252231223397, | |
| "grad_norm": 0.42776307463645935, | |
| "learning_rate": 0.00029597069597069595, | |
| "loss": 0.5911, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.33722367156391597, | |
| "grad_norm": 0.36645814776420593, | |
| "learning_rate": 0.0002958485958485958, | |
| "loss": 0.5584, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3383221200054922, | |
| "grad_norm": 0.4089672565460205, | |
| "learning_rate": 0.0002957264957264957, | |
| "loss": 0.6814, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3394205684470685, | |
| "grad_norm": 0.4406324326992035, | |
| "learning_rate": 0.0002956043956043956, | |
| "loss": 0.5426, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.3405190168886448, | |
| "grad_norm": 0.4138193726539612, | |
| "learning_rate": 0.00029548229548229544, | |
| "loss": 0.7554, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3416174653302211, | |
| "grad_norm": 0.45647338032722473, | |
| "learning_rate": 0.00029536019536019535, | |
| "loss": 0.4871, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.34271591377179733, | |
| "grad_norm": 0.44362974166870117, | |
| "learning_rate": 0.0002952380952380952, | |
| "loss": 0.7254, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.34381436221337364, | |
| "grad_norm": 0.5832559466362, | |
| "learning_rate": 0.00029511599511599507, | |
| "loss": 0.64, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3449128106549499, | |
| "grad_norm": 0.6754651665687561, | |
| "learning_rate": 0.000294993894993895, | |
| "loss": 0.7046, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.34601125909652614, | |
| "grad_norm": 0.6487123370170593, | |
| "learning_rate": 0.00029487179487179484, | |
| "loss": 0.5934, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.34710970753810244, | |
| "grad_norm": 0.24118930101394653, | |
| "learning_rate": 0.0002947496947496947, | |
| "loss": 0.5241, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.3482081559796787, | |
| "grad_norm": 0.4580494165420532, | |
| "learning_rate": 0.0002946275946275946, | |
| "loss": 0.6733, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.349306604421255, | |
| "grad_norm": 0.4770609736442566, | |
| "learning_rate": 0.00029450549450549447, | |
| "loss": 0.5758, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.35040505286283125, | |
| "grad_norm": 0.40334221720695496, | |
| "learning_rate": 0.0002943833943833944, | |
| "loss": 0.5365, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.35150350130440755, | |
| "grad_norm": 0.5605480074882507, | |
| "learning_rate": 0.00029426129426129424, | |
| "loss": 0.5967, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3526019497459838, | |
| "grad_norm": 0.6031836271286011, | |
| "learning_rate": 0.0002941391941391941, | |
| "loss": 0.6397, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.35370039818756005, | |
| "grad_norm": 0.5602075457572937, | |
| "learning_rate": 0.000294017094017094, | |
| "loss": 0.7253, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.35479884662913636, | |
| "grad_norm": 1.5055879354476929, | |
| "learning_rate": 0.00029389499389499387, | |
| "loss": 0.6066, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3558972950707126, | |
| "grad_norm": 1.969072699546814, | |
| "learning_rate": 0.0002937728937728937, | |
| "loss": 0.9263, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3569957435122889, | |
| "grad_norm": 0.43139147758483887, | |
| "learning_rate": 0.00029365079365079364, | |
| "loss": 0.6462, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.35809419195386516, | |
| "grad_norm": 0.40423595905303955, | |
| "learning_rate": 0.0002935286935286935, | |
| "loss": 0.4278, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.35919264039544146, | |
| "grad_norm": 0.41983166337013245, | |
| "learning_rate": 0.00029340659340659336, | |
| "loss": 0.7527, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3602910888370177, | |
| "grad_norm": 0.6624807715415955, | |
| "learning_rate": 0.00029328449328449327, | |
| "loss": 0.7381, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.36138953727859396, | |
| "grad_norm": 0.6173990964889526, | |
| "learning_rate": 0.00029316239316239313, | |
| "loss": 0.6838, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.36248798572017027, | |
| "grad_norm": 1.1278433799743652, | |
| "learning_rate": 0.000293040293040293, | |
| "loss": 0.8439, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3635864341617465, | |
| "grad_norm": 0.3453993797302246, | |
| "learning_rate": 0.0002929181929181929, | |
| "loss": 0.5324, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3646848826033228, | |
| "grad_norm": 0.4151187241077423, | |
| "learning_rate": 0.0002927960927960928, | |
| "loss": 0.7019, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3657833310448991, | |
| "grad_norm": 0.4247313439846039, | |
| "learning_rate": 0.0002926739926739926, | |
| "loss": 0.6362, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3668817794864754, | |
| "grad_norm": 1.5250136852264404, | |
| "learning_rate": 0.00029255189255189253, | |
| "loss": 0.5885, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.36798022792805163, | |
| "grad_norm": 0.43669968843460083, | |
| "learning_rate": 0.00029242979242979244, | |
| "loss": 0.9191, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3690786763696279, | |
| "grad_norm": 0.8063925504684448, | |
| "learning_rate": 0.0002923076923076923, | |
| "loss": 0.6813, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3701771248112042, | |
| "grad_norm": 0.6002399325370789, | |
| "learning_rate": 0.00029218559218559216, | |
| "loss": 0.5859, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.37127557325278043, | |
| "grad_norm": 0.9405462145805359, | |
| "learning_rate": 0.000292063492063492, | |
| "loss": 0.7476, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.37237402169435674, | |
| "grad_norm": 0.5050615072250366, | |
| "learning_rate": 0.00029194139194139193, | |
| "loss": 0.5172, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.373472470135933, | |
| "grad_norm": 0.4593801200389862, | |
| "learning_rate": 0.0002918192918192918, | |
| "loss": 0.5405, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3745709185775093, | |
| "grad_norm": 0.5275060534477234, | |
| "learning_rate": 0.00029169719169719164, | |
| "loss": 0.4537, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.37566936701908554, | |
| "grad_norm": 0.8907522559165955, | |
| "learning_rate": 0.00029157509157509156, | |
| "loss": 0.6826, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3767678154606618, | |
| "grad_norm": 0.7229670882225037, | |
| "learning_rate": 0.0002914529914529914, | |
| "loss": 0.6072, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3778662639022381, | |
| "grad_norm": 1.7154827117919922, | |
| "learning_rate": 0.0002913308913308913, | |
| "loss": 0.6956, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.37896471234381435, | |
| "grad_norm": 1.012902021408081, | |
| "learning_rate": 0.0002912087912087912, | |
| "loss": 0.5337, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.38006316078539065, | |
| "grad_norm": 0.6467313170433044, | |
| "learning_rate": 0.00029108669108669105, | |
| "loss": 0.7652, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3811616092269669, | |
| "grad_norm": 0.5594947338104248, | |
| "learning_rate": 0.0002909645909645909, | |
| "loss": 0.578, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3822600576685432, | |
| "grad_norm": 0.5808854699134827, | |
| "learning_rate": 0.0002908424908424908, | |
| "loss": 0.6142, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.38335850611011946, | |
| "grad_norm": 0.6067795157432556, | |
| "learning_rate": 0.00029072039072039073, | |
| "loss": 0.7682, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3844569545516957, | |
| "grad_norm": 0.392993301153183, | |
| "learning_rate": 0.0002905982905982906, | |
| "loss": 0.6599, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.385555402993272, | |
| "grad_norm": 0.3963404893875122, | |
| "learning_rate": 0.00029047619047619045, | |
| "loss": 0.7079, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.38665385143484826, | |
| "grad_norm": 0.3471222221851349, | |
| "learning_rate": 0.00029035409035409036, | |
| "loss": 0.463, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.38775229987642457, | |
| "grad_norm": 0.5496531128883362, | |
| "learning_rate": 0.0002902319902319902, | |
| "loss": 0.7639, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3888507483180008, | |
| "grad_norm": 0.5482885241508484, | |
| "learning_rate": 0.0002901098901098901, | |
| "loss": 0.4198, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.3899491967595771, | |
| "grad_norm": 0.7329181432723999, | |
| "learning_rate": 0.00028998778998779, | |
| "loss": 0.6057, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.39104764520115337, | |
| "grad_norm": 0.41850918531417847, | |
| "learning_rate": 0.00028986568986568985, | |
| "loss": 0.605, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.3921460936427296, | |
| "grad_norm": 0.4463609457015991, | |
| "learning_rate": 0.0002897435897435897, | |
| "loss": 0.7381, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3932445420843059, | |
| "grad_norm": 0.7207491397857666, | |
| "learning_rate": 0.0002896214896214896, | |
| "loss": 0.6892, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3943429905258822, | |
| "grad_norm": 0.3715958595275879, | |
| "learning_rate": 0.0002894993894993895, | |
| "loss": 0.5426, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.3954414389674585, | |
| "grad_norm": 0.7077822685241699, | |
| "learning_rate": 0.00028937728937728933, | |
| "loss": 0.5923, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.39653988740903473, | |
| "grad_norm": 0.5109585523605347, | |
| "learning_rate": 0.00028925518925518925, | |
| "loss": 0.5939, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.39763833585061104, | |
| "grad_norm": 0.6105355024337769, | |
| "learning_rate": 0.0002891330891330891, | |
| "loss": 1.0345, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.3987367842921873, | |
| "grad_norm": 0.479732871055603, | |
| "learning_rate": 0.000289010989010989, | |
| "loss": 0.71, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.39983523273376353, | |
| "grad_norm": 0.8600007891654968, | |
| "learning_rate": 0.0002888888888888888, | |
| "loss": 0.7406, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.40093368117533984, | |
| "grad_norm": 0.6584550738334656, | |
| "learning_rate": 0.00028876678876678873, | |
| "loss": 0.6658, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4020321296169161, | |
| "grad_norm": 0.7251041531562805, | |
| "learning_rate": 0.00028864468864468865, | |
| "loss": 0.8425, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4031305780584924, | |
| "grad_norm": 0.5729238390922546, | |
| "learning_rate": 0.0002885225885225885, | |
| "loss": 0.9054, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.40422902650006864, | |
| "grad_norm": 1.1829932928085327, | |
| "learning_rate": 0.00028840048840048836, | |
| "loss": 0.9232, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.40532747494164495, | |
| "grad_norm": 0.37746721506118774, | |
| "learning_rate": 0.0002882783882783883, | |
| "loss": 0.9619, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4064259233832212, | |
| "grad_norm": 0.5653749108314514, | |
| "learning_rate": 0.00028815628815628813, | |
| "loss": 0.7182, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.40752437182479745, | |
| "grad_norm": 0.6024563312530518, | |
| "learning_rate": 0.000288034188034188, | |
| "loss": 0.6881, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.40862282026637375, | |
| "grad_norm": 0.485350102186203, | |
| "learning_rate": 0.0002879120879120879, | |
| "loss": 0.6451, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.40972126870795, | |
| "grad_norm": 0.5762611627578735, | |
| "learning_rate": 0.00028778998778998776, | |
| "loss": 0.7818, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4108197171495263, | |
| "grad_norm": 0.7961844801902771, | |
| "learning_rate": 0.0002876678876678876, | |
| "loss": 0.6682, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.41191816559110256, | |
| "grad_norm": 0.4630587697029114, | |
| "learning_rate": 0.00028754578754578753, | |
| "loss": 0.9015, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.41301661403267886, | |
| "grad_norm": 0.6592808961868286, | |
| "learning_rate": 0.0002874236874236874, | |
| "loss": 0.5738, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4141150624742551, | |
| "grad_norm": 0.4788278639316559, | |
| "learning_rate": 0.00028730158730158725, | |
| "loss": 0.7022, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.41521351091583136, | |
| "grad_norm": 0.5041861534118652, | |
| "learning_rate": 0.00028717948717948716, | |
| "loss": 0.6137, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.41631195935740767, | |
| "grad_norm": 0.5436013340950012, | |
| "learning_rate": 0.000287057387057387, | |
| "loss": 0.6621, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4174104077989839, | |
| "grad_norm": 0.5102400183677673, | |
| "learning_rate": 0.00028693528693528694, | |
| "loss": 0.6627, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4185088562405602, | |
| "grad_norm": 0.43655040860176086, | |
| "learning_rate": 0.0002868131868131868, | |
| "loss": 0.6475, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4196073046821365, | |
| "grad_norm": 0.3989826738834381, | |
| "learning_rate": 0.00028669108669108665, | |
| "loss": 0.5483, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4207057531237128, | |
| "grad_norm": 0.7781158685684204, | |
| "learning_rate": 0.00028656898656898656, | |
| "loss": 0.6475, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.421804201565289, | |
| "grad_norm": 0.8119930624961853, | |
| "learning_rate": 0.0002864468864468864, | |
| "loss": 0.8122, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4229026500068653, | |
| "grad_norm": 0.7233585119247437, | |
| "learning_rate": 0.0002863247863247863, | |
| "loss": 0.7837, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4240010984484416, | |
| "grad_norm": 0.41249507665634155, | |
| "learning_rate": 0.0002862026862026862, | |
| "loss": 0.6916, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.42509954689001783, | |
| "grad_norm": 0.4865298867225647, | |
| "learning_rate": 0.00028608058608058605, | |
| "loss": 0.595, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.42619799533159414, | |
| "grad_norm": 0.6057963371276855, | |
| "learning_rate": 0.0002859584859584859, | |
| "loss": 0.7214, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.4272964437731704, | |
| "grad_norm": 0.5390968918800354, | |
| "learning_rate": 0.0002858363858363858, | |
| "loss": 0.805, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4283948922147467, | |
| "grad_norm": 0.5944109559059143, | |
| "learning_rate": 0.0002857142857142857, | |
| "loss": 0.9953, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.42949334065632294, | |
| "grad_norm": 0.5480278134346008, | |
| "learning_rate": 0.00028559218559218554, | |
| "loss": 0.8406, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4305917890978992, | |
| "grad_norm": 0.5168552994728088, | |
| "learning_rate": 0.00028547008547008545, | |
| "loss": 0.9715, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4316902375394755, | |
| "grad_norm": 0.4859452247619629, | |
| "learning_rate": 0.0002853479853479853, | |
| "loss": 0.7368, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.43278868598105175, | |
| "grad_norm": 0.4697234034538269, | |
| "learning_rate": 0.0002852258852258852, | |
| "loss": 0.4801, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.43388713442262805, | |
| "grad_norm": 0.6198891401290894, | |
| "learning_rate": 0.0002851037851037851, | |
| "loss": 0.5184, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4349855828642043, | |
| "grad_norm": 0.531563401222229, | |
| "learning_rate": 0.00028498168498168494, | |
| "loss": 0.8047, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4360840313057806, | |
| "grad_norm": 0.4610724449157715, | |
| "learning_rate": 0.00028485958485958485, | |
| "loss": 0.4583, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.43718247974735686, | |
| "grad_norm": 0.5609697699546814, | |
| "learning_rate": 0.0002847374847374847, | |
| "loss": 0.7362, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4382809281889331, | |
| "grad_norm": 0.5257968306541443, | |
| "learning_rate": 0.00028461538461538457, | |
| "loss": 0.8173, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4393793766305094, | |
| "grad_norm": 0.8307009339332581, | |
| "learning_rate": 0.0002844932844932845, | |
| "loss": 0.5507, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.44047782507208566, | |
| "grad_norm": 0.36615508794784546, | |
| "learning_rate": 0.00028437118437118434, | |
| "loss": 0.6605, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.44157627351366197, | |
| "grad_norm": 0.35138362646102905, | |
| "learning_rate": 0.0002842490842490842, | |
| "loss": 0.6614, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4426747219552382, | |
| "grad_norm": 0.5054494738578796, | |
| "learning_rate": 0.0002841269841269841, | |
| "loss": 0.799, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4437731703968145, | |
| "grad_norm": 0.4711816608905792, | |
| "learning_rate": 0.00028400488400488397, | |
| "loss": 0.8892, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.44487161883839077, | |
| "grad_norm": 0.5073884725570679, | |
| "learning_rate": 0.00028388278388278383, | |
| "loss": 0.8156, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.445970067279967, | |
| "grad_norm": 0.29938632249832153, | |
| "learning_rate": 0.00028376068376068374, | |
| "loss": 0.7598, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4470685157215433, | |
| "grad_norm": 1.745937466621399, | |
| "learning_rate": 0.00028363858363858365, | |
| "loss": 0.7829, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4481669641631196, | |
| "grad_norm": 0.46887943148612976, | |
| "learning_rate": 0.00028351648351648346, | |
| "loss": 0.7798, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4492654126046959, | |
| "grad_norm": 0.4274987280368805, | |
| "learning_rate": 0.00028339438339438337, | |
| "loss": 0.8407, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.45036386104627213, | |
| "grad_norm": 0.4445902109146118, | |
| "learning_rate": 0.0002832722832722833, | |
| "loss": 0.7394, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.45146230948784843, | |
| "grad_norm": 0.3842466175556183, | |
| "learning_rate": 0.00028315018315018314, | |
| "loss": 0.7781, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4525607579294247, | |
| "grad_norm": 0.5660600066184998, | |
| "learning_rate": 0.000283028083028083, | |
| "loss": 0.8058, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.45365920637100093, | |
| "grad_norm": 0.442911297082901, | |
| "learning_rate": 0.0002829059829059829, | |
| "loss": 0.808, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.45475765481257724, | |
| "grad_norm": 0.9051260352134705, | |
| "learning_rate": 0.00028278388278388277, | |
| "loss": 0.9427, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4558561032541535, | |
| "grad_norm": 0.8027593493461609, | |
| "learning_rate": 0.00028266178266178263, | |
| "loss": 0.531, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4569545516957298, | |
| "grad_norm": 0.36242446303367615, | |
| "learning_rate": 0.0002825396825396825, | |
| "loss": 0.5609, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.45805300013730604, | |
| "grad_norm": 0.6095871925354004, | |
| "learning_rate": 0.0002824175824175824, | |
| "loss": 0.7424, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.45915144857888235, | |
| "grad_norm": 0.5102814435958862, | |
| "learning_rate": 0.00028229548229548226, | |
| "loss": 0.8861, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4602498970204586, | |
| "grad_norm": 0.375265896320343, | |
| "learning_rate": 0.0002821733821733821, | |
| "loss": 0.6235, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.4613483454620349, | |
| "grad_norm": 0.4506315588951111, | |
| "learning_rate": 0.00028205128205128203, | |
| "loss": 0.6059, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.46244679390361115, | |
| "grad_norm": 0.8119642734527588, | |
| "learning_rate": 0.0002819291819291819, | |
| "loss": 0.7821, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4635452423451874, | |
| "grad_norm": 0.42945513129234314, | |
| "learning_rate": 0.00028180708180708175, | |
| "loss": 0.9503, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4646436907867637, | |
| "grad_norm": 0.35567665100097656, | |
| "learning_rate": 0.00028168498168498166, | |
| "loss": 0.5243, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.46574213922833996, | |
| "grad_norm": 0.5160343647003174, | |
| "learning_rate": 0.00028156288156288157, | |
| "loss": 0.5767, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.46684058766991626, | |
| "grad_norm": 0.37530624866485596, | |
| "learning_rate": 0.00028144078144078143, | |
| "loss": 1.2016, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4679390361114925, | |
| "grad_norm": 0.5283146500587463, | |
| "learning_rate": 0.0002813186813186813, | |
| "loss": 0.5958, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4690374845530688, | |
| "grad_norm": 0.5217192769050598, | |
| "learning_rate": 0.0002811965811965812, | |
| "loss": 0.715, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.47013593299464507, | |
| "grad_norm": 0.5092077851295471, | |
| "learning_rate": 0.00028107448107448106, | |
| "loss": 0.6942, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.4712343814362213, | |
| "grad_norm": 0.7683324813842773, | |
| "learning_rate": 0.0002809523809523809, | |
| "loss": 1.0185, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4723328298777976, | |
| "grad_norm": 0.3117397725582123, | |
| "learning_rate": 0.00028083028083028083, | |
| "loss": 0.6949, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.47343127831937387, | |
| "grad_norm": 0.3218965232372284, | |
| "learning_rate": 0.0002807081807081807, | |
| "loss": 0.6872, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.4745297267609502, | |
| "grad_norm": 1.104121446609497, | |
| "learning_rate": 0.00028058608058608055, | |
| "loss": 0.6628, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4756281752025264, | |
| "grad_norm": 0.3224816620349884, | |
| "learning_rate": 0.00028046398046398046, | |
| "loss": 0.5974, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.47672662364410273, | |
| "grad_norm": 0.5742220878601074, | |
| "learning_rate": 0.0002803418803418803, | |
| "loss": 0.7248, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.477825072085679, | |
| "grad_norm": 0.5449275374412537, | |
| "learning_rate": 0.0002802197802197802, | |
| "loss": 0.8552, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.47892352052725523, | |
| "grad_norm": 0.44660067558288574, | |
| "learning_rate": 0.0002800976800976801, | |
| "loss": 0.6968, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.48002196896883154, | |
| "grad_norm": 0.4287508428096771, | |
| "learning_rate": 0.00027997557997557995, | |
| "loss": 0.8101, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4811204174104078, | |
| "grad_norm": 0.4142225384712219, | |
| "learning_rate": 0.00027985347985347986, | |
| "loss": 0.5379, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.4822188658519841, | |
| "grad_norm": 1.246833324432373, | |
| "learning_rate": 0.0002797313797313797, | |
| "loss": 0.7116, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.48331731429356034, | |
| "grad_norm": 0.3845030963420868, | |
| "learning_rate": 0.0002796092796092796, | |
| "loss": 0.8088, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.48441576273513665, | |
| "grad_norm": 1.4492995738983154, | |
| "learning_rate": 0.0002794871794871795, | |
| "loss": 0.7358, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4855142111767129, | |
| "grad_norm": 0.40994521975517273, | |
| "learning_rate": 0.00027936507936507935, | |
| "loss": 0.6228, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.48661265961828915, | |
| "grad_norm": 0.4782777428627014, | |
| "learning_rate": 0.0002792429792429792, | |
| "loss": 0.4944, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.48771110805986545, | |
| "grad_norm": 0.47269922494888306, | |
| "learning_rate": 0.0002791208791208791, | |
| "loss": 0.7023, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.4888095565014417, | |
| "grad_norm": 0.5529118776321411, | |
| "learning_rate": 0.000278998778998779, | |
| "loss": 0.7717, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.489908004943018, | |
| "grad_norm": 0.4244072139263153, | |
| "learning_rate": 0.00027887667887667884, | |
| "loss": 0.7902, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.49100645338459425, | |
| "grad_norm": 1.4737539291381836, | |
| "learning_rate": 0.00027875457875457875, | |
| "loss": 0.5784, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.49210490182617056, | |
| "grad_norm": 0.40120208263397217, | |
| "learning_rate": 0.0002786324786324786, | |
| "loss": 0.7974, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4932033502677468, | |
| "grad_norm": 0.5481031537055969, | |
| "learning_rate": 0.00027851037851037846, | |
| "loss": 0.7867, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.49430179870932306, | |
| "grad_norm": 0.36719343066215515, | |
| "learning_rate": 0.0002783882783882784, | |
| "loss": 0.6543, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.49540024715089936, | |
| "grad_norm": 0.3980066776275635, | |
| "learning_rate": 0.00027826617826617824, | |
| "loss": 0.5395, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.4964986955924756, | |
| "grad_norm": 0.45570313930511475, | |
| "learning_rate": 0.0002781440781440781, | |
| "loss": 0.7908, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4975971440340519, | |
| "grad_norm": 0.41858601570129395, | |
| "learning_rate": 0.000278021978021978, | |
| "loss": 0.5248, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.49869559247562817, | |
| "grad_norm": 0.5019702315330505, | |
| "learning_rate": 0.00027789987789987786, | |
| "loss": 0.8006, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.4997940409172045, | |
| "grad_norm": 0.4589880108833313, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.7294, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5008924893587807, | |
| "grad_norm": 0.5679266452789307, | |
| "learning_rate": 0.00027765567765567764, | |
| "loss": 0.651, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.501990937800357, | |
| "grad_norm": 0.4854479134082794, | |
| "learning_rate": 0.0002775335775335775, | |
| "loss": 0.9908, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5030893862419332, | |
| "grad_norm": 0.4964112341403961, | |
| "learning_rate": 0.0002774114774114774, | |
| "loss": 0.8084, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5041878346835096, | |
| "grad_norm": 0.5130513906478882, | |
| "learning_rate": 0.00027728937728937727, | |
| "loss": 0.8389, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5052862831250858, | |
| "grad_norm": 0.4784137010574341, | |
| "learning_rate": 0.0002771672771672771, | |
| "loss": 0.5497, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5063847315666621, | |
| "grad_norm": 0.28685998916625977, | |
| "learning_rate": 0.00027704517704517704, | |
| "loss": 0.491, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5074831800082383, | |
| "grad_norm": 0.5337100625038147, | |
| "learning_rate": 0.0002769230769230769, | |
| "loss": 0.8315, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5085816284498146, | |
| "grad_norm": 0.5431344509124756, | |
| "learning_rate": 0.00027680097680097675, | |
| "loss": 0.5996, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5096800768913909, | |
| "grad_norm": 0.4546130299568176, | |
| "learning_rate": 0.00027667887667887667, | |
| "loss": 0.5647, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5107785253329672, | |
| "grad_norm": 0.6298655271530151, | |
| "learning_rate": 0.0002765567765567765, | |
| "loss": 0.7684, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5118769737745434, | |
| "grad_norm": 0.44330841302871704, | |
| "learning_rate": 0.0002764346764346764, | |
| "loss": 0.4906, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5129754222161197, | |
| "grad_norm": 0.3824306130409241, | |
| "learning_rate": 0.0002763125763125763, | |
| "loss": 0.6123, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.514073870657696, | |
| "grad_norm": 0.3225514590740204, | |
| "learning_rate": 0.00027619047619047615, | |
| "loss": 0.7535, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5151723190992723, | |
| "grad_norm": 0.701239824295044, | |
| "learning_rate": 0.00027606837606837607, | |
| "loss": 0.9643, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5162707675408486, | |
| "grad_norm": 0.37800920009613037, | |
| "learning_rate": 0.0002759462759462759, | |
| "loss": 0.543, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5173692159824248, | |
| "grad_norm": 0.3521328568458557, | |
| "learning_rate": 0.0002758241758241758, | |
| "loss": 0.7157, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.518467664424001, | |
| "grad_norm": 0.2659924626350403, | |
| "learning_rate": 0.0002757020757020757, | |
| "loss": 0.7334, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5195661128655774, | |
| "grad_norm": 0.42815065383911133, | |
| "learning_rate": 0.00027557997557997555, | |
| "loss": 1.2015, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5206645613071537, | |
| "grad_norm": 0.7758998870849609, | |
| "learning_rate": 0.0002754578754578754, | |
| "loss": 0.9493, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5217630097487299, | |
| "grad_norm": 0.46281251311302185, | |
| "learning_rate": 0.0002753357753357753, | |
| "loss": 0.9159, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5228614581903062, | |
| "grad_norm": 0.3668971061706543, | |
| "learning_rate": 0.0002752136752136752, | |
| "loss": 0.4869, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5239599066318824, | |
| "grad_norm": 0.462534099817276, | |
| "learning_rate": 0.00027509157509157504, | |
| "loss": 0.6439, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5250583550734588, | |
| "grad_norm": 0.6341688632965088, | |
| "learning_rate": 0.00027496947496947495, | |
| "loss": 0.6948, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.526156803515035, | |
| "grad_norm": 0.5469139814376831, | |
| "learning_rate": 0.0002748473748473748, | |
| "loss": 1.016, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5272552519566113, | |
| "grad_norm": 0.438204288482666, | |
| "learning_rate": 0.00027472527472527467, | |
| "loss": 0.6941, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5283537003981875, | |
| "grad_norm": 0.586700975894928, | |
| "learning_rate": 0.0002746031746031746, | |
| "loss": 0.6649, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5294521488397639, | |
| "grad_norm": 0.4077949523925781, | |
| "learning_rate": 0.0002744810744810745, | |
| "loss": 0.5948, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5305505972813401, | |
| "grad_norm": 0.3756411373615265, | |
| "learning_rate": 0.0002743589743589743, | |
| "loss": 0.4915, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5316490457229164, | |
| "grad_norm": 1.2067008018493652, | |
| "learning_rate": 0.0002742368742368742, | |
| "loss": 0.8795, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5327474941644926, | |
| "grad_norm": 0.3097778260707855, | |
| "learning_rate": 0.0002741147741147741, | |
| "loss": 0.5478, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5338459426060689, | |
| "grad_norm": 0.5536866188049316, | |
| "learning_rate": 0.000273992673992674, | |
| "loss": 0.7042, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5349443910476452, | |
| "grad_norm": 0.5930231809616089, | |
| "learning_rate": 0.00027387057387057384, | |
| "loss": 0.7108, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5360428394892215, | |
| "grad_norm": 0.39304253458976746, | |
| "learning_rate": 0.00027374847374847375, | |
| "loss": 0.788, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5371412879307977, | |
| "grad_norm": 0.5238274335861206, | |
| "learning_rate": 0.0002736263736263736, | |
| "loss": 0.9887, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.538239736372374, | |
| "grad_norm": 0.5993770956993103, | |
| "learning_rate": 0.00027350427350427347, | |
| "loss": 0.7819, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5393381848139503, | |
| "grad_norm": 0.4601563811302185, | |
| "learning_rate": 0.00027338217338217333, | |
| "loss": 0.4347, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5404366332555266, | |
| "grad_norm": 0.5292415022850037, | |
| "learning_rate": 0.00027326007326007324, | |
| "loss": 0.5248, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5415350816971028, | |
| "grad_norm": 0.37247565388679504, | |
| "learning_rate": 0.0002731379731379731, | |
| "loss": 0.5412, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5426335301386791, | |
| "grad_norm": 0.6865994930267334, | |
| "learning_rate": 0.00027301587301587296, | |
| "loss": 0.8263, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5437319785802553, | |
| "grad_norm": 0.5019715428352356, | |
| "learning_rate": 0.00027289377289377287, | |
| "loss": 0.7084, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5448304270218317, | |
| "grad_norm": 0.8432828783988953, | |
| "learning_rate": 0.00027277167277167273, | |
| "loss": 0.6188, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.545928875463408, | |
| "grad_norm": 0.594881534576416, | |
| "learning_rate": 0.0002726495726495726, | |
| "loss": 0.8923, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5470273239049842, | |
| "grad_norm": 0.5573694705963135, | |
| "learning_rate": 0.0002725274725274725, | |
| "loss": 0.6351, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5481257723465605, | |
| "grad_norm": 0.30426710844039917, | |
| "learning_rate": 0.0002724053724053724, | |
| "loss": 0.6359, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5492242207881367, | |
| "grad_norm": 0.759385883808136, | |
| "learning_rate": 0.00027228327228327227, | |
| "loss": 0.6131, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5503226692297131, | |
| "grad_norm": 0.5436901450157166, | |
| "learning_rate": 0.00027216117216117213, | |
| "loss": 0.5232, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5514211176712893, | |
| "grad_norm": 0.5924163460731506, | |
| "learning_rate": 0.00027203907203907204, | |
| "loss": 0.9594, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5525195661128656, | |
| "grad_norm": 0.49177658557891846, | |
| "learning_rate": 0.0002719169719169719, | |
| "loss": 0.842, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5536180145544418, | |
| "grad_norm": 0.4437295198440552, | |
| "learning_rate": 0.00027179487179487176, | |
| "loss": 1.0338, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5547164629960182, | |
| "grad_norm": 0.426213800907135, | |
| "learning_rate": 0.00027167277167277167, | |
| "loss": 0.6375, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5558149114375944, | |
| "grad_norm": 0.4599516689777374, | |
| "learning_rate": 0.00027155067155067153, | |
| "loss": 0.5005, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5569133598791707, | |
| "grad_norm": 0.647957980632782, | |
| "learning_rate": 0.0002714285714285714, | |
| "loss": 0.6292, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5580118083207469, | |
| "grad_norm": 0.7891755104064941, | |
| "learning_rate": 0.0002713064713064713, | |
| "loss": 0.697, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5591102567623232, | |
| "grad_norm": 0.5290817618370056, | |
| "learning_rate": 0.00027118437118437116, | |
| "loss": 0.4547, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5602087052038995, | |
| "grad_norm": 0.4025941789150238, | |
| "learning_rate": 0.000271062271062271, | |
| "loss": 0.6299, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5613071536454758, | |
| "grad_norm": 0.7768287658691406, | |
| "learning_rate": 0.00027094017094017093, | |
| "loss": 0.6813, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.562405602087052, | |
| "grad_norm": 0.6977662444114685, | |
| "learning_rate": 0.0002708180708180708, | |
| "loss": 0.8217, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5635040505286283, | |
| "grad_norm": 0.5238949060440063, | |
| "learning_rate": 0.0002706959706959707, | |
| "loss": 0.7348, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5646024989702045, | |
| "grad_norm": 0.5099830627441406, | |
| "learning_rate": 0.00027057387057387056, | |
| "loss": 0.9894, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5657009474117809, | |
| "grad_norm": 0.6254756450653076, | |
| "learning_rate": 0.0002704517704517704, | |
| "loss": 0.9258, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5667993958533571, | |
| "grad_norm": 0.40313196182250977, | |
| "learning_rate": 0.00027032967032967033, | |
| "loss": 0.8115, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5678978442949334, | |
| "grad_norm": 0.9706575274467468, | |
| "learning_rate": 0.0002702075702075702, | |
| "loss": 0.5204, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5689962927365096, | |
| "grad_norm": 0.36777085065841675, | |
| "learning_rate": 0.00027008547008547005, | |
| "loss": 0.7716, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.570094741178086, | |
| "grad_norm": 0.48726886510849, | |
| "learning_rate": 0.00026996336996336996, | |
| "loss": 0.7745, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5711931896196623, | |
| "grad_norm": 0.3590470850467682, | |
| "learning_rate": 0.0002698412698412698, | |
| "loss": 0.7038, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5722916380612385, | |
| "grad_norm": 0.7103118896484375, | |
| "learning_rate": 0.0002697191697191697, | |
| "loss": 0.8368, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5733900865028148, | |
| "grad_norm": 0.5503933429718018, | |
| "learning_rate": 0.0002695970695970696, | |
| "loss": 0.6164, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.574488534944391, | |
| "grad_norm": 0.5255150198936462, | |
| "learning_rate": 0.00026947496947496945, | |
| "loss": 0.8886, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5755869833859674, | |
| "grad_norm": 0.4872569739818573, | |
| "learning_rate": 0.0002693528693528693, | |
| "loss": 0.6277, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5766854318275436, | |
| "grad_norm": 0.3748464584350586, | |
| "learning_rate": 0.0002692307692307692, | |
| "loss": 0.6471, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5777838802691199, | |
| "grad_norm": 0.4401276111602783, | |
| "learning_rate": 0.0002691086691086691, | |
| "loss": 0.9846, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5788823287106961, | |
| "grad_norm": 0.9565305709838867, | |
| "learning_rate": 0.00026898656898656894, | |
| "loss": 0.9471, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5799807771522724, | |
| "grad_norm": 0.6307245492935181, | |
| "learning_rate": 0.00026886446886446885, | |
| "loss": 0.9168, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5810792255938487, | |
| "grad_norm": 0.49177634716033936, | |
| "learning_rate": 0.0002687423687423687, | |
| "loss": 0.5464, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.582177674035425, | |
| "grad_norm": 0.68553626537323, | |
| "learning_rate": 0.0002686202686202686, | |
| "loss": 0.5874, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5832761224770012, | |
| "grad_norm": 0.3811597228050232, | |
| "learning_rate": 0.0002684981684981685, | |
| "loss": 0.766, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.5843745709185775, | |
| "grad_norm": 0.6634503602981567, | |
| "learning_rate": 0.00026837606837606834, | |
| "loss": 0.6438, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5854730193601538, | |
| "grad_norm": 0.6115571856498718, | |
| "learning_rate": 0.00026825396825396825, | |
| "loss": 0.8757, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5865714678017301, | |
| "grad_norm": 0.3011985719203949, | |
| "learning_rate": 0.0002681318681318681, | |
| "loss": 0.6188, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5876699162433063, | |
| "grad_norm": 0.7029386162757874, | |
| "learning_rate": 0.00026800976800976797, | |
| "loss": 0.8681, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5887683646848826, | |
| "grad_norm": 0.4796508550643921, | |
| "learning_rate": 0.0002678876678876679, | |
| "loss": 0.7207, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5898668131264588, | |
| "grad_norm": 0.542948842048645, | |
| "learning_rate": 0.00026776556776556774, | |
| "loss": 0.5587, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5909652615680352, | |
| "grad_norm": 0.7566731572151184, | |
| "learning_rate": 0.0002676434676434676, | |
| "loss": 0.8562, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5920637100096114, | |
| "grad_norm": 0.6411837339401245, | |
| "learning_rate": 0.0002675213675213675, | |
| "loss": 0.4516, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5931621584511877, | |
| "grad_norm": 0.41434159874916077, | |
| "learning_rate": 0.00026739926739926737, | |
| "loss": 0.7069, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5942606068927639, | |
| "grad_norm": 0.29941752552986145, | |
| "learning_rate": 0.0002672771672771672, | |
| "loss": 0.7444, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5953590553343402, | |
| "grad_norm": 1.8168927431106567, | |
| "learning_rate": 0.00026715506715506714, | |
| "loss": 0.4947, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5964575037759166, | |
| "grad_norm": 0.5639868974685669, | |
| "learning_rate": 0.000267032967032967, | |
| "loss": 0.6749, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5975559522174928, | |
| "grad_norm": 0.5054119229316711, | |
| "learning_rate": 0.0002669108669108669, | |
| "loss": 0.8075, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.598654400659069, | |
| "grad_norm": 0.3531246483325958, | |
| "learning_rate": 0.00026678876678876677, | |
| "loss": 0.6986, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5997528491006453, | |
| "grad_norm": 0.36428287625312805, | |
| "learning_rate": 0.0002666666666666666, | |
| "loss": 0.6496, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6008512975422217, | |
| "grad_norm": 0.45706960558891296, | |
| "learning_rate": 0.00026654456654456654, | |
| "loss": 0.5646, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6019497459837979, | |
| "grad_norm": 0.39326363801956177, | |
| "learning_rate": 0.0002664224664224664, | |
| "loss": 0.5037, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6030481944253742, | |
| "grad_norm": 0.7158151268959045, | |
| "learning_rate": 0.00026630036630036625, | |
| "loss": 0.5643, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6041466428669504, | |
| "grad_norm": 0.398335337638855, | |
| "learning_rate": 0.00026617826617826617, | |
| "loss": 0.5462, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6052450913085267, | |
| "grad_norm": 0.8625812530517578, | |
| "learning_rate": 0.000266056166056166, | |
| "loss": 0.7898, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.606343539750103, | |
| "grad_norm": 0.5558099150657654, | |
| "learning_rate": 0.0002659340659340659, | |
| "loss": 0.7968, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6074419881916793, | |
| "grad_norm": 0.6244741678237915, | |
| "learning_rate": 0.0002658119658119658, | |
| "loss": 0.9085, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6085404366332555, | |
| "grad_norm": 0.4907127916812897, | |
| "learning_rate": 0.00026568986568986565, | |
| "loss": 0.5683, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6096388850748318, | |
| "grad_norm": 0.6140159964561462, | |
| "learning_rate": 0.0002655677655677655, | |
| "loss": 0.5693, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.610737333516408, | |
| "grad_norm": 0.41251274943351746, | |
| "learning_rate": 0.0002654456654456654, | |
| "loss": 0.728, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6118357819579844, | |
| "grad_norm": 0.43427684903144836, | |
| "learning_rate": 0.00026532356532356534, | |
| "loss": 0.5692, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6129342303995606, | |
| "grad_norm": 0.41471078991889954, | |
| "learning_rate": 0.00026520146520146514, | |
| "loss": 0.6616, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6140326788411369, | |
| "grad_norm": 0.4406953752040863, | |
| "learning_rate": 0.00026507936507936506, | |
| "loss": 0.4764, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6151311272827131, | |
| "grad_norm": 7.233060359954834, | |
| "learning_rate": 0.00026495726495726497, | |
| "loss": 0.6111, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6162295757242895, | |
| "grad_norm": 0.47008857131004333, | |
| "learning_rate": 0.0002648351648351648, | |
| "loss": 0.8145, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6173280241658657, | |
| "grad_norm": 0.47636717557907104, | |
| "learning_rate": 0.0002647130647130647, | |
| "loss": 0.8036, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.618426472607442, | |
| "grad_norm": 0.526971161365509, | |
| "learning_rate": 0.0002645909645909646, | |
| "loss": 0.7559, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6195249210490182, | |
| "grad_norm": 0.5027382373809814, | |
| "learning_rate": 0.00026446886446886446, | |
| "loss": 0.7765, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6206233694905945, | |
| "grad_norm": 0.4222506284713745, | |
| "learning_rate": 0.0002643467643467643, | |
| "loss": 0.6376, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6217218179321709, | |
| "grad_norm": 0.6390372514724731, | |
| "learning_rate": 0.0002642246642246642, | |
| "loss": 0.8224, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6228202663737471, | |
| "grad_norm": 0.44495514035224915, | |
| "learning_rate": 0.0002641025641025641, | |
| "loss": 0.5995, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6239187148153233, | |
| "grad_norm": 0.7005137205123901, | |
| "learning_rate": 0.00026398046398046394, | |
| "loss": 0.4986, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6250171632568996, | |
| "grad_norm": 0.40745365619659424, | |
| "learning_rate": 0.0002638583638583638, | |
| "loss": 0.608, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6261156116984758, | |
| "grad_norm": 0.3449142277240753, | |
| "learning_rate": 0.0002637362637362637, | |
| "loss": 0.6253, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6272140601400522, | |
| "grad_norm": 0.4318457841873169, | |
| "learning_rate": 0.00026361416361416357, | |
| "loss": 0.6376, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6283125085816285, | |
| "grad_norm": 2.2202258110046387, | |
| "learning_rate": 0.00026349206349206343, | |
| "loss": 0.5477, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6294109570232047, | |
| "grad_norm": 0.6759721040725708, | |
| "learning_rate": 0.00026336996336996334, | |
| "loss": 1.1176, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.630509405464781, | |
| "grad_norm": 1.7796927690505981, | |
| "learning_rate": 0.00026324786324786326, | |
| "loss": 0.8713, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6316078539063573, | |
| "grad_norm": 0.32952558994293213, | |
| "learning_rate": 0.0002631257631257631, | |
| "loss": 0.4711, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6327063023479336, | |
| "grad_norm": 0.40390628576278687, | |
| "learning_rate": 0.000263003663003663, | |
| "loss": 0.5412, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6338047507895098, | |
| "grad_norm": 0.7439208030700684, | |
| "learning_rate": 0.0002628815628815629, | |
| "loss": 0.7094, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6349031992310861, | |
| "grad_norm": 0.34505775570869446, | |
| "learning_rate": 0.00026275946275946274, | |
| "loss": 0.5939, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6360016476726623, | |
| "grad_norm": 0.9452011585235596, | |
| "learning_rate": 0.0002626373626373626, | |
| "loss": 0.5108, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6371000961142387, | |
| "grad_norm": 0.42789551615715027, | |
| "learning_rate": 0.0002625152625152625, | |
| "loss": 0.5661, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6381985445558149, | |
| "grad_norm": 0.3460575044155121, | |
| "learning_rate": 0.0002623931623931624, | |
| "loss": 0.8333, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6392969929973912, | |
| "grad_norm": 0.8932168483734131, | |
| "learning_rate": 0.00026227106227106223, | |
| "loss": 0.7058, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6403954414389674, | |
| "grad_norm": 0.8588842749595642, | |
| "learning_rate": 0.00026214896214896214, | |
| "loss": 0.6905, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6414938898805437, | |
| "grad_norm": 0.5097251534461975, | |
| "learning_rate": 0.000262026862026862, | |
| "loss": 0.8189, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.64259233832212, | |
| "grad_norm": 0.45746755599975586, | |
| "learning_rate": 0.00026190476190476186, | |
| "loss": 0.7212, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6436907867636963, | |
| "grad_norm": 0.9576689600944519, | |
| "learning_rate": 0.0002617826617826618, | |
| "loss": 0.6159, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6447892352052725, | |
| "grad_norm": 0.5721899271011353, | |
| "learning_rate": 0.00026166056166056163, | |
| "loss": 0.6083, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6458876836468488, | |
| "grad_norm": 0.4851115942001343, | |
| "learning_rate": 0.00026153846153846154, | |
| "loss": 0.7678, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6469861320884251, | |
| "grad_norm": 0.6631761193275452, | |
| "learning_rate": 0.0002614163614163614, | |
| "loss": 0.7068, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6480845805300014, | |
| "grad_norm": 0.6862382292747498, | |
| "learning_rate": 0.00026129426129426126, | |
| "loss": 0.5766, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6491830289715776, | |
| "grad_norm": 0.3754968047142029, | |
| "learning_rate": 0.0002611721611721612, | |
| "loss": 0.7254, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6502814774131539, | |
| "grad_norm": 0.5239700078964233, | |
| "learning_rate": 0.00026105006105006103, | |
| "loss": 0.5777, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6513799258547301, | |
| "grad_norm": 0.5103443264961243, | |
| "learning_rate": 0.0002609279609279609, | |
| "loss": 1.0006, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6524783742963065, | |
| "grad_norm": 0.4733884632587433, | |
| "learning_rate": 0.0002608058608058608, | |
| "loss": 0.6851, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6535768227378828, | |
| "grad_norm": 0.5982065796852112, | |
| "learning_rate": 0.00026068376068376066, | |
| "loss": 0.6295, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.654675271179459, | |
| "grad_norm": 1.2408190965652466, | |
| "learning_rate": 0.0002605616605616605, | |
| "loss": 0.8806, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6557737196210353, | |
| "grad_norm": 0.6005455851554871, | |
| "learning_rate": 0.00026043956043956043, | |
| "loss": 0.7186, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6568721680626116, | |
| "grad_norm": 0.33777105808258057, | |
| "learning_rate": 0.0002603174603174603, | |
| "loss": 0.4599, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6579706165041879, | |
| "grad_norm": 0.5336529612541199, | |
| "learning_rate": 0.00026019536019536015, | |
| "loss": 0.553, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6590690649457641, | |
| "grad_norm": 0.6930931806564331, | |
| "learning_rate": 0.00026007326007326006, | |
| "loss": 0.5686, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6601675133873404, | |
| "grad_norm": 1.1340439319610596, | |
| "learning_rate": 0.0002599511599511599, | |
| "loss": 0.5886, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6612659618289166, | |
| "grad_norm": 0.9833797812461853, | |
| "learning_rate": 0.0002598290598290598, | |
| "loss": 0.7109, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.662364410270493, | |
| "grad_norm": 0.9305315017700195, | |
| "learning_rate": 0.0002597069597069597, | |
| "loss": 0.8341, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6634628587120692, | |
| "grad_norm": 0.9753265380859375, | |
| "learning_rate": 0.00025958485958485955, | |
| "loss": 0.7102, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6645613071536455, | |
| "grad_norm": 2.2342822551727295, | |
| "learning_rate": 0.00025946275946275946, | |
| "loss": 0.6784, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6656597555952217, | |
| "grad_norm": 0.6815157532691956, | |
| "learning_rate": 0.0002593406593406593, | |
| "loss": 0.7689, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.666758204036798, | |
| "grad_norm": 0.7792591452598572, | |
| "learning_rate": 0.0002592185592185592, | |
| "loss": 0.9444, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6678566524783743, | |
| "grad_norm": 0.668251097202301, | |
| "learning_rate": 0.0002590964590964591, | |
| "loss": 0.6899, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6689551009199506, | |
| "grad_norm": 0.5041349530220032, | |
| "learning_rate": 0.00025897435897435895, | |
| "loss": 0.652, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6700535493615268, | |
| "grad_norm": 0.35069939494132996, | |
| "learning_rate": 0.0002588522588522588, | |
| "loss": 0.8102, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6711519978031031, | |
| "grad_norm": 3.324793577194214, | |
| "learning_rate": 0.0002587301587301587, | |
| "loss": 0.7936, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6722504462446794, | |
| "grad_norm": 0.6778903007507324, | |
| "learning_rate": 0.0002586080586080586, | |
| "loss": 0.6258, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6733488946862557, | |
| "grad_norm": 3.034745454788208, | |
| "learning_rate": 0.00025848595848595844, | |
| "loss": 0.697, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6744473431278319, | |
| "grad_norm": 2.563870429992676, | |
| "learning_rate": 0.00025836385836385835, | |
| "loss": 0.7596, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6755457915694082, | |
| "grad_norm": 0.45592913031578064, | |
| "learning_rate": 0.0002582417582417582, | |
| "loss": 0.7753, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6766442400109844, | |
| "grad_norm": 0.7209720015525818, | |
| "learning_rate": 0.00025811965811965807, | |
| "loss": 0.6907, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6777426884525608, | |
| "grad_norm": 0.4611949026584625, | |
| "learning_rate": 0.000257997557997558, | |
| "loss": 0.5896, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.678841136894137, | |
| "grad_norm": 1.3885395526885986, | |
| "learning_rate": 0.0002578754578754579, | |
| "loss": 0.6344, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6799395853357133, | |
| "grad_norm": 0.544572651386261, | |
| "learning_rate": 0.00025775335775335775, | |
| "loss": 0.586, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.6810380337772896, | |
| "grad_norm": 0.5637034177780151, | |
| "learning_rate": 0.0002576312576312576, | |
| "loss": 0.8284, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6821364822188658, | |
| "grad_norm": 1.170779824256897, | |
| "learning_rate": 0.00025750915750915747, | |
| "loss": 0.8818, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6832349306604422, | |
| "grad_norm": 0.4877263605594635, | |
| "learning_rate": 0.0002573870573870574, | |
| "loss": 0.9179, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6843333791020184, | |
| "grad_norm": 0.6684415340423584, | |
| "learning_rate": 0.00025726495726495724, | |
| "loss": 0.7358, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6854318275435947, | |
| "grad_norm": 0.6679075956344604, | |
| "learning_rate": 0.0002571428571428571, | |
| "loss": 0.6342, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6865302759851709, | |
| "grad_norm": 0.65242600440979, | |
| "learning_rate": 0.000257020757020757, | |
| "loss": 0.4762, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6876287244267473, | |
| "grad_norm": 0.806523859500885, | |
| "learning_rate": 0.00025689865689865687, | |
| "loss": 0.7621, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6887271728683235, | |
| "grad_norm": 1.09652578830719, | |
| "learning_rate": 0.0002567765567765567, | |
| "loss": 0.6594, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6898256213098998, | |
| "grad_norm": 0.412505179643631, | |
| "learning_rate": 0.00025665445665445664, | |
| "loss": 0.8026, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.690924069751476, | |
| "grad_norm": 0.5801676511764526, | |
| "learning_rate": 0.0002565323565323565, | |
| "loss": 0.7026, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6920225181930523, | |
| "grad_norm": 0.6822883486747742, | |
| "learning_rate": 0.00025641025641025636, | |
| "loss": 0.4372, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6931209666346286, | |
| "grad_norm": 0.3455508351325989, | |
| "learning_rate": 0.00025628815628815627, | |
| "loss": 0.5624, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6942194150762049, | |
| "grad_norm": 0.3533216714859009, | |
| "learning_rate": 0.0002561660561660562, | |
| "loss": 0.7493, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6953178635177811, | |
| "grad_norm": 1.4306656122207642, | |
| "learning_rate": 0.000256043956043956, | |
| "loss": 0.7537, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6964163119593574, | |
| "grad_norm": 0.336393266916275, | |
| "learning_rate": 0.0002559218559218559, | |
| "loss": 0.787, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6975147604009336, | |
| "grad_norm": 0.5303547382354736, | |
| "learning_rate": 0.0002557997557997558, | |
| "loss": 0.5604, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.69861320884251, | |
| "grad_norm": 0.5421821475028992, | |
| "learning_rate": 0.00025567765567765567, | |
| "loss": 0.6905, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6997116572840862, | |
| "grad_norm": 0.5445061922073364, | |
| "learning_rate": 0.00025555555555555553, | |
| "loss": 0.6389, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7008101057256625, | |
| "grad_norm": 0.42832881212234497, | |
| "learning_rate": 0.00025543345543345544, | |
| "loss": 0.7825, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7019085541672387, | |
| "grad_norm": 1.4624862670898438, | |
| "learning_rate": 0.0002553113553113553, | |
| "loss": 0.4964, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7030070026088151, | |
| "grad_norm": 0.38657426834106445, | |
| "learning_rate": 0.00025518925518925516, | |
| "loss": 0.5299, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7041054510503914, | |
| "grad_norm": 14.422834396362305, | |
| "learning_rate": 0.00025506715506715507, | |
| "loss": 0.5008, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7052038994919676, | |
| "grad_norm": 0.591106653213501, | |
| "learning_rate": 0.00025494505494505493, | |
| "loss": 0.6732, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7063023479335439, | |
| "grad_norm": 1.6697375774383545, | |
| "learning_rate": 0.0002548229548229548, | |
| "loss": 0.6782, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7074007963751201, | |
| "grad_norm": 1.670777678489685, | |
| "learning_rate": 0.0002547008547008547, | |
| "loss": 0.5275, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7084992448166965, | |
| "grad_norm": 2.3361563682556152, | |
| "learning_rate": 0.00025457875457875456, | |
| "loss": 0.4177, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7095976932582727, | |
| "grad_norm": 1.823844313621521, | |
| "learning_rate": 0.0002544566544566544, | |
| "loss": 0.5438, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.710696141699849, | |
| "grad_norm": 0.5374146699905396, | |
| "learning_rate": 0.0002543345543345543, | |
| "loss": 0.6704, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7117945901414252, | |
| "grad_norm": 0.9709361791610718, | |
| "learning_rate": 0.0002542124542124542, | |
| "loss": 0.8896, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7128930385830015, | |
| "grad_norm": 0.7118197083473206, | |
| "learning_rate": 0.0002540903540903541, | |
| "loss": 0.766, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7139914870245778, | |
| "grad_norm": 0.4597225487232208, | |
| "learning_rate": 0.00025396825396825396, | |
| "loss": 0.7498, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7150899354661541, | |
| "grad_norm": 0.9708977937698364, | |
| "learning_rate": 0.0002538461538461538, | |
| "loss": 0.7602, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7161883839077303, | |
| "grad_norm": 0.8156960606575012, | |
| "learning_rate": 0.00025372405372405373, | |
| "loss": 1.1105, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7172868323493066, | |
| "grad_norm": 1.4135644435882568, | |
| "learning_rate": 0.0002536019536019536, | |
| "loss": 0.9203, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7183852807908829, | |
| "grad_norm": 0.5754226446151733, | |
| "learning_rate": 0.00025347985347985344, | |
| "loss": 0.5368, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7194837292324592, | |
| "grad_norm": 1.7644588947296143, | |
| "learning_rate": 0.00025335775335775336, | |
| "loss": 0.6451, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7205821776740354, | |
| "grad_norm": 4.35576868057251, | |
| "learning_rate": 0.0002532356532356532, | |
| "loss": 0.6732, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7216806261156117, | |
| "grad_norm": 1.1072558164596558, | |
| "learning_rate": 0.0002531135531135531, | |
| "loss": 0.7901, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7227790745571879, | |
| "grad_norm": 0.3916113078594208, | |
| "learning_rate": 0.000252991452991453, | |
| "loss": 0.7153, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7238775229987643, | |
| "grad_norm": 1.055137276649475, | |
| "learning_rate": 0.00025286935286935285, | |
| "loss": 0.8664, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7249759714403405, | |
| "grad_norm": 0.5966087579727173, | |
| "learning_rate": 0.0002527472527472527, | |
| "loss": 0.933, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7260744198819168, | |
| "grad_norm": 0.40958529710769653, | |
| "learning_rate": 0.0002526251526251526, | |
| "loss": 0.7196, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.727172868323493, | |
| "grad_norm": 0.4636710584163666, | |
| "learning_rate": 0.0002525030525030525, | |
| "loss": 0.7039, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7282713167650693, | |
| "grad_norm": 0.6967337131500244, | |
| "learning_rate": 0.0002523809523809524, | |
| "loss": 0.8981, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7293697652066456, | |
| "grad_norm": 0.49781784415245056, | |
| "learning_rate": 0.00025225885225885225, | |
| "loss": 0.7239, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7304682136482219, | |
| "grad_norm": 0.940851628780365, | |
| "learning_rate": 0.0002521367521367521, | |
| "loss": 0.8199, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7315666620897981, | |
| "grad_norm": 1.0271226167678833, | |
| "learning_rate": 0.000252014652014652, | |
| "loss": 0.6757, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7326651105313744, | |
| "grad_norm": 0.5299912095069885, | |
| "learning_rate": 0.0002518925518925519, | |
| "loss": 0.8464, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7337635589729508, | |
| "grad_norm": 0.7060052156448364, | |
| "learning_rate": 0.00025177045177045173, | |
| "loss": 0.6541, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.734862007414527, | |
| "grad_norm": 0.5419691205024719, | |
| "learning_rate": 0.00025164835164835165, | |
| "loss": 0.8741, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.7359604558561033, | |
| "grad_norm": 0.6363463401794434, | |
| "learning_rate": 0.0002515262515262515, | |
| "loss": 0.7224, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7370589042976795, | |
| "grad_norm": 0.7622922658920288, | |
| "learning_rate": 0.00025140415140415136, | |
| "loss": 0.9402, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7381573527392558, | |
| "grad_norm": 0.7477490305900574, | |
| "learning_rate": 0.0002512820512820513, | |
| "loss": 0.6036, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7392558011808321, | |
| "grad_norm": 0.4813562333583832, | |
| "learning_rate": 0.00025115995115995113, | |
| "loss": 0.5982, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7403542496224084, | |
| "grad_norm": 3.112766981124878, | |
| "learning_rate": 0.000251037851037851, | |
| "loss": 0.5825, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7414526980639846, | |
| "grad_norm": 0.9523088932037354, | |
| "learning_rate": 0.0002509157509157509, | |
| "loss": 0.5698, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7425511465055609, | |
| "grad_norm": 0.3426001965999603, | |
| "learning_rate": 0.00025079365079365076, | |
| "loss": 0.5516, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7436495949471371, | |
| "grad_norm": 0.4866350591182709, | |
| "learning_rate": 0.0002506715506715506, | |
| "loss": 0.5466, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7447480433887135, | |
| "grad_norm": 0.6590595245361328, | |
| "learning_rate": 0.00025054945054945053, | |
| "loss": 0.7579, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7458464918302897, | |
| "grad_norm": 0.36733704805374146, | |
| "learning_rate": 0.0002504273504273504, | |
| "loss": 0.5114, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.746944940271866, | |
| "grad_norm": 0.5890951156616211, | |
| "learning_rate": 0.0002503052503052503, | |
| "loss": 0.7196, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7480433887134422, | |
| "grad_norm": 0.8393438458442688, | |
| "learning_rate": 0.00025018315018315016, | |
| "loss": 0.6291, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7491418371550186, | |
| "grad_norm": 0.9745636582374573, | |
| "learning_rate": 0.00025006105006105, | |
| "loss": 0.8675, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7502402855965948, | |
| "grad_norm": 1.1764310598373413, | |
| "learning_rate": 0.00024993894993894993, | |
| "loss": 0.9384, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7513387340381711, | |
| "grad_norm": 0.6199970245361328, | |
| "learning_rate": 0.0002498168498168498, | |
| "loss": 0.5984, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7524371824797473, | |
| "grad_norm": 2.2708802223205566, | |
| "learning_rate": 0.00024969474969474965, | |
| "loss": 0.7867, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7535356309213236, | |
| "grad_norm": 0.6731462478637695, | |
| "learning_rate": 0.00024957264957264956, | |
| "loss": 0.5377, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7546340793629, | |
| "grad_norm": 0.991669774055481, | |
| "learning_rate": 0.0002494505494505494, | |
| "loss": 0.7015, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7557325278044762, | |
| "grad_norm": 0.5873506665229797, | |
| "learning_rate": 0.0002493284493284493, | |
| "loss": 0.567, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7568309762460524, | |
| "grad_norm": 1.5025473833084106, | |
| "learning_rate": 0.0002492063492063492, | |
| "loss": 0.6264, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7579294246876287, | |
| "grad_norm": 0.4942665696144104, | |
| "learning_rate": 0.00024908424908424905, | |
| "loss": 0.7623, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7590278731292049, | |
| "grad_norm": 0.5522105693817139, | |
| "learning_rate": 0.0002489621489621489, | |
| "loss": 0.6192, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7601263215707813, | |
| "grad_norm": 1.25243079662323, | |
| "learning_rate": 0.0002488400488400488, | |
| "loss": 0.8547, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7612247700123576, | |
| "grad_norm": 0.5228685140609741, | |
| "learning_rate": 0.00024871794871794874, | |
| "loss": 0.7365, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7623232184539338, | |
| "grad_norm": 1.5090827941894531, | |
| "learning_rate": 0.0002485958485958486, | |
| "loss": 0.9226, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.76342166689551, | |
| "grad_norm": 3.3617379665374756, | |
| "learning_rate": 0.00024847374847374845, | |
| "loss": 0.7942, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7645201153370864, | |
| "grad_norm": 0.5350137948989868, | |
| "learning_rate": 0.0002483516483516483, | |
| "loss": 0.6254, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7656185637786627, | |
| "grad_norm": 0.8871312141418457, | |
| "learning_rate": 0.0002482295482295482, | |
| "loss": 0.8241, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7667170122202389, | |
| "grad_norm": 0.48593926429748535, | |
| "learning_rate": 0.0002481074481074481, | |
| "loss": 0.5707, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7678154606618152, | |
| "grad_norm": 0.7460000514984131, | |
| "learning_rate": 0.00024798534798534794, | |
| "loss": 0.9521, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7689139091033914, | |
| "grad_norm": 0.7105034589767456, | |
| "learning_rate": 0.00024786324786324785, | |
| "loss": 0.7513, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7700123575449678, | |
| "grad_norm": 0.40251481533050537, | |
| "learning_rate": 0.0002477411477411477, | |
| "loss": 0.6067, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.771110805986544, | |
| "grad_norm": 0.452709436416626, | |
| "learning_rate": 0.00024761904761904757, | |
| "loss": 0.671, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7722092544281203, | |
| "grad_norm": 0.581453263759613, | |
| "learning_rate": 0.0002474969474969475, | |
| "loss": 0.5356, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7733077028696965, | |
| "grad_norm": 0.8013669848442078, | |
| "learning_rate": 0.00024737484737484734, | |
| "loss": 0.6889, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.7744061513112728, | |
| "grad_norm": 1.1480565071105957, | |
| "learning_rate": 0.0002472527472527472, | |
| "loss": 0.7456, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7755045997528491, | |
| "grad_norm": 0.7568329572677612, | |
| "learning_rate": 0.0002471306471306471, | |
| "loss": 0.7455, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7766030481944254, | |
| "grad_norm": 0.4223226308822632, | |
| "learning_rate": 0.000247008547008547, | |
| "loss": 0.7138, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7777014966360016, | |
| "grad_norm": 0.372872531414032, | |
| "learning_rate": 0.00024688644688644683, | |
| "loss": 0.8037, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7787999450775779, | |
| "grad_norm": 0.968614399433136, | |
| "learning_rate": 0.00024676434676434674, | |
| "loss": 0.5943, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7798983935191542, | |
| "grad_norm": 0.801157534122467, | |
| "learning_rate": 0.00024664224664224665, | |
| "loss": 0.9467, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7809968419607305, | |
| "grad_norm": 0.7115808129310608, | |
| "learning_rate": 0.0002465201465201465, | |
| "loss": 0.7828, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7820952904023067, | |
| "grad_norm": 1.2951349020004272, | |
| "learning_rate": 0.00024639804639804637, | |
| "loss": 0.6221, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.783193738843883, | |
| "grad_norm": 0.47706693410873413, | |
| "learning_rate": 0.0002462759462759463, | |
| "loss": 0.3641, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7842921872854592, | |
| "grad_norm": 0.8871097564697266, | |
| "learning_rate": 0.00024615384615384614, | |
| "loss": 0.6177, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7853906357270356, | |
| "grad_norm": 0.7920973896980286, | |
| "learning_rate": 0.000246031746031746, | |
| "loss": 0.5858, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7864890841686119, | |
| "grad_norm": 0.49732694029808044, | |
| "learning_rate": 0.0002459096459096459, | |
| "loss": 0.5176, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7875875326101881, | |
| "grad_norm": 0.34965720772743225, | |
| "learning_rate": 0.00024578754578754577, | |
| "loss": 0.4983, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7886859810517644, | |
| "grad_norm": 0.45963025093078613, | |
| "learning_rate": 0.00024566544566544563, | |
| "loss": 0.7756, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7897844294933407, | |
| "grad_norm": 0.5802373290061951, | |
| "learning_rate": 0.00024554334554334554, | |
| "loss": 0.5773, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.790882877934917, | |
| "grad_norm": 1.8482742309570312, | |
| "learning_rate": 0.0002454212454212454, | |
| "loss": 0.7978, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7919813263764932, | |
| "grad_norm": 0.5821959972381592, | |
| "learning_rate": 0.00024529914529914526, | |
| "loss": 0.7483, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.7930797748180695, | |
| "grad_norm": 0.9352701306343079, | |
| "learning_rate": 0.0002451770451770451, | |
| "loss": 0.6979, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.7941782232596457, | |
| "grad_norm": 0.554032564163208, | |
| "learning_rate": 0.00024505494505494503, | |
| "loss": 0.6773, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7952766717012221, | |
| "grad_norm": 0.6914504766464233, | |
| "learning_rate": 0.00024493284493284494, | |
| "loss": 0.6548, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7963751201427983, | |
| "grad_norm": 0.40804949402809143, | |
| "learning_rate": 0.0002448107448107448, | |
| "loss": 0.4634, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7974735685843746, | |
| "grad_norm": 0.4965716302394867, | |
| "learning_rate": 0.00024468864468864466, | |
| "loss": 0.4879, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7985720170259508, | |
| "grad_norm": 0.48798999190330505, | |
| "learning_rate": 0.00024456654456654457, | |
| "loss": 0.7003, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7996704654675271, | |
| "grad_norm": 0.6946013569831848, | |
| "learning_rate": 0.00024444444444444443, | |
| "loss": 0.7508, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8007689139091034, | |
| "grad_norm": 0.4310678243637085, | |
| "learning_rate": 0.0002443223443223443, | |
| "loss": 0.5765, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8018673623506797, | |
| "grad_norm": 0.5407636761665344, | |
| "learning_rate": 0.0002442002442002442, | |
| "loss": 0.5445, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8029658107922559, | |
| "grad_norm": 0.6281490921974182, | |
| "learning_rate": 0.00024407814407814403, | |
| "loss": 0.9319, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8040642592338322, | |
| "grad_norm": 1.2027008533477783, | |
| "learning_rate": 0.00024395604395604394, | |
| "loss": 0.3957, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8051627076754085, | |
| "grad_norm": 0.543230414390564, | |
| "learning_rate": 0.00024383394383394383, | |
| "loss": 0.7919, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.8062611561169848, | |
| "grad_norm": 0.4269828498363495, | |
| "learning_rate": 0.0002437118437118437, | |
| "loss": 0.6081, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.807359604558561, | |
| "grad_norm": 1.2857966423034668, | |
| "learning_rate": 0.00024358974358974357, | |
| "loss": 0.8654, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8084580530001373, | |
| "grad_norm": 0.6370485424995422, | |
| "learning_rate": 0.00024346764346764346, | |
| "loss": 0.8053, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8095565014417135, | |
| "grad_norm": 1.1288559436798096, | |
| "learning_rate": 0.00024334554334554332, | |
| "loss": 0.8709, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.8106549498832899, | |
| "grad_norm": 0.5601497292518616, | |
| "learning_rate": 0.0002432234432234432, | |
| "loss": 0.7982, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8117533983248661, | |
| "grad_norm": 0.476745069026947, | |
| "learning_rate": 0.0002431013431013431, | |
| "loss": 0.7372, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8128518467664424, | |
| "grad_norm": 0.4287762939929962, | |
| "learning_rate": 0.00024297924297924295, | |
| "loss": 0.5686, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8139502952080186, | |
| "grad_norm": 0.7039306163787842, | |
| "learning_rate": 0.00024285714285714283, | |
| "loss": 0.7976, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8150487436495949, | |
| "grad_norm": 0.47433528304100037, | |
| "learning_rate": 0.00024273504273504272, | |
| "loss": 0.6375, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8161471920911713, | |
| "grad_norm": 0.5443944931030273, | |
| "learning_rate": 0.00024261294261294258, | |
| "loss": 0.6793, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8172456405327475, | |
| "grad_norm": 0.516094982624054, | |
| "learning_rate": 0.00024249084249084246, | |
| "loss": 0.785, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8183440889743238, | |
| "grad_norm": 0.6694304347038269, | |
| "learning_rate": 0.00024236874236874237, | |
| "loss": 0.5431, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8194425374159, | |
| "grad_norm": 0.5309669375419617, | |
| "learning_rate": 0.00024224664224664223, | |
| "loss": 0.5806, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8205409858574764, | |
| "grad_norm": 0.5502971410751343, | |
| "learning_rate": 0.00024212454212454212, | |
| "loss": 0.5053, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8216394342990526, | |
| "grad_norm": 0.5242869853973389, | |
| "learning_rate": 0.00024200244200244198, | |
| "loss": 0.8189, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8227378827406289, | |
| "grad_norm": 0.4131311774253845, | |
| "learning_rate": 0.00024188034188034186, | |
| "loss": 0.7074, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8238363311822051, | |
| "grad_norm": 0.599915087223053, | |
| "learning_rate": 0.00024175824175824175, | |
| "loss": 0.9408, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8249347796237814, | |
| "grad_norm": 0.3683515191078186, | |
| "learning_rate": 0.0002416361416361416, | |
| "loss": 0.6675, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8260332280653577, | |
| "grad_norm": 1.633415699005127, | |
| "learning_rate": 0.0002415140415140415, | |
| "loss": 0.6768, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.827131676506934, | |
| "grad_norm": 0.3848377764225006, | |
| "learning_rate": 0.00024139194139194138, | |
| "loss": 0.485, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8282301249485102, | |
| "grad_norm": 0.4116027355194092, | |
| "learning_rate": 0.00024126984126984123, | |
| "loss": 0.8253, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8293285733900865, | |
| "grad_norm": 0.5805407762527466, | |
| "learning_rate": 0.00024114774114774112, | |
| "loss": 0.825, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8304270218316627, | |
| "grad_norm": 1.2401742935180664, | |
| "learning_rate": 0.000241025641025641, | |
| "loss": 0.6394, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8315254702732391, | |
| "grad_norm": 0.42345038056373596, | |
| "learning_rate": 0.00024090354090354086, | |
| "loss": 0.6958, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8326239187148153, | |
| "grad_norm": 1.3758116960525513, | |
| "learning_rate": 0.00024078144078144075, | |
| "loss": 0.6997, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8337223671563916, | |
| "grad_norm": 1.1826672554016113, | |
| "learning_rate": 0.00024065934065934066, | |
| "loss": 0.7908, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8348208155979678, | |
| "grad_norm": 1.0752373933792114, | |
| "learning_rate": 0.0002405372405372405, | |
| "loss": 0.8896, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8359192640395442, | |
| "grad_norm": 0.3347112834453583, | |
| "learning_rate": 0.0002404151404151404, | |
| "loss": 0.8202, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8370177124811204, | |
| "grad_norm": 0.5837082266807556, | |
| "learning_rate": 0.0002402930402930403, | |
| "loss": 0.7502, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8381161609226967, | |
| "grad_norm": 0.5439388751983643, | |
| "learning_rate": 0.00024017094017094015, | |
| "loss": 0.6928, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.839214609364273, | |
| "grad_norm": 0.35348060727119446, | |
| "learning_rate": 0.00024004884004884004, | |
| "loss": 0.5495, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8403130578058492, | |
| "grad_norm": 0.4943974018096924, | |
| "learning_rate": 0.00023992673992673992, | |
| "loss": 0.9218, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8414115062474256, | |
| "grad_norm": 0.628667414188385, | |
| "learning_rate": 0.00023980463980463978, | |
| "loss": 0.6266, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8425099546890018, | |
| "grad_norm": 0.822575032711029, | |
| "learning_rate": 0.00023968253968253966, | |
| "loss": 0.791, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.843608403130578, | |
| "grad_norm": 0.3044184446334839, | |
| "learning_rate": 0.00023956043956043955, | |
| "loss": 0.6048, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8447068515721543, | |
| "grad_norm": 0.40807369351387024, | |
| "learning_rate": 0.0002394383394383394, | |
| "loss": 0.6286, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8458053000137306, | |
| "grad_norm": 1.2373838424682617, | |
| "learning_rate": 0.0002393162393162393, | |
| "loss": 0.5133, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8469037484553069, | |
| "grad_norm": 0.5104987025260925, | |
| "learning_rate": 0.00023919413919413918, | |
| "loss": 0.591, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.8480021968968832, | |
| "grad_norm": 0.6644220352172852, | |
| "learning_rate": 0.00023907203907203904, | |
| "loss": 0.7039, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8491006453384594, | |
| "grad_norm": 0.5887960195541382, | |
| "learning_rate": 0.00023894993894993892, | |
| "loss": 0.7017, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8501990937800357, | |
| "grad_norm": 0.6568577885627747, | |
| "learning_rate": 0.00023882783882783878, | |
| "loss": 0.6131, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.851297542221612, | |
| "grad_norm": 0.6594721674919128, | |
| "learning_rate": 0.00023870573870573867, | |
| "loss": 0.6079, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8523959906631883, | |
| "grad_norm": 12.29937744140625, | |
| "learning_rate": 0.00023858363858363858, | |
| "loss": 1.1068, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8534944391047645, | |
| "grad_norm": 1.175355315208435, | |
| "learning_rate": 0.00023846153846153844, | |
| "loss": 0.734, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8545928875463408, | |
| "grad_norm": 1.7128019332885742, | |
| "learning_rate": 0.00023833943833943832, | |
| "loss": 0.6395, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.855691335987917, | |
| "grad_norm": 0.6479717493057251, | |
| "learning_rate": 0.0002382173382173382, | |
| "loss": 0.8572, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8567897844294934, | |
| "grad_norm": 0.9646544456481934, | |
| "learning_rate": 0.00023809523809523807, | |
| "loss": 1.1168, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8578882328710696, | |
| "grad_norm": 0.8290930986404419, | |
| "learning_rate": 0.00023797313797313795, | |
| "loss": 0.4413, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8589866813126459, | |
| "grad_norm": 0.6690389513969421, | |
| "learning_rate": 0.00023785103785103784, | |
| "loss": 1.1878, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8600851297542221, | |
| "grad_norm": 0.6602356433868408, | |
| "learning_rate": 0.0002377289377289377, | |
| "loss": 0.5862, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8611835781957984, | |
| "grad_norm": 0.612316370010376, | |
| "learning_rate": 0.00023760683760683758, | |
| "loss": 0.7971, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8622820266373747, | |
| "grad_norm": 0.7429434657096863, | |
| "learning_rate": 0.00023748473748473747, | |
| "loss": 0.6265, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.863380475078951, | |
| "grad_norm": 0.40107640624046326, | |
| "learning_rate": 0.00023736263736263733, | |
| "loss": 0.6697, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8644789235205272, | |
| "grad_norm": 0.45808035135269165, | |
| "learning_rate": 0.0002372405372405372, | |
| "loss": 0.7443, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8655773719621035, | |
| "grad_norm": 0.36327049136161804, | |
| "learning_rate": 0.0002371184371184371, | |
| "loss": 0.6518, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8666758204036799, | |
| "grad_norm": 0.45617833733558655, | |
| "learning_rate": 0.00023699633699633696, | |
| "loss": 0.792, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8677742688452561, | |
| "grad_norm": 0.5354835391044617, | |
| "learning_rate": 0.00023687423687423687, | |
| "loss": 0.7788, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8688727172868324, | |
| "grad_norm": 0.9770327210426331, | |
| "learning_rate": 0.00023675213675213675, | |
| "loss": 0.7267, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8699711657284086, | |
| "grad_norm": 0.646757960319519, | |
| "learning_rate": 0.0002366300366300366, | |
| "loss": 0.7234, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8710696141699849, | |
| "grad_norm": 0.4694693982601166, | |
| "learning_rate": 0.0002365079365079365, | |
| "loss": 0.8261, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8721680626115612, | |
| "grad_norm": 0.9923954606056213, | |
| "learning_rate": 0.00023638583638583638, | |
| "loss": 0.703, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8732665110531375, | |
| "grad_norm": 1.6440534591674805, | |
| "learning_rate": 0.00023626373626373624, | |
| "loss": 0.7654, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8743649594947137, | |
| "grad_norm": 0.3947128653526306, | |
| "learning_rate": 0.00023614163614163613, | |
| "loss": 0.637, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.87546340793629, | |
| "grad_norm": 3.4264323711395264, | |
| "learning_rate": 0.000236019536019536, | |
| "loss": 0.7325, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8765618563778662, | |
| "grad_norm": 0.5469256043434143, | |
| "learning_rate": 0.00023589743589743587, | |
| "loss": 0.8203, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8776603048194426, | |
| "grad_norm": 0.5184471011161804, | |
| "learning_rate": 0.00023577533577533576, | |
| "loss": 0.7895, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8787587532610188, | |
| "grad_norm": 0.8231347799301147, | |
| "learning_rate": 0.00023565323565323562, | |
| "loss": 0.7888, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8798572017025951, | |
| "grad_norm": 14.826855659484863, | |
| "learning_rate": 0.0002355311355311355, | |
| "loss": 0.7564, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8809556501441713, | |
| "grad_norm": 0.5809927582740784, | |
| "learning_rate": 0.00023540903540903539, | |
| "loss": 0.6702, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8820540985857477, | |
| "grad_norm": 0.7244674563407898, | |
| "learning_rate": 0.00023528693528693524, | |
| "loss": 0.6475, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8831525470273239, | |
| "grad_norm": 0.8071272373199463, | |
| "learning_rate": 0.00023516483516483513, | |
| "loss": 0.7434, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8842509954689002, | |
| "grad_norm": 0.6872429847717285, | |
| "learning_rate": 0.00023504273504273504, | |
| "loss": 0.5968, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.8853494439104764, | |
| "grad_norm": 9.353965759277344, | |
| "learning_rate": 0.00023492063492063487, | |
| "loss": 0.4228, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8864478923520527, | |
| "grad_norm": 0.47151222825050354, | |
| "learning_rate": 0.00023479853479853479, | |
| "loss": 0.6832, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.887546340793629, | |
| "grad_norm": 1.4599422216415405, | |
| "learning_rate": 0.00023467643467643467, | |
| "loss": 0.6692, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8886447892352053, | |
| "grad_norm": 0.45811519026756287, | |
| "learning_rate": 0.00023455433455433453, | |
| "loss": 0.787, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8897432376767815, | |
| "grad_norm": 1.077709674835205, | |
| "learning_rate": 0.00023443223443223442, | |
| "loss": 0.6695, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8908416861183578, | |
| "grad_norm": 0.5702061057090759, | |
| "learning_rate": 0.0002343101343101343, | |
| "loss": 0.5858, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.891940134559934, | |
| "grad_norm": 2.2391059398651123, | |
| "learning_rate": 0.00023418803418803416, | |
| "loss": 0.6688, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8930385830015104, | |
| "grad_norm": 1.6974279880523682, | |
| "learning_rate": 0.00023406593406593405, | |
| "loss": 0.8545, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8941370314430866, | |
| "grad_norm": 0.983435869216919, | |
| "learning_rate": 0.00023394383394383393, | |
| "loss": 0.8128, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8952354798846629, | |
| "grad_norm": 0.44103240966796875, | |
| "learning_rate": 0.0002338217338217338, | |
| "loss": 0.7968, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8963339283262391, | |
| "grad_norm": 1.0707038640975952, | |
| "learning_rate": 0.00023369963369963367, | |
| "loss": 0.6996, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8974323767678155, | |
| "grad_norm": 0.8029122352600098, | |
| "learning_rate": 0.00023357753357753356, | |
| "loss": 0.7911, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8985308252093918, | |
| "grad_norm": 0.46339499950408936, | |
| "learning_rate": 0.00023345543345543342, | |
| "loss": 0.7712, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.899629273650968, | |
| "grad_norm": 1.020947813987732, | |
| "learning_rate": 0.0002333333333333333, | |
| "loss": 0.6865, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9007277220925443, | |
| "grad_norm": 0.5332039594650269, | |
| "learning_rate": 0.00023321123321123322, | |
| "loss": 0.8352, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9018261705341205, | |
| "grad_norm": 0.40052923560142517, | |
| "learning_rate": 0.00023308913308913307, | |
| "loss": 0.5435, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.9029246189756969, | |
| "grad_norm": 0.6643521189689636, | |
| "learning_rate": 0.00023296703296703296, | |
| "loss": 0.7406, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9040230674172731, | |
| "grad_norm": 0.7514997720718384, | |
| "learning_rate": 0.00023284493284493285, | |
| "loss": 0.7595, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.9051215158588494, | |
| "grad_norm": 0.7124571204185486, | |
| "learning_rate": 0.0002327228327228327, | |
| "loss": 0.5736, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9062199643004256, | |
| "grad_norm": 0.6757075786590576, | |
| "learning_rate": 0.0002326007326007326, | |
| "loss": 0.6275, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9073184127420019, | |
| "grad_norm": 0.4200783669948578, | |
| "learning_rate": 0.00023247863247863245, | |
| "loss": 0.6267, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9084168611835782, | |
| "grad_norm": 0.5442836284637451, | |
| "learning_rate": 0.00023235653235653233, | |
| "loss": 0.6814, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9095153096251545, | |
| "grad_norm": 0.4859601557254791, | |
| "learning_rate": 0.00023223443223443222, | |
| "loss": 0.6451, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9106137580667307, | |
| "grad_norm": 0.7353097200393677, | |
| "learning_rate": 0.00023211233211233208, | |
| "loss": 0.6723, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.911712206508307, | |
| "grad_norm": 0.6389304995536804, | |
| "learning_rate": 0.00023199023199023196, | |
| "loss": 0.9429, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9128106549498833, | |
| "grad_norm": 0.6813933849334717, | |
| "learning_rate": 0.00023186813186813185, | |
| "loss": 0.5319, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.9139091033914596, | |
| "grad_norm": 0.40023690462112427, | |
| "learning_rate": 0.0002317460317460317, | |
| "loss": 0.5808, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9150075518330358, | |
| "grad_norm": 0.5327205657958984, | |
| "learning_rate": 0.0002316239316239316, | |
| "loss": 0.6666, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9161060002746121, | |
| "grad_norm": 1.672450065612793, | |
| "learning_rate": 0.0002315018315018315, | |
| "loss": 0.7758, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.9172044487161883, | |
| "grad_norm": 0.5022990703582764, | |
| "learning_rate": 0.00023137973137973134, | |
| "loss": 0.6309, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9183028971577647, | |
| "grad_norm": 0.43023642897605896, | |
| "learning_rate": 0.00023125763125763125, | |
| "loss": 0.5343, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.919401345599341, | |
| "grad_norm": 0.6878641843795776, | |
| "learning_rate": 0.00023113553113553113, | |
| "loss": 0.7268, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.9204997940409172, | |
| "grad_norm": 0.40551453828811646, | |
| "learning_rate": 0.000231013431013431, | |
| "loss": 0.5784, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9215982424824934, | |
| "grad_norm": 0.412356436252594, | |
| "learning_rate": 0.00023089133089133088, | |
| "loss": 0.7685, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9226966909240698, | |
| "grad_norm": 1.1603305339813232, | |
| "learning_rate": 0.00023076923076923076, | |
| "loss": 0.518, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9237951393656461, | |
| "grad_norm": 0.6733229756355286, | |
| "learning_rate": 0.00023064713064713062, | |
| "loss": 0.5883, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9248935878072223, | |
| "grad_norm": 0.619434654712677, | |
| "learning_rate": 0.0002305250305250305, | |
| "loss": 0.6244, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9259920362487986, | |
| "grad_norm": 0.6989772319793701, | |
| "learning_rate": 0.0002304029304029304, | |
| "loss": 0.5763, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9270904846903748, | |
| "grad_norm": 0.6276418566703796, | |
| "learning_rate": 0.00023028083028083025, | |
| "loss": 0.4762, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.9281889331319512, | |
| "grad_norm": 0.5577360987663269, | |
| "learning_rate": 0.00023015873015873014, | |
| "loss": 0.6254, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9292873815735274, | |
| "grad_norm": 0.6185848116874695, | |
| "learning_rate": 0.00023003663003663002, | |
| "loss": 1.0182, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9303858300151037, | |
| "grad_norm": 1.2415262460708618, | |
| "learning_rate": 0.00022991452991452988, | |
| "loss": 0.4677, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.9314842784566799, | |
| "grad_norm": 0.4582594335079193, | |
| "learning_rate": 0.00022979242979242977, | |
| "loss": 0.6308, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9325827268982562, | |
| "grad_norm": 0.4749620258808136, | |
| "learning_rate": 0.00022967032967032962, | |
| "loss": 0.6217, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9336811753398325, | |
| "grad_norm": 0.48614588379859924, | |
| "learning_rate": 0.0002295482295482295, | |
| "loss": 0.7469, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9347796237814088, | |
| "grad_norm": 0.7357453107833862, | |
| "learning_rate": 0.00022942612942612942, | |
| "loss": 0.5978, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.935878072222985, | |
| "grad_norm": 0.53326815366745, | |
| "learning_rate": 0.00022930402930402928, | |
| "loss": 0.7678, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9369765206645613, | |
| "grad_norm": 0.4853271245956421, | |
| "learning_rate": 0.00022918192918192917, | |
| "loss": 0.4888, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9380749691061376, | |
| "grad_norm": 1.6529743671417236, | |
| "learning_rate": 0.00022905982905982905, | |
| "loss": 0.6103, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9391734175477139, | |
| "grad_norm": 0.8255143165588379, | |
| "learning_rate": 0.0002289377289377289, | |
| "loss": 0.6977, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9402718659892901, | |
| "grad_norm": 0.3999016284942627, | |
| "learning_rate": 0.0002288156288156288, | |
| "loss": 0.5398, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9413703144308664, | |
| "grad_norm": 1.933090329170227, | |
| "learning_rate": 0.00022869352869352868, | |
| "loss": 1.0827, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.9424687628724426, | |
| "grad_norm": 0.8884105682373047, | |
| "learning_rate": 0.00022857142857142854, | |
| "loss": 0.702, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.943567211314019, | |
| "grad_norm": 0.4555901885032654, | |
| "learning_rate": 0.00022844932844932843, | |
| "loss": 0.8737, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9446656597555952, | |
| "grad_norm": 0.535915732383728, | |
| "learning_rate": 0.0002283272283272283, | |
| "loss": 0.7036, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9457641081971715, | |
| "grad_norm": 0.7607597708702087, | |
| "learning_rate": 0.00022820512820512817, | |
| "loss": 0.8707, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9468625566387477, | |
| "grad_norm": 0.4056457579135895, | |
| "learning_rate": 0.00022808302808302805, | |
| "loss": 0.6658, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.947961005080324, | |
| "grad_norm": 0.5472984313964844, | |
| "learning_rate": 0.00022796092796092794, | |
| "loss": 0.5429, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9490594535219004, | |
| "grad_norm": 0.6866592764854431, | |
| "learning_rate": 0.0002278388278388278, | |
| "loss": 0.7343, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9501579019634766, | |
| "grad_norm": 0.5244406461715698, | |
| "learning_rate": 0.0002277167277167277, | |
| "loss": 0.669, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9512563504050529, | |
| "grad_norm": 0.45024383068084717, | |
| "learning_rate": 0.0002275946275946276, | |
| "loss": 0.9062, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9523547988466291, | |
| "grad_norm": 0.4252873659133911, | |
| "learning_rate": 0.00022747252747252745, | |
| "loss": 0.6109, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9534532472882055, | |
| "grad_norm": 0.50081467628479, | |
| "learning_rate": 0.00022735042735042734, | |
| "loss": 0.5266, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9545516957297817, | |
| "grad_norm": 0.9674072861671448, | |
| "learning_rate": 0.00022722832722832723, | |
| "loss": 0.7197, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.955650144171358, | |
| "grad_norm": 1.572348952293396, | |
| "learning_rate": 0.00022710622710622708, | |
| "loss": 0.4728, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9567485926129342, | |
| "grad_norm": 0.6033158898353577, | |
| "learning_rate": 0.00022698412698412697, | |
| "loss": 0.6394, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9578470410545105, | |
| "grad_norm": 0.5810523629188538, | |
| "learning_rate": 0.00022686202686202686, | |
| "loss": 0.8813, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9589454894960868, | |
| "grad_norm": 0.46345213055610657, | |
| "learning_rate": 0.00022673992673992671, | |
| "loss": 0.5828, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9600439379376631, | |
| "grad_norm": 0.5414748191833496, | |
| "learning_rate": 0.0002266178266178266, | |
| "loss": 0.6311, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9611423863792393, | |
| "grad_norm": 0.9083818197250366, | |
| "learning_rate": 0.00022649572649572646, | |
| "loss": 0.961, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9622408348208156, | |
| "grad_norm": 0.786993145942688, | |
| "learning_rate": 0.00022637362637362634, | |
| "loss": 0.7825, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.9633392832623918, | |
| "grad_norm": 0.7639968991279602, | |
| "learning_rate": 0.00022625152625152623, | |
| "loss": 0.8989, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9644377317039682, | |
| "grad_norm": 0.43360400199890137, | |
| "learning_rate": 0.0002261294261294261, | |
| "loss": 0.6747, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9655361801455444, | |
| "grad_norm": 0.8512898683547974, | |
| "learning_rate": 0.00022600732600732597, | |
| "loss": 0.7152, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9666346285871207, | |
| "grad_norm": 0.46903684735298157, | |
| "learning_rate": 0.00022588522588522589, | |
| "loss": 0.7594, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9677330770286969, | |
| "grad_norm": 1.9560080766677856, | |
| "learning_rate": 0.00022576312576312572, | |
| "loss": 0.598, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9688315254702733, | |
| "grad_norm": 1.1595470905303955, | |
| "learning_rate": 0.00022564102564102563, | |
| "loss": 0.6005, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9699299739118495, | |
| "grad_norm": 0.7318668365478516, | |
| "learning_rate": 0.00022551892551892551, | |
| "loss": 0.7327, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9710284223534258, | |
| "grad_norm": 0.6557647585868835, | |
| "learning_rate": 0.00022539682539682537, | |
| "loss": 0.5858, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.972126870795002, | |
| "grad_norm": 0.5645928382873535, | |
| "learning_rate": 0.00022527472527472526, | |
| "loss": 0.5818, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9732253192365783, | |
| "grad_norm": 0.4630253314971924, | |
| "learning_rate": 0.00022515262515262514, | |
| "loss": 0.8363, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9743237676781547, | |
| "grad_norm": 0.6750912666320801, | |
| "learning_rate": 0.000225030525030525, | |
| "loss": 0.8865, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9754222161197309, | |
| "grad_norm": 0.6309487819671631, | |
| "learning_rate": 0.0002249084249084249, | |
| "loss": 0.5596, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9765206645613072, | |
| "grad_norm": 0.9696050882339478, | |
| "learning_rate": 0.00022478632478632477, | |
| "loss": 0.7752, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9776191130028834, | |
| "grad_norm": 0.7614735960960388, | |
| "learning_rate": 0.00022466422466422463, | |
| "loss": 0.7131, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9787175614444596, | |
| "grad_norm": 0.4971006214618683, | |
| "learning_rate": 0.00022454212454212452, | |
| "loss": 0.6218, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.979816009886036, | |
| "grad_norm": 0.47809773683547974, | |
| "learning_rate": 0.0002244200244200244, | |
| "loss": 0.5678, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.9809144583276123, | |
| "grad_norm": 0.5959337949752808, | |
| "learning_rate": 0.00022429792429792426, | |
| "loss": 1.0002, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.9820129067691885, | |
| "grad_norm": 0.45277753472328186, | |
| "learning_rate": 0.00022417582417582415, | |
| "loss": 0.7321, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9831113552107648, | |
| "grad_norm": 1.279405951499939, | |
| "learning_rate": 0.00022405372405372406, | |
| "loss": 0.7912, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9842098036523411, | |
| "grad_norm": 0.49885687232017517, | |
| "learning_rate": 0.00022393162393162392, | |
| "loss": 0.5558, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9853082520939174, | |
| "grad_norm": 0.474979430437088, | |
| "learning_rate": 0.0002238095238095238, | |
| "loss": 0.7095, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9864067005354936, | |
| "grad_norm": 0.3826389014720917, | |
| "learning_rate": 0.0002236874236874237, | |
| "loss": 0.5695, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9875051489770699, | |
| "grad_norm": 0.33514517545700073, | |
| "learning_rate": 0.00022356532356532355, | |
| "loss": 0.6341, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.9886035974186461, | |
| "grad_norm": 0.5049251914024353, | |
| "learning_rate": 0.00022344322344322343, | |
| "loss": 0.5577, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9897020458602225, | |
| "grad_norm": 0.5179988145828247, | |
| "learning_rate": 0.0002233211233211233, | |
| "loss": 0.5769, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.9908004943017987, | |
| "grad_norm": 0.5194469094276428, | |
| "learning_rate": 0.00022319902319902318, | |
| "loss": 0.5466, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.991898942743375, | |
| "grad_norm": 0.46941491961479187, | |
| "learning_rate": 0.00022307692307692306, | |
| "loss": 0.642, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.9929973911849512, | |
| "grad_norm": 0.379682719707489, | |
| "learning_rate": 0.00022295482295482292, | |
| "loss": 0.5508, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.9940958396265275, | |
| "grad_norm": 1.3844119310379028, | |
| "learning_rate": 0.0002228327228327228, | |
| "loss": 0.8814, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9951942880681038, | |
| "grad_norm": 2.497697114944458, | |
| "learning_rate": 0.0002227106227106227, | |
| "loss": 0.8116, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9962927365096801, | |
| "grad_norm": 0.36689239740371704, | |
| "learning_rate": 0.00022258852258852255, | |
| "loss": 0.5001, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.9973911849512563, | |
| "grad_norm": 0.39868447184562683, | |
| "learning_rate": 0.00022246642246642243, | |
| "loss": 0.6913, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9984896333928326, | |
| "grad_norm": 0.5270336270332336, | |
| "learning_rate": 0.00022234432234432235, | |
| "loss": 0.5401, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.999588081834409, | |
| "grad_norm": 0.4079851508140564, | |
| "learning_rate": 0.00022222222222222218, | |
| "loss": 0.471, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.000686530275985, | |
| "grad_norm": 0.43189048767089844, | |
| "learning_rate": 0.0002221001221001221, | |
| "loss": 0.8237, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.0017849787175614, | |
| "grad_norm": 0.52342289686203, | |
| "learning_rate": 0.00022197802197802198, | |
| "loss": 0.6363, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.0028834271591378, | |
| "grad_norm": 0.38078904151916504, | |
| "learning_rate": 0.00022185592185592184, | |
| "loss": 0.4411, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.003981875600714, | |
| "grad_norm": 0.5302817821502686, | |
| "learning_rate": 0.00022173382173382172, | |
| "loss": 0.858, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.0050803240422903, | |
| "grad_norm": 0.3696751892566681, | |
| "learning_rate": 0.0002216117216117216, | |
| "loss": 0.8766, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.0061787724838664, | |
| "grad_norm": 0.7566766738891602, | |
| "learning_rate": 0.00022148962148962146, | |
| "loss": 1.067, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.0072772209254428, | |
| "grad_norm": 0.7399318218231201, | |
| "learning_rate": 0.00022136752136752135, | |
| "loss": 0.6683, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.0083756693670192, | |
| "grad_norm": 0.5435899496078491, | |
| "learning_rate": 0.00022124542124542124, | |
| "loss": 0.6045, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.0094741178085953, | |
| "grad_norm": 0.9680571556091309, | |
| "learning_rate": 0.0002211233211233211, | |
| "loss": 0.7546, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.0105725662501717, | |
| "grad_norm": 0.6131067872047424, | |
| "learning_rate": 0.00022100122100122098, | |
| "loss": 0.6655, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.0116710146917478, | |
| "grad_norm": 0.8093316555023193, | |
| "learning_rate": 0.00022087912087912086, | |
| "loss": 0.4812, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.0127694631333242, | |
| "grad_norm": 0.5077763199806213, | |
| "learning_rate": 0.00022075702075702072, | |
| "loss": 0.5357, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.0138679115749005, | |
| "grad_norm": 0.4767695963382721, | |
| "learning_rate": 0.0002206349206349206, | |
| "loss": 0.5807, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.0149663600164767, | |
| "grad_norm": 0.3215581178665161, | |
| "learning_rate": 0.00022051282051282052, | |
| "loss": 0.5773, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.016064808458053, | |
| "grad_norm": 0.425603985786438, | |
| "learning_rate": 0.00022039072039072035, | |
| "loss": 0.5441, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.0171632568996292, | |
| "grad_norm": 0.6131730079650879, | |
| "learning_rate": 0.00022026862026862027, | |
| "loss": 0.856, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.0182617053412055, | |
| "grad_norm": 0.5472941398620605, | |
| "learning_rate": 0.00022014652014652012, | |
| "loss": 0.8228, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.0193601537827819, | |
| "grad_norm": 0.46728211641311646, | |
| "learning_rate": 0.00022002442002442, | |
| "loss": 0.7615, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.020458602224358, | |
| "grad_norm": 0.39919501543045044, | |
| "learning_rate": 0.0002199023199023199, | |
| "loss": 0.709, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.0215570506659344, | |
| "grad_norm": 0.564400315284729, | |
| "learning_rate": 0.00021978021978021975, | |
| "loss": 0.5941, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.0226554991075107, | |
| "grad_norm": 0.39073804020881653, | |
| "learning_rate": 0.00021965811965811964, | |
| "loss": 0.6386, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.0237539475490869, | |
| "grad_norm": 0.3725563585758209, | |
| "learning_rate": 0.00021953601953601952, | |
| "loss": 0.4766, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.0248523959906632, | |
| "grad_norm": 1.319197654724121, | |
| "learning_rate": 0.00021941391941391938, | |
| "loss": 0.8465, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.0259508444322394, | |
| "grad_norm": 0.5126785635948181, | |
| "learning_rate": 0.00021929181929181927, | |
| "loss": 0.5103, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.0270492928738157, | |
| "grad_norm": 0.5401897430419922, | |
| "learning_rate": 0.00021916971916971915, | |
| "loss": 0.5879, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.028147741315392, | |
| "grad_norm": 0.47014057636260986, | |
| "learning_rate": 0.000219047619047619, | |
| "loss": 0.658, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.0292461897569682, | |
| "grad_norm": 0.49227291345596313, | |
| "learning_rate": 0.0002189255189255189, | |
| "loss": 0.5271, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.0303446381985446, | |
| "grad_norm": 0.8186778426170349, | |
| "learning_rate": 0.00021880341880341878, | |
| "loss": 0.6491, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.0314430866401207, | |
| "grad_norm": 0.46345674991607666, | |
| "learning_rate": 0.00021868131868131864, | |
| "loss": 0.7935, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.032541535081697, | |
| "grad_norm": 1.7300915718078613, | |
| "learning_rate": 0.00021855921855921855, | |
| "loss": 0.516, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.0336399835232735, | |
| "grad_norm": 0.5100822448730469, | |
| "learning_rate": 0.00021843711843711844, | |
| "loss": 0.8286, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.0347384319648496, | |
| "grad_norm": 0.42278483510017395, | |
| "learning_rate": 0.0002183150183150183, | |
| "loss": 0.7312, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.035836880406426, | |
| "grad_norm": 0.42105185985565186, | |
| "learning_rate": 0.00021819291819291818, | |
| "loss": 0.5729, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.036935328848002, | |
| "grad_norm": 0.5117312669754028, | |
| "learning_rate": 0.00021807081807081807, | |
| "loss": 0.7688, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.0380337772895785, | |
| "grad_norm": 0.4982740879058838, | |
| "learning_rate": 0.00021794871794871793, | |
| "loss": 0.5746, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.0391322257311548, | |
| "grad_norm": 0.5181052684783936, | |
| "learning_rate": 0.0002178266178266178, | |
| "loss": 0.8446, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.040230674172731, | |
| "grad_norm": 5.104315757751465, | |
| "learning_rate": 0.0002177045177045177, | |
| "loss": 0.9641, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.0413291226143073, | |
| "grad_norm": 0.7384645938873291, | |
| "learning_rate": 0.00021758241758241756, | |
| "loss": 0.7168, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.0424275710558835, | |
| "grad_norm": 0.4367550313472748, | |
| "learning_rate": 0.00021746031746031744, | |
| "loss": 0.7139, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.0435260194974598, | |
| "grad_norm": 0.7332566380500793, | |
| "learning_rate": 0.00021733821733821733, | |
| "loss": 0.7082, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0446244679390362, | |
| "grad_norm": 0.4191775918006897, | |
| "learning_rate": 0.00021721611721611719, | |
| "loss": 0.7986, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.0457229163806123, | |
| "grad_norm": 0.33929941058158875, | |
| "learning_rate": 0.00021709401709401707, | |
| "loss": 0.3784, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.0468213648221887, | |
| "grad_norm": 0.5255181789398193, | |
| "learning_rate": 0.00021697191697191693, | |
| "loss": 0.5842, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.047919813263765, | |
| "grad_norm": 0.5401780605316162, | |
| "learning_rate": 0.00021684981684981681, | |
| "loss": 0.7939, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.0490182617053412, | |
| "grad_norm": 0.34873855113983154, | |
| "learning_rate": 0.00021672771672771673, | |
| "loss": 0.7957, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.0501167101469175, | |
| "grad_norm": 0.33418160676956177, | |
| "learning_rate": 0.00021660561660561656, | |
| "loss": 0.6037, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.0512151585884937, | |
| "grad_norm": 0.3197249174118042, | |
| "learning_rate": 0.00021648351648351647, | |
| "loss": 0.5223, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.05231360703007, | |
| "grad_norm": 0.5962835550308228, | |
| "learning_rate": 0.00021636141636141636, | |
| "loss": 0.5213, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.0534120554716464, | |
| "grad_norm": 1.3891643285751343, | |
| "learning_rate": 0.00021623931623931622, | |
| "loss": 0.6781, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.0545105039132225, | |
| "grad_norm": 0.42117932438850403, | |
| "learning_rate": 0.0002161172161172161, | |
| "loss": 0.6363, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.055608952354799, | |
| "grad_norm": 0.4514491558074951, | |
| "learning_rate": 0.00021599511599511599, | |
| "loss": 0.6904, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.056707400796375, | |
| "grad_norm": 0.4863387644290924, | |
| "learning_rate": 0.00021587301587301584, | |
| "loss": 0.6595, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.0578058492379514, | |
| "grad_norm": 0.6178450584411621, | |
| "learning_rate": 0.00021575091575091573, | |
| "loss": 0.8412, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.0589042976795278, | |
| "grad_norm": 0.3728642761707306, | |
| "learning_rate": 0.00021562881562881562, | |
| "loss": 0.629, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.060002746121104, | |
| "grad_norm": 0.7554892301559448, | |
| "learning_rate": 0.00021550671550671547, | |
| "loss": 0.5804, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.0611011945626803, | |
| "grad_norm": 0.550298273563385, | |
| "learning_rate": 0.00021538461538461536, | |
| "loss": 0.476, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.0621996430042564, | |
| "grad_norm": 0.4082244336605072, | |
| "learning_rate": 0.00021526251526251524, | |
| "loss": 0.4001, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.0632980914458328, | |
| "grad_norm": 1.2327499389648438, | |
| "learning_rate": 0.0002151404151404151, | |
| "loss": 0.4583, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.0643965398874091, | |
| "grad_norm": 0.860550045967102, | |
| "learning_rate": 0.000215018315018315, | |
| "loss": 0.6415, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.0654949883289853, | |
| "grad_norm": 0.558860182762146, | |
| "learning_rate": 0.0002148962148962149, | |
| "loss": 0.6215, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.0665934367705616, | |
| "grad_norm": 0.7794890403747559, | |
| "learning_rate": 0.00021477411477411476, | |
| "loss": 0.5094, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.0676918852121378, | |
| "grad_norm": 0.48574942350387573, | |
| "learning_rate": 0.00021465201465201465, | |
| "loss": 0.7385, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.0687903336537141, | |
| "grad_norm": 0.4496791660785675, | |
| "learning_rate": 0.00021452991452991453, | |
| "loss": 0.5036, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.0698887820952905, | |
| "grad_norm": 0.5360952615737915, | |
| "learning_rate": 0.0002144078144078144, | |
| "loss": 0.6825, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.0709872305368666, | |
| "grad_norm": 0.5783904194831848, | |
| "learning_rate": 0.00021428571428571427, | |
| "loss": 0.6736, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.072085678978443, | |
| "grad_norm": 2.290815830230713, | |
| "learning_rate": 0.00021416361416361416, | |
| "loss": 0.696, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.0731841274200193, | |
| "grad_norm": 1.3432899713516235, | |
| "learning_rate": 0.00021404151404151402, | |
| "loss": 0.5296, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.0742825758615955, | |
| "grad_norm": 0.5308722257614136, | |
| "learning_rate": 0.0002139194139194139, | |
| "loss": 0.6642, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.0753810243031718, | |
| "grad_norm": 0.7245768904685974, | |
| "learning_rate": 0.00021379731379731376, | |
| "loss": 0.6811, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.076479472744748, | |
| "grad_norm": 0.3873349726200104, | |
| "learning_rate": 0.00021367521367521365, | |
| "loss": 0.8503, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0775779211863243, | |
| "grad_norm": 0.5792405605316162, | |
| "learning_rate": 0.00021355311355311353, | |
| "loss": 0.4543, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.0786763696279005, | |
| "grad_norm": 0.6543241143226624, | |
| "learning_rate": 0.0002134310134310134, | |
| "loss": 0.7778, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.0797748180694768, | |
| "grad_norm": 0.5572071075439453, | |
| "learning_rate": 0.00021330891330891328, | |
| "loss": 0.8446, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.0808732665110532, | |
| "grad_norm": 0.5798014402389526, | |
| "learning_rate": 0.0002131868131868132, | |
| "loss": 0.7461, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.0819717149526293, | |
| "grad_norm": 0.8282085657119751, | |
| "learning_rate": 0.00021306471306471302, | |
| "loss": 0.612, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.0830701633942057, | |
| "grad_norm": 0.5782580971717834, | |
| "learning_rate": 0.00021294261294261293, | |
| "loss": 0.5506, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.084168611835782, | |
| "grad_norm": 0.3826775848865509, | |
| "learning_rate": 0.00021282051282051282, | |
| "loss": 0.7859, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.0852670602773582, | |
| "grad_norm": 0.534752368927002, | |
| "learning_rate": 0.00021269841269841268, | |
| "loss": 0.8835, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.0863655087189346, | |
| "grad_norm": 0.45931264758110046, | |
| "learning_rate": 0.00021257631257631256, | |
| "loss": 0.6694, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.0874639571605107, | |
| "grad_norm": 0.6106250286102295, | |
| "learning_rate": 0.00021245421245421245, | |
| "loss": 0.8274, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.088562405602087, | |
| "grad_norm": 0.3704061806201935, | |
| "learning_rate": 0.0002123321123321123, | |
| "loss": 0.7449, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.0896608540436634, | |
| "grad_norm": 0.3922840356826782, | |
| "learning_rate": 0.0002122100122100122, | |
| "loss": 0.5845, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.0907593024852396, | |
| "grad_norm": 0.48152726888656616, | |
| "learning_rate": 0.00021208791208791208, | |
| "loss": 0.6608, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.091857750926816, | |
| "grad_norm": 0.42257216572761536, | |
| "learning_rate": 0.00021196581196581194, | |
| "loss": 0.6379, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.092956199368392, | |
| "grad_norm": 0.4746345579624176, | |
| "learning_rate": 0.00021184371184371182, | |
| "loss": 0.6467, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.0940546478099684, | |
| "grad_norm": 0.3915644884109497, | |
| "learning_rate": 0.0002117216117216117, | |
| "loss": 0.9699, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.0951530962515448, | |
| "grad_norm": 0.5957880020141602, | |
| "learning_rate": 0.00021159951159951157, | |
| "loss": 0.6917, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.096251544693121, | |
| "grad_norm": 0.4327985942363739, | |
| "learning_rate": 0.00021147741147741145, | |
| "loss": 0.8091, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.0973499931346973, | |
| "grad_norm": 0.42600274085998535, | |
| "learning_rate": 0.00021135531135531136, | |
| "loss": 0.7685, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.0984484415762734, | |
| "grad_norm": 0.7165039777755737, | |
| "learning_rate": 0.0002112332112332112, | |
| "loss": 0.8646, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0995468900178498, | |
| "grad_norm": 0.447652131319046, | |
| "learning_rate": 0.0002111111111111111, | |
| "loss": 0.521, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.1006453384594261, | |
| "grad_norm": 0.3022591769695282, | |
| "learning_rate": 0.000210989010989011, | |
| "loss": 0.6099, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.1017437869010023, | |
| "grad_norm": 0.32764387130737305, | |
| "learning_rate": 0.00021086691086691085, | |
| "loss": 0.5624, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.1028422353425786, | |
| "grad_norm": 0.7301959991455078, | |
| "learning_rate": 0.00021074481074481074, | |
| "loss": 0.6091, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.1039406837841548, | |
| "grad_norm": 0.4734131097793579, | |
| "learning_rate": 0.0002106227106227106, | |
| "loss": 0.6849, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.1050391322257311, | |
| "grad_norm": 0.7214820384979248, | |
| "learning_rate": 0.00021050061050061048, | |
| "loss": 0.789, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.1061375806673075, | |
| "grad_norm": 0.31265702843666077, | |
| "learning_rate": 0.00021037851037851037, | |
| "loss": 0.5176, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.1072360291088836, | |
| "grad_norm": 0.5804157257080078, | |
| "learning_rate": 0.00021025641025641022, | |
| "loss": 1.0152, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.10833447755046, | |
| "grad_norm": 0.3624595105648041, | |
| "learning_rate": 0.0002101343101343101, | |
| "loss": 0.6843, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.1094329259920364, | |
| "grad_norm": 0.5099515318870544, | |
| "learning_rate": 0.00021001221001221, | |
| "loss": 0.5568, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.1105313744336125, | |
| "grad_norm": 0.46201249957084656, | |
| "learning_rate": 0.00020989010989010985, | |
| "loss": 0.5883, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.1116298228751889, | |
| "grad_norm": 0.4493483603000641, | |
| "learning_rate": 0.00020976800976800974, | |
| "loss": 0.8338, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.112728271316765, | |
| "grad_norm": 0.4771614968776703, | |
| "learning_rate": 0.00020964590964590963, | |
| "loss": 0.7251, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.1138267197583414, | |
| "grad_norm": 2.073347806930542, | |
| "learning_rate": 0.00020952380952380948, | |
| "loss": 0.8921, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.1149251681999177, | |
| "grad_norm": 0.435680091381073, | |
| "learning_rate": 0.0002094017094017094, | |
| "loss": 0.5444, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.1160236166414939, | |
| "grad_norm": 0.46824783086776733, | |
| "learning_rate": 0.00020927960927960928, | |
| "loss": 0.5591, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.1171220650830702, | |
| "grad_norm": 0.43938374519348145, | |
| "learning_rate": 0.00020915750915750914, | |
| "loss": 0.7476, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.1182205135246464, | |
| "grad_norm": 0.3620377779006958, | |
| "learning_rate": 0.00020903540903540903, | |
| "loss": 0.5763, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.1193189619662227, | |
| "grad_norm": 0.612406313419342, | |
| "learning_rate": 0.0002089133089133089, | |
| "loss": 0.706, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.120417410407799, | |
| "grad_norm": 0.5045173168182373, | |
| "learning_rate": 0.00020879120879120877, | |
| "loss": 0.6799, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.1215158588493752, | |
| "grad_norm": 0.4815331995487213, | |
| "learning_rate": 0.00020866910866910865, | |
| "loss": 0.8845, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.1226143072909516, | |
| "grad_norm": 0.3756159245967865, | |
| "learning_rate": 0.00020854700854700854, | |
| "loss": 0.5545, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.1237127557325277, | |
| "grad_norm": 0.3184347152709961, | |
| "learning_rate": 0.0002084249084249084, | |
| "loss": 0.5109, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.124811204174104, | |
| "grad_norm": 0.4000808298587799, | |
| "learning_rate": 0.00020830280830280828, | |
| "loss": 0.8363, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.1259096526156804, | |
| "grad_norm": 0.3930743336677551, | |
| "learning_rate": 0.00020818070818070817, | |
| "loss": 0.6183, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.1270081010572566, | |
| "grad_norm": 0.7536817789077759, | |
| "learning_rate": 0.00020805860805860803, | |
| "loss": 0.7511, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.128106549498833, | |
| "grad_norm": 0.5012079477310181, | |
| "learning_rate": 0.00020793650793650791, | |
| "loss": 0.6346, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.129204997940409, | |
| "grad_norm": 0.9914690852165222, | |
| "learning_rate": 0.00020781440781440783, | |
| "loss": 0.5827, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.1303034463819854, | |
| "grad_norm": 0.9096476435661316, | |
| "learning_rate": 0.00020769230769230766, | |
| "loss": 1.0235, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.1314018948235618, | |
| "grad_norm": 0.6668229699134827, | |
| "learning_rate": 0.00020757020757020757, | |
| "loss": 0.741, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.132500343265138, | |
| "grad_norm": 0.3232771158218384, | |
| "learning_rate": 0.0002074481074481074, | |
| "loss": 0.6206, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.1335987917067143, | |
| "grad_norm": 0.278003990650177, | |
| "learning_rate": 0.00020732600732600731, | |
| "loss": 0.5661, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.1346972401482907, | |
| "grad_norm": 1.481213927268982, | |
| "learning_rate": 0.0002072039072039072, | |
| "loss": 0.6422, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.1357956885898668, | |
| "grad_norm": 0.4688512682914734, | |
| "learning_rate": 0.00020708180708180706, | |
| "loss": 0.4163, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.1368941370314432, | |
| "grad_norm": 0.6438425779342651, | |
| "learning_rate": 0.00020695970695970694, | |
| "loss": 0.6241, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.1379925854730193, | |
| "grad_norm": 0.5013176798820496, | |
| "learning_rate": 0.00020683760683760683, | |
| "loss": 0.6273, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.1390910339145957, | |
| "grad_norm": 0.5178597569465637, | |
| "learning_rate": 0.0002067155067155067, | |
| "loss": 0.7489, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.1401894823561718, | |
| "grad_norm": 0.5804840922355652, | |
| "learning_rate": 0.00020659340659340657, | |
| "loss": 0.9142, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.1412879307977482, | |
| "grad_norm": 0.47613444924354553, | |
| "learning_rate": 0.00020647130647130646, | |
| "loss": 0.9531, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.1423863792393245, | |
| "grad_norm": 0.4835624694824219, | |
| "learning_rate": 0.00020634920634920632, | |
| "loss": 0.6349, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.1434848276809007, | |
| "grad_norm": 0.38351112604141235, | |
| "learning_rate": 0.0002062271062271062, | |
| "loss": 0.4726, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.144583276122477, | |
| "grad_norm": 0.5533854365348816, | |
| "learning_rate": 0.0002061050061050061, | |
| "loss": 0.5108, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.1456817245640534, | |
| "grad_norm": 0.4842824637889862, | |
| "learning_rate": 0.00020598290598290595, | |
| "loss": 0.6038, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.1467801730056295, | |
| "grad_norm": 0.552798330783844, | |
| "learning_rate": 0.00020586080586080583, | |
| "loss": 0.8056, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.1478786214472059, | |
| "grad_norm": 0.40466025471687317, | |
| "learning_rate": 0.00020573870573870574, | |
| "loss": 0.6234, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.148977069888782, | |
| "grad_norm": 0.6988784074783325, | |
| "learning_rate": 0.0002056166056166056, | |
| "loss": 0.7721, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.1500755183303584, | |
| "grad_norm": 0.4852863550186157, | |
| "learning_rate": 0.0002054945054945055, | |
| "loss": 0.6074, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.1511739667719347, | |
| "grad_norm": 0.4548696279525757, | |
| "learning_rate": 0.00020537240537240537, | |
| "loss": 0.5592, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.1522724152135109, | |
| "grad_norm": 0.9355410933494568, | |
| "learning_rate": 0.00020525030525030523, | |
| "loss": 0.8618, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.1533708636550872, | |
| "grad_norm": 0.5641398429870605, | |
| "learning_rate": 0.00020512820512820512, | |
| "loss": 0.704, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.1544693120966634, | |
| "grad_norm": 0.48187771439552307, | |
| "learning_rate": 0.000205006105006105, | |
| "loss": 0.6008, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.1555677605382397, | |
| "grad_norm": 0.41609904170036316, | |
| "learning_rate": 0.00020488400488400486, | |
| "loss": 0.8812, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.156666208979816, | |
| "grad_norm": 0.919477105140686, | |
| "learning_rate": 0.00020476190476190475, | |
| "loss": 0.6597, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.1577646574213922, | |
| "grad_norm": 0.5008611083030701, | |
| "learning_rate": 0.0002046398046398046, | |
| "loss": 0.6501, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.1588631058629686, | |
| "grad_norm": 0.39832696318626404, | |
| "learning_rate": 0.0002045177045177045, | |
| "loss": 0.6232, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.159961554304545, | |
| "grad_norm": 0.5290446281433105, | |
| "learning_rate": 0.00020439560439560438, | |
| "loss": 0.6123, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.161060002746121, | |
| "grad_norm": 0.40837669372558594, | |
| "learning_rate": 0.00020427350427350423, | |
| "loss": 0.4989, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.1621584511876974, | |
| "grad_norm": 0.43407055735588074, | |
| "learning_rate": 0.00020415140415140412, | |
| "loss": 0.6961, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.1632568996292736, | |
| "grad_norm": 0.7601787447929382, | |
| "learning_rate": 0.00020402930402930403, | |
| "loss": 0.9308, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.16435534807085, | |
| "grad_norm": 0.452628493309021, | |
| "learning_rate": 0.00020390720390720386, | |
| "loss": 0.6478, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.165453796512426, | |
| "grad_norm": 0.4524000287055969, | |
| "learning_rate": 0.00020378510378510378, | |
| "loss": 0.4499, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.1665522449540024, | |
| "grad_norm": 0.5971822142601013, | |
| "learning_rate": 0.00020366300366300366, | |
| "loss": 0.6402, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.1676506933955788, | |
| "grad_norm": 0.36858659982681274, | |
| "learning_rate": 0.00020354090354090352, | |
| "loss": 0.6511, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.168749141837155, | |
| "grad_norm": 0.47295433282852173, | |
| "learning_rate": 0.0002034188034188034, | |
| "loss": 0.5977, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.1698475902787313, | |
| "grad_norm": 0.4402971565723419, | |
| "learning_rate": 0.0002032967032967033, | |
| "loss": 0.4824, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.1709460387203077, | |
| "grad_norm": 0.3752620816230774, | |
| "learning_rate": 0.00020317460317460315, | |
| "loss": 0.6519, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.1720444871618838, | |
| "grad_norm": 0.45207279920578003, | |
| "learning_rate": 0.00020305250305250303, | |
| "loss": 0.6869, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.1731429356034602, | |
| "grad_norm": 0.4255804121494293, | |
| "learning_rate": 0.00020293040293040292, | |
| "loss": 0.7289, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.1742413840450363, | |
| "grad_norm": 0.48725178837776184, | |
| "learning_rate": 0.00020280830280830278, | |
| "loss": 0.5472, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.1753398324866127, | |
| "grad_norm": 0.37094470858573914, | |
| "learning_rate": 0.00020268620268620266, | |
| "loss": 0.558, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.176438280928189, | |
| "grad_norm": 0.4191375970840454, | |
| "learning_rate": 0.00020256410256410255, | |
| "loss": 0.6422, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.1775367293697652, | |
| "grad_norm": 0.4091531038284302, | |
| "learning_rate": 0.0002024420024420024, | |
| "loss": 0.6705, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.1786351778113415, | |
| "grad_norm": 0.4876718521118164, | |
| "learning_rate": 0.0002023199023199023, | |
| "loss": 0.8265, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.1797336262529177, | |
| "grad_norm": 0.43008798360824585, | |
| "learning_rate": 0.0002021978021978022, | |
| "loss": 0.5159, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.180832074694494, | |
| "grad_norm": 0.47896140813827515, | |
| "learning_rate": 0.00020207570207570204, | |
| "loss": 0.5455, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.1819305231360704, | |
| "grad_norm": 0.5313389301300049, | |
| "learning_rate": 0.00020195360195360195, | |
| "loss": 0.7628, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.1830289715776465, | |
| "grad_norm": 0.46337512135505676, | |
| "learning_rate": 0.00020183150183150184, | |
| "loss": 0.6661, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.1841274200192229, | |
| "grad_norm": 0.4304458498954773, | |
| "learning_rate": 0.0002017094017094017, | |
| "loss": 0.7019, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.185225868460799, | |
| "grad_norm": 0.638445258140564, | |
| "learning_rate": 0.00020158730158730158, | |
| "loss": 0.6972, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.1863243169023754, | |
| "grad_norm": 1.8217968940734863, | |
| "learning_rate": 0.00020146520146520144, | |
| "loss": 0.5217, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1874227653439517, | |
| "grad_norm": 0.4996611773967743, | |
| "learning_rate": 0.00020134310134310132, | |
| "loss": 0.6767, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.1885212137855279, | |
| "grad_norm": 0.43705832958221436, | |
| "learning_rate": 0.0002012210012210012, | |
| "loss": 0.7364, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.1896196622271042, | |
| "grad_norm": 0.4148736596107483, | |
| "learning_rate": 0.00020109890109890107, | |
| "loss": 0.7544, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.1907181106686804, | |
| "grad_norm": 0.5772218108177185, | |
| "learning_rate": 0.00020097680097680095, | |
| "loss": 0.6349, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.1918165591102567, | |
| "grad_norm": 0.9127015471458435, | |
| "learning_rate": 0.00020085470085470084, | |
| "loss": 0.4772, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.192915007551833, | |
| "grad_norm": 0.46906840801239014, | |
| "learning_rate": 0.0002007326007326007, | |
| "loss": 0.6184, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.1940134559934092, | |
| "grad_norm": 0.38405168056488037, | |
| "learning_rate": 0.00020061050061050058, | |
| "loss": 0.5027, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.1951119044349856, | |
| "grad_norm": 0.6352836489677429, | |
| "learning_rate": 0.00020048840048840047, | |
| "loss": 0.6674, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.196210352876562, | |
| "grad_norm": 0.6750807762145996, | |
| "learning_rate": 0.00020036630036630033, | |
| "loss": 0.5707, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.197308801318138, | |
| "grad_norm": 0.5661985874176025, | |
| "learning_rate": 0.00020024420024420024, | |
| "loss": 0.8298, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.1984072497597145, | |
| "grad_norm": 0.6393309831619263, | |
| "learning_rate": 0.00020012210012210012, | |
| "loss": 0.7397, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.1995056982012906, | |
| "grad_norm": 0.5442856550216675, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 0.7176, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.200604146642867, | |
| "grad_norm": 1.0100654363632202, | |
| "learning_rate": 0.00019987789987789987, | |
| "loss": 0.8052, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.201702595084443, | |
| "grad_norm": 0.3916209936141968, | |
| "learning_rate": 0.00019975579975579975, | |
| "loss": 0.5951, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.2028010435260195, | |
| "grad_norm": 0.3890608847141266, | |
| "learning_rate": 0.0001996336996336996, | |
| "loss": 0.8129, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.2038994919675958, | |
| "grad_norm": 0.4267507493495941, | |
| "learning_rate": 0.0001995115995115995, | |
| "loss": 0.8741, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.204997940409172, | |
| "grad_norm": 0.49055561423301697, | |
| "learning_rate": 0.00019938949938949938, | |
| "loss": 0.901, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.2060963888507483, | |
| "grad_norm": 0.6662428379058838, | |
| "learning_rate": 0.00019926739926739924, | |
| "loss": 0.4971, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.2071948372923247, | |
| "grad_norm": 0.4469052255153656, | |
| "learning_rate": 0.00019914529914529913, | |
| "loss": 0.6593, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.2082932857339008, | |
| "grad_norm": 0.5514255166053772, | |
| "learning_rate": 0.000199023199023199, | |
| "loss": 0.8033, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.2093917341754772, | |
| "grad_norm": 0.4838184714317322, | |
| "learning_rate": 0.00019890109890109887, | |
| "loss": 0.5533, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.2104901826170533, | |
| "grad_norm": 0.6061891913414001, | |
| "learning_rate": 0.00019877899877899876, | |
| "loss": 0.5837, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.2115886310586297, | |
| "grad_norm": 0.3387523889541626, | |
| "learning_rate": 0.00019865689865689867, | |
| "loss": 0.455, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.212687079500206, | |
| "grad_norm": 0.5204731225967407, | |
| "learning_rate": 0.0001985347985347985, | |
| "loss": 0.6869, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.2137855279417822, | |
| "grad_norm": 0.5747571587562561, | |
| "learning_rate": 0.0001984126984126984, | |
| "loss": 0.7208, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.2148839763833585, | |
| "grad_norm": 0.5382461547851562, | |
| "learning_rate": 0.00019829059829059824, | |
| "loss": 0.6035, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.2159824248249347, | |
| "grad_norm": 0.44335421919822693, | |
| "learning_rate": 0.00019816849816849816, | |
| "loss": 0.8563, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.217080873266511, | |
| "grad_norm": 0.3059934675693512, | |
| "learning_rate": 0.00019804639804639804, | |
| "loss": 0.6422, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.2181793217080874, | |
| "grad_norm": 0.4306177794933319, | |
| "learning_rate": 0.0001979242979242979, | |
| "loss": 0.5347, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.2192777701496635, | |
| "grad_norm": 0.5196095705032349, | |
| "learning_rate": 0.00019780219780219779, | |
| "loss": 0.5996, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.22037621859124, | |
| "grad_norm": 0.4814283549785614, | |
| "learning_rate": 0.00019768009768009767, | |
| "loss": 0.6782, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.2214746670328163, | |
| "grad_norm": 0.2287791222333908, | |
| "learning_rate": 0.00019755799755799753, | |
| "loss": 0.5908, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.2225731154743924, | |
| "grad_norm": 0.43044313788414, | |
| "learning_rate": 0.00019743589743589742, | |
| "loss": 0.6554, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.2236715639159688, | |
| "grad_norm": 0.390874445438385, | |
| "learning_rate": 0.0001973137973137973, | |
| "loss": 0.5777, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.224770012357545, | |
| "grad_norm": 0.5380458235740662, | |
| "learning_rate": 0.00019719169719169716, | |
| "loss": 0.467, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.2258684607991213, | |
| "grad_norm": 0.6176440119743347, | |
| "learning_rate": 0.00019706959706959704, | |
| "loss": 0.5625, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.2269669092406974, | |
| "grad_norm": 0.4321332275867462, | |
| "learning_rate": 0.00019694749694749693, | |
| "loss": 0.7262, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.2280653576822738, | |
| "grad_norm": 0.5679623484611511, | |
| "learning_rate": 0.0001968253968253968, | |
| "loss": 0.8216, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.2291638061238501, | |
| "grad_norm": 0.4741218686103821, | |
| "learning_rate": 0.00019670329670329667, | |
| "loss": 0.7164, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.2302622545654263, | |
| "grad_norm": 0.6570267677307129, | |
| "learning_rate": 0.00019658119658119659, | |
| "loss": 0.7606, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.2313607030070026, | |
| "grad_norm": 0.4256306290626526, | |
| "learning_rate": 0.00019645909645909644, | |
| "loss": 0.5137, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.232459151448579, | |
| "grad_norm": 0.4444984793663025, | |
| "learning_rate": 0.00019633699633699633, | |
| "loss": 0.8863, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.2335575998901551, | |
| "grad_norm": 0.458133339881897, | |
| "learning_rate": 0.00019621489621489622, | |
| "loss": 0.6445, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.2346560483317315, | |
| "grad_norm": 0.6087627410888672, | |
| "learning_rate": 0.00019609279609279607, | |
| "loss": 0.5625, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.2357544967733076, | |
| "grad_norm": 0.42782312631607056, | |
| "learning_rate": 0.00019597069597069596, | |
| "loss": 0.6321, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.236852945214884, | |
| "grad_norm": 0.49623987078666687, | |
| "learning_rate": 0.00019584859584859585, | |
| "loss": 0.6473, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.2379513936564603, | |
| "grad_norm": 0.5348198413848877, | |
| "learning_rate": 0.0001957264957264957, | |
| "loss": 0.6948, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.2390498420980365, | |
| "grad_norm": 0.44476062059402466, | |
| "learning_rate": 0.0001956043956043956, | |
| "loss": 0.5917, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.2401482905396128, | |
| "grad_norm": 0.5777286291122437, | |
| "learning_rate": 0.00019548229548229547, | |
| "loss": 0.7474, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.241246738981189, | |
| "grad_norm": 0.3132689893245697, | |
| "learning_rate": 0.00019536019536019533, | |
| "loss": 0.5827, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.2423451874227653, | |
| "grad_norm": 0.3898192346096039, | |
| "learning_rate": 0.00019523809523809522, | |
| "loss": 0.5469, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.2434436358643417, | |
| "grad_norm": 0.338693767786026, | |
| "learning_rate": 0.00019511599511599508, | |
| "loss": 0.704, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.2445420843059178, | |
| "grad_norm": 0.4276609718799591, | |
| "learning_rate": 0.00019499389499389496, | |
| "loss": 0.7269, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.2456405327474942, | |
| "grad_norm": 0.7320281863212585, | |
| "learning_rate": 0.00019487179487179487, | |
| "loss": 0.62, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.2467389811890706, | |
| "grad_norm": 0.4023820757865906, | |
| "learning_rate": 0.0001947496947496947, | |
| "loss": 0.4234, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.2478374296306467, | |
| "grad_norm": 0.3218212425708771, | |
| "learning_rate": 0.00019462759462759462, | |
| "loss": 0.5325, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.248935878072223, | |
| "grad_norm": 0.45131513476371765, | |
| "learning_rate": 0.0001945054945054945, | |
| "loss": 0.5667, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.2500343265137992, | |
| "grad_norm": 0.604475200176239, | |
| "learning_rate": 0.00019438339438339436, | |
| "loss": 0.9018, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.2511327749553756, | |
| "grad_norm": 0.46968311071395874, | |
| "learning_rate": 0.00019426129426129425, | |
| "loss": 0.7946, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.2522312233969517, | |
| "grad_norm": 0.3960346281528473, | |
| "learning_rate": 0.00019413919413919413, | |
| "loss": 0.7719, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.253329671838528, | |
| "grad_norm": 0.5146461129188538, | |
| "learning_rate": 0.000194017094017094, | |
| "loss": 0.8946, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.2544281202801044, | |
| "grad_norm": 0.6343802809715271, | |
| "learning_rate": 0.00019389499389499388, | |
| "loss": 0.7822, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.2555265687216806, | |
| "grad_norm": 0.4646434485912323, | |
| "learning_rate": 0.00019377289377289376, | |
| "loss": 0.6722, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.256625017163257, | |
| "grad_norm": 0.48127877712249756, | |
| "learning_rate": 0.00019365079365079362, | |
| "loss": 0.9059, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.2577234656048333, | |
| "grad_norm": 0.4040716290473938, | |
| "learning_rate": 0.0001935286935286935, | |
| "loss": 0.7288, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.2588219140464094, | |
| "grad_norm": 0.43992865085601807, | |
| "learning_rate": 0.0001934065934065934, | |
| "loss": 0.5804, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.2599203624879858, | |
| "grad_norm": 0.41578513383865356, | |
| "learning_rate": 0.00019328449328449325, | |
| "loss": 0.5459, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.261018810929562, | |
| "grad_norm": 0.40165719389915466, | |
| "learning_rate": 0.00019316239316239314, | |
| "loss": 0.6001, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.2621172593711383, | |
| "grad_norm": 0.43200212717056274, | |
| "learning_rate": 0.00019304029304029305, | |
| "loss": 0.8712, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.2632157078127144, | |
| "grad_norm": 0.3217264413833618, | |
| "learning_rate": 0.00019291819291819288, | |
| "loss": 0.6074, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.2643141562542908, | |
| "grad_norm": 0.3964528441429138, | |
| "learning_rate": 0.0001927960927960928, | |
| "loss": 0.6131, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.2654126046958671, | |
| "grad_norm": 0.5151070952415466, | |
| "learning_rate": 0.00019267399267399268, | |
| "loss": 0.6992, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.2665110531374433, | |
| "grad_norm": 0.5902129411697388, | |
| "learning_rate": 0.00019255189255189254, | |
| "loss": 0.7311, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.2676095015790196, | |
| "grad_norm": 0.5386108160018921, | |
| "learning_rate": 0.00019242979242979242, | |
| "loss": 0.6469, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.268707950020596, | |
| "grad_norm": 0.384093701839447, | |
| "learning_rate": 0.0001923076923076923, | |
| "loss": 0.7111, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.2698063984621721, | |
| "grad_norm": 0.34160250425338745, | |
| "learning_rate": 0.00019218559218559217, | |
| "loss": 0.5396, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.2709048469037485, | |
| "grad_norm": 0.6590912938117981, | |
| "learning_rate": 0.00019206349206349205, | |
| "loss": 1.1613, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.2720032953453249, | |
| "grad_norm": 0.6230842471122742, | |
| "learning_rate": 0.0001919413919413919, | |
| "loss": 0.7701, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.273101743786901, | |
| "grad_norm": 0.3881864547729492, | |
| "learning_rate": 0.0001918192918192918, | |
| "loss": 0.633, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.2742001922284774, | |
| "grad_norm": 0.4538264274597168, | |
| "learning_rate": 0.00019169719169719168, | |
| "loss": 0.451, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.2752986406700535, | |
| "grad_norm": 0.6188018321990967, | |
| "learning_rate": 0.00019157509157509154, | |
| "loss": 0.9563, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.2763970891116299, | |
| "grad_norm": 0.4172852039337158, | |
| "learning_rate": 0.00019145299145299142, | |
| "loss": 0.8284, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.277495537553206, | |
| "grad_norm": 0.338623583316803, | |
| "learning_rate": 0.0001913308913308913, | |
| "loss": 0.6745, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.2785939859947824, | |
| "grad_norm": 0.3960900902748108, | |
| "learning_rate": 0.00019120879120879117, | |
| "loss": 0.6508, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.2796924344363587, | |
| "grad_norm": 0.37232962250709534, | |
| "learning_rate": 0.00019108669108669108, | |
| "loss": 0.7347, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.2807908828779349, | |
| "grad_norm": 0.47092223167419434, | |
| "learning_rate": 0.00019096459096459097, | |
| "loss": 0.8251, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.2818893313195112, | |
| "grad_norm": 0.4647108316421509, | |
| "learning_rate": 0.00019084249084249082, | |
| "loss": 0.556, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.2829877797610876, | |
| "grad_norm": 0.5812810659408569, | |
| "learning_rate": 0.0001907203907203907, | |
| "loss": 0.6802, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.2840862282026637, | |
| "grad_norm": 0.3731052279472351, | |
| "learning_rate": 0.0001905982905982906, | |
| "loss": 0.6384, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.28518467664424, | |
| "grad_norm": 0.47995856404304504, | |
| "learning_rate": 0.00019047619047619045, | |
| "loss": 0.4914, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.2862831250858162, | |
| "grad_norm": 0.3223705589771271, | |
| "learning_rate": 0.00019035409035409034, | |
| "loss": 0.6676, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.2873815735273926, | |
| "grad_norm": 0.5643377304077148, | |
| "learning_rate": 0.00019023199023199023, | |
| "loss": 0.8224, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.2884800219689687, | |
| "grad_norm": 0.48324450850486755, | |
| "learning_rate": 0.00019010989010989008, | |
| "loss": 0.8005, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.289578470410545, | |
| "grad_norm": 0.40516728162765503, | |
| "learning_rate": 0.00018998778998778997, | |
| "loss": 0.5463, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.2906769188521214, | |
| "grad_norm": 0.45521625876426697, | |
| "learning_rate": 0.00018986568986568985, | |
| "loss": 0.7562, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.2917753672936976, | |
| "grad_norm": 0.38747909665107727, | |
| "learning_rate": 0.0001897435897435897, | |
| "loss": 0.5074, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.292873815735274, | |
| "grad_norm": 0.39688000082969666, | |
| "learning_rate": 0.0001896214896214896, | |
| "loss": 0.3551, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.2939722641768503, | |
| "grad_norm": 0.6891604065895081, | |
| "learning_rate": 0.0001894993894993895, | |
| "loss": 0.601, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.2950707126184264, | |
| "grad_norm": 0.5177300572395325, | |
| "learning_rate": 0.00018937728937728934, | |
| "loss": 0.5188, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.2961691610600028, | |
| "grad_norm": 0.3166979253292084, | |
| "learning_rate": 0.00018925518925518926, | |
| "loss": 0.8411, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.2972676095015792, | |
| "grad_norm": 0.6637437343597412, | |
| "learning_rate": 0.00018913308913308914, | |
| "loss": 0.7256, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.2983660579431553, | |
| "grad_norm": 0.424932599067688, | |
| "learning_rate": 0.000189010989010989, | |
| "loss": 0.783, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.2994645063847314, | |
| "grad_norm": 0.47751033306121826, | |
| "learning_rate": 0.00018888888888888888, | |
| "loss": 0.7039, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.3005629548263078, | |
| "grad_norm": 0.4332704544067383, | |
| "learning_rate": 0.00018876678876678874, | |
| "loss": 0.4797, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.3016614032678842, | |
| "grad_norm": 0.439431756734848, | |
| "learning_rate": 0.00018864468864468863, | |
| "loss": 0.6256, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.3027598517094603, | |
| "grad_norm": 0.4334176480770111, | |
| "learning_rate": 0.00018852258852258851, | |
| "loss": 0.5583, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.3038583001510367, | |
| "grad_norm": 0.42080724239349365, | |
| "learning_rate": 0.00018840048840048837, | |
| "loss": 0.461, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.304956748592613, | |
| "grad_norm": 0.41007399559020996, | |
| "learning_rate": 0.00018827838827838826, | |
| "loss": 0.4746, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.3060551970341892, | |
| "grad_norm": 0.3763822019100189, | |
| "learning_rate": 0.00018815628815628814, | |
| "loss": 0.5352, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.3071536454757655, | |
| "grad_norm": 0.5557730197906494, | |
| "learning_rate": 0.000188034188034188, | |
| "loss": 0.5404, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.3082520939173419, | |
| "grad_norm": 0.43677788972854614, | |
| "learning_rate": 0.0001879120879120879, | |
| "loss": 0.7111, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.309350542358918, | |
| "grad_norm": 0.6084219217300415, | |
| "learning_rate": 0.00018778998778998777, | |
| "loss": 0.7524, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.3104489908004944, | |
| "grad_norm": 0.7219144701957703, | |
| "learning_rate": 0.00018766788766788763, | |
| "loss": 0.6182, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.3115474392420705, | |
| "grad_norm": 0.5280331969261169, | |
| "learning_rate": 0.00018754578754578752, | |
| "loss": 0.8023, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.3126458876836469, | |
| "grad_norm": 0.42130032181739807, | |
| "learning_rate": 0.00018742368742368743, | |
| "loss": 0.5673, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.313744336125223, | |
| "grad_norm": 0.6063292026519775, | |
| "learning_rate": 0.0001873015873015873, | |
| "loss": 0.6438, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.3148427845667994, | |
| "grad_norm": 0.4073690176010132, | |
| "learning_rate": 0.00018717948717948717, | |
| "loss": 0.7099, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.3159412330083757, | |
| "grad_norm": 0.5419113636016846, | |
| "learning_rate": 0.00018705738705738706, | |
| "loss": 0.6451, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.3170396814499519, | |
| "grad_norm": 0.4489867091178894, | |
| "learning_rate": 0.00018693528693528692, | |
| "loss": 0.7522, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.3181381298915282, | |
| "grad_norm": 0.3536837697029114, | |
| "learning_rate": 0.0001868131868131868, | |
| "loss": 0.6201, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3192365783331046, | |
| "grad_norm": 0.42462313175201416, | |
| "learning_rate": 0.0001866910866910867, | |
| "loss": 0.4804, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.3203350267746807, | |
| "grad_norm": 0.612319827079773, | |
| "learning_rate": 0.00018656898656898655, | |
| "loss": 0.8546, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.321433475216257, | |
| "grad_norm": 0.5242000222206116, | |
| "learning_rate": 0.00018644688644688643, | |
| "loss": 0.7577, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.3225319236578332, | |
| "grad_norm": 0.5688628554344177, | |
| "learning_rate": 0.00018632478632478632, | |
| "loss": 0.6645, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.3236303720994096, | |
| "grad_norm": 0.3695731461048126, | |
| "learning_rate": 0.00018620268620268618, | |
| "loss": 0.4979, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.3247288205409857, | |
| "grad_norm": 0.44525593519210815, | |
| "learning_rate": 0.00018608058608058606, | |
| "loss": 0.807, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.325827268982562, | |
| "grad_norm": 0.37627971172332764, | |
| "learning_rate": 0.00018595848595848595, | |
| "loss": 0.6584, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.3269257174241385, | |
| "grad_norm": 0.39727315306663513, | |
| "learning_rate": 0.0001858363858363858, | |
| "loss": 0.5565, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.3280241658657146, | |
| "grad_norm": 0.4151424169540405, | |
| "learning_rate": 0.00018571428571428572, | |
| "loss": 0.81, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.329122614307291, | |
| "grad_norm": 0.37529075145721436, | |
| "learning_rate": 0.00018559218559218555, | |
| "loss": 0.6188, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.3302210627488673, | |
| "grad_norm": 0.43061408400535583, | |
| "learning_rate": 0.00018547008547008546, | |
| "loss": 0.814, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.3313195111904434, | |
| "grad_norm": 0.437511682510376, | |
| "learning_rate": 0.00018534798534798535, | |
| "loss": 0.55, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.3324179596320198, | |
| "grad_norm": 0.5172685980796814, | |
| "learning_rate": 0.0001852258852258852, | |
| "loss": 0.6551, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.3335164080735962, | |
| "grad_norm": 0.3292716443538666, | |
| "learning_rate": 0.0001851037851037851, | |
| "loss": 0.5108, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.3346148565151723, | |
| "grad_norm": 0.7129474878311157, | |
| "learning_rate": 0.00018498168498168498, | |
| "loss": 0.7197, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.3357133049567487, | |
| "grad_norm": 0.46317145228385925, | |
| "learning_rate": 0.00018485958485958483, | |
| "loss": 0.6553, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.3368117533983248, | |
| "grad_norm": 0.5539398789405823, | |
| "learning_rate": 0.00018473748473748472, | |
| "loss": 0.7057, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.3379102018399012, | |
| "grad_norm": 0.40555253624916077, | |
| "learning_rate": 0.0001846153846153846, | |
| "loss": 0.5976, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.3390086502814773, | |
| "grad_norm": 0.462704062461853, | |
| "learning_rate": 0.00018449328449328446, | |
| "loss": 0.7018, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.3401070987230537, | |
| "grad_norm": 0.407287061214447, | |
| "learning_rate": 0.00018437118437118435, | |
| "loss": 0.4726, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.34120554716463, | |
| "grad_norm": 0.3654995858669281, | |
| "learning_rate": 0.00018424908424908423, | |
| "loss": 0.5811, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.3423039956062062, | |
| "grad_norm": 0.46455878019332886, | |
| "learning_rate": 0.0001841269841269841, | |
| "loss": 0.8998, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.3434024440477825, | |
| "grad_norm": 0.47929346561431885, | |
| "learning_rate": 0.00018400488400488398, | |
| "loss": 0.7348, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.344500892489359, | |
| "grad_norm": 0.7128652930259705, | |
| "learning_rate": 0.0001838827838827839, | |
| "loss": 1.2647, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.345599340930935, | |
| "grad_norm": 0.3956572413444519, | |
| "learning_rate": 0.00018376068376068372, | |
| "loss": 0.6985, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.3466977893725114, | |
| "grad_norm": 0.5585309863090515, | |
| "learning_rate": 0.00018363858363858364, | |
| "loss": 1.0086, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.3477962378140875, | |
| "grad_norm": 1.5960838794708252, | |
| "learning_rate": 0.00018351648351648352, | |
| "loss": 0.644, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.3488946862556639, | |
| "grad_norm": 0.6499342322349548, | |
| "learning_rate": 0.00018339438339438338, | |
| "loss": 0.7698, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.34999313469724, | |
| "grad_norm": 0.42246925830841064, | |
| "learning_rate": 0.00018327228327228326, | |
| "loss": 0.5614, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.3510915831388164, | |
| "grad_norm": 0.42192572355270386, | |
| "learning_rate": 0.00018315018315018315, | |
| "loss": 0.7726, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.3521900315803927, | |
| "grad_norm": 0.6409221887588501, | |
| "learning_rate": 0.000183028083028083, | |
| "loss": 0.5928, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.3532884800219689, | |
| "grad_norm": 1.328852653503418, | |
| "learning_rate": 0.0001829059829059829, | |
| "loss": 0.7861, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.3543869284635452, | |
| "grad_norm": 0.4519331753253937, | |
| "learning_rate": 0.00018278388278388275, | |
| "loss": 0.5938, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.3554853769051216, | |
| "grad_norm": 0.3942720592021942, | |
| "learning_rate": 0.00018266178266178264, | |
| "loss": 0.4781, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.3565838253466977, | |
| "grad_norm": 0.5066869258880615, | |
| "learning_rate": 0.00018253968253968252, | |
| "loss": 0.8069, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.357682273788274, | |
| "grad_norm": 0.37002792954444885, | |
| "learning_rate": 0.00018241758241758238, | |
| "loss": 0.5737, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.3587807222298505, | |
| "grad_norm": 0.3738810122013092, | |
| "learning_rate": 0.00018229548229548227, | |
| "loss": 0.5169, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.3598791706714266, | |
| "grad_norm": 0.44956260919570923, | |
| "learning_rate": 0.00018217338217338215, | |
| "loss": 0.5614, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.3609776191130027, | |
| "grad_norm": 0.34839004278182983, | |
| "learning_rate": 0.000182051282051282, | |
| "loss": 0.5783, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.362076067554579, | |
| "grad_norm": 0.30152127146720886, | |
| "learning_rate": 0.00018192918192918192, | |
| "loss": 0.4321, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.3631745159961555, | |
| "grad_norm": 0.6672345399856567, | |
| "learning_rate": 0.0001818070818070818, | |
| "loss": 0.6073, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.3642729644377316, | |
| "grad_norm": 0.45652687549591064, | |
| "learning_rate": 0.00018168498168498167, | |
| "loss": 0.6193, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.365371412879308, | |
| "grad_norm": 0.6392306089401245, | |
| "learning_rate": 0.00018156288156288155, | |
| "loss": 0.8388, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.3664698613208843, | |
| "grad_norm": 0.5510252714157104, | |
| "learning_rate": 0.00018144078144078144, | |
| "loss": 0.6512, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.3675683097624605, | |
| "grad_norm": 0.38780227303504944, | |
| "learning_rate": 0.0001813186813186813, | |
| "loss": 0.6835, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.3686667582040368, | |
| "grad_norm": 0.47472965717315674, | |
| "learning_rate": 0.00018119658119658118, | |
| "loss": 0.6625, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.3697652066456132, | |
| "grad_norm": 0.3599228262901306, | |
| "learning_rate": 0.00018107448107448107, | |
| "loss": 0.5063, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.3708636550871893, | |
| "grad_norm": 0.3284567892551422, | |
| "learning_rate": 0.00018095238095238093, | |
| "loss": 0.7679, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.3719621035287657, | |
| "grad_norm": 0.5258575081825256, | |
| "learning_rate": 0.0001808302808302808, | |
| "loss": 0.6213, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.3730605519703418, | |
| "grad_norm": 0.3211069405078888, | |
| "learning_rate": 0.0001807081807081807, | |
| "loss": 0.5306, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.3741590004119182, | |
| "grad_norm": 0.6325588822364807, | |
| "learning_rate": 0.00018058608058608056, | |
| "loss": 0.8104, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.3752574488534943, | |
| "grad_norm": 0.4994303584098816, | |
| "learning_rate": 0.00018046398046398044, | |
| "loss": 0.6464, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.3763558972950707, | |
| "grad_norm": 0.3013019263744354, | |
| "learning_rate": 0.00018034188034188035, | |
| "loss": 0.4749, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.377454345736647, | |
| "grad_norm": 1.0342131853103638, | |
| "learning_rate": 0.00018021978021978018, | |
| "loss": 0.7995, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.3785527941782232, | |
| "grad_norm": 0.40213823318481445, | |
| "learning_rate": 0.0001800976800976801, | |
| "loss": 0.8791, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.3796512426197995, | |
| "grad_norm": 0.37126532196998596, | |
| "learning_rate": 0.00017997557997557998, | |
| "loss": 0.551, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.380749691061376, | |
| "grad_norm": 0.3417685031890869, | |
| "learning_rate": 0.00017985347985347984, | |
| "loss": 0.583, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.381848139502952, | |
| "grad_norm": 0.33571329712867737, | |
| "learning_rate": 0.00017973137973137973, | |
| "loss": 0.4927, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.3829465879445284, | |
| "grad_norm": 0.5128073692321777, | |
| "learning_rate": 0.00017960927960927959, | |
| "loss": 0.5903, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.3840450363861048, | |
| "grad_norm": 0.5345245599746704, | |
| "learning_rate": 0.00017948717948717947, | |
| "loss": 0.5828, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.385143484827681, | |
| "grad_norm": 0.312639981508255, | |
| "learning_rate": 0.00017936507936507936, | |
| "loss": 0.6905, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.386241933269257, | |
| "grad_norm": 0.4795394837856293, | |
| "learning_rate": 0.00017924297924297921, | |
| "loss": 0.6193, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.3873403817108334, | |
| "grad_norm": 0.39672231674194336, | |
| "learning_rate": 0.0001791208791208791, | |
| "loss": 0.7833, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.3884388301524098, | |
| "grad_norm": 0.46752655506134033, | |
| "learning_rate": 0.00017899877899877899, | |
| "loss": 0.6385, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.389537278593986, | |
| "grad_norm": 0.5376736521720886, | |
| "learning_rate": 0.00017887667887667884, | |
| "loss": 0.6362, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.3906357270355623, | |
| "grad_norm": 0.5675904750823975, | |
| "learning_rate": 0.00017875457875457873, | |
| "loss": 0.7975, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.3917341754771386, | |
| "grad_norm": 0.5429015755653381, | |
| "learning_rate": 0.00017863247863247861, | |
| "loss": 0.5415, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.3928326239187148, | |
| "grad_norm": 0.3714626729488373, | |
| "learning_rate": 0.00017851037851037847, | |
| "loss": 0.7104, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.3939310723602911, | |
| "grad_norm": 0.7549324035644531, | |
| "learning_rate": 0.00017838827838827836, | |
| "loss": 0.698, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.3950295208018675, | |
| "grad_norm": 0.36867257952690125, | |
| "learning_rate": 0.00017826617826617827, | |
| "loss": 0.6019, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3961279692434436, | |
| "grad_norm": 0.42439624667167664, | |
| "learning_rate": 0.00017814407814407813, | |
| "loss": 0.4626, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.39722641768502, | |
| "grad_norm": 0.4768877923488617, | |
| "learning_rate": 0.00017802197802197802, | |
| "loss": 0.671, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.3983248661265961, | |
| "grad_norm": 0.3415908217430115, | |
| "learning_rate": 0.0001778998778998779, | |
| "loss": 0.5904, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.3994233145681725, | |
| "grad_norm": 0.5370535850524902, | |
| "learning_rate": 0.00017777777777777776, | |
| "loss": 0.578, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.4005217630097486, | |
| "grad_norm": 0.61114901304245, | |
| "learning_rate": 0.00017765567765567764, | |
| "loss": 0.6498, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.401620211451325, | |
| "grad_norm": 0.3491772711277008, | |
| "learning_rate": 0.00017753357753357753, | |
| "loss": 0.6057, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.4027186598929013, | |
| "grad_norm": 0.4992705285549164, | |
| "learning_rate": 0.0001774114774114774, | |
| "loss": 0.8541, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.4038171083344775, | |
| "grad_norm": 0.5476379990577698, | |
| "learning_rate": 0.00017728937728937727, | |
| "loss": 0.5608, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.4049155567760538, | |
| "grad_norm": 0.6107895374298096, | |
| "learning_rate": 0.00017716727716727716, | |
| "loss": 0.7437, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.4060140052176302, | |
| "grad_norm": 0.510809600353241, | |
| "learning_rate": 0.00017704517704517702, | |
| "loss": 0.6569, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.4071124536592063, | |
| "grad_norm": 0.5050077438354492, | |
| "learning_rate": 0.0001769230769230769, | |
| "loss": 0.6566, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.4082109021007827, | |
| "grad_norm": 0.44812703132629395, | |
| "learning_rate": 0.0001768009768009768, | |
| "loss": 0.6557, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.4093093505423588, | |
| "grad_norm": 0.5216537714004517, | |
| "learning_rate": 0.00017667887667887665, | |
| "loss": 0.7311, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.4104077989839352, | |
| "grad_norm": 0.5608856081962585, | |
| "learning_rate": 0.00017655677655677656, | |
| "loss": 0.9001, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.4115062474255113, | |
| "grad_norm": 0.47205066680908203, | |
| "learning_rate": 0.0001764346764346764, | |
| "loss": 0.5214, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.4126046958670877, | |
| "grad_norm": 0.4073629081249237, | |
| "learning_rate": 0.0001763125763125763, | |
| "loss": 0.483, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.413703144308664, | |
| "grad_norm": 0.42381593585014343, | |
| "learning_rate": 0.0001761904761904762, | |
| "loss": 0.4895, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.4148015927502402, | |
| "grad_norm": 0.629356861114502, | |
| "learning_rate": 0.00017606837606837605, | |
| "loss": 0.4639, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.4159000411918166, | |
| "grad_norm": 0.3123486340045929, | |
| "learning_rate": 0.00017594627594627593, | |
| "loss": 0.4575, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.416998489633393, | |
| "grad_norm": 0.4163682460784912, | |
| "learning_rate": 0.00017582417582417582, | |
| "loss": 0.7511, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.418096938074969, | |
| "grad_norm": 0.5697455406188965, | |
| "learning_rate": 0.00017570207570207568, | |
| "loss": 0.5977, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.4191953865165454, | |
| "grad_norm": 0.39232510328292847, | |
| "learning_rate": 0.00017557997557997556, | |
| "loss": 0.6133, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.4202938349581218, | |
| "grad_norm": 0.5452993512153625, | |
| "learning_rate": 0.00017545787545787545, | |
| "loss": 0.6596, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.421392283399698, | |
| "grad_norm": 0.39080601930618286, | |
| "learning_rate": 0.0001753357753357753, | |
| "loss": 0.7422, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.4224907318412743, | |
| "grad_norm": 0.6513398289680481, | |
| "learning_rate": 0.0001752136752136752, | |
| "loss": 0.5277, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.4235891802828504, | |
| "grad_norm": 0.4627130329608917, | |
| "learning_rate": 0.00017509157509157508, | |
| "loss": 0.6296, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.4246876287244268, | |
| "grad_norm": 0.499700129032135, | |
| "learning_rate": 0.00017496947496947494, | |
| "loss": 0.689, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.425786077166003, | |
| "grad_norm": 0.4668709635734558, | |
| "learning_rate": 0.00017484737484737482, | |
| "loss": 0.784, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.4268845256075793, | |
| "grad_norm": 0.6378145217895508, | |
| "learning_rate": 0.00017472527472527473, | |
| "loss": 0.5077, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.4279829740491556, | |
| "grad_norm": 0.6320174336433411, | |
| "learning_rate": 0.00017460317460317457, | |
| "loss": 1.061, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4290814224907318, | |
| "grad_norm": 0.48719078302383423, | |
| "learning_rate": 0.00017448107448107448, | |
| "loss": 0.7181, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.4301798709323081, | |
| "grad_norm": 0.5345287919044495, | |
| "learning_rate": 0.00017435897435897436, | |
| "loss": 0.5599, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.4312783193738845, | |
| "grad_norm": 0.567857563495636, | |
| "learning_rate": 0.00017423687423687422, | |
| "loss": 0.6294, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.4323767678154606, | |
| "grad_norm": 0.5715040564537048, | |
| "learning_rate": 0.0001741147741147741, | |
| "loss": 0.5326, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.433475216257037, | |
| "grad_norm": 0.40048834681510925, | |
| "learning_rate": 0.000173992673992674, | |
| "loss": 0.687, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.4345736646986131, | |
| "grad_norm": 0.4964540898799896, | |
| "learning_rate": 0.00017387057387057385, | |
| "loss": 0.6149, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.4356721131401895, | |
| "grad_norm": 0.5018569231033325, | |
| "learning_rate": 0.00017374847374847374, | |
| "loss": 0.4224, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.4367705615817656, | |
| "grad_norm": 0.6026094555854797, | |
| "learning_rate": 0.00017362637362637362, | |
| "loss": 0.8934, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.437869010023342, | |
| "grad_norm": 0.33409950137138367, | |
| "learning_rate": 0.00017350427350427348, | |
| "loss": 0.6725, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.4389674584649184, | |
| "grad_norm": 0.43982234597206116, | |
| "learning_rate": 0.00017338217338217337, | |
| "loss": 0.9203, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.4400659069064945, | |
| "grad_norm": 0.843877911567688, | |
| "learning_rate": 0.00017326007326007322, | |
| "loss": 0.6028, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.4411643553480709, | |
| "grad_norm": 0.35148733854293823, | |
| "learning_rate": 0.0001731379731379731, | |
| "loss": 0.7503, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.4422628037896472, | |
| "grad_norm": 0.4561845362186432, | |
| "learning_rate": 0.000173015873015873, | |
| "loss": 0.6577, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.4433612522312234, | |
| "grad_norm": 0.47295713424682617, | |
| "learning_rate": 0.00017289377289377285, | |
| "loss": 0.8013, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.4444597006727997, | |
| "grad_norm": 0.46340033411979675, | |
| "learning_rate": 0.00017277167277167277, | |
| "loss": 0.73, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.445558149114376, | |
| "grad_norm": 0.49221453070640564, | |
| "learning_rate": 0.00017264957264957265, | |
| "loss": 0.6735, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.4466565975559522, | |
| "grad_norm": 0.36250925064086914, | |
| "learning_rate": 0.0001725274725274725, | |
| "loss": 0.7463, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.4477550459975284, | |
| "grad_norm": 0.3832615911960602, | |
| "learning_rate": 0.0001724053724053724, | |
| "loss": 0.7295, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.4488534944391047, | |
| "grad_norm": 0.7413591742515564, | |
| "learning_rate": 0.00017228327228327228, | |
| "loss": 0.7627, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.449951942880681, | |
| "grad_norm": 0.45626765489578247, | |
| "learning_rate": 0.00017216117216117214, | |
| "loss": 0.727, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.4510503913222572, | |
| "grad_norm": 0.3024120330810547, | |
| "learning_rate": 0.00017203907203907202, | |
| "loss": 0.3986, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.4521488397638336, | |
| "grad_norm": 0.31635284423828125, | |
| "learning_rate": 0.0001719169719169719, | |
| "loss": 0.3469, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.45324728820541, | |
| "grad_norm": 0.36893391609191895, | |
| "learning_rate": 0.00017179487179487177, | |
| "loss": 0.7017, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.454345736646986, | |
| "grad_norm": 0.4804024398326874, | |
| "learning_rate": 0.00017167277167277165, | |
| "loss": 0.8811, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.4554441850885624, | |
| "grad_norm": 0.4446522295475006, | |
| "learning_rate": 0.00017155067155067154, | |
| "loss": 0.8027, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.4565426335301388, | |
| "grad_norm": 0.27936413884162903, | |
| "learning_rate": 0.0001714285714285714, | |
| "loss": 0.3846, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.457641081971715, | |
| "grad_norm": 0.3312259316444397, | |
| "learning_rate": 0.00017130647130647128, | |
| "loss": 0.4852, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.4587395304132913, | |
| "grad_norm": 0.4751642644405365, | |
| "learning_rate": 0.0001711843711843712, | |
| "loss": 0.7337, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.4598379788548674, | |
| "grad_norm": 0.5365067720413208, | |
| "learning_rate": 0.00017106227106227103, | |
| "loss": 0.8052, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.4609364272964438, | |
| "grad_norm": 0.5944942831993103, | |
| "learning_rate": 0.00017094017094017094, | |
| "loss": 0.7673, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.46203487573802, | |
| "grad_norm": 0.48244431614875793, | |
| "learning_rate": 0.00017081807081807083, | |
| "loss": 0.855, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.4631333241795963, | |
| "grad_norm": 0.32348135113716125, | |
| "learning_rate": 0.00017069597069597068, | |
| "loss": 0.5133, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.4642317726211727, | |
| "grad_norm": 0.6455866694450378, | |
| "learning_rate": 0.00017057387057387057, | |
| "loss": 0.6825, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.4653302210627488, | |
| "grad_norm": 0.3937522768974304, | |
| "learning_rate": 0.00017045177045177045, | |
| "loss": 0.6335, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.4664286695043252, | |
| "grad_norm": 0.33579352498054504, | |
| "learning_rate": 0.0001703296703296703, | |
| "loss": 0.4711, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.4675271179459015, | |
| "grad_norm": 0.5055533647537231, | |
| "learning_rate": 0.0001702075702075702, | |
| "loss": 0.6512, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.4686255663874777, | |
| "grad_norm": 0.40702182054519653, | |
| "learning_rate": 0.00017008547008547006, | |
| "loss": 0.8833, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.469724014829054, | |
| "grad_norm": 0.3574135899543762, | |
| "learning_rate": 0.00016996336996336994, | |
| "loss": 0.7127, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.4708224632706302, | |
| "grad_norm": 0.45641472935676575, | |
| "learning_rate": 0.00016984126984126983, | |
| "loss": 0.7258, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.4719209117122065, | |
| "grad_norm": 1.5012352466583252, | |
| "learning_rate": 0.0001697191697191697, | |
| "loss": 0.8065, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.4730193601537827, | |
| "grad_norm": 0.5025885701179504, | |
| "learning_rate": 0.00016959706959706957, | |
| "loss": 0.9377, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.474117808595359, | |
| "grad_norm": 0.2942202687263489, | |
| "learning_rate": 0.00016947496947496946, | |
| "loss": 0.5693, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.4752162570369354, | |
| "grad_norm": 0.48770126700401306, | |
| "learning_rate": 0.00016935286935286932, | |
| "loss": 0.5483, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.4763147054785115, | |
| "grad_norm": 0.3853349983692169, | |
| "learning_rate": 0.0001692307692307692, | |
| "loss": 0.5787, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.4774131539200879, | |
| "grad_norm": 0.3593169152736664, | |
| "learning_rate": 0.00016910866910866911, | |
| "loss": 0.6426, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.4785116023616642, | |
| "grad_norm": 0.5932713150978088, | |
| "learning_rate": 0.00016898656898656897, | |
| "loss": 0.7543, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.4796100508032404, | |
| "grad_norm": 0.43406638503074646, | |
| "learning_rate": 0.00016886446886446886, | |
| "loss": 0.7868, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.4807084992448167, | |
| "grad_norm": 0.38596048951148987, | |
| "learning_rate": 0.00016874236874236874, | |
| "loss": 0.49, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.481806947686393, | |
| "grad_norm": 0.42844533920288086, | |
| "learning_rate": 0.0001686202686202686, | |
| "loss": 0.6485, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.4829053961279692, | |
| "grad_norm": 0.5165280103683472, | |
| "learning_rate": 0.0001684981684981685, | |
| "loss": 0.6924, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.4840038445695456, | |
| "grad_norm": 0.5717988610267639, | |
| "learning_rate": 0.00016837606837606837, | |
| "loss": 0.5624, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.4851022930111217, | |
| "grad_norm": 0.4384293556213379, | |
| "learning_rate": 0.00016825396825396823, | |
| "loss": 0.7895, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.486200741452698, | |
| "grad_norm": 0.5472243428230286, | |
| "learning_rate": 0.00016813186813186812, | |
| "loss": 0.8838, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.4872991898942742, | |
| "grad_norm": 0.3903232216835022, | |
| "learning_rate": 0.000168009768009768, | |
| "loss": 0.5452, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.4883976383358506, | |
| "grad_norm": 0.3799583613872528, | |
| "learning_rate": 0.00016788766788766786, | |
| "loss": 0.8931, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.489496086777427, | |
| "grad_norm": 0.4481349289417267, | |
| "learning_rate": 0.00016776556776556775, | |
| "loss": 0.5956, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.490594535219003, | |
| "grad_norm": 0.45875266194343567, | |
| "learning_rate": 0.00016764346764346763, | |
| "loss": 0.4729, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.4916929836605795, | |
| "grad_norm": 0.494112104177475, | |
| "learning_rate": 0.0001675213675213675, | |
| "loss": 0.6416, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.4927914321021558, | |
| "grad_norm": 0.3976772725582123, | |
| "learning_rate": 0.0001673992673992674, | |
| "loss": 0.6601, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.493889880543732, | |
| "grad_norm": 0.29009610414505005, | |
| "learning_rate": 0.0001672771672771673, | |
| "loss": 0.4261, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.4949883289853083, | |
| "grad_norm": 0.5540419816970825, | |
| "learning_rate": 0.00016715506715506715, | |
| "loss": 0.8206, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.4960867774268845, | |
| "grad_norm": 0.41308313608169556, | |
| "learning_rate": 0.00016703296703296703, | |
| "loss": 0.7862, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.4971852258684608, | |
| "grad_norm": 0.6565150618553162, | |
| "learning_rate": 0.0001669108669108669, | |
| "loss": 0.6963, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.498283674310037, | |
| "grad_norm": 0.4901321530342102, | |
| "learning_rate": 0.00016678876678876678, | |
| "loss": 0.7063, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.4993821227516133, | |
| "grad_norm": 0.4676086902618408, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 0.5142, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.5004805711931897, | |
| "grad_norm": 0.4745628833770752, | |
| "learning_rate": 0.00016654456654456652, | |
| "loss": 0.7659, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.5015790196347658, | |
| "grad_norm": 0.42693057656288147, | |
| "learning_rate": 0.0001664224664224664, | |
| "loss": 0.9233, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.5026774680763422, | |
| "grad_norm": 0.4110391139984131, | |
| "learning_rate": 0.0001663003663003663, | |
| "loss": 0.5062, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.5037759165179185, | |
| "grad_norm": 0.3090996742248535, | |
| "learning_rate": 0.00016617826617826615, | |
| "loss": 0.4462, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.5048743649594947, | |
| "grad_norm": 0.42027410864830017, | |
| "learning_rate": 0.00016605616605616603, | |
| "loss": 0.8589, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.505972813401071, | |
| "grad_norm": 0.38396796584129333, | |
| "learning_rate": 0.00016593406593406592, | |
| "loss": 0.6609, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.5070712618426474, | |
| "grad_norm": 0.5236012935638428, | |
| "learning_rate": 0.00016581196581196578, | |
| "loss": 0.6506, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.5081697102842235, | |
| "grad_norm": 0.7232113480567932, | |
| "learning_rate": 0.00016568986568986566, | |
| "loss": 0.6689, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.5092681587257997, | |
| "grad_norm": 0.4777502417564392, | |
| "learning_rate": 0.00016556776556776558, | |
| "loss": 0.5701, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.510366607167376, | |
| "grad_norm": 0.39154767990112305, | |
| "learning_rate": 0.0001654456654456654, | |
| "loss": 0.4906, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.5114650556089524, | |
| "grad_norm": 0.469382107257843, | |
| "learning_rate": 0.00016532356532356532, | |
| "loss": 0.5768, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.5125635040505285, | |
| "grad_norm": 0.3485945761203766, | |
| "learning_rate": 0.0001652014652014652, | |
| "loss": 0.7814, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.513661952492105, | |
| "grad_norm": 0.4375949203968048, | |
| "learning_rate": 0.00016507936507936506, | |
| "loss": 0.6328, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.5147604009336813, | |
| "grad_norm": 0.47778064012527466, | |
| "learning_rate": 0.00016495726495726495, | |
| "loss": 0.635, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.5158588493752574, | |
| "grad_norm": 0.3515126705169678, | |
| "learning_rate": 0.00016483516483516484, | |
| "loss": 0.7014, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.5169572978168337, | |
| "grad_norm": 0.3710018992424011, | |
| "learning_rate": 0.0001647130647130647, | |
| "loss": 0.7903, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.51805574625841, | |
| "grad_norm": 0.37630394101142883, | |
| "learning_rate": 0.00016459096459096458, | |
| "loss": 0.5446, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.5191541946999862, | |
| "grad_norm": 0.4312807321548462, | |
| "learning_rate": 0.00016446886446886446, | |
| "loss": 0.6101, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.5202526431415624, | |
| "grad_norm": 0.399384468793869, | |
| "learning_rate": 0.00016434676434676432, | |
| "loss": 0.5734, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.521351091583139, | |
| "grad_norm": 0.41233471035957336, | |
| "learning_rate": 0.0001642246642246642, | |
| "loss": 0.6525, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.522449540024715, | |
| "grad_norm": 0.5215228199958801, | |
| "learning_rate": 0.0001641025641025641, | |
| "loss": 0.4804, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.5235479884662912, | |
| "grad_norm": 0.42069393396377563, | |
| "learning_rate": 0.00016398046398046395, | |
| "loss": 0.5517, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 1.5246464369078676, | |
| "grad_norm": 1.7902978658676147, | |
| "learning_rate": 0.00016385836385836384, | |
| "loss": 0.6295, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.525744885349444, | |
| "grad_norm": 0.7353507280349731, | |
| "learning_rate": 0.0001637362637362637, | |
| "loss": 1.0585, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 1.52684333379102, | |
| "grad_norm": 0.45992404222488403, | |
| "learning_rate": 0.0001636141636141636, | |
| "loss": 0.7671, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.5279417822325965, | |
| "grad_norm": 0.3927334249019623, | |
| "learning_rate": 0.0001634920634920635, | |
| "loss": 0.7479, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 1.5290402306741728, | |
| "grad_norm": 0.32833003997802734, | |
| "learning_rate": 0.00016336996336996335, | |
| "loss": 0.5774, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.530138679115749, | |
| "grad_norm": 0.4306529462337494, | |
| "learning_rate": 0.00016324786324786324, | |
| "loss": 0.6317, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 1.5312371275573253, | |
| "grad_norm": 0.5411052703857422, | |
| "learning_rate": 0.00016312576312576312, | |
| "loss": 0.6637, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.5323355759989017, | |
| "grad_norm": 0.633800745010376, | |
| "learning_rate": 0.00016300366300366298, | |
| "loss": 0.7145, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.5334340244404778, | |
| "grad_norm": 0.6986578702926636, | |
| "learning_rate": 0.00016288156288156287, | |
| "loss": 0.7194, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.534532472882054, | |
| "grad_norm": 0.5223686695098877, | |
| "learning_rate": 0.00016275946275946275, | |
| "loss": 0.7849, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 1.5356309213236303, | |
| "grad_norm": 0.5342483520507812, | |
| "learning_rate": 0.0001626373626373626, | |
| "loss": 0.8885, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.5367293697652067, | |
| "grad_norm": 0.5467656850814819, | |
| "learning_rate": 0.0001625152625152625, | |
| "loss": 0.6265, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 1.5378278182067828, | |
| "grad_norm": 0.4483658969402313, | |
| "learning_rate": 0.00016239316239316238, | |
| "loss": 0.7133, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.5389262666483592, | |
| "grad_norm": 0.5714216232299805, | |
| "learning_rate": 0.00016227106227106224, | |
| "loss": 0.5212, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 1.5400247150899355, | |
| "grad_norm": 0.5487145781517029, | |
| "learning_rate": 0.00016214896214896213, | |
| "loss": 0.6276, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.5411231635315117, | |
| "grad_norm": 0.3687078654766083, | |
| "learning_rate": 0.00016202686202686204, | |
| "loss": 0.7512, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 1.542221611973088, | |
| "grad_norm": 0.3596762418746948, | |
| "learning_rate": 0.00016190476190476187, | |
| "loss": 0.7192, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.5433200604146644, | |
| "grad_norm": 0.4092305898666382, | |
| "learning_rate": 0.00016178266178266178, | |
| "loss": 0.7339, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.5444185088562405, | |
| "grad_norm": 0.4018193483352661, | |
| "learning_rate": 0.00016166056166056167, | |
| "loss": 0.7213, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.5455169572978167, | |
| "grad_norm": 0.4993208646774292, | |
| "learning_rate": 0.00016153846153846153, | |
| "loss": 0.6362, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 1.5466154057393933, | |
| "grad_norm": 0.3958855867385864, | |
| "learning_rate": 0.0001614163614163614, | |
| "loss": 0.8482, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.5477138541809694, | |
| "grad_norm": 0.32689765095710754, | |
| "learning_rate": 0.0001612942612942613, | |
| "loss": 0.6583, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 1.5488123026225455, | |
| "grad_norm": 0.48947611451148987, | |
| "learning_rate": 0.00016117216117216116, | |
| "loss": 0.6707, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.549910751064122, | |
| "grad_norm": 0.3446139395236969, | |
| "learning_rate": 0.00016105006105006104, | |
| "loss": 0.8914, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 1.5510091995056983, | |
| "grad_norm": 0.585746705532074, | |
| "learning_rate": 0.0001609279609279609, | |
| "loss": 0.5413, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.5521076479472744, | |
| "grad_norm": 0.6561328172683716, | |
| "learning_rate": 0.00016080586080586079, | |
| "loss": 0.3728, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 1.5532060963888508, | |
| "grad_norm": 0.47158828377723694, | |
| "learning_rate": 0.00016068376068376067, | |
| "loss": 0.6525, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.5543045448304271, | |
| "grad_norm": 0.3676914572715759, | |
| "learning_rate": 0.00016056166056166053, | |
| "loss": 0.7395, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.5554029932720033, | |
| "grad_norm": 0.608076810836792, | |
| "learning_rate": 0.00016043956043956041, | |
| "loss": 0.5289, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.5565014417135794, | |
| "grad_norm": 0.44940462708473206, | |
| "learning_rate": 0.0001603174603174603, | |
| "loss": 0.6282, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 1.557599890155156, | |
| "grad_norm": 0.48062869906425476, | |
| "learning_rate": 0.00016019536019536016, | |
| "loss": 0.7438, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.5586983385967321, | |
| "grad_norm": 0.43834635615348816, | |
| "learning_rate": 0.00016007326007326004, | |
| "loss": 0.4248, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 1.5597967870383083, | |
| "grad_norm": 0.5203731060028076, | |
| "learning_rate": 0.00015995115995115996, | |
| "loss": 0.91, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.5608952354798846, | |
| "grad_norm": 0.5766960978507996, | |
| "learning_rate": 0.00015982905982905981, | |
| "loss": 0.7211, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 1.561993683921461, | |
| "grad_norm": 0.3048666715621948, | |
| "learning_rate": 0.0001597069597069597, | |
| "loss": 0.5618, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.5630921323630371, | |
| "grad_norm": 0.3916679322719574, | |
| "learning_rate": 0.00015958485958485959, | |
| "loss": 0.6954, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 1.5641905808046135, | |
| "grad_norm": 0.6336612105369568, | |
| "learning_rate": 0.00015946275946275944, | |
| "loss": 0.6368, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.5652890292461898, | |
| "grad_norm": 0.8314816355705261, | |
| "learning_rate": 0.00015934065934065933, | |
| "loss": 0.7633, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.566387477687766, | |
| "grad_norm": 0.46973487734794617, | |
| "learning_rate": 0.00015921855921855922, | |
| "loss": 0.6915, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.5674859261293423, | |
| "grad_norm": 0.48737633228302, | |
| "learning_rate": 0.00015909645909645907, | |
| "loss": 0.5346, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 1.5685843745709187, | |
| "grad_norm": 0.548876941204071, | |
| "learning_rate": 0.00015897435897435896, | |
| "loss": 1.0449, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.5696828230124948, | |
| "grad_norm": 0.5039654970169067, | |
| "learning_rate": 0.00015885225885225884, | |
| "loss": 0.9953, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 1.570781271454071, | |
| "grad_norm": 0.7233378887176514, | |
| "learning_rate": 0.0001587301587301587, | |
| "loss": 0.7068, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.5718797198956473, | |
| "grad_norm": 0.5767638683319092, | |
| "learning_rate": 0.0001586080586080586, | |
| "loss": 0.8055, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 1.5729781683372237, | |
| "grad_norm": 0.34450021386146545, | |
| "learning_rate": 0.00015848595848595847, | |
| "loss": 0.726, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.5740766167787998, | |
| "grad_norm": 0.8474962711334229, | |
| "learning_rate": 0.00015836385836385833, | |
| "loss": 0.6974, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 1.5751750652203762, | |
| "grad_norm": 1.565746545791626, | |
| "learning_rate": 0.00015824175824175824, | |
| "loss": 0.7766, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.5762735136619526, | |
| "grad_norm": 0.4393616020679474, | |
| "learning_rate": 0.00015811965811965813, | |
| "loss": 0.6071, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 1.5773719621035287, | |
| "grad_norm": 0.5209214091300964, | |
| "learning_rate": 0.000157997557997558, | |
| "loss": 0.7546, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.578470410545105, | |
| "grad_norm": 0.6069398522377014, | |
| "learning_rate": 0.00015787545787545787, | |
| "loss": 0.7322, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 1.5795688589866814, | |
| "grad_norm": 0.6168296337127686, | |
| "learning_rate": 0.00015775335775335773, | |
| "loss": 0.5169, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.5806673074282576, | |
| "grad_norm": 0.25368016958236694, | |
| "learning_rate": 0.00015763125763125762, | |
| "loss": 0.4838, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 1.5817657558698337, | |
| "grad_norm": 0.4165039360523224, | |
| "learning_rate": 0.0001575091575091575, | |
| "loss": 1.0135, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.5828642043114103, | |
| "grad_norm": 0.4596197307109833, | |
| "learning_rate": 0.00015738705738705736, | |
| "loss": 0.5545, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 1.5839626527529864, | |
| "grad_norm": 0.5077592730522156, | |
| "learning_rate": 0.00015726495726495725, | |
| "loss": 0.7754, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.5850611011945626, | |
| "grad_norm": 0.5041285157203674, | |
| "learning_rate": 0.00015714285714285713, | |
| "loss": 0.8384, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 1.586159549636139, | |
| "grad_norm": 0.40924420952796936, | |
| "learning_rate": 0.000157020757020757, | |
| "loss": 0.5511, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.5872579980777153, | |
| "grad_norm": 0.4800551235675812, | |
| "learning_rate": 0.00015689865689865688, | |
| "loss": 0.6154, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 1.5883564465192914, | |
| "grad_norm": 0.433174729347229, | |
| "learning_rate": 0.00015677655677655676, | |
| "loss": 0.6158, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.5894548949608678, | |
| "grad_norm": 0.29649895429611206, | |
| "learning_rate": 0.00015665445665445662, | |
| "loss": 0.5729, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 1.5905533434024441, | |
| "grad_norm": 0.3815969228744507, | |
| "learning_rate": 0.0001565323565323565, | |
| "loss": 0.6748, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.5916517918440203, | |
| "grad_norm": 0.4933919608592987, | |
| "learning_rate": 0.00015641025641025642, | |
| "loss": 0.7683, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 1.5927502402855966, | |
| "grad_norm": 0.5053071975708008, | |
| "learning_rate": 0.00015628815628815625, | |
| "loss": 0.6779, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.593848688727173, | |
| "grad_norm": 0.3900013566017151, | |
| "learning_rate": 0.00015616605616605616, | |
| "loss": 0.6326, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 1.5949471371687491, | |
| "grad_norm": 0.5823982357978821, | |
| "learning_rate": 0.00015604395604395605, | |
| "loss": 0.6104, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.5960455856103253, | |
| "grad_norm": 0.5277792811393738, | |
| "learning_rate": 0.0001559218559218559, | |
| "loss": 0.6647, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 1.5971440340519016, | |
| "grad_norm": 0.32926440238952637, | |
| "learning_rate": 0.0001557997557997558, | |
| "loss": 0.6064, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.598242482493478, | |
| "grad_norm": 0.7350378036499023, | |
| "learning_rate": 0.00015567765567765568, | |
| "loss": 0.7951, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 1.5993409309350541, | |
| "grad_norm": 0.4125807285308838, | |
| "learning_rate": 0.00015555555555555554, | |
| "loss": 0.7761, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.6004393793766305, | |
| "grad_norm": 0.49707722663879395, | |
| "learning_rate": 0.00015543345543345542, | |
| "loss": 0.7299, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 1.6015378278182069, | |
| "grad_norm": 0.3240358829498291, | |
| "learning_rate": 0.0001553113553113553, | |
| "loss": 0.4832, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.602636276259783, | |
| "grad_norm": 0.44430434703826904, | |
| "learning_rate": 0.00015518925518925517, | |
| "loss": 0.5968, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 1.6037347247013594, | |
| "grad_norm": 0.3702992796897888, | |
| "learning_rate": 0.00015506715506715505, | |
| "loss": 0.7177, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.6048331731429357, | |
| "grad_norm": 0.5001052618026733, | |
| "learning_rate": 0.00015494505494505494, | |
| "loss": 0.7448, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 1.6059316215845119, | |
| "grad_norm": 0.45969969034194946, | |
| "learning_rate": 0.0001548229548229548, | |
| "loss": 0.8292, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.607030070026088, | |
| "grad_norm": 0.46075674891471863, | |
| "learning_rate": 0.00015470085470085468, | |
| "loss": 0.5624, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 1.6081285184676646, | |
| "grad_norm": 2.077080488204956, | |
| "learning_rate": 0.00015457875457875454, | |
| "loss": 0.6643, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.6092269669092407, | |
| "grad_norm": 0.46008172631263733, | |
| "learning_rate": 0.00015445665445665445, | |
| "loss": 0.6329, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 1.6103254153508169, | |
| "grad_norm": 0.5016405582427979, | |
| "learning_rate": 0.00015433455433455434, | |
| "loss": 0.7692, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.6114238637923932, | |
| "grad_norm": 0.46292269229888916, | |
| "learning_rate": 0.0001542124542124542, | |
| "loss": 0.6485, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 1.6125223122339696, | |
| "grad_norm": 0.4498538672924042, | |
| "learning_rate": 0.00015409035409035408, | |
| "loss": 0.598, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.6136207606755457, | |
| "grad_norm": 0.3537295162677765, | |
| "learning_rate": 0.00015396825396825397, | |
| "loss": 0.6356, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 1.614719209117122, | |
| "grad_norm": 0.9966747164726257, | |
| "learning_rate": 0.00015384615384615382, | |
| "loss": 0.6627, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.6158176575586984, | |
| "grad_norm": 0.9386951327323914, | |
| "learning_rate": 0.0001537240537240537, | |
| "loss": 0.8148, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 1.6169161060002746, | |
| "grad_norm": 0.3452979028224945, | |
| "learning_rate": 0.0001536019536019536, | |
| "loss": 0.5778, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.618014554441851, | |
| "grad_norm": 0.3443523049354553, | |
| "learning_rate": 0.00015347985347985345, | |
| "loss": 0.9228, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 1.6191130028834273, | |
| "grad_norm": 0.5345872044563293, | |
| "learning_rate": 0.00015335775335775334, | |
| "loss": 0.4682, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.6202114513250034, | |
| "grad_norm": 0.35112351179122925, | |
| "learning_rate": 0.00015323565323565322, | |
| "loss": 0.5482, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.6213098997665796, | |
| "grad_norm": 0.39090535044670105, | |
| "learning_rate": 0.00015311355311355308, | |
| "loss": 0.825, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.622408348208156, | |
| "grad_norm": 1.1684538125991821, | |
| "learning_rate": 0.00015299145299145297, | |
| "loss": 0.6561, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 1.6235067966497323, | |
| "grad_norm": 0.4006233513355255, | |
| "learning_rate": 0.00015286935286935288, | |
| "loss": 0.3647, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.6246052450913084, | |
| "grad_norm": 0.30577126145362854, | |
| "learning_rate": 0.0001527472527472527, | |
| "loss": 0.4934, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 1.6257036935328848, | |
| "grad_norm": 0.39927995204925537, | |
| "learning_rate": 0.00015262515262515263, | |
| "loss": 0.6028, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.6268021419744612, | |
| "grad_norm": 0.49143150448799133, | |
| "learning_rate": 0.0001525030525030525, | |
| "loss": 0.4595, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 1.6279005904160373, | |
| "grad_norm": 0.8603225946426392, | |
| "learning_rate": 0.00015238095238095237, | |
| "loss": 0.8617, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.6289990388576137, | |
| "grad_norm": 0.534269392490387, | |
| "learning_rate": 0.00015225885225885225, | |
| "loss": 0.6648, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 1.63009748729919, | |
| "grad_norm": 0.4987354278564453, | |
| "learning_rate": 0.00015213675213675214, | |
| "loss": 0.5908, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.6311959357407662, | |
| "grad_norm": 0.5739774107933044, | |
| "learning_rate": 0.000152014652014652, | |
| "loss": 0.7652, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 1.6322943841823423, | |
| "grad_norm": 0.5343801975250244, | |
| "learning_rate": 0.00015189255189255188, | |
| "loss": 0.6864, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.6333928326239189, | |
| "grad_norm": 0.45683905482292175, | |
| "learning_rate": 0.00015177045177045177, | |
| "loss": 0.7179, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 1.634491281065495, | |
| "grad_norm": 0.5020450949668884, | |
| "learning_rate": 0.00015164835164835163, | |
| "loss": 0.4356, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.6355897295070712, | |
| "grad_norm": 0.3870914876461029, | |
| "learning_rate": 0.0001515262515262515, | |
| "loss": 0.692, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 1.6366881779486475, | |
| "grad_norm": 0.5256255269050598, | |
| "learning_rate": 0.00015140415140415137, | |
| "loss": 0.7184, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.6377866263902239, | |
| "grad_norm": 0.27588197588920593, | |
| "learning_rate": 0.00015128205128205126, | |
| "loss": 0.6928, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 1.6388850748318, | |
| "grad_norm": 0.43336692452430725, | |
| "learning_rate": 0.00015115995115995114, | |
| "loss": 0.7357, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.6399835232733764, | |
| "grad_norm": 0.7952486872673035, | |
| "learning_rate": 0.000151037851037851, | |
| "loss": 0.5536, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 1.6410819717149527, | |
| "grad_norm": 3.8659090995788574, | |
| "learning_rate": 0.00015091575091575089, | |
| "loss": 0.6409, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.6421804201565289, | |
| "grad_norm": 0.3824027478694916, | |
| "learning_rate": 0.0001507936507936508, | |
| "loss": 0.5988, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 1.643278868598105, | |
| "grad_norm": 0.45106491446495056, | |
| "learning_rate": 0.00015067155067155066, | |
| "loss": 0.7568, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.6443773170396816, | |
| "grad_norm": 0.719417154788971, | |
| "learning_rate": 0.00015054945054945054, | |
| "loss": 0.8191, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.6454757654812577, | |
| "grad_norm": 0.4702167212963104, | |
| "learning_rate": 0.00015042735042735043, | |
| "loss": 0.6761, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.6465742139228339, | |
| "grad_norm": 0.49441996216773987, | |
| "learning_rate": 0.0001503052503052503, | |
| "loss": 0.7323, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 1.6476726623644102, | |
| "grad_norm": 0.623470664024353, | |
| "learning_rate": 0.00015018315018315017, | |
| "loss": 0.8384, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6487711108059866, | |
| "grad_norm": 0.5583334565162659, | |
| "learning_rate": 0.00015006105006105006, | |
| "loss": 0.8238, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 1.6498695592475627, | |
| "grad_norm": 0.4803924560546875, | |
| "learning_rate": 0.00014993894993894994, | |
| "loss": 0.5322, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.650968007689139, | |
| "grad_norm": 0.709605872631073, | |
| "learning_rate": 0.0001498168498168498, | |
| "loss": 0.8254, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 1.6520664561307155, | |
| "grad_norm": 0.48047375679016113, | |
| "learning_rate": 0.0001496947496947497, | |
| "loss": 0.5263, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.6531649045722916, | |
| "grad_norm": 0.41796261072158813, | |
| "learning_rate": 0.00014957264957264957, | |
| "loss": 0.5803, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 1.654263353013868, | |
| "grad_norm": 0.7576707601547241, | |
| "learning_rate": 0.00014945054945054943, | |
| "loss": 0.545, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.6553618014554443, | |
| "grad_norm": 0.4668630063533783, | |
| "learning_rate": 0.00014932844932844932, | |
| "loss": 0.6213, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 1.6564602498970205, | |
| "grad_norm": 0.9730806350708008, | |
| "learning_rate": 0.00014920634920634917, | |
| "loss": 0.5415, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.6575586983385966, | |
| "grad_norm": 0.39670151472091675, | |
| "learning_rate": 0.0001490842490842491, | |
| "loss": 0.7931, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 1.658657146780173, | |
| "grad_norm": 0.6003556847572327, | |
| "learning_rate": 0.00014896214896214895, | |
| "loss": 0.7494, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.6597555952217493, | |
| "grad_norm": 0.4335152506828308, | |
| "learning_rate": 0.00014884004884004883, | |
| "loss": 0.7003, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 1.6608540436633255, | |
| "grad_norm": 0.34025630354881287, | |
| "learning_rate": 0.00014871794871794872, | |
| "loss": 0.9012, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.6619524921049018, | |
| "grad_norm": 0.403934508562088, | |
| "learning_rate": 0.00014859584859584858, | |
| "loss": 0.717, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 1.6630509405464782, | |
| "grad_norm": 0.45691147446632385, | |
| "learning_rate": 0.00014847374847374846, | |
| "loss": 0.4833, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.6641493889880543, | |
| "grad_norm": 0.42266151309013367, | |
| "learning_rate": 0.00014835164835164835, | |
| "loss": 0.5892, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 1.6652478374296307, | |
| "grad_norm": 0.392337441444397, | |
| "learning_rate": 0.0001482295482295482, | |
| "loss": 0.7748, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.666346285871207, | |
| "grad_norm": 0.352081298828125, | |
| "learning_rate": 0.0001481074481074481, | |
| "loss": 0.6018, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 1.6674447343127832, | |
| "grad_norm": 0.46293389797210693, | |
| "learning_rate": 0.00014798534798534798, | |
| "loss": 0.4696, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.6685431827543593, | |
| "grad_norm": 0.6427372097969055, | |
| "learning_rate": 0.00014786324786324786, | |
| "loss": 0.7279, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 1.669641631195936, | |
| "grad_norm": 0.500382125377655, | |
| "learning_rate": 0.00014774114774114772, | |
| "loss": 0.7395, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.670740079637512, | |
| "grad_norm": 0.4410606920719147, | |
| "learning_rate": 0.0001476190476190476, | |
| "loss": 0.501, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 1.6718385280790882, | |
| "grad_norm": 0.5587645769119263, | |
| "learning_rate": 0.0001474969474969475, | |
| "loss": 0.8655, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.6729369765206645, | |
| "grad_norm": 0.4312286376953125, | |
| "learning_rate": 0.00014737484737484735, | |
| "loss": 0.9578, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 1.674035424962241, | |
| "grad_norm": 0.48694175481796265, | |
| "learning_rate": 0.00014725274725274723, | |
| "loss": 0.6806, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.675133873403817, | |
| "grad_norm": 0.39892563223838806, | |
| "learning_rate": 0.00014713064713064712, | |
| "loss": 0.598, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.6762323218453934, | |
| "grad_norm": 0.4714735150337219, | |
| "learning_rate": 0.000147008547008547, | |
| "loss": 0.9637, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.6773307702869698, | |
| "grad_norm": 0.8308823108673096, | |
| "learning_rate": 0.00014688644688644686, | |
| "loss": 0.7886, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 1.678429218728546, | |
| "grad_norm": 0.5142358541488647, | |
| "learning_rate": 0.00014676434676434675, | |
| "loss": 0.8028, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.6795276671701223, | |
| "grad_norm": 0.4001234471797943, | |
| "learning_rate": 0.00014664224664224663, | |
| "loss": 0.59, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 1.6806261156116986, | |
| "grad_norm": 0.4112735688686371, | |
| "learning_rate": 0.0001465201465201465, | |
| "loss": 0.6523, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.6817245640532748, | |
| "grad_norm": 0.4391016960144043, | |
| "learning_rate": 0.0001463980463980464, | |
| "loss": 0.7372, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 1.682823012494851, | |
| "grad_norm": 0.7199782133102417, | |
| "learning_rate": 0.00014627594627594626, | |
| "loss": 0.8493, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.6839214609364273, | |
| "grad_norm": 0.42379269003868103, | |
| "learning_rate": 0.00014615384615384615, | |
| "loss": 0.6609, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 1.6850199093780036, | |
| "grad_norm": 0.41174909472465515, | |
| "learning_rate": 0.000146031746031746, | |
| "loss": 0.7021, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.6861183578195797, | |
| "grad_norm": 0.4856640100479126, | |
| "learning_rate": 0.0001459096459096459, | |
| "loss": 0.6055, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 1.687216806261156, | |
| "grad_norm": 0.5789656043052673, | |
| "learning_rate": 0.00014578754578754578, | |
| "loss": 0.7003, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.6883152547027325, | |
| "grad_norm": 0.5711427330970764, | |
| "learning_rate": 0.00014566544566544564, | |
| "loss": 0.5762, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.6894137031443086, | |
| "grad_norm": 0.3285518288612366, | |
| "learning_rate": 0.00014554334554334552, | |
| "loss": 0.6232, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.690512151585885, | |
| "grad_norm": 0.48425230383872986, | |
| "learning_rate": 0.0001454212454212454, | |
| "loss": 0.5515, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 1.6916106000274613, | |
| "grad_norm": 0.573079526424408, | |
| "learning_rate": 0.0001452991452991453, | |
| "loss": 0.7776, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.6927090484690375, | |
| "grad_norm": 0.49084943532943726, | |
| "learning_rate": 0.00014517704517704518, | |
| "loss": 0.6504, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 1.6938074969106136, | |
| "grad_norm": 0.46472617983818054, | |
| "learning_rate": 0.00014505494505494504, | |
| "loss": 0.6971, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.6949059453521902, | |
| "grad_norm": 0.4890255033969879, | |
| "learning_rate": 0.00014493284493284492, | |
| "loss": 0.9292, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 1.6960043937937663, | |
| "grad_norm": 0.42868301272392273, | |
| "learning_rate": 0.0001448107448107448, | |
| "loss": 0.6024, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.6971028422353425, | |
| "grad_norm": 0.5118973255157471, | |
| "learning_rate": 0.00014468864468864467, | |
| "loss": 0.7598, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 1.6982012906769188, | |
| "grad_norm": 0.40809181332588196, | |
| "learning_rate": 0.00014456654456654455, | |
| "loss": 0.5157, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.6992997391184952, | |
| "grad_norm": 0.5236404538154602, | |
| "learning_rate": 0.0001444444444444444, | |
| "loss": 0.84, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 1.7003981875600713, | |
| "grad_norm": 0.5712966322898865, | |
| "learning_rate": 0.00014432234432234432, | |
| "loss": 0.7208, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.7014966360016477, | |
| "grad_norm": 0.2910475730895996, | |
| "learning_rate": 0.00014420024420024418, | |
| "loss": 0.4998, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 1.702595084443224, | |
| "grad_norm": 0.5326736569404602, | |
| "learning_rate": 0.00014407814407814407, | |
| "loss": 0.5492, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.7036935328848002, | |
| "grad_norm": 0.5454451441764832, | |
| "learning_rate": 0.00014395604395604395, | |
| "loss": 0.9016, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 1.7047919813263763, | |
| "grad_norm": 0.45031625032424927, | |
| "learning_rate": 0.0001438339438339438, | |
| "loss": 0.671, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.705890429767953, | |
| "grad_norm": 0.5496229529380798, | |
| "learning_rate": 0.0001437118437118437, | |
| "loss": 0.6333, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 1.706988878209529, | |
| "grad_norm": 0.4200669825077057, | |
| "learning_rate": 0.00014358974358974358, | |
| "loss": 0.6158, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.7080873266511052, | |
| "grad_norm": 0.7623536586761475, | |
| "learning_rate": 0.00014346764346764347, | |
| "loss": 0.686, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 1.7091857750926815, | |
| "grad_norm": 0.3363445997238159, | |
| "learning_rate": 0.00014334554334554333, | |
| "loss": 0.305, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.710284223534258, | |
| "grad_norm": 0.5042807459831238, | |
| "learning_rate": 0.0001432234432234432, | |
| "loss": 0.72, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 1.711382671975834, | |
| "grad_norm": 0.5264353156089783, | |
| "learning_rate": 0.0001431013431013431, | |
| "loss": 0.6778, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.7124811204174104, | |
| "grad_norm": 0.48960715532302856, | |
| "learning_rate": 0.00014297924297924296, | |
| "loss": 0.4935, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 1.7135795688589868, | |
| "grad_norm": 0.4308861792087555, | |
| "learning_rate": 0.00014285714285714284, | |
| "loss": 0.6527, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.714678017300563, | |
| "grad_norm": 0.42890703678131104, | |
| "learning_rate": 0.00014273504273504273, | |
| "loss": 0.4846, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 1.7157764657421393, | |
| "grad_norm": 0.5222750902175903, | |
| "learning_rate": 0.0001426129426129426, | |
| "loss": 0.764, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.7168749141837156, | |
| "grad_norm": 0.49664998054504395, | |
| "learning_rate": 0.00014249084249084247, | |
| "loss": 0.5728, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 1.7179733626252918, | |
| "grad_norm": 0.3131520748138428, | |
| "learning_rate": 0.00014236874236874236, | |
| "loss": 0.5089, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.719071811066868, | |
| "grad_norm": 0.5098987221717834, | |
| "learning_rate": 0.00014224664224664224, | |
| "loss": 0.781, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 1.7201702595084445, | |
| "grad_norm": 0.4040893316268921, | |
| "learning_rate": 0.0001421245421245421, | |
| "loss": 0.7358, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.7212687079500206, | |
| "grad_norm": 0.3601396679878235, | |
| "learning_rate": 0.00014200244200244198, | |
| "loss": 0.5531, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 1.7223671563915968, | |
| "grad_norm": 0.6634377837181091, | |
| "learning_rate": 0.00014188034188034187, | |
| "loss": 0.6548, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.7234656048331731, | |
| "grad_norm": 0.35935553908348083, | |
| "learning_rate": 0.00014175824175824173, | |
| "loss": 0.5653, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 1.7245640532747495, | |
| "grad_norm": 0.4607802927494049, | |
| "learning_rate": 0.00014163614163614164, | |
| "loss": 0.9111, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.7256625017163256, | |
| "grad_norm": 1.0116467475891113, | |
| "learning_rate": 0.0001415140415140415, | |
| "loss": 0.9226, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 1.726760950157902, | |
| "grad_norm": 0.9484761953353882, | |
| "learning_rate": 0.00014139194139194139, | |
| "loss": 0.7536, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.7278593985994783, | |
| "grad_norm": 0.3684981167316437, | |
| "learning_rate": 0.00014126984126984124, | |
| "loss": 0.5013, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 1.7289578470410545, | |
| "grad_norm": 0.40037083625793457, | |
| "learning_rate": 0.00014114774114774113, | |
| "loss": 0.8069, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.7300562954826306, | |
| "grad_norm": 0.42828282713890076, | |
| "learning_rate": 0.00014102564102564101, | |
| "loss": 0.5586, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.7311547439242072, | |
| "grad_norm": 0.3461548686027527, | |
| "learning_rate": 0.00014090354090354087, | |
| "loss": 0.6045, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.7322531923657833, | |
| "grad_norm": 0.622982919216156, | |
| "learning_rate": 0.00014078144078144079, | |
| "loss": 0.8943, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 1.7333516408073595, | |
| "grad_norm": 0.3318479359149933, | |
| "learning_rate": 0.00014065934065934064, | |
| "loss": 0.4058, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.7344500892489358, | |
| "grad_norm": 0.5178685188293457, | |
| "learning_rate": 0.00014053724053724053, | |
| "loss": 0.5839, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 1.7355485376905122, | |
| "grad_norm": 0.44273868203163147, | |
| "learning_rate": 0.00014041514041514042, | |
| "loss": 0.5394, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.7366469861320883, | |
| "grad_norm": 0.60169517993927, | |
| "learning_rate": 0.00014029304029304027, | |
| "loss": 0.6753, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 1.7377454345736647, | |
| "grad_norm": 0.7691718339920044, | |
| "learning_rate": 0.00014017094017094016, | |
| "loss": 0.9618, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.738843883015241, | |
| "grad_norm": 0.3900390565395355, | |
| "learning_rate": 0.00014004884004884004, | |
| "loss": 0.5809, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 1.7399423314568172, | |
| "grad_norm": 0.6272429823875427, | |
| "learning_rate": 0.00013992673992673993, | |
| "loss": 0.8579, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.7410407798983936, | |
| "grad_norm": 0.30017220973968506, | |
| "learning_rate": 0.0001398046398046398, | |
| "loss": 0.5335, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 1.74213922833997, | |
| "grad_norm": 0.4937066435813904, | |
| "learning_rate": 0.00013968253968253967, | |
| "loss": 0.7941, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.743237676781546, | |
| "grad_norm": 0.47317594289779663, | |
| "learning_rate": 0.00013956043956043956, | |
| "loss": 0.6013, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 1.7443361252231222, | |
| "grad_norm": 1.9155733585357666, | |
| "learning_rate": 0.00013943833943833942, | |
| "loss": 0.6708, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.7454345736646986, | |
| "grad_norm": 0.3844835162162781, | |
| "learning_rate": 0.0001393162393162393, | |
| "loss": 0.7176, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 1.746533022106275, | |
| "grad_norm": 0.42810145020484924, | |
| "learning_rate": 0.0001391941391941392, | |
| "loss": 0.9255, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.747631470547851, | |
| "grad_norm": 3.846015691757202, | |
| "learning_rate": 0.00013907203907203905, | |
| "loss": 0.6202, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 1.7487299189894274, | |
| "grad_norm": 0.42783257365226746, | |
| "learning_rate": 0.00013894993894993893, | |
| "loss": 0.7451, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.7498283674310038, | |
| "grad_norm": 0.5237023234367371, | |
| "learning_rate": 0.00013882783882783882, | |
| "loss": 0.7961, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 1.75092681587258, | |
| "grad_norm": 2.5639729499816895, | |
| "learning_rate": 0.0001387057387057387, | |
| "loss": 0.7026, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.7520252643141563, | |
| "grad_norm": 0.5686498284339905, | |
| "learning_rate": 0.00013858363858363856, | |
| "loss": 0.4916, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 1.7531237127557326, | |
| "grad_norm": 0.561611533164978, | |
| "learning_rate": 0.00013846153846153845, | |
| "loss": 0.772, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.7542221611973088, | |
| "grad_norm": 0.6220077872276306, | |
| "learning_rate": 0.00013833943833943833, | |
| "loss": 0.5694, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 1.755320609638885, | |
| "grad_norm": 0.6902570724487305, | |
| "learning_rate": 0.0001382173382173382, | |
| "loss": 0.7963, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.7564190580804615, | |
| "grad_norm": 2.0417702198028564, | |
| "learning_rate": 0.00013809523809523808, | |
| "loss": 0.6721, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 1.7575175065220376, | |
| "grad_norm": 0.36764901876449585, | |
| "learning_rate": 0.00013797313797313796, | |
| "loss": 0.5714, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.7586159549636138, | |
| "grad_norm": 0.6679022908210754, | |
| "learning_rate": 0.00013785103785103785, | |
| "loss": 0.7025, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 1.7597144034051901, | |
| "grad_norm": 0.5749796628952026, | |
| "learning_rate": 0.0001377289377289377, | |
| "loss": 0.7381, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.7608128518467665, | |
| "grad_norm": 0.9285687208175659, | |
| "learning_rate": 0.0001376068376068376, | |
| "loss": 0.6, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 1.7619113002883426, | |
| "grad_norm": 0.8209772706031799, | |
| "learning_rate": 0.00013748473748473748, | |
| "loss": 0.5701, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.763009748729919, | |
| "grad_norm": 0.7823337912559509, | |
| "learning_rate": 0.00013736263736263734, | |
| "loss": 0.6695, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 1.7641081971714954, | |
| "grad_norm": 0.4885605275630951, | |
| "learning_rate": 0.00013724053724053725, | |
| "loss": 0.6487, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.7652066456130715, | |
| "grad_norm": 0.36517488956451416, | |
| "learning_rate": 0.0001371184371184371, | |
| "loss": 0.5798, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 1.7663050940546479, | |
| "grad_norm": 0.49961966276168823, | |
| "learning_rate": 0.000136996336996337, | |
| "loss": 0.4373, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.7674035424962242, | |
| "grad_norm": 0.495263010263443, | |
| "learning_rate": 0.00013687423687423688, | |
| "loss": 0.5868, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 1.7685019909378004, | |
| "grad_norm": 0.7384648323059082, | |
| "learning_rate": 0.00013675213675213674, | |
| "loss": 0.4957, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.7696004393793765, | |
| "grad_norm": 0.465440034866333, | |
| "learning_rate": 0.00013663003663003662, | |
| "loss": 0.7424, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 1.7706988878209529, | |
| "grad_norm": 0.68381667137146, | |
| "learning_rate": 0.00013650793650793648, | |
| "loss": 1.0421, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.7717973362625292, | |
| "grad_norm": 4.455906867980957, | |
| "learning_rate": 0.00013638583638583637, | |
| "loss": 0.6626, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 1.7728957847041054, | |
| "grad_norm": 0.6165801286697388, | |
| "learning_rate": 0.00013626373626373625, | |
| "loss": 0.6072, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.7739942331456817, | |
| "grad_norm": 0.8296604156494141, | |
| "learning_rate": 0.00013614163614163614, | |
| "loss": 0.6507, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 1.775092681587258, | |
| "grad_norm": 0.4678190350532532, | |
| "learning_rate": 0.00013601953601953602, | |
| "loss": 0.8466, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.7761911300288342, | |
| "grad_norm": 1.2141482830047607, | |
| "learning_rate": 0.00013589743589743588, | |
| "loss": 0.513, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 1.7772895784704106, | |
| "grad_norm": 0.4522024691104889, | |
| "learning_rate": 0.00013577533577533577, | |
| "loss": 0.7571, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.778388026911987, | |
| "grad_norm": 2.0903220176696777, | |
| "learning_rate": 0.00013565323565323565, | |
| "loss": 0.7359, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 1.779486475353563, | |
| "grad_norm": 0.5292307734489441, | |
| "learning_rate": 0.0001355311355311355, | |
| "loss": 0.6526, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.7805849237951392, | |
| "grad_norm": 0.5047786235809326, | |
| "learning_rate": 0.0001354090354090354, | |
| "loss": 0.7056, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 1.7816833722367158, | |
| "grad_norm": 0.4102507531642914, | |
| "learning_rate": 0.00013528693528693528, | |
| "loss": 0.8673, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.782781820678292, | |
| "grad_norm": 0.471556693315506, | |
| "learning_rate": 0.00013516483516483517, | |
| "loss": 0.9424, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 1.783880269119868, | |
| "grad_norm": 0.6595687866210938, | |
| "learning_rate": 0.00013504273504273502, | |
| "loss": 0.661, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.7849787175614444, | |
| "grad_norm": 0.6221860647201538, | |
| "learning_rate": 0.0001349206349206349, | |
| "loss": 0.5457, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.7860771660030208, | |
| "grad_norm": 0.9256211519241333, | |
| "learning_rate": 0.0001347985347985348, | |
| "loss": 0.9216, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.787175614444597, | |
| "grad_norm": 0.31376492977142334, | |
| "learning_rate": 0.00013467643467643465, | |
| "loss": 0.7071, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 1.7882740628861733, | |
| "grad_norm": 0.5313776135444641, | |
| "learning_rate": 0.00013455433455433454, | |
| "loss": 0.8111, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.7893725113277497, | |
| "grad_norm": 0.8203330636024475, | |
| "learning_rate": 0.00013443223443223442, | |
| "loss": 0.5301, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 1.7904709597693258, | |
| "grad_norm": 0.42774948477745056, | |
| "learning_rate": 0.0001343101343101343, | |
| "loss": 0.8359, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.791569408210902, | |
| "grad_norm": 0.8165685534477234, | |
| "learning_rate": 0.00013418803418803417, | |
| "loss": 0.4894, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 1.7926678566524785, | |
| "grad_norm": 0.5739139318466187, | |
| "learning_rate": 0.00013406593406593405, | |
| "loss": 0.7009, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.7937663050940547, | |
| "grad_norm": 0.5102986097335815, | |
| "learning_rate": 0.00013394383394383394, | |
| "loss": 0.7174, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 1.7948647535356308, | |
| "grad_norm": 1.1377652883529663, | |
| "learning_rate": 0.0001338217338217338, | |
| "loss": 0.79, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.7959632019772072, | |
| "grad_norm": 0.44272491335868835, | |
| "learning_rate": 0.00013369963369963368, | |
| "loss": 0.6761, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 1.7970616504187835, | |
| "grad_norm": 0.5084714889526367, | |
| "learning_rate": 0.00013357753357753357, | |
| "loss": 0.6848, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.7981600988603597, | |
| "grad_norm": 0.752017080783844, | |
| "learning_rate": 0.00013345543345543345, | |
| "loss": 0.6107, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 1.799258547301936, | |
| "grad_norm": 0.4430617690086365, | |
| "learning_rate": 0.0001333333333333333, | |
| "loss": 0.7639, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.8003569957435124, | |
| "grad_norm": 0.8098049759864807, | |
| "learning_rate": 0.0001332112332112332, | |
| "loss": 0.8172, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 1.8014554441850885, | |
| "grad_norm": 0.6817697286605835, | |
| "learning_rate": 0.00013308913308913308, | |
| "loss": 0.8274, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.8025538926266649, | |
| "grad_norm": 0.5132669806480408, | |
| "learning_rate": 0.00013296703296703294, | |
| "loss": 0.6269, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 1.8036523410682412, | |
| "grad_norm": 0.8487284183502197, | |
| "learning_rate": 0.00013284493284493283, | |
| "loss": 0.6734, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.8047507895098174, | |
| "grad_norm": 0.7084116339683533, | |
| "learning_rate": 0.0001327228327228327, | |
| "loss": 0.703, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 1.8058492379513935, | |
| "grad_norm": 0.39045432209968567, | |
| "learning_rate": 0.00013260073260073257, | |
| "loss": 0.5466, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.8069476863929699, | |
| "grad_norm": 0.4408475160598755, | |
| "learning_rate": 0.00013247863247863248, | |
| "loss": 0.4998, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 1.8080461348345462, | |
| "grad_norm": 0.41640380024909973, | |
| "learning_rate": 0.00013235653235653234, | |
| "loss": 0.49, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.8091445832761224, | |
| "grad_norm": 0.6760729551315308, | |
| "learning_rate": 0.00013223443223443223, | |
| "loss": 0.4537, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 1.8102430317176987, | |
| "grad_norm": 0.42953255772590637, | |
| "learning_rate": 0.0001321123321123321, | |
| "loss": 0.489, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.811341480159275, | |
| "grad_norm": 0.3260825574398041, | |
| "learning_rate": 0.00013199023199023197, | |
| "loss": 0.6633, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 1.8124399286008512, | |
| "grad_norm": 0.7073171138763428, | |
| "learning_rate": 0.00013186813186813186, | |
| "loss": 0.4953, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.8135383770424276, | |
| "grad_norm": 0.36153069138526917, | |
| "learning_rate": 0.00013174603174603172, | |
| "loss": 0.7641, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 1.814636825484004, | |
| "grad_norm": 0.4233636260032654, | |
| "learning_rate": 0.00013162393162393163, | |
| "loss": 0.7119, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.81573527392558, | |
| "grad_norm": 0.5262153148651123, | |
| "learning_rate": 0.0001315018315018315, | |
| "loss": 0.4516, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 1.8168337223671562, | |
| "grad_norm": 0.5263295769691467, | |
| "learning_rate": 0.00013137973137973137, | |
| "loss": 0.7786, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.8179321708087328, | |
| "grad_norm": 0.3681116998195648, | |
| "learning_rate": 0.00013125763125763126, | |
| "loss": 0.5295, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 1.819030619250309, | |
| "grad_norm": 0.5075433254241943, | |
| "learning_rate": 0.00013113553113553112, | |
| "loss": 0.6017, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.820129067691885, | |
| "grad_norm": 0.2960616946220398, | |
| "learning_rate": 0.000131013431013431, | |
| "loss": 0.4951, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 1.8212275161334615, | |
| "grad_norm": 0.4010205864906311, | |
| "learning_rate": 0.0001308913308913309, | |
| "loss": 0.8916, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.8223259645750378, | |
| "grad_norm": 0.9112391471862793, | |
| "learning_rate": 0.00013076923076923077, | |
| "loss": 0.4978, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 1.823424413016614, | |
| "grad_norm": 0.7214633226394653, | |
| "learning_rate": 0.00013064713064713063, | |
| "loss": 0.791, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.8245228614581903, | |
| "grad_norm": 0.4174933433532715, | |
| "learning_rate": 0.00013052503052503052, | |
| "loss": 0.4099, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 1.8256213098997667, | |
| "grad_norm": 0.4622137248516083, | |
| "learning_rate": 0.0001304029304029304, | |
| "loss": 1.1726, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 1.8267197583413428, | |
| "grad_norm": 0.5991957783699036, | |
| "learning_rate": 0.00013028083028083026, | |
| "loss": 0.6713, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 1.8278182067829192, | |
| "grad_norm": 0.43959730863571167, | |
| "learning_rate": 0.00013015873015873015, | |
| "loss": 0.5676, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.8289166552244955, | |
| "grad_norm": 0.6271671056747437, | |
| "learning_rate": 0.00013003663003663003, | |
| "loss": 0.7399, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 1.8300151036660717, | |
| "grad_norm": 0.6412084102630615, | |
| "learning_rate": 0.0001299145299145299, | |
| "loss": 0.7585, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 1.8311135521076478, | |
| "grad_norm": 0.4066605269908905, | |
| "learning_rate": 0.00012979242979242977, | |
| "loss": 0.5756, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 1.8322120005492242, | |
| "grad_norm": 0.3568172752857208, | |
| "learning_rate": 0.00012967032967032966, | |
| "loss": 0.968, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 1.8333104489908005, | |
| "grad_norm": 0.5061100721359253, | |
| "learning_rate": 0.00012954822954822955, | |
| "loss": 0.5089, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 1.8344088974323767, | |
| "grad_norm": 3.013622522354126, | |
| "learning_rate": 0.0001294261294261294, | |
| "loss": 0.5101, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.835507345873953, | |
| "grad_norm": 0.40078219771385193, | |
| "learning_rate": 0.0001293040293040293, | |
| "loss": 0.5602, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 1.8366057943155294, | |
| "grad_norm": 0.4108009338378906, | |
| "learning_rate": 0.00012918192918192918, | |
| "loss": 0.6338, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 1.8377042427571055, | |
| "grad_norm": 0.5452212691307068, | |
| "learning_rate": 0.00012905982905982903, | |
| "loss": 0.5358, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 1.838802691198682, | |
| "grad_norm": 0.4694603979587555, | |
| "learning_rate": 0.00012893772893772895, | |
| "loss": 0.7031, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 1.8399011396402583, | |
| "grad_norm": 0.3787671625614166, | |
| "learning_rate": 0.0001288156288156288, | |
| "loss": 0.5667, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.8409995880818344, | |
| "grad_norm": 0.4842737317085266, | |
| "learning_rate": 0.0001286935286935287, | |
| "loss": 0.5082, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 1.8420980365234105, | |
| "grad_norm": 0.7690992951393127, | |
| "learning_rate": 0.00012857142857142855, | |
| "loss": 0.706, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 1.8431964849649871, | |
| "grad_norm": 1.0891668796539307, | |
| "learning_rate": 0.00012844932844932843, | |
| "loss": 0.7162, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 1.8442949334065633, | |
| "grad_norm": 0.4118032157421112, | |
| "learning_rate": 0.00012832722832722832, | |
| "loss": 0.7019, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 1.8453933818481394, | |
| "grad_norm": 0.513157308101654, | |
| "learning_rate": 0.00012820512820512818, | |
| "loss": 0.4359, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.8464918302897158, | |
| "grad_norm": 1.3229504823684692, | |
| "learning_rate": 0.0001280830280830281, | |
| "loss": 0.5555, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 1.8475902787312921, | |
| "grad_norm": 0.6301699876785278, | |
| "learning_rate": 0.00012796092796092795, | |
| "loss": 0.5211, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 1.8486887271728683, | |
| "grad_norm": 0.6125632524490356, | |
| "learning_rate": 0.00012783882783882783, | |
| "loss": 0.6287, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 1.8497871756144446, | |
| "grad_norm": 1.806593418121338, | |
| "learning_rate": 0.00012771672771672772, | |
| "loss": 0.5794, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 1.850885624056021, | |
| "grad_norm": 1.2972358465194702, | |
| "learning_rate": 0.00012759462759462758, | |
| "loss": 0.9205, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 1.8519840724975971, | |
| "grad_norm": 1.0519033670425415, | |
| "learning_rate": 0.00012747252747252746, | |
| "loss": 0.7103, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 1.8530825209391735, | |
| "grad_norm": 1.6489734649658203, | |
| "learning_rate": 0.00012735042735042735, | |
| "loss": 0.7585, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 1.8541809693807498, | |
| "grad_norm": 0.7229527235031128, | |
| "learning_rate": 0.0001272283272283272, | |
| "loss": 0.8109, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 1.855279417822326, | |
| "grad_norm": 0.35257261991500854, | |
| "learning_rate": 0.0001271062271062271, | |
| "loss": 0.8014, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 1.856377866263902, | |
| "grad_norm": 0.4653327167034149, | |
| "learning_rate": 0.00012698412698412698, | |
| "loss": 0.6404, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.8574763147054785, | |
| "grad_norm": 0.5230842232704163, | |
| "learning_rate": 0.00012686202686202686, | |
| "loss": 0.7413, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 1.8585747631470548, | |
| "grad_norm": 0.42130210995674133, | |
| "learning_rate": 0.00012673992673992672, | |
| "loss": 0.7283, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 1.859673211588631, | |
| "grad_norm": 1.4667960405349731, | |
| "learning_rate": 0.0001266178266178266, | |
| "loss": 0.5656, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 1.8607716600302073, | |
| "grad_norm": 0.4077359139919281, | |
| "learning_rate": 0.0001264957264957265, | |
| "loss": 0.5891, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 1.8618701084717837, | |
| "grad_norm": 0.503654956817627, | |
| "learning_rate": 0.00012637362637362635, | |
| "loss": 0.5912, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 1.8629685569133598, | |
| "grad_norm": 1.6315315961837769, | |
| "learning_rate": 0.00012625152625152624, | |
| "loss": 0.5588, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 1.8640670053549362, | |
| "grad_norm": 0.783920407295227, | |
| "learning_rate": 0.00012612942612942612, | |
| "loss": 0.6585, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 1.8651654537965126, | |
| "grad_norm": 0.7186728715896606, | |
| "learning_rate": 0.000126007326007326, | |
| "loss": 0.9174, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 1.8662639022380887, | |
| "grad_norm": 0.8784156441688538, | |
| "learning_rate": 0.00012588522588522587, | |
| "loss": 0.5835, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 1.8673623506796648, | |
| "grad_norm": 0.7090787887573242, | |
| "learning_rate": 0.00012576312576312575, | |
| "loss": 0.7555, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.8684607991212414, | |
| "grad_norm": 0.5508129596710205, | |
| "learning_rate": 0.00012564102564102564, | |
| "loss": 0.6168, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 1.8695592475628175, | |
| "grad_norm": 0.40403681993484497, | |
| "learning_rate": 0.0001255189255189255, | |
| "loss": 0.4528, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 1.8706576960043937, | |
| "grad_norm": 0.9553635716438293, | |
| "learning_rate": 0.00012539682539682538, | |
| "loss": 0.654, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 1.87175614444597, | |
| "grad_norm": 1.0610092878341675, | |
| "learning_rate": 0.00012527472527472527, | |
| "loss": 0.6115, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 1.8728545928875464, | |
| "grad_norm": 0.32898634672164917, | |
| "learning_rate": 0.00012515262515262515, | |
| "loss": 0.5651, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 1.8739530413291225, | |
| "grad_norm": 0.4018780589103699, | |
| "learning_rate": 0.000125030525030525, | |
| "loss": 0.5919, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 1.875051489770699, | |
| "grad_norm": 1.6521873474121094, | |
| "learning_rate": 0.0001249084249084249, | |
| "loss": 0.7137, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 1.8761499382122753, | |
| "grad_norm": 0.5515930652618408, | |
| "learning_rate": 0.00012478632478632478, | |
| "loss": 0.4471, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 1.8772483866538514, | |
| "grad_norm": 0.4156915545463562, | |
| "learning_rate": 0.00012466422466422464, | |
| "loss": 0.6575, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 1.8783468350954275, | |
| "grad_norm": 0.41263312101364136, | |
| "learning_rate": 0.00012454212454212453, | |
| "loss": 0.542, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.8794452835370041, | |
| "grad_norm": 1.0169517993927002, | |
| "learning_rate": 0.0001244200244200244, | |
| "loss": 1.1631, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 1.8805437319785803, | |
| "grad_norm": 0.49169981479644775, | |
| "learning_rate": 0.0001242979242979243, | |
| "loss": 0.6707, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 1.8816421804201564, | |
| "grad_norm": 0.44801297783851624, | |
| "learning_rate": 0.00012417582417582416, | |
| "loss": 1.0036, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 1.8827406288617328, | |
| "grad_norm": 0.47181040048599243, | |
| "learning_rate": 0.00012405372405372404, | |
| "loss": 0.6693, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 1.8838390773033091, | |
| "grad_norm": 0.39900457859039307, | |
| "learning_rate": 0.00012393162393162393, | |
| "loss": 0.6421, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 1.8849375257448853, | |
| "grad_norm": 1.1160179376602173, | |
| "learning_rate": 0.00012380952380952378, | |
| "loss": 0.6599, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 1.8860359741864616, | |
| "grad_norm": 0.6951555609703064, | |
| "learning_rate": 0.00012368742368742367, | |
| "loss": 0.743, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 1.887134422628038, | |
| "grad_norm": 0.5381472706794739, | |
| "learning_rate": 0.00012356532356532356, | |
| "loss": 0.5051, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 1.8882328710696141, | |
| "grad_norm": 0.48717793822288513, | |
| "learning_rate": 0.00012344322344322341, | |
| "loss": 0.7015, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 1.8893313195111905, | |
| "grad_norm": 0.3720596432685852, | |
| "learning_rate": 0.00012332112332112333, | |
| "loss": 0.6743, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.8904297679527668, | |
| "grad_norm": 1.1850451231002808, | |
| "learning_rate": 0.00012319902319902318, | |
| "loss": 0.6132, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 1.891528216394343, | |
| "grad_norm": 0.4546525180339813, | |
| "learning_rate": 0.00012307692307692307, | |
| "loss": 0.5465, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 1.8926266648359191, | |
| "grad_norm": 0.41415080428123474, | |
| "learning_rate": 0.00012295482295482296, | |
| "loss": 0.7259, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 1.8937251132774955, | |
| "grad_norm": 0.44278842210769653, | |
| "learning_rate": 0.00012283272283272281, | |
| "loss": 0.7244, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 1.8948235617190718, | |
| "grad_norm": 0.3887364864349365, | |
| "learning_rate": 0.0001227106227106227, | |
| "loss": 0.7124, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.895922010160648, | |
| "grad_norm": 0.5405781269073486, | |
| "learning_rate": 0.00012258852258852256, | |
| "loss": 0.5153, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 1.8970204586022243, | |
| "grad_norm": 0.3530559837818146, | |
| "learning_rate": 0.00012246642246642247, | |
| "loss": 0.5429, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 1.8981189070438007, | |
| "grad_norm": 0.523621678352356, | |
| "learning_rate": 0.00012234432234432233, | |
| "loss": 0.5645, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 1.8992173554853768, | |
| "grad_norm": 0.3893704116344452, | |
| "learning_rate": 0.00012222222222222221, | |
| "loss": 0.6419, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 1.9003158039269532, | |
| "grad_norm": 0.7010704278945923, | |
| "learning_rate": 0.0001221001221001221, | |
| "loss": 0.5202, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.9014142523685296, | |
| "grad_norm": 0.45551490783691406, | |
| "learning_rate": 0.00012197802197802197, | |
| "loss": 0.8492, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 1.9025127008101057, | |
| "grad_norm": 1.0112484693527222, | |
| "learning_rate": 0.00012185592185592184, | |
| "loss": 0.8602, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 1.9036111492516818, | |
| "grad_norm": 0.4509601294994354, | |
| "learning_rate": 0.00012173382173382173, | |
| "loss": 0.6138, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 1.9047095976932584, | |
| "grad_norm": 0.4303388297557831, | |
| "learning_rate": 0.0001216117216117216, | |
| "loss": 0.4748, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 1.9058080461348346, | |
| "grad_norm": 0.4452000558376312, | |
| "learning_rate": 0.00012148962148962147, | |
| "loss": 0.5869, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 1.9069064945764107, | |
| "grad_norm": 0.5915077924728394, | |
| "learning_rate": 0.00012136752136752136, | |
| "loss": 0.8057, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 1.908004943017987, | |
| "grad_norm": 0.38761547207832336, | |
| "learning_rate": 0.00012124542124542123, | |
| "loss": 0.5772, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 1.9091033914595634, | |
| "grad_norm": 0.517752468585968, | |
| "learning_rate": 0.00012112332112332112, | |
| "loss": 0.7865, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 1.9102018399011396, | |
| "grad_norm": 0.5325546860694885, | |
| "learning_rate": 0.00012100122100122099, | |
| "loss": 0.5934, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 1.911300288342716, | |
| "grad_norm": 0.3930620551109314, | |
| "learning_rate": 0.00012087912087912087, | |
| "loss": 0.5974, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.9123987367842923, | |
| "grad_norm": 1.1001818180084229, | |
| "learning_rate": 0.00012075702075702075, | |
| "loss": 0.6524, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 1.9134971852258684, | |
| "grad_norm": 0.3690165877342224, | |
| "learning_rate": 0.00012063492063492062, | |
| "loss": 0.36, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 1.9145956336674448, | |
| "grad_norm": 0.4403206408023834, | |
| "learning_rate": 0.0001205128205128205, | |
| "loss": 0.5737, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 1.9156940821090211, | |
| "grad_norm": 0.651498019695282, | |
| "learning_rate": 0.00012039072039072037, | |
| "loss": 0.657, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 1.9167925305505973, | |
| "grad_norm": 0.6880660057067871, | |
| "learning_rate": 0.00012026862026862025, | |
| "loss": 0.6891, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 1.9178909789921734, | |
| "grad_norm": 0.4968664348125458, | |
| "learning_rate": 0.00012014652014652015, | |
| "loss": 0.841, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 1.9189894274337498, | |
| "grad_norm": 0.4392407536506653, | |
| "learning_rate": 0.00012002442002442002, | |
| "loss": 0.7096, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 1.9200878758753261, | |
| "grad_norm": 0.41028741002082825, | |
| "learning_rate": 0.00011990231990231989, | |
| "loss": 0.5838, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 1.9211863243169023, | |
| "grad_norm": 0.7928158640861511, | |
| "learning_rate": 0.00011978021978021978, | |
| "loss": 0.6633, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 1.9222847727584786, | |
| "grad_norm": 0.4970681071281433, | |
| "learning_rate": 0.00011965811965811965, | |
| "loss": 0.7764, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.923383221200055, | |
| "grad_norm": 0.49581378698349, | |
| "learning_rate": 0.00011953601953601952, | |
| "loss": 0.7204, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 1.9244816696416311, | |
| "grad_norm": 1.309241771697998, | |
| "learning_rate": 0.00011941391941391939, | |
| "loss": 0.5859, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 1.9255801180832075, | |
| "grad_norm": 0.4651016592979431, | |
| "learning_rate": 0.00011929181929181929, | |
| "loss": 0.6425, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 1.9266785665247839, | |
| "grad_norm": 0.5377634167671204, | |
| "learning_rate": 0.00011916971916971916, | |
| "loss": 0.8244, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 1.92777701496636, | |
| "grad_norm": 0.6809287667274475, | |
| "learning_rate": 0.00011904761904761903, | |
| "loss": 0.5711, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 1.9288754634079361, | |
| "grad_norm": 0.650701105594635, | |
| "learning_rate": 0.00011892551892551892, | |
| "loss": 0.8341, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 1.9299739118495127, | |
| "grad_norm": 1.1710751056671143, | |
| "learning_rate": 0.00011880341880341879, | |
| "loss": 0.8093, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 1.9310723602910889, | |
| "grad_norm": 0.4244484603404999, | |
| "learning_rate": 0.00011868131868131866, | |
| "loss": 0.5556, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 1.932170808732665, | |
| "grad_norm": 0.43999040126800537, | |
| "learning_rate": 0.00011855921855921855, | |
| "loss": 0.4582, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 1.9332692571742414, | |
| "grad_norm": 0.4197145700454712, | |
| "learning_rate": 0.00011843711843711843, | |
| "loss": 0.6475, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.9343677056158177, | |
| "grad_norm": 0.36619749665260315, | |
| "learning_rate": 0.0001183150183150183, | |
| "loss": 0.5804, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 1.9354661540573939, | |
| "grad_norm": 1.7230706214904785, | |
| "learning_rate": 0.00011819291819291819, | |
| "loss": 0.7064, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 1.9365646024989702, | |
| "grad_norm": 0.7621874213218689, | |
| "learning_rate": 0.00011807081807081806, | |
| "loss": 0.6766, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 1.9376630509405466, | |
| "grad_norm": 0.5920525789260864, | |
| "learning_rate": 0.00011794871794871794, | |
| "loss": 0.7092, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.9387614993821227, | |
| "grad_norm": 1.5368432998657227, | |
| "learning_rate": 0.00011782661782661781, | |
| "loss": 0.3366, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 1.9398599478236989, | |
| "grad_norm": 0.43197643756866455, | |
| "learning_rate": 0.00011770451770451769, | |
| "loss": 0.6158, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 1.9409583962652754, | |
| "grad_norm": 0.4623143970966339, | |
| "learning_rate": 0.00011758241758241756, | |
| "loss": 0.6574, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 1.9420568447068516, | |
| "grad_norm": 0.40638601779937744, | |
| "learning_rate": 0.00011746031746031744, | |
| "loss": 0.4385, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 1.9431552931484277, | |
| "grad_norm": 0.5941652655601501, | |
| "learning_rate": 0.00011733821733821734, | |
| "loss": 0.8634, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 1.944253741590004, | |
| "grad_norm": 0.9646288156509399, | |
| "learning_rate": 0.00011721611721611721, | |
| "loss": 0.7107, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.9453521900315804, | |
| "grad_norm": 1.6859776973724365, | |
| "learning_rate": 0.00011709401709401708, | |
| "loss": 0.5544, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 1.9464506384731566, | |
| "grad_norm": 0.4034999907016754, | |
| "learning_rate": 0.00011697191697191697, | |
| "loss": 0.559, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 1.947549086914733, | |
| "grad_norm": 0.3644643723964691, | |
| "learning_rate": 0.00011684981684981684, | |
| "loss": 0.535, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 1.9486475353563093, | |
| "grad_norm": 0.5826202034950256, | |
| "learning_rate": 0.00011672771672771671, | |
| "loss": 0.6405, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 1.9497459837978854, | |
| "grad_norm": 0.5501505136489868, | |
| "learning_rate": 0.00011660561660561661, | |
| "loss": 0.5702, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.9508444322394618, | |
| "grad_norm": 0.7928853631019592, | |
| "learning_rate": 0.00011648351648351648, | |
| "loss": 0.666, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 1.9519428806810382, | |
| "grad_norm": 0.8168489933013916, | |
| "learning_rate": 0.00011636141636141635, | |
| "loss": 0.4451, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 1.9530413291226143, | |
| "grad_norm": 0.3752410113811493, | |
| "learning_rate": 0.00011623931623931622, | |
| "loss": 0.6552, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 1.9541397775641904, | |
| "grad_norm": 0.9020218849182129, | |
| "learning_rate": 0.00011611721611721611, | |
| "loss": 0.5994, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 1.9552382260057668, | |
| "grad_norm": 0.7668479084968567, | |
| "learning_rate": 0.00011599511599511598, | |
| "loss": 0.5007, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.9563366744473432, | |
| "grad_norm": 0.5034022331237793, | |
| "learning_rate": 0.00011587301587301585, | |
| "loss": 0.5211, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 1.9574351228889193, | |
| "grad_norm": 1.0153850317001343, | |
| "learning_rate": 0.00011575091575091575, | |
| "loss": 0.5953, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 1.9585335713304957, | |
| "grad_norm": 0.40088045597076416, | |
| "learning_rate": 0.00011562881562881562, | |
| "loss": 0.568, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 1.959632019772072, | |
| "grad_norm": 1.4017099142074585, | |
| "learning_rate": 0.0001155067155067155, | |
| "loss": 0.7058, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 1.9607304682136482, | |
| "grad_norm": 0.6009597778320312, | |
| "learning_rate": 0.00011538461538461538, | |
| "loss": 0.6239, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 1.9618289166552245, | |
| "grad_norm": 0.5155071020126343, | |
| "learning_rate": 0.00011526251526251525, | |
| "loss": 0.6089, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 1.9629273650968009, | |
| "grad_norm": 0.4248057007789612, | |
| "learning_rate": 0.00011514041514041513, | |
| "loss": 0.6481, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 1.964025813538377, | |
| "grad_norm": 0.6521177887916565, | |
| "learning_rate": 0.00011501831501831501, | |
| "loss": 0.6598, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 1.9651242619799532, | |
| "grad_norm": 0.44697993993759155, | |
| "learning_rate": 0.00011489621489621488, | |
| "loss": 0.8944, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 1.9662227104215297, | |
| "grad_norm": 0.41537097096443176, | |
| "learning_rate": 0.00011477411477411476, | |
| "loss": 0.5304, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.9673211588631059, | |
| "grad_norm": 0.48793885111808777, | |
| "learning_rate": 0.00011465201465201464, | |
| "loss": 0.7262, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 1.968419607304682, | |
| "grad_norm": 0.8768893480300903, | |
| "learning_rate": 0.00011452991452991453, | |
| "loss": 0.6748, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.9695180557462584, | |
| "grad_norm": 0.39224761724472046, | |
| "learning_rate": 0.0001144078144078144, | |
| "loss": 0.5503, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 1.9706165041878347, | |
| "grad_norm": 0.5617446899414062, | |
| "learning_rate": 0.00011428571428571427, | |
| "loss": 0.7329, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 1.9717149526294109, | |
| "grad_norm": 0.3787171542644501, | |
| "learning_rate": 0.00011416361416361416, | |
| "loss": 0.545, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 1.9728134010709872, | |
| "grad_norm": 1.5167701244354248, | |
| "learning_rate": 0.00011404151404151403, | |
| "loss": 0.492, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 1.9739118495125636, | |
| "grad_norm": 0.6436883807182312, | |
| "learning_rate": 0.0001139194139194139, | |
| "loss": 0.5644, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 1.9750102979541397, | |
| "grad_norm": 0.7104658484458923, | |
| "learning_rate": 0.0001137973137973138, | |
| "loss": 0.7485, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 1.976108746395716, | |
| "grad_norm": 0.7996894717216492, | |
| "learning_rate": 0.00011367521367521367, | |
| "loss": 0.6918, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 1.9772071948372925, | |
| "grad_norm": 0.6419106721878052, | |
| "learning_rate": 0.00011355311355311354, | |
| "loss": 0.5945, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.9783056432788686, | |
| "grad_norm": 0.5158131718635559, | |
| "learning_rate": 0.00011343101343101343, | |
| "loss": 0.6685, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 1.9794040917204447, | |
| "grad_norm": 1.0825144052505493, | |
| "learning_rate": 0.0001133089133089133, | |
| "loss": 0.6774, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 1.980502540162021, | |
| "grad_norm": 0.3999088704586029, | |
| "learning_rate": 0.00011318681318681317, | |
| "loss": 0.632, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 1.9816009886035975, | |
| "grad_norm": 0.8866069316864014, | |
| "learning_rate": 0.00011306471306471304, | |
| "loss": 0.6541, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 1.9826994370451736, | |
| "grad_norm": 0.3858928978443146, | |
| "learning_rate": 0.00011294261294261294, | |
| "loss": 0.6608, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 1.98379788548675, | |
| "grad_norm": 0.513117790222168, | |
| "learning_rate": 0.00011282051282051281, | |
| "loss": 0.7598, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 1.9848963339283263, | |
| "grad_norm": 0.3166581392288208, | |
| "learning_rate": 0.00011269841269841269, | |
| "loss": 0.781, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 1.9859947823699025, | |
| "grad_norm": 0.3982362151145935, | |
| "learning_rate": 0.00011257631257631257, | |
| "loss": 0.873, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 1.9870932308114788, | |
| "grad_norm": 0.3784008026123047, | |
| "learning_rate": 0.00011245421245421244, | |
| "loss": 0.7286, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 1.9881916792530552, | |
| "grad_norm": 0.7578315138816833, | |
| "learning_rate": 0.00011233211233211232, | |
| "loss": 0.5958, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.9892901276946313, | |
| "grad_norm": 0.8509061932563782, | |
| "learning_rate": 0.0001122100122100122, | |
| "loss": 0.557, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 1.9903885761362075, | |
| "grad_norm": 0.5107323527336121, | |
| "learning_rate": 0.00011208791208791207, | |
| "loss": 0.6994, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 1.991487024577784, | |
| "grad_norm": 0.5421388149261475, | |
| "learning_rate": 0.00011196581196581196, | |
| "loss": 0.8839, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 1.9925854730193602, | |
| "grad_norm": 0.7442356944084167, | |
| "learning_rate": 0.00011184371184371184, | |
| "loss": 0.6676, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 1.9936839214609363, | |
| "grad_norm": 0.34132111072540283, | |
| "learning_rate": 0.00011172161172161172, | |
| "loss": 0.5714, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 1.9947823699025127, | |
| "grad_norm": 0.3995620906352997, | |
| "learning_rate": 0.00011159951159951159, | |
| "loss": 0.4811, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 1.995880818344089, | |
| "grad_norm": 0.5613861083984375, | |
| "learning_rate": 0.00011147741147741146, | |
| "loss": 0.7495, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 1.9969792667856652, | |
| "grad_norm": 0.4366309642791748, | |
| "learning_rate": 0.00011135531135531135, | |
| "loss": 0.6512, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 1.9980777152272415, | |
| "grad_norm": 0.889916718006134, | |
| "learning_rate": 0.00011123321123321122, | |
| "loss": 0.5544, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 1.999176163668818, | |
| "grad_norm": 0.512112021446228, | |
| "learning_rate": 0.00011111111111111109, | |
| "loss": 1.136, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.000274612110394, | |
| "grad_norm": 0.5241844654083252, | |
| "learning_rate": 0.00011098901098901099, | |
| "loss": 0.5898, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 2.00137306055197, | |
| "grad_norm": 0.38159477710723877, | |
| "learning_rate": 0.00011086691086691086, | |
| "loss": 0.5523, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.0024715089935468, | |
| "grad_norm": 1.0415009260177612, | |
| "learning_rate": 0.00011074481074481073, | |
| "loss": 0.6963, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 2.003569957435123, | |
| "grad_norm": 0.5349957942962646, | |
| "learning_rate": 0.00011062271062271062, | |
| "loss": 0.4422, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.004668405876699, | |
| "grad_norm": 0.4512043297290802, | |
| "learning_rate": 0.00011050061050061049, | |
| "loss": 0.5467, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.0057668543182756, | |
| "grad_norm": 0.8268045783042908, | |
| "learning_rate": 0.00011037851037851036, | |
| "loss": 0.6931, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.0068653027598518, | |
| "grad_norm": 0.47922319173812866, | |
| "learning_rate": 0.00011025641025641026, | |
| "loss": 0.707, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 2.007963751201428, | |
| "grad_norm": 1.352858304977417, | |
| "learning_rate": 0.00011013431013431013, | |
| "loss": 0.5658, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.0090621996430045, | |
| "grad_norm": 0.6304643154144287, | |
| "learning_rate": 0.00011001221001221, | |
| "loss": 0.6526, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 2.0101606480845806, | |
| "grad_norm": 0.3759060502052307, | |
| "learning_rate": 0.00010989010989010988, | |
| "loss": 0.627, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.0112590965261568, | |
| "grad_norm": 0.5676531195640564, | |
| "learning_rate": 0.00010976800976800976, | |
| "loss": 0.7568, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 2.012357544967733, | |
| "grad_norm": 0.7481321692466736, | |
| "learning_rate": 0.00010964590964590963, | |
| "loss": 0.7304, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.0134559934093095, | |
| "grad_norm": 1.0350905656814575, | |
| "learning_rate": 0.0001095238095238095, | |
| "loss": 0.7414, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 2.0145544418508856, | |
| "grad_norm": 0.7817292809486389, | |
| "learning_rate": 0.00010940170940170939, | |
| "loss": 0.7742, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.0156528902924618, | |
| "grad_norm": 0.44659602642059326, | |
| "learning_rate": 0.00010927960927960928, | |
| "loss": 0.7872, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 2.0167513387340383, | |
| "grad_norm": 0.46931198239326477, | |
| "learning_rate": 0.00010915750915750915, | |
| "loss": 0.5596, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.0178497871756145, | |
| "grad_norm": 0.34634560346603394, | |
| "learning_rate": 0.00010903540903540903, | |
| "loss": 0.6861, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 2.0189482356171906, | |
| "grad_norm": 0.36579200625419617, | |
| "learning_rate": 0.0001089133089133089, | |
| "loss": 0.6586, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.020046684058767, | |
| "grad_norm": 0.9167144894599915, | |
| "learning_rate": 0.00010879120879120878, | |
| "loss": 0.7125, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 2.0211451325003433, | |
| "grad_norm": 0.4107789993286133, | |
| "learning_rate": 0.00010866910866910866, | |
| "loss": 0.6089, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.0222435809419195, | |
| "grad_norm": 1.0845204591751099, | |
| "learning_rate": 0.00010854700854700854, | |
| "loss": 0.499, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 2.0233420293834956, | |
| "grad_norm": 0.382376492023468, | |
| "learning_rate": 0.00010842490842490841, | |
| "loss": 0.5505, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.024440477825072, | |
| "grad_norm": 0.38339781761169434, | |
| "learning_rate": 0.00010830280830280828, | |
| "loss": 0.4593, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 2.0255389262666483, | |
| "grad_norm": 0.45328769087791443, | |
| "learning_rate": 0.00010818070818070818, | |
| "loss": 0.8437, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.0266373747082245, | |
| "grad_norm": 0.3051920533180237, | |
| "learning_rate": 0.00010805860805860805, | |
| "loss": 0.6096, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 2.027735823149801, | |
| "grad_norm": 0.4249560236930847, | |
| "learning_rate": 0.00010793650793650792, | |
| "loss": 0.6441, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.028834271591377, | |
| "grad_norm": 0.6639708280563354, | |
| "learning_rate": 0.00010781440781440781, | |
| "loss": 0.716, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 2.0299327200329533, | |
| "grad_norm": 0.4324635863304138, | |
| "learning_rate": 0.00010769230769230768, | |
| "loss": 0.5288, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.03103116847453, | |
| "grad_norm": 0.46487629413604736, | |
| "learning_rate": 0.00010757020757020755, | |
| "loss": 0.4908, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 2.032129616916106, | |
| "grad_norm": 0.5104641318321228, | |
| "learning_rate": 0.00010744810744810745, | |
| "loss": 0.6367, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.033228065357682, | |
| "grad_norm": 0.4010922312736511, | |
| "learning_rate": 0.00010732600732600732, | |
| "loss": 0.4266, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 2.0343265137992583, | |
| "grad_norm": 0.6835510730743408, | |
| "learning_rate": 0.0001072039072039072, | |
| "loss": 1.0077, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.035424962240835, | |
| "grad_norm": 0.7012602686882019, | |
| "learning_rate": 0.00010708180708180708, | |
| "loss": 0.7656, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 2.036523410682411, | |
| "grad_norm": 0.8202001452445984, | |
| "learning_rate": 0.00010695970695970695, | |
| "loss": 0.9796, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.037621859123987, | |
| "grad_norm": 0.37708353996276855, | |
| "learning_rate": 0.00010683760683760682, | |
| "loss": 0.3664, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 2.0387203075655638, | |
| "grad_norm": 0.34818801283836365, | |
| "learning_rate": 0.0001067155067155067, | |
| "loss": 0.5365, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.03981875600714, | |
| "grad_norm": 0.46427440643310547, | |
| "learning_rate": 0.0001065934065934066, | |
| "loss": 0.7503, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 2.040917204448716, | |
| "grad_norm": 0.4782754182815552, | |
| "learning_rate": 0.00010647130647130647, | |
| "loss": 0.9247, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.0420156528902926, | |
| "grad_norm": 0.6814667582511902, | |
| "learning_rate": 0.00010634920634920634, | |
| "loss": 0.5365, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 2.0431141013318688, | |
| "grad_norm": 0.4782056510448456, | |
| "learning_rate": 0.00010622710622710622, | |
| "loss": 0.7444, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.044212549773445, | |
| "grad_norm": 0.768439769744873, | |
| "learning_rate": 0.0001061050061050061, | |
| "loss": 0.6386, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 2.0453109982150215, | |
| "grad_norm": 0.9991740584373474, | |
| "learning_rate": 0.00010598290598290597, | |
| "loss": 0.4762, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.0464094466565976, | |
| "grad_norm": 0.4244922995567322, | |
| "learning_rate": 0.00010586080586080585, | |
| "loss": 0.4469, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 2.0475078950981738, | |
| "grad_norm": 0.4085465371608734, | |
| "learning_rate": 0.00010573870573870573, | |
| "loss": 0.7215, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.04860634353975, | |
| "grad_norm": 1.3068008422851562, | |
| "learning_rate": 0.0001056166056166056, | |
| "loss": 0.7781, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 2.0497047919813265, | |
| "grad_norm": 0.3995974659919739, | |
| "learning_rate": 0.0001054945054945055, | |
| "loss": 0.6114, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.0508032404229026, | |
| "grad_norm": 0.47944560647010803, | |
| "learning_rate": 0.00010537240537240537, | |
| "loss": 0.7355, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 2.0519016888644788, | |
| "grad_norm": 1.6718720197677612, | |
| "learning_rate": 0.00010525030525030524, | |
| "loss": 0.5987, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.0530001373060554, | |
| "grad_norm": 0.46015220880508423, | |
| "learning_rate": 0.00010512820512820511, | |
| "loss": 0.481, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 2.0540985857476315, | |
| "grad_norm": 0.4863795042037964, | |
| "learning_rate": 0.000105006105006105, | |
| "loss": 0.5877, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.0551970341892076, | |
| "grad_norm": 0.9190402030944824, | |
| "learning_rate": 0.00010488400488400487, | |
| "loss": 0.7941, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 2.056295482630784, | |
| "grad_norm": 0.6056554317474365, | |
| "learning_rate": 0.00010476190476190474, | |
| "loss": 0.5455, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.0573939310723603, | |
| "grad_norm": 0.7070736289024353, | |
| "learning_rate": 0.00010463980463980464, | |
| "loss": 0.6112, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 2.0584923795139365, | |
| "grad_norm": 0.5415268540382385, | |
| "learning_rate": 0.00010451770451770451, | |
| "loss": 0.7141, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.0595908279555126, | |
| "grad_norm": 0.45696091651916504, | |
| "learning_rate": 0.00010439560439560438, | |
| "loss": 0.7825, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.060689276397089, | |
| "grad_norm": 0.5728979706764221, | |
| "learning_rate": 0.00010427350427350427, | |
| "loss": 0.5869, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.0617877248386653, | |
| "grad_norm": 0.5910143852233887, | |
| "learning_rate": 0.00010415140415140414, | |
| "loss": 0.728, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 2.0628861732802415, | |
| "grad_norm": 0.530915379524231, | |
| "learning_rate": 0.00010402930402930401, | |
| "loss": 0.6459, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.063984621721818, | |
| "grad_norm": 0.36358964443206787, | |
| "learning_rate": 0.00010390720390720391, | |
| "loss": 0.7536, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 2.065083070163394, | |
| "grad_norm": 2.7523410320281982, | |
| "learning_rate": 0.00010378510378510379, | |
| "loss": 0.6347, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.0661815186049703, | |
| "grad_norm": 0.6842527389526367, | |
| "learning_rate": 0.00010366300366300366, | |
| "loss": 0.4943, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 2.067279967046547, | |
| "grad_norm": 0.5830293297767639, | |
| "learning_rate": 0.00010354090354090353, | |
| "loss": 0.5855, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.068378415488123, | |
| "grad_norm": 0.981920599937439, | |
| "learning_rate": 0.00010341880341880341, | |
| "loss": 0.4425, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 2.069476863929699, | |
| "grad_norm": 2.0826029777526855, | |
| "learning_rate": 0.00010329670329670329, | |
| "loss": 0.5399, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.0705753123712753, | |
| "grad_norm": 0.4648442268371582, | |
| "learning_rate": 0.00010317460317460316, | |
| "loss": 0.6203, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 2.071673760812852, | |
| "grad_norm": 0.5086346864700317, | |
| "learning_rate": 0.00010305250305250304, | |
| "loss": 0.6091, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.072772209254428, | |
| "grad_norm": 0.40404266119003296, | |
| "learning_rate": 0.00010293040293040292, | |
| "loss": 0.5013, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 2.073870657696004, | |
| "grad_norm": 2.0507569313049316, | |
| "learning_rate": 0.0001028083028083028, | |
| "loss": 0.7822, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.074969106137581, | |
| "grad_norm": 0.9318211078643799, | |
| "learning_rate": 0.00010268620268620269, | |
| "loss": 0.6638, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 2.076067554579157, | |
| "grad_norm": 0.7601054310798645, | |
| "learning_rate": 0.00010256410256410256, | |
| "loss": 0.6085, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.077166003020733, | |
| "grad_norm": 1.1299306154251099, | |
| "learning_rate": 0.00010244200244200243, | |
| "loss": 0.682, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 2.0782644514623096, | |
| "grad_norm": 0.5009475350379944, | |
| "learning_rate": 0.0001023199023199023, | |
| "loss": 0.7229, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.079362899903886, | |
| "grad_norm": 0.3432561159133911, | |
| "learning_rate": 0.00010219780219780219, | |
| "loss": 0.5991, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 2.080461348345462, | |
| "grad_norm": 0.5224031805992126, | |
| "learning_rate": 0.00010207570207570206, | |
| "loss": 0.3687, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.0815597967870385, | |
| "grad_norm": 0.4849548935890198, | |
| "learning_rate": 0.00010195360195360193, | |
| "loss": 0.507, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 2.0826582452286146, | |
| "grad_norm": 0.6093185544013977, | |
| "learning_rate": 0.00010183150183150183, | |
| "loss": 0.7019, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.083756693670191, | |
| "grad_norm": 0.7408457398414612, | |
| "learning_rate": 0.0001017094017094017, | |
| "loss": 0.6331, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 2.084855142111767, | |
| "grad_norm": 0.67701655626297, | |
| "learning_rate": 0.00010158730158730157, | |
| "loss": 0.6685, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.0859535905533435, | |
| "grad_norm": 0.2880030870437622, | |
| "learning_rate": 0.00010146520146520146, | |
| "loss": 0.4043, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 2.0870520389949196, | |
| "grad_norm": 0.45890796184539795, | |
| "learning_rate": 0.00010134310134310133, | |
| "loss": 0.3695, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.088150487436496, | |
| "grad_norm": 0.7898344397544861, | |
| "learning_rate": 0.0001012210012210012, | |
| "loss": 0.7875, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 2.0892489358780724, | |
| "grad_norm": 0.5648753046989441, | |
| "learning_rate": 0.0001010989010989011, | |
| "loss": 0.6058, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.0903473843196485, | |
| "grad_norm": 0.7880465984344482, | |
| "learning_rate": 0.00010097680097680098, | |
| "loss": 0.6403, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 2.0914458327612246, | |
| "grad_norm": 0.4169737696647644, | |
| "learning_rate": 0.00010085470085470085, | |
| "loss": 0.71, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.0925442812028012, | |
| "grad_norm": 0.33653560280799866, | |
| "learning_rate": 0.00010073260073260072, | |
| "loss": 0.6278, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 2.0936427296443774, | |
| "grad_norm": 0.6861558556556702, | |
| "learning_rate": 0.0001006105006105006, | |
| "loss": 0.8463, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.0947411780859535, | |
| "grad_norm": 0.29407018423080444, | |
| "learning_rate": 0.00010048840048840048, | |
| "loss": 0.5644, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 2.09583962652753, | |
| "grad_norm": 0.673083484172821, | |
| "learning_rate": 0.00010036630036630035, | |
| "loss": 0.8353, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.0969380749691062, | |
| "grad_norm": 0.429061621427536, | |
| "learning_rate": 0.00010024420024420023, | |
| "loss": 0.6381, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 2.0980365234106824, | |
| "grad_norm": 0.5113368630409241, | |
| "learning_rate": 0.00010012210012210012, | |
| "loss": 0.7603, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.0991349718522585, | |
| "grad_norm": 0.9005820751190186, | |
| "learning_rate": 9.999999999999999e-05, | |
| "loss": 0.6331, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 2.100233420293835, | |
| "grad_norm": 0.489851176738739, | |
| "learning_rate": 9.987789987789988e-05, | |
| "loss": 0.8564, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.1013318687354112, | |
| "grad_norm": 0.42647236585617065, | |
| "learning_rate": 9.975579975579975e-05, | |
| "loss": 0.5496, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 2.1024303171769874, | |
| "grad_norm": 0.9061693549156189, | |
| "learning_rate": 9.963369963369962e-05, | |
| "loss": 0.4478, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.103528765618564, | |
| "grad_norm": 0.4721933901309967, | |
| "learning_rate": 9.95115995115995e-05, | |
| "loss": 0.6066, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 2.10462721406014, | |
| "grad_norm": 0.7265921831130981, | |
| "learning_rate": 9.938949938949938e-05, | |
| "loss": 0.7195, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.1057256625017162, | |
| "grad_norm": 0.4521386921405792, | |
| "learning_rate": 9.926739926739925e-05, | |
| "loss": 0.6476, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 2.106824110943293, | |
| "grad_norm": 0.42982912063598633, | |
| "learning_rate": 9.914529914529912e-05, | |
| "loss": 0.535, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.107922559384869, | |
| "grad_norm": 0.4758259952068329, | |
| "learning_rate": 9.902319902319902e-05, | |
| "loss": 0.8106, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 2.109021007826445, | |
| "grad_norm": 0.69195157289505, | |
| "learning_rate": 9.890109890109889e-05, | |
| "loss": 0.6643, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.110119456268021, | |
| "grad_norm": 0.8207395672798157, | |
| "learning_rate": 9.877899877899876e-05, | |
| "loss": 0.7535, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 2.111217904709598, | |
| "grad_norm": 1.4245035648345947, | |
| "learning_rate": 9.865689865689865e-05, | |
| "loss": 0.6721, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.112316353151174, | |
| "grad_norm": 0.5496362447738647, | |
| "learning_rate": 9.853479853479852e-05, | |
| "loss": 0.5367, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 2.11341480159275, | |
| "grad_norm": 0.5466665625572205, | |
| "learning_rate": 9.84126984126984e-05, | |
| "loss": 0.6083, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.1145132500343267, | |
| "grad_norm": 0.7750464677810669, | |
| "learning_rate": 9.829059829059829e-05, | |
| "loss": 0.663, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.115611698475903, | |
| "grad_norm": 0.4978208541870117, | |
| "learning_rate": 9.816849816849817e-05, | |
| "loss": 0.6334, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.116710146917479, | |
| "grad_norm": 0.6415550708770752, | |
| "learning_rate": 9.804639804639804e-05, | |
| "loss": 0.6477, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 2.1178085953590555, | |
| "grad_norm": 0.644123911857605, | |
| "learning_rate": 9.792429792429792e-05, | |
| "loss": 0.668, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.1189070438006317, | |
| "grad_norm": 0.39706236124038696, | |
| "learning_rate": 9.78021978021978e-05, | |
| "loss": 0.5875, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 2.120005492242208, | |
| "grad_norm": 1.3733233213424683, | |
| "learning_rate": 9.768009768009767e-05, | |
| "loss": 0.6023, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.121103940683784, | |
| "grad_norm": 0.48839983344078064, | |
| "learning_rate": 9.755799755799754e-05, | |
| "loss": 0.5693, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 2.1222023891253605, | |
| "grad_norm": 0.3107692301273346, | |
| "learning_rate": 9.743589743589744e-05, | |
| "loss": 0.5822, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.1233008375669367, | |
| "grad_norm": 0.3988654911518097, | |
| "learning_rate": 9.731379731379731e-05, | |
| "loss": 0.5989, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 2.124399286008513, | |
| "grad_norm": 1.1887754201889038, | |
| "learning_rate": 9.719169719169718e-05, | |
| "loss": 0.6382, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.1254977344500894, | |
| "grad_norm": 0.43282651901245117, | |
| "learning_rate": 9.706959706959707e-05, | |
| "loss": 0.5649, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 2.1265961828916655, | |
| "grad_norm": 0.39243975281715393, | |
| "learning_rate": 9.694749694749694e-05, | |
| "loss": 0.7005, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.1276946313332417, | |
| "grad_norm": 0.7401454448699951, | |
| "learning_rate": 9.682539682539681e-05, | |
| "loss": 1.0632, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 2.1287930797748182, | |
| "grad_norm": 0.6976983547210693, | |
| "learning_rate": 9.67032967032967e-05, | |
| "loss": 0.562, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.1298915282163944, | |
| "grad_norm": 0.9784336686134338, | |
| "learning_rate": 9.658119658119657e-05, | |
| "loss": 0.8115, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 2.1309899766579705, | |
| "grad_norm": 0.5289125442504883, | |
| "learning_rate": 9.645909645909644e-05, | |
| "loss": 0.6161, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.132088425099547, | |
| "grad_norm": 1.414559006690979, | |
| "learning_rate": 9.633699633699634e-05, | |
| "loss": 0.7115, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 2.1331868735411232, | |
| "grad_norm": 0.5444177389144897, | |
| "learning_rate": 9.621489621489621e-05, | |
| "loss": 0.6211, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.1342853219826994, | |
| "grad_norm": 0.637030839920044, | |
| "learning_rate": 9.609279609279608e-05, | |
| "loss": 0.8747, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 2.1353837704242755, | |
| "grad_norm": 0.5926198363304138, | |
| "learning_rate": 9.597069597069595e-05, | |
| "loss": 0.8673, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.136482218865852, | |
| "grad_norm": 0.3638801872730255, | |
| "learning_rate": 9.584859584859584e-05, | |
| "loss": 0.4698, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 2.1375806673074282, | |
| "grad_norm": 0.5823031067848206, | |
| "learning_rate": 9.572649572649571e-05, | |
| "loss": 0.6988, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.1386791157490044, | |
| "grad_norm": 0.44348934292793274, | |
| "learning_rate": 9.560439560439558e-05, | |
| "loss": 0.6667, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 2.139777564190581, | |
| "grad_norm": 3.177112579345703, | |
| "learning_rate": 9.548229548229548e-05, | |
| "loss": 0.8738, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.140876012632157, | |
| "grad_norm": 1.3834997415542603, | |
| "learning_rate": 9.536019536019536e-05, | |
| "loss": 0.528, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 2.1419744610737332, | |
| "grad_norm": 0.5514722466468811, | |
| "learning_rate": 9.523809523809523e-05, | |
| "loss": 0.5058, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.14307290951531, | |
| "grad_norm": 0.8795000314712524, | |
| "learning_rate": 9.511599511599511e-05, | |
| "loss": 0.6368, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 2.144171357956886, | |
| "grad_norm": 1.0043178796768188, | |
| "learning_rate": 9.499389499389498e-05, | |
| "loss": 0.5701, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.145269806398462, | |
| "grad_norm": 1.8537780046463013, | |
| "learning_rate": 9.487179487179486e-05, | |
| "loss": 0.6978, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 2.1463682548400387, | |
| "grad_norm": 0.5239475965499878, | |
| "learning_rate": 9.474969474969476e-05, | |
| "loss": 0.7093, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.147466703281615, | |
| "grad_norm": 0.7944377064704895, | |
| "learning_rate": 9.462759462759463e-05, | |
| "loss": 0.7625, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 2.148565151723191, | |
| "grad_norm": 0.7356003522872925, | |
| "learning_rate": 9.45054945054945e-05, | |
| "loss": 0.6845, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.149663600164767, | |
| "grad_norm": 1.3590694665908813, | |
| "learning_rate": 9.438339438339437e-05, | |
| "loss": 0.6964, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 2.1507620486063437, | |
| "grad_norm": 0.40889453887939453, | |
| "learning_rate": 9.426129426129426e-05, | |
| "loss": 0.6643, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.15186049704792, | |
| "grad_norm": 0.6347643136978149, | |
| "learning_rate": 9.413919413919413e-05, | |
| "loss": 1.0002, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 2.152958945489496, | |
| "grad_norm": 0.3661377429962158, | |
| "learning_rate": 9.4017094017094e-05, | |
| "loss": 0.5084, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.1540573939310725, | |
| "grad_norm": 0.8262574672698975, | |
| "learning_rate": 9.389499389499389e-05, | |
| "loss": 0.5658, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 2.1551558423726487, | |
| "grad_norm": 0.6054818034172058, | |
| "learning_rate": 9.377289377289376e-05, | |
| "loss": 0.6349, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.156254290814225, | |
| "grad_norm": 0.3696078658103943, | |
| "learning_rate": 9.365079365079364e-05, | |
| "loss": 0.5746, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 2.157352739255801, | |
| "grad_norm": 0.7613049745559692, | |
| "learning_rate": 9.352869352869353e-05, | |
| "loss": 0.5204, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.1584511876973775, | |
| "grad_norm": 0.6841816306114197, | |
| "learning_rate": 9.34065934065934e-05, | |
| "loss": 0.813, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 2.1595496361389537, | |
| "grad_norm": 0.902998685836792, | |
| "learning_rate": 9.328449328449327e-05, | |
| "loss": 0.6288, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.16064808458053, | |
| "grad_norm": 0.5367470979690552, | |
| "learning_rate": 9.316239316239316e-05, | |
| "loss": 0.6689, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 2.1617465330221064, | |
| "grad_norm": 0.9443572163581848, | |
| "learning_rate": 9.304029304029303e-05, | |
| "loss": 0.6864, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.1628449814636825, | |
| "grad_norm": 0.42191457748413086, | |
| "learning_rate": 9.29181929181929e-05, | |
| "loss": 0.6509, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 2.1639434299052587, | |
| "grad_norm": 0.6019404530525208, | |
| "learning_rate": 9.279609279609277e-05, | |
| "loss": 0.5252, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.1650418783468353, | |
| "grad_norm": 1.9933907985687256, | |
| "learning_rate": 9.267399267399267e-05, | |
| "loss": 0.6042, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 2.1661403267884114, | |
| "grad_norm": 0.33075836300849915, | |
| "learning_rate": 9.255189255189255e-05, | |
| "loss": 0.579, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.1672387752299875, | |
| "grad_norm": 0.37899547815322876, | |
| "learning_rate": 9.242979242979242e-05, | |
| "loss": 0.5006, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 2.168337223671564, | |
| "grad_norm": 0.6482734680175781, | |
| "learning_rate": 9.23076923076923e-05, | |
| "loss": 0.4844, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.1694356721131403, | |
| "grad_norm": 0.47632062435150146, | |
| "learning_rate": 9.218559218559217e-05, | |
| "loss": 0.5844, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.1705341205547164, | |
| "grad_norm": 0.3402813971042633, | |
| "learning_rate": 9.206349206349205e-05, | |
| "loss": 0.6397, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.1716325689962925, | |
| "grad_norm": 0.47405871748924255, | |
| "learning_rate": 9.194139194139195e-05, | |
| "loss": 0.6436, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 2.172731017437869, | |
| "grad_norm": 0.5474234223365784, | |
| "learning_rate": 9.181929181929182e-05, | |
| "loss": 0.5758, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.1738294658794453, | |
| "grad_norm": 0.5423378348350525, | |
| "learning_rate": 9.169719169719169e-05, | |
| "loss": 0.5882, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 2.1749279143210214, | |
| "grad_norm": 0.32848963141441345, | |
| "learning_rate": 9.157509157509158e-05, | |
| "loss": 0.5828, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.176026362762598, | |
| "grad_norm": 0.6646802425384521, | |
| "learning_rate": 9.145299145299145e-05, | |
| "loss": 0.551, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 2.177124811204174, | |
| "grad_norm": 0.4560980200767517, | |
| "learning_rate": 9.133089133089132e-05, | |
| "loss": 0.705, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.1782232596457503, | |
| "grad_norm": 0.4531053304672241, | |
| "learning_rate": 9.120879120879119e-05, | |
| "loss": 0.7471, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 2.179321708087327, | |
| "grad_norm": 0.5881507992744446, | |
| "learning_rate": 9.108669108669108e-05, | |
| "loss": 0.7559, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.180420156528903, | |
| "grad_norm": 0.41462886333465576, | |
| "learning_rate": 9.096459096459096e-05, | |
| "loss": 0.5674, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 2.181518604970479, | |
| "grad_norm": 0.46718108654022217, | |
| "learning_rate": 9.084249084249083e-05, | |
| "loss": 0.7149, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.1826170534120557, | |
| "grad_norm": 0.49290111660957336, | |
| "learning_rate": 9.072039072039072e-05, | |
| "loss": 0.5641, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 2.183715501853632, | |
| "grad_norm": 0.398296594619751, | |
| "learning_rate": 9.059829059829059e-05, | |
| "loss": 0.5177, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 2.184813950295208, | |
| "grad_norm": 0.8241115212440491, | |
| "learning_rate": 9.047619047619046e-05, | |
| "loss": 0.7864, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 2.185912398736784, | |
| "grad_norm": 1.1335865259170532, | |
| "learning_rate": 9.035409035409035e-05, | |
| "loss": 0.6167, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.1870108471783607, | |
| "grad_norm": 0.4479789435863495, | |
| "learning_rate": 9.023199023199022e-05, | |
| "loss": 0.6365, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 2.188109295619937, | |
| "grad_norm": 0.4892582297325134, | |
| "learning_rate": 9.010989010989009e-05, | |
| "loss": 0.6283, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 2.189207744061513, | |
| "grad_norm": 0.8397974371910095, | |
| "learning_rate": 8.998778998778999e-05, | |
| "loss": 0.7123, | |
| "step": 1993 | |
| }, | |
| { | |
| "epoch": 2.1903061925030896, | |
| "grad_norm": 0.5295377969741821, | |
| "learning_rate": 8.986568986568986e-05, | |
| "loss": 0.4033, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 2.1914046409446657, | |
| "grad_norm": 0.464832067489624, | |
| "learning_rate": 8.974358974358974e-05, | |
| "loss": 0.8228, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 2.192503089386242, | |
| "grad_norm": 0.381369024515152, | |
| "learning_rate": 8.962148962148961e-05, | |
| "loss": 0.6267, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 2.193601537827818, | |
| "grad_norm": 0.7176710963249207, | |
| "learning_rate": 8.949938949938949e-05, | |
| "loss": 0.7008, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 2.1946999862693946, | |
| "grad_norm": 2.569753885269165, | |
| "learning_rate": 8.937728937728936e-05, | |
| "loss": 0.6899, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 2.1957984347109707, | |
| "grad_norm": 0.5020056962966919, | |
| "learning_rate": 8.925518925518924e-05, | |
| "loss": 0.527, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 2.196896883152547, | |
| "grad_norm": 1.7054524421691895, | |
| "learning_rate": 8.913308913308914e-05, | |
| "loss": 0.5455, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.1979953315941234, | |
| "grad_norm": 0.5037225484848022, | |
| "learning_rate": 8.901098901098901e-05, | |
| "loss": 0.7445, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 2.1990937800356996, | |
| "grad_norm": 0.8109555840492249, | |
| "learning_rate": 8.888888888888888e-05, | |
| "loss": 0.624, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 2.2001922284772757, | |
| "grad_norm": 0.47120043635368347, | |
| "learning_rate": 8.876678876678877e-05, | |
| "loss": 0.6858, | |
| "step": 2003 | |
| }, | |
| { | |
| "epoch": 2.2012906769188523, | |
| "grad_norm": 0.6166191101074219, | |
| "learning_rate": 8.864468864468864e-05, | |
| "loss": 0.4528, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 2.2023891253604284, | |
| "grad_norm": 0.4999128580093384, | |
| "learning_rate": 8.852258852258851e-05, | |
| "loss": 0.712, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 2.2034875738020046, | |
| "grad_norm": 1.1858354806900024, | |
| "learning_rate": 8.84004884004884e-05, | |
| "loss": 0.7647, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 2.204586022243581, | |
| "grad_norm": 0.4223528206348419, | |
| "learning_rate": 8.827838827838828e-05, | |
| "loss": 0.6553, | |
| "step": 2007 | |
| }, | |
| { | |
| "epoch": 2.2056844706851573, | |
| "grad_norm": 0.41678956151008606, | |
| "learning_rate": 8.815628815628815e-05, | |
| "loss": 0.6033, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 2.2067829191267334, | |
| "grad_norm": 0.5812666416168213, | |
| "learning_rate": 8.803418803418802e-05, | |
| "loss": 0.6016, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 2.2078813675683095, | |
| "grad_norm": 0.5553560256958008, | |
| "learning_rate": 8.791208791208791e-05, | |
| "loss": 0.7621, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.208979816009886, | |
| "grad_norm": 0.6392796635627747, | |
| "learning_rate": 8.778998778998778e-05, | |
| "loss": 0.567, | |
| "step": 2011 | |
| }, | |
| { | |
| "epoch": 2.2100782644514623, | |
| "grad_norm": 1.0086902379989624, | |
| "learning_rate": 8.766788766788765e-05, | |
| "loss": 0.9432, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 2.2111767128930384, | |
| "grad_norm": 1.3578602075576782, | |
| "learning_rate": 8.754578754578754e-05, | |
| "loss": 0.5107, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 2.212275161334615, | |
| "grad_norm": 0.5530524849891663, | |
| "learning_rate": 8.742368742368741e-05, | |
| "loss": 0.6078, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 2.213373609776191, | |
| "grad_norm": 0.3795104920864105, | |
| "learning_rate": 8.730158730158728e-05, | |
| "loss": 0.4889, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 2.2144720582177673, | |
| "grad_norm": 0.40977227687835693, | |
| "learning_rate": 8.717948717948718e-05, | |
| "loss": 0.6295, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 2.215570506659344, | |
| "grad_norm": 0.4882934093475342, | |
| "learning_rate": 8.705738705738705e-05, | |
| "loss": 0.7219, | |
| "step": 2017 | |
| }, | |
| { | |
| "epoch": 2.21666895510092, | |
| "grad_norm": 0.7966530919075012, | |
| "learning_rate": 8.693528693528693e-05, | |
| "loss": 0.5342, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 2.217767403542496, | |
| "grad_norm": 0.6992311477661133, | |
| "learning_rate": 8.681318681318681e-05, | |
| "loss": 0.5932, | |
| "step": 2019 | |
| }, | |
| { | |
| "epoch": 2.2188658519840727, | |
| "grad_norm": 0.396427720785141, | |
| "learning_rate": 8.669108669108668e-05, | |
| "loss": 0.5838, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.219964300425649, | |
| "grad_norm": 0.5625690817832947, | |
| "learning_rate": 8.656898656898655e-05, | |
| "loss": 0.7605, | |
| "step": 2021 | |
| }, | |
| { | |
| "epoch": 2.221062748867225, | |
| "grad_norm": 0.6052583456039429, | |
| "learning_rate": 8.644688644688643e-05, | |
| "loss": 0.6572, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 2.222161197308801, | |
| "grad_norm": 0.7201973795890808, | |
| "learning_rate": 8.632478632478633e-05, | |
| "loss": 0.4924, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 2.2232596457503777, | |
| "grad_norm": 0.4222647249698639, | |
| "learning_rate": 8.62026862026862e-05, | |
| "loss": 0.7764, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 2.224358094191954, | |
| "grad_norm": 0.5168121457099915, | |
| "learning_rate": 8.608058608058607e-05, | |
| "loss": 0.5766, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.22545654263353, | |
| "grad_norm": 0.886203408241272, | |
| "learning_rate": 8.595848595848596e-05, | |
| "loss": 0.3804, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 2.2265549910751066, | |
| "grad_norm": 1.7365875244140625, | |
| "learning_rate": 8.583638583638583e-05, | |
| "loss": 0.6583, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 2.2276534395166827, | |
| "grad_norm": 0.44519639015197754, | |
| "learning_rate": 8.57142857142857e-05, | |
| "loss": 0.7322, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 2.228751887958259, | |
| "grad_norm": 0.4888206422328949, | |
| "learning_rate": 8.55921855921856e-05, | |
| "loss": 0.6645, | |
| "step": 2029 | |
| }, | |
| { | |
| "epoch": 2.2298503363998354, | |
| "grad_norm": 0.598225474357605, | |
| "learning_rate": 8.547008547008547e-05, | |
| "loss": 0.7903, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.2309487848414116, | |
| "grad_norm": 0.8521910905838013, | |
| "learning_rate": 8.534798534798534e-05, | |
| "loss": 0.8573, | |
| "step": 2031 | |
| }, | |
| { | |
| "epoch": 2.2320472332829877, | |
| "grad_norm": 1.6346311569213867, | |
| "learning_rate": 8.522588522588523e-05, | |
| "loss": 0.5653, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 2.233145681724564, | |
| "grad_norm": 0.6574315428733826, | |
| "learning_rate": 8.51037851037851e-05, | |
| "loss": 0.5289, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 2.2342441301661404, | |
| "grad_norm": 0.3821216821670532, | |
| "learning_rate": 8.498168498168497e-05, | |
| "loss": 0.4627, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 2.2353425786077166, | |
| "grad_norm": 0.28965023159980774, | |
| "learning_rate": 8.485958485958484e-05, | |
| "loss": 0.3696, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 2.2364410270492927, | |
| "grad_norm": 0.8256242275238037, | |
| "learning_rate": 8.473748473748473e-05, | |
| "loss": 0.6305, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 2.2375394754908693, | |
| "grad_norm": 0.8374451398849487, | |
| "learning_rate": 8.46153846153846e-05, | |
| "loss": 0.5038, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 2.2386379239324454, | |
| "grad_norm": 0.5931464433670044, | |
| "learning_rate": 8.449328449328449e-05, | |
| "loss": 0.6928, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 2.2397363723740216, | |
| "grad_norm": 0.5120035409927368, | |
| "learning_rate": 8.437118437118437e-05, | |
| "loss": 0.6004, | |
| "step": 2039 | |
| }, | |
| { | |
| "epoch": 2.240834820815598, | |
| "grad_norm": 0.6345282196998596, | |
| "learning_rate": 8.424908424908424e-05, | |
| "loss": 0.866, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.2419332692571743, | |
| "grad_norm": 0.5632284283638, | |
| "learning_rate": 8.412698412698412e-05, | |
| "loss": 0.406, | |
| "step": 2041 | |
| }, | |
| { | |
| "epoch": 2.2430317176987504, | |
| "grad_norm": 0.4784685969352722, | |
| "learning_rate": 8.4004884004884e-05, | |
| "loss": 0.4732, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 2.2441301661403266, | |
| "grad_norm": 0.47678086161613464, | |
| "learning_rate": 8.388278388278387e-05, | |
| "loss": 0.502, | |
| "step": 2043 | |
| }, | |
| { | |
| "epoch": 2.245228614581903, | |
| "grad_norm": 0.6543307304382324, | |
| "learning_rate": 8.376068376068374e-05, | |
| "loss": 0.7183, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 2.2463270630234793, | |
| "grad_norm": 0.6147063374519348, | |
| "learning_rate": 8.363858363858364e-05, | |
| "loss": 0.618, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 2.2474255114650554, | |
| "grad_norm": 0.5867168307304382, | |
| "learning_rate": 8.351648351648352e-05, | |
| "loss": 0.7749, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 2.248523959906632, | |
| "grad_norm": 1.164838433265686, | |
| "learning_rate": 8.339438339438339e-05, | |
| "loss": 0.6261, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 2.249622408348208, | |
| "grad_norm": 0.6695102453231812, | |
| "learning_rate": 8.327228327228326e-05, | |
| "loss": 0.6172, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.2507208567897843, | |
| "grad_norm": 0.43873751163482666, | |
| "learning_rate": 8.315018315018315e-05, | |
| "loss": 0.7032, | |
| "step": 2049 | |
| }, | |
| { | |
| "epoch": 2.251819305231361, | |
| "grad_norm": 0.439897745847702, | |
| "learning_rate": 8.302808302808302e-05, | |
| "loss": 0.7744, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.252917753672937, | |
| "grad_norm": 0.6671053767204285, | |
| "learning_rate": 8.290598290598289e-05, | |
| "loss": 0.6877, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 2.254016202114513, | |
| "grad_norm": 0.37354105710983276, | |
| "learning_rate": 8.278388278388279e-05, | |
| "loss": 0.5653, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 2.2551146505560897, | |
| "grad_norm": 0.5615684390068054, | |
| "learning_rate": 8.266178266178266e-05, | |
| "loss": 0.5961, | |
| "step": 2053 | |
| }, | |
| { | |
| "epoch": 2.256213098997666, | |
| "grad_norm": 2.0932323932647705, | |
| "learning_rate": 8.253968253968253e-05, | |
| "loss": 0.6139, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 2.257311547439242, | |
| "grad_norm": 0.5486952066421509, | |
| "learning_rate": 8.241758241758242e-05, | |
| "loss": 0.7816, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 2.258409995880818, | |
| "grad_norm": 0.7377699017524719, | |
| "learning_rate": 8.229548229548229e-05, | |
| "loss": 0.5036, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 2.2595084443223947, | |
| "grad_norm": 0.7057545781135559, | |
| "learning_rate": 8.217338217338216e-05, | |
| "loss": 0.5788, | |
| "step": 2057 | |
| }, | |
| { | |
| "epoch": 2.260606892763971, | |
| "grad_norm": 0.5388674736022949, | |
| "learning_rate": 8.205128205128205e-05, | |
| "loss": 0.7079, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.261705341205547, | |
| "grad_norm": 0.620943546295166, | |
| "learning_rate": 8.192918192918192e-05, | |
| "loss": 0.6223, | |
| "step": 2059 | |
| }, | |
| { | |
| "epoch": 2.2628037896471236, | |
| "grad_norm": 0.6159489154815674, | |
| "learning_rate": 8.18070818070818e-05, | |
| "loss": 0.7277, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.2639022380886997, | |
| "grad_norm": 0.5745131373405457, | |
| "learning_rate": 8.168498168498168e-05, | |
| "loss": 0.6356, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 2.265000686530276, | |
| "grad_norm": 0.4925720989704132, | |
| "learning_rate": 8.156288156288156e-05, | |
| "loss": 0.6342, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 2.2660991349718524, | |
| "grad_norm": 0.410692036151886, | |
| "learning_rate": 8.144078144078143e-05, | |
| "loss": 0.5903, | |
| "step": 2063 | |
| }, | |
| { | |
| "epoch": 2.2671975834134286, | |
| "grad_norm": 0.8246005177497864, | |
| "learning_rate": 8.13186813186813e-05, | |
| "loss": 0.4048, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.2682960318550047, | |
| "grad_norm": 0.5054492950439453, | |
| "learning_rate": 8.119658119658119e-05, | |
| "loss": 0.5797, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 2.2693944802965813, | |
| "grad_norm": 0.6249692440032959, | |
| "learning_rate": 8.107448107448106e-05, | |
| "loss": 0.5434, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 2.2704929287381574, | |
| "grad_norm": 0.5582659244537354, | |
| "learning_rate": 8.095238095238093e-05, | |
| "loss": 0.5925, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 2.2715913771797336, | |
| "grad_norm": 0.38472238183021545, | |
| "learning_rate": 8.083028083028083e-05, | |
| "loss": 0.7325, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 2.2726898256213097, | |
| "grad_norm": 0.4649077355861664, | |
| "learning_rate": 8.07081807081807e-05, | |
| "loss": 0.6244, | |
| "step": 2069 | |
| }, | |
| { | |
| "epoch": 2.2737882740628863, | |
| "grad_norm": 0.38582849502563477, | |
| "learning_rate": 8.058608058608058e-05, | |
| "loss": 0.7696, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.2748867225044624, | |
| "grad_norm": 0.4612105190753937, | |
| "learning_rate": 8.046398046398045e-05, | |
| "loss": 0.6453, | |
| "step": 2071 | |
| }, | |
| { | |
| "epoch": 2.2759851709460386, | |
| "grad_norm": 0.6572852730751038, | |
| "learning_rate": 8.034188034188034e-05, | |
| "loss": 0.7417, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 2.277083619387615, | |
| "grad_norm": 0.6322109699249268, | |
| "learning_rate": 8.021978021978021e-05, | |
| "loss": 0.2827, | |
| "step": 2073 | |
| }, | |
| { | |
| "epoch": 2.2781820678291913, | |
| "grad_norm": 1.2452771663665771, | |
| "learning_rate": 8.009768009768008e-05, | |
| "loss": 0.7441, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 2.2792805162707674, | |
| "grad_norm": 0.32154834270477295, | |
| "learning_rate": 7.997557997557998e-05, | |
| "loss": 0.4606, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.2803789647123436, | |
| "grad_norm": 1.0170034170150757, | |
| "learning_rate": 7.985347985347985e-05, | |
| "loss": 0.7003, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 2.28147741315392, | |
| "grad_norm": 0.7780435085296631, | |
| "learning_rate": 7.973137973137972e-05, | |
| "loss": 0.5847, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 2.2825758615954963, | |
| "grad_norm": 0.6422854661941528, | |
| "learning_rate": 7.960927960927961e-05, | |
| "loss": 0.6278, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 2.2836743100370724, | |
| "grad_norm": 0.5440393090248108, | |
| "learning_rate": 7.948717948717948e-05, | |
| "loss": 0.6313, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 2.284772758478649, | |
| "grad_norm": 0.5774940848350525, | |
| "learning_rate": 7.936507936507935e-05, | |
| "loss": 0.7504, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.285871206920225, | |
| "grad_norm": 0.44180789589881897, | |
| "learning_rate": 7.924297924297924e-05, | |
| "loss": 0.5806, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 2.2869696553618013, | |
| "grad_norm": 0.8452728390693665, | |
| "learning_rate": 7.912087912087912e-05, | |
| "loss": 0.5753, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 2.288068103803378, | |
| "grad_norm": 0.40172943472862244, | |
| "learning_rate": 7.8998778998779e-05, | |
| "loss": 0.5565, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 2.289166552244954, | |
| "grad_norm": 0.3919180929660797, | |
| "learning_rate": 7.887667887667887e-05, | |
| "loss": 0.4951, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 2.29026500068653, | |
| "grad_norm": 1.0796260833740234, | |
| "learning_rate": 7.875457875457875e-05, | |
| "loss": 0.733, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 2.2913634491281067, | |
| "grad_norm": 0.5640047788619995, | |
| "learning_rate": 7.863247863247862e-05, | |
| "loss": 0.4625, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 2.292461897569683, | |
| "grad_norm": 0.8736083507537842, | |
| "learning_rate": 7.85103785103785e-05, | |
| "loss": 0.5532, | |
| "step": 2087 | |
| }, | |
| { | |
| "epoch": 2.293560346011259, | |
| "grad_norm": 0.5358221530914307, | |
| "learning_rate": 7.838827838827838e-05, | |
| "loss": 0.6397, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 2.294658794452835, | |
| "grad_norm": 5.207391262054443, | |
| "learning_rate": 7.826617826617825e-05, | |
| "loss": 0.6402, | |
| "step": 2089 | |
| }, | |
| { | |
| "epoch": 2.2957572428944117, | |
| "grad_norm": 0.4122523069381714, | |
| "learning_rate": 7.814407814407813e-05, | |
| "loss": 0.474, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.296855691335988, | |
| "grad_norm": 2.8296186923980713, | |
| "learning_rate": 7.802197802197802e-05, | |
| "loss": 0.5197, | |
| "step": 2091 | |
| }, | |
| { | |
| "epoch": 2.297954139777564, | |
| "grad_norm": 0.6898410320281982, | |
| "learning_rate": 7.78998778998779e-05, | |
| "loss": 0.782, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 2.2990525882191406, | |
| "grad_norm": 0.37363025546073914, | |
| "learning_rate": 7.777777777777777e-05, | |
| "loss": 0.5824, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 2.3001510366607167, | |
| "grad_norm": 0.5120764374732971, | |
| "learning_rate": 7.765567765567765e-05, | |
| "loss": 0.7326, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 2.301249485102293, | |
| "grad_norm": 0.6517985463142395, | |
| "learning_rate": 7.753357753357753e-05, | |
| "loss": 0.6274, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 2.3023479335438695, | |
| "grad_norm": 0.8033846020698547, | |
| "learning_rate": 7.74114774114774e-05, | |
| "loss": 0.7093, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 2.3034463819854456, | |
| "grad_norm": 0.896397590637207, | |
| "learning_rate": 7.728937728937727e-05, | |
| "loss": 0.6685, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 2.3045448304270217, | |
| "grad_norm": 0.4606597423553467, | |
| "learning_rate": 7.716727716727717e-05, | |
| "loss": 0.5821, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 2.3056432788685983, | |
| "grad_norm": 0.9286845922470093, | |
| "learning_rate": 7.704517704517704e-05, | |
| "loss": 0.7537, | |
| "step": 2099 | |
| }, | |
| { | |
| "epoch": 2.3067417273101745, | |
| "grad_norm": 0.6514043211936951, | |
| "learning_rate": 7.692307692307691e-05, | |
| "loss": 0.5644, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.3078401757517506, | |
| "grad_norm": 0.4881083369255066, | |
| "learning_rate": 7.68009768009768e-05, | |
| "loss": 0.5348, | |
| "step": 2101 | |
| }, | |
| { | |
| "epoch": 2.3089386241933267, | |
| "grad_norm": 2.688716173171997, | |
| "learning_rate": 7.667887667887667e-05, | |
| "loss": 0.6732, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 2.3100370726349033, | |
| "grad_norm": 0.4597708582878113, | |
| "learning_rate": 7.655677655677654e-05, | |
| "loss": 0.6166, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 2.3111355210764795, | |
| "grad_norm": 0.7629315853118896, | |
| "learning_rate": 7.643467643467644e-05, | |
| "loss": 0.4677, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 2.3122339695180556, | |
| "grad_norm": 0.7282788753509521, | |
| "learning_rate": 7.631257631257631e-05, | |
| "loss": 0.6841, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 2.313332417959632, | |
| "grad_norm": 0.5421862006187439, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.7274, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 2.3144308664012083, | |
| "grad_norm": 0.7396867871284485, | |
| "learning_rate": 7.606837606837607e-05, | |
| "loss": 0.6546, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 2.3155293148427845, | |
| "grad_norm": 0.34731313586235046, | |
| "learning_rate": 7.594627594627594e-05, | |
| "loss": 0.72, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 2.3166277632843606, | |
| "grad_norm": 1.1024978160858154, | |
| "learning_rate": 7.582417582417581e-05, | |
| "loss": 0.7304, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 2.317726211725937, | |
| "grad_norm": 0.5866183638572693, | |
| "learning_rate": 7.570207570207569e-05, | |
| "loss": 0.4912, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.3188246601675133, | |
| "grad_norm": 0.8068836331367493, | |
| "learning_rate": 7.557997557997557e-05, | |
| "loss": 0.5342, | |
| "step": 2111 | |
| }, | |
| { | |
| "epoch": 2.31992310860909, | |
| "grad_norm": 0.6417646408081055, | |
| "learning_rate": 7.545787545787544e-05, | |
| "loss": 0.7642, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 2.321021557050666, | |
| "grad_norm": 0.4545808434486389, | |
| "learning_rate": 7.533577533577533e-05, | |
| "loss": 0.5681, | |
| "step": 2113 | |
| }, | |
| { | |
| "epoch": 2.322120005492242, | |
| "grad_norm": 0.3567211329936981, | |
| "learning_rate": 7.521367521367521e-05, | |
| "loss": 0.6368, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 2.3232184539338183, | |
| "grad_norm": 0.5747010707855225, | |
| "learning_rate": 7.509157509157509e-05, | |
| "loss": 0.5848, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 2.324316902375395, | |
| "grad_norm": 0.46303555369377136, | |
| "learning_rate": 7.496947496947497e-05, | |
| "loss": 0.6577, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 2.325415350816971, | |
| "grad_norm": 0.5343080759048462, | |
| "learning_rate": 7.484737484737484e-05, | |
| "loss": 0.8531, | |
| "step": 2117 | |
| }, | |
| { | |
| "epoch": 2.326513799258547, | |
| "grad_norm": 0.9027140736579895, | |
| "learning_rate": 7.472527472527472e-05, | |
| "loss": 0.6271, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 2.3276122477001238, | |
| "grad_norm": 0.6390063166618347, | |
| "learning_rate": 7.460317460317459e-05, | |
| "loss": 0.5669, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 2.3287106961417, | |
| "grad_norm": 0.4965013563632965, | |
| "learning_rate": 7.448107448107447e-05, | |
| "loss": 0.6362, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.329809144583276, | |
| "grad_norm": 0.49252766370773315, | |
| "learning_rate": 7.435897435897436e-05, | |
| "loss": 0.6703, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 2.330907593024852, | |
| "grad_norm": 0.7043023705482483, | |
| "learning_rate": 7.423687423687423e-05, | |
| "loss": 0.7114, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 2.3320060414664288, | |
| "grad_norm": 0.4373185634613037, | |
| "learning_rate": 7.41147741147741e-05, | |
| "loss": 0.5656, | |
| "step": 2123 | |
| }, | |
| { | |
| "epoch": 2.333104489908005, | |
| "grad_norm": 1.0036537647247314, | |
| "learning_rate": 7.399267399267399e-05, | |
| "loss": 0.6652, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 2.334202938349581, | |
| "grad_norm": 2.06589937210083, | |
| "learning_rate": 7.387057387057386e-05, | |
| "loss": 0.6502, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.3353013867911576, | |
| "grad_norm": 1.1616554260253906, | |
| "learning_rate": 7.374847374847375e-05, | |
| "loss": 0.7288, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 2.3363998352327338, | |
| "grad_norm": 0.4532950520515442, | |
| "learning_rate": 7.362637362637362e-05, | |
| "loss": 0.7696, | |
| "step": 2127 | |
| }, | |
| { | |
| "epoch": 2.33749828367431, | |
| "grad_norm": 1.0143449306488037, | |
| "learning_rate": 7.35042735042735e-05, | |
| "loss": 1.0185, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 2.3385967321158865, | |
| "grad_norm": 2.2059850692749023, | |
| "learning_rate": 7.338217338217337e-05, | |
| "loss": 0.6267, | |
| "step": 2129 | |
| }, | |
| { | |
| "epoch": 2.3396951805574626, | |
| "grad_norm": 0.4883456826210022, | |
| "learning_rate": 7.326007326007325e-05, | |
| "loss": 0.6081, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.3407936289990388, | |
| "grad_norm": 0.42373138666152954, | |
| "learning_rate": 7.313797313797313e-05, | |
| "loss": 0.6204, | |
| "step": 2131 | |
| }, | |
| { | |
| "epoch": 2.3418920774406153, | |
| "grad_norm": 0.43958979845046997, | |
| "learning_rate": 7.3015873015873e-05, | |
| "loss": 0.7608, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 2.3429905258821915, | |
| "grad_norm": 0.4493010342121124, | |
| "learning_rate": 7.289377289377289e-05, | |
| "loss": 0.5985, | |
| "step": 2133 | |
| }, | |
| { | |
| "epoch": 2.3440889743237676, | |
| "grad_norm": 0.38533085584640503, | |
| "learning_rate": 7.277167277167276e-05, | |
| "loss": 0.445, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 2.3451874227653438, | |
| "grad_norm": 0.37900710105895996, | |
| "learning_rate": 7.264957264957265e-05, | |
| "loss": 0.8466, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 2.3462858712069203, | |
| "grad_norm": 1.7598285675048828, | |
| "learning_rate": 7.252747252747252e-05, | |
| "loss": 0.6881, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 2.3473843196484965, | |
| "grad_norm": 0.5551338791847229, | |
| "learning_rate": 7.24053724053724e-05, | |
| "loss": 0.5908, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 2.3484827680900726, | |
| "grad_norm": 0.42995861172676086, | |
| "learning_rate": 7.228327228327228e-05, | |
| "loss": 0.689, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 2.349581216531649, | |
| "grad_norm": 0.6428760290145874, | |
| "learning_rate": 7.216117216117216e-05, | |
| "loss": 0.5879, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 2.3506796649732253, | |
| "grad_norm": 0.6199445724487305, | |
| "learning_rate": 7.203907203907203e-05, | |
| "loss": 0.5275, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.3517781134148015, | |
| "grad_norm": 0.4687311053276062, | |
| "learning_rate": 7.19169719169719e-05, | |
| "loss": 0.7046, | |
| "step": 2141 | |
| }, | |
| { | |
| "epoch": 2.352876561856378, | |
| "grad_norm": 0.47645121812820435, | |
| "learning_rate": 7.179487179487179e-05, | |
| "loss": 0.4787, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 2.353975010297954, | |
| "grad_norm": 1.3774843215942383, | |
| "learning_rate": 7.167277167277166e-05, | |
| "loss": 0.565, | |
| "step": 2143 | |
| }, | |
| { | |
| "epoch": 2.3550734587395303, | |
| "grad_norm": 0.9585548043251038, | |
| "learning_rate": 7.155067155067155e-05, | |
| "loss": 0.7496, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 2.356171907181107, | |
| "grad_norm": 0.9073938131332397, | |
| "learning_rate": 7.142857142857142e-05, | |
| "loss": 0.6785, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 2.357270355622683, | |
| "grad_norm": 1.4543087482452393, | |
| "learning_rate": 7.13064713064713e-05, | |
| "loss": 0.4827, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 2.358368804064259, | |
| "grad_norm": 0.49685895442962646, | |
| "learning_rate": 7.118437118437118e-05, | |
| "loss": 0.5624, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 2.3594672525058353, | |
| "grad_norm": 0.3820716142654419, | |
| "learning_rate": 7.106227106227105e-05, | |
| "loss": 0.5326, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 2.360565700947412, | |
| "grad_norm": 0.6018278002738953, | |
| "learning_rate": 7.094017094017094e-05, | |
| "loss": 0.7372, | |
| "step": 2149 | |
| }, | |
| { | |
| "epoch": 2.361664149388988, | |
| "grad_norm": 0.49245381355285645, | |
| "learning_rate": 7.081807081807082e-05, | |
| "loss": 0.714, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.362762597830564, | |
| "grad_norm": 0.5913417339324951, | |
| "learning_rate": 7.069597069597069e-05, | |
| "loss": 0.6395, | |
| "step": 2151 | |
| }, | |
| { | |
| "epoch": 2.3638610462721408, | |
| "grad_norm": 0.3142958879470825, | |
| "learning_rate": 7.057387057387056e-05, | |
| "loss": 0.4363, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 2.364959494713717, | |
| "grad_norm": 0.44251006841659546, | |
| "learning_rate": 7.045177045177044e-05, | |
| "loss": 0.5751, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 2.366057943155293, | |
| "grad_norm": 0.7642143964767456, | |
| "learning_rate": 7.032967032967032e-05, | |
| "loss": 0.9707, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 2.367156391596869, | |
| "grad_norm": 0.3676380217075348, | |
| "learning_rate": 7.020757020757021e-05, | |
| "loss": 0.6142, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 2.3682548400384458, | |
| "grad_norm": 0.43112027645111084, | |
| "learning_rate": 7.008547008547008e-05, | |
| "loss": 0.6194, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 2.369353288480022, | |
| "grad_norm": 0.5463792681694031, | |
| "learning_rate": 6.996336996336996e-05, | |
| "loss": 0.5478, | |
| "step": 2157 | |
| }, | |
| { | |
| "epoch": 2.370451736921598, | |
| "grad_norm": 0.5498053431510925, | |
| "learning_rate": 6.984126984126984e-05, | |
| "loss": 0.8373, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 2.3715501853631746, | |
| "grad_norm": 0.5144299268722534, | |
| "learning_rate": 6.971916971916971e-05, | |
| "loss": 0.7033, | |
| "step": 2159 | |
| }, | |
| { | |
| "epoch": 2.3726486338047508, | |
| "grad_norm": 0.4049033522605896, | |
| "learning_rate": 6.95970695970696e-05, | |
| "loss": 0.6257, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.373747082246327, | |
| "grad_norm": 0.8007866740226746, | |
| "learning_rate": 6.947496947496947e-05, | |
| "loss": 1.1859, | |
| "step": 2161 | |
| }, | |
| { | |
| "epoch": 2.3748455306879035, | |
| "grad_norm": 0.6302816867828369, | |
| "learning_rate": 6.935286935286935e-05, | |
| "loss": 0.4972, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 2.3759439791294796, | |
| "grad_norm": 0.4181542694568634, | |
| "learning_rate": 6.923076923076922e-05, | |
| "loss": 0.5543, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 2.3770424275710558, | |
| "grad_norm": 0.45409703254699707, | |
| "learning_rate": 6.91086691086691e-05, | |
| "loss": 0.6237, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 2.3781408760126324, | |
| "grad_norm": 0.5172666907310486, | |
| "learning_rate": 6.898656898656898e-05, | |
| "loss": 0.5798, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 2.3792393244542085, | |
| "grad_norm": 0.7849127054214478, | |
| "learning_rate": 6.886446886446885e-05, | |
| "loss": 0.8282, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 2.3803377728957846, | |
| "grad_norm": 0.4041041135787964, | |
| "learning_rate": 6.874236874236874e-05, | |
| "loss": 0.5046, | |
| "step": 2167 | |
| }, | |
| { | |
| "epoch": 2.3814362213373608, | |
| "grad_norm": 0.35880064964294434, | |
| "learning_rate": 6.862026862026862e-05, | |
| "loss": 0.4096, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 2.3825346697789374, | |
| "grad_norm": 0.5949457883834839, | |
| "learning_rate": 6.84981684981685e-05, | |
| "loss": 0.6666, | |
| "step": 2169 | |
| }, | |
| { | |
| "epoch": 2.3836331182205135, | |
| "grad_norm": 0.6332186460494995, | |
| "learning_rate": 6.837606837606837e-05, | |
| "loss": 0.9715, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.3847315666620896, | |
| "grad_norm": 0.3173432946205139, | |
| "learning_rate": 6.825396825396824e-05, | |
| "loss": 0.6792, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 2.385830015103666, | |
| "grad_norm": 0.7556782364845276, | |
| "learning_rate": 6.813186813186813e-05, | |
| "loss": 0.7267, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 2.3869284635452424, | |
| "grad_norm": 0.43191683292388916, | |
| "learning_rate": 6.800976800976801e-05, | |
| "loss": 0.5841, | |
| "step": 2173 | |
| }, | |
| { | |
| "epoch": 2.3880269119868185, | |
| "grad_norm": 0.4010660946369171, | |
| "learning_rate": 6.788766788766788e-05, | |
| "loss": 0.7491, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 2.389125360428395, | |
| "grad_norm": 0.6889204382896423, | |
| "learning_rate": 6.776556776556775e-05, | |
| "loss": 0.4539, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.390223808869971, | |
| "grad_norm": 0.4509136974811554, | |
| "learning_rate": 6.764346764346764e-05, | |
| "loss": 0.7066, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.3913222573115474, | |
| "grad_norm": 0.4313298463821411, | |
| "learning_rate": 6.752136752136751e-05, | |
| "loss": 0.6292, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 2.392420705753124, | |
| "grad_norm": 0.7713265419006348, | |
| "learning_rate": 6.73992673992674e-05, | |
| "loss": 0.8392, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 2.3935191541947, | |
| "grad_norm": 0.5283428430557251, | |
| "learning_rate": 6.727716727716727e-05, | |
| "loss": 0.6912, | |
| "step": 2179 | |
| }, | |
| { | |
| "epoch": 2.394617602636276, | |
| "grad_norm": 0.40429314970970154, | |
| "learning_rate": 6.715506715506716e-05, | |
| "loss": 0.4335, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.3957160510778523, | |
| "grad_norm": 0.6888754367828369, | |
| "learning_rate": 6.703296703296703e-05, | |
| "loss": 0.6276, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 2.396814499519429, | |
| "grad_norm": 0.5595026612281799, | |
| "learning_rate": 6.69108669108669e-05, | |
| "loss": 0.7806, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 2.397912947961005, | |
| "grad_norm": 0.32394587993621826, | |
| "learning_rate": 6.678876678876678e-05, | |
| "loss": 0.5531, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 2.399011396402581, | |
| "grad_norm": 0.5909039974212646, | |
| "learning_rate": 6.666666666666666e-05, | |
| "loss": 0.4932, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 2.400109844844158, | |
| "grad_norm": 0.4148501455783844, | |
| "learning_rate": 6.654456654456654e-05, | |
| "loss": 0.5637, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 2.401208293285734, | |
| "grad_norm": 0.558403491973877, | |
| "learning_rate": 6.642246642246641e-05, | |
| "loss": 0.5733, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 2.40230674172731, | |
| "grad_norm": 0.5171149373054504, | |
| "learning_rate": 6.630036630036629e-05, | |
| "loss": 0.6931, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 2.403405190168886, | |
| "grad_norm": 0.44966164231300354, | |
| "learning_rate": 6.617826617826617e-05, | |
| "loss": 0.5061, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 2.404503638610463, | |
| "grad_norm": 0.45499417185783386, | |
| "learning_rate": 6.605616605616606e-05, | |
| "loss": 0.3726, | |
| "step": 2189 | |
| }, | |
| { | |
| "epoch": 2.405602087052039, | |
| "grad_norm": 0.5790139436721802, | |
| "learning_rate": 6.593406593406593e-05, | |
| "loss": 0.6647, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.4067005354936155, | |
| "grad_norm": 0.5948793292045593, | |
| "learning_rate": 6.581196581196581e-05, | |
| "loss": 0.765, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 2.4077989839351917, | |
| "grad_norm": 0.5925643444061279, | |
| "learning_rate": 6.568986568986569e-05, | |
| "loss": 0.889, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 2.408897432376768, | |
| "grad_norm": 0.5776219964027405, | |
| "learning_rate": 6.556776556776556e-05, | |
| "loss": 0.5506, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 2.409995880818344, | |
| "grad_norm": 0.44397997856140137, | |
| "learning_rate": 6.544566544566544e-05, | |
| "loss": 0.5372, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 2.4110943292599205, | |
| "grad_norm": 0.45733606815338135, | |
| "learning_rate": 6.532356532356532e-05, | |
| "loss": 0.7207, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 2.4121927777014966, | |
| "grad_norm": 0.38223645091056824, | |
| "learning_rate": 6.52014652014652e-05, | |
| "loss": 0.5888, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 2.413291226143073, | |
| "grad_norm": 0.3642580211162567, | |
| "learning_rate": 6.507936507936507e-05, | |
| "loss": 0.5687, | |
| "step": 2197 | |
| }, | |
| { | |
| "epoch": 2.4143896745846494, | |
| "grad_norm": 0.42435723543167114, | |
| "learning_rate": 6.495726495726494e-05, | |
| "loss": 0.6056, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 2.4154881230262255, | |
| "grad_norm": 0.4998740255832672, | |
| "learning_rate": 6.483516483516483e-05, | |
| "loss": 0.6813, | |
| "step": 2199 | |
| }, | |
| { | |
| "epoch": 2.4165865714678016, | |
| "grad_norm": 0.47158849239349365, | |
| "learning_rate": 6.47130647130647e-05, | |
| "loss": 0.5585, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.417685019909378, | |
| "grad_norm": 0.4780612289905548, | |
| "learning_rate": 6.459096459096459e-05, | |
| "loss": 0.4941, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 2.4187834683509544, | |
| "grad_norm": 0.5073630809783936, | |
| "learning_rate": 6.446886446886447e-05, | |
| "loss": 0.4549, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 2.4198819167925305, | |
| "grad_norm": 0.4311310052871704, | |
| "learning_rate": 6.434676434676435e-05, | |
| "loss": 0.4419, | |
| "step": 2203 | |
| }, | |
| { | |
| "epoch": 2.4209803652341066, | |
| "grad_norm": 0.3557896316051483, | |
| "learning_rate": 6.422466422466422e-05, | |
| "loss": 0.6973, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 2.4220788136756832, | |
| "grad_norm": 0.6171516180038452, | |
| "learning_rate": 6.410256410256409e-05, | |
| "loss": 0.7554, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 2.4231772621172594, | |
| "grad_norm": 0.4687957465648651, | |
| "learning_rate": 6.398046398046397e-05, | |
| "loss": 0.7429, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 2.4242757105588355, | |
| "grad_norm": 0.8685696125030518, | |
| "learning_rate": 6.385836385836386e-05, | |
| "loss": 0.5896, | |
| "step": 2207 | |
| }, | |
| { | |
| "epoch": 2.425374159000412, | |
| "grad_norm": 0.39599040150642395, | |
| "learning_rate": 6.373626373626373e-05, | |
| "loss": 0.4744, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 2.4264726074419882, | |
| "grad_norm": 0.9079630970954895, | |
| "learning_rate": 6.36141636141636e-05, | |
| "loss": 0.6067, | |
| "step": 2209 | |
| }, | |
| { | |
| "epoch": 2.4275710558835644, | |
| "grad_norm": 0.5051462054252625, | |
| "learning_rate": 6.349206349206349e-05, | |
| "loss": 0.7314, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.428669504325141, | |
| "grad_norm": 0.4899844825267792, | |
| "learning_rate": 6.336996336996336e-05, | |
| "loss": 0.7086, | |
| "step": 2211 | |
| }, | |
| { | |
| "epoch": 2.429767952766717, | |
| "grad_norm": 0.5135432481765747, | |
| "learning_rate": 6.324786324786325e-05, | |
| "loss": 0.5261, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 2.4308664012082932, | |
| "grad_norm": 0.6025048494338989, | |
| "learning_rate": 6.312576312576312e-05, | |
| "loss": 0.5276, | |
| "step": 2213 | |
| }, | |
| { | |
| "epoch": 2.4319648496498694, | |
| "grad_norm": 0.6931442022323608, | |
| "learning_rate": 6.3003663003663e-05, | |
| "loss": 0.6535, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 2.433063298091446, | |
| "grad_norm": 0.695106565952301, | |
| "learning_rate": 6.288156288156288e-05, | |
| "loss": 0.9183, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 2.434161746533022, | |
| "grad_norm": 0.450100302696228, | |
| "learning_rate": 6.275946275946275e-05, | |
| "loss": 0.5049, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 2.4352601949745982, | |
| "grad_norm": 0.5539785623550415, | |
| "learning_rate": 6.263736263736263e-05, | |
| "loss": 0.5735, | |
| "step": 2217 | |
| }, | |
| { | |
| "epoch": 2.436358643416175, | |
| "grad_norm": 0.5560977458953857, | |
| "learning_rate": 6.25152625152625e-05, | |
| "loss": 0.7364, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 2.437457091857751, | |
| "grad_norm": 0.740195095539093, | |
| "learning_rate": 6.239316239316239e-05, | |
| "loss": 0.7839, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 2.438555540299327, | |
| "grad_norm": 0.9324271082878113, | |
| "learning_rate": 6.227106227106226e-05, | |
| "loss": 0.6365, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.4396539887409037, | |
| "grad_norm": 0.5540104508399963, | |
| "learning_rate": 6.214896214896215e-05, | |
| "loss": 0.6586, | |
| "step": 2221 | |
| }, | |
| { | |
| "epoch": 2.44075243718248, | |
| "grad_norm": 0.5028054714202881, | |
| "learning_rate": 6.202686202686202e-05, | |
| "loss": 0.4422, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 2.441850885624056, | |
| "grad_norm": 0.7052125930786133, | |
| "learning_rate": 6.190476190476189e-05, | |
| "loss": 0.7248, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 2.4429493340656325, | |
| "grad_norm": 0.6705207824707031, | |
| "learning_rate": 6.178266178266178e-05, | |
| "loss": 0.81, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 2.4440477825072087, | |
| "grad_norm": 0.7996514439582825, | |
| "learning_rate": 6.166056166056166e-05, | |
| "loss": 0.382, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 2.445146230948785, | |
| "grad_norm": 1.5169689655303955, | |
| "learning_rate": 6.153846153846154e-05, | |
| "loss": 0.7373, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 2.446244679390361, | |
| "grad_norm": 0.8039339780807495, | |
| "learning_rate": 6.141636141636141e-05, | |
| "loss": 0.8609, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 2.4473431278319375, | |
| "grad_norm": 0.6489125490188599, | |
| "learning_rate": 6.129426129426128e-05, | |
| "loss": 0.6309, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 2.4484415762735137, | |
| "grad_norm": 0.533184826374054, | |
| "learning_rate": 6.117216117216116e-05, | |
| "loss": 0.5166, | |
| "step": 2229 | |
| }, | |
| { | |
| "epoch": 2.44954002471509, | |
| "grad_norm": 0.5699225068092346, | |
| "learning_rate": 6.105006105006105e-05, | |
| "loss": 0.7276, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.4506384731566664, | |
| "grad_norm": 0.5552012324333191, | |
| "learning_rate": 6.092796092796092e-05, | |
| "loss": 0.636, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 2.4517369215982425, | |
| "grad_norm": 0.4785599112510681, | |
| "learning_rate": 6.08058608058608e-05, | |
| "loss": 0.6362, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 2.4528353700398187, | |
| "grad_norm": 0.740872859954834, | |
| "learning_rate": 6.068376068376068e-05, | |
| "loss": 0.5603, | |
| "step": 2233 | |
| }, | |
| { | |
| "epoch": 2.453933818481395, | |
| "grad_norm": 0.5217441916465759, | |
| "learning_rate": 6.056166056166056e-05, | |
| "loss": 0.6306, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 2.4550322669229714, | |
| "grad_norm": 0.446481853723526, | |
| "learning_rate": 6.043956043956044e-05, | |
| "loss": 0.8156, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 2.4561307153645475, | |
| "grad_norm": 0.6527410745620728, | |
| "learning_rate": 6.031746031746031e-05, | |
| "loss": 0.7057, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.4572291638061237, | |
| "grad_norm": 0.6801958680152893, | |
| "learning_rate": 6.019536019536019e-05, | |
| "loss": 0.7718, | |
| "step": 2237 | |
| }, | |
| { | |
| "epoch": 2.4583276122477002, | |
| "grad_norm": 1.0723007917404175, | |
| "learning_rate": 6.007326007326007e-05, | |
| "loss": 0.5552, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 2.4594260606892764, | |
| "grad_norm": 0.4058208763599396, | |
| "learning_rate": 5.9951159951159945e-05, | |
| "loss": 0.5035, | |
| "step": 2239 | |
| }, | |
| { | |
| "epoch": 2.4605245091308525, | |
| "grad_norm": 0.5384330153465271, | |
| "learning_rate": 5.9829059829059824e-05, | |
| "loss": 0.5059, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.461622957572429, | |
| "grad_norm": 0.7797716856002808, | |
| "learning_rate": 5.9706959706959696e-05, | |
| "loss": 0.5613, | |
| "step": 2241 | |
| }, | |
| { | |
| "epoch": 2.4627214060140052, | |
| "grad_norm": 2.9689226150512695, | |
| "learning_rate": 5.958485958485958e-05, | |
| "loss": 0.6219, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 2.4638198544555814, | |
| "grad_norm": 0.47863152623176575, | |
| "learning_rate": 5.946275946275946e-05, | |
| "loss": 0.5498, | |
| "step": 2243 | |
| }, | |
| { | |
| "epoch": 2.464918302897158, | |
| "grad_norm": 0.49707144498825073, | |
| "learning_rate": 5.934065934065933e-05, | |
| "loss": 0.775, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 2.466016751338734, | |
| "grad_norm": 0.3437495529651642, | |
| "learning_rate": 5.921855921855922e-05, | |
| "loss": 0.4592, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 2.4671151997803102, | |
| "grad_norm": 0.7298309206962585, | |
| "learning_rate": 5.9096459096459096e-05, | |
| "loss": 0.5374, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 2.4682136482218864, | |
| "grad_norm": 0.6666691303253174, | |
| "learning_rate": 5.897435897435897e-05, | |
| "loss": 0.424, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 2.469312096663463, | |
| "grad_norm": 0.5841661691665649, | |
| "learning_rate": 5.8852258852258847e-05, | |
| "loss": 0.5316, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 2.470410545105039, | |
| "grad_norm": 0.4921081066131592, | |
| "learning_rate": 5.873015873015872e-05, | |
| "loss": 0.6901, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 2.4715089935466152, | |
| "grad_norm": 0.4779987633228302, | |
| "learning_rate": 5.8608058608058604e-05, | |
| "loss": 0.8976, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.472607441988192, | |
| "grad_norm": 0.43142780661582947, | |
| "learning_rate": 5.848595848595848e-05, | |
| "loss": 0.4915, | |
| "step": 2251 | |
| }, | |
| { | |
| "epoch": 2.473705890429768, | |
| "grad_norm": 1.132870078086853, | |
| "learning_rate": 5.8363858363858355e-05, | |
| "loss": 0.6633, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 2.474804338871344, | |
| "grad_norm": 0.5674893856048584, | |
| "learning_rate": 5.824175824175824e-05, | |
| "loss": 0.5023, | |
| "step": 2253 | |
| }, | |
| { | |
| "epoch": 2.4759027873129207, | |
| "grad_norm": 0.42495957016944885, | |
| "learning_rate": 5.811965811965811e-05, | |
| "loss": 0.6544, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 2.477001235754497, | |
| "grad_norm": 0.8031434416770935, | |
| "learning_rate": 5.799755799755799e-05, | |
| "loss": 0.892, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 2.478099684196073, | |
| "grad_norm": 0.7715115547180176, | |
| "learning_rate": 5.7875457875457876e-05, | |
| "loss": 0.5659, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 2.4791981326376495, | |
| "grad_norm": 0.6882114410400391, | |
| "learning_rate": 5.775335775335775e-05, | |
| "loss": 0.5154, | |
| "step": 2257 | |
| }, | |
| { | |
| "epoch": 2.4802965810792257, | |
| "grad_norm": 0.4994114935398102, | |
| "learning_rate": 5.763125763125763e-05, | |
| "loss": 0.6001, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 2.481395029520802, | |
| "grad_norm": 0.45008450746536255, | |
| "learning_rate": 5.7509157509157506e-05, | |
| "loss": 0.7076, | |
| "step": 2259 | |
| }, | |
| { | |
| "epoch": 2.482493477962378, | |
| "grad_norm": 0.654270350933075, | |
| "learning_rate": 5.738705738705738e-05, | |
| "loss": 0.5809, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.4835919264039545, | |
| "grad_norm": 0.6344896554946899, | |
| "learning_rate": 5.726495726495726e-05, | |
| "loss": 0.6059, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 2.4846903748455307, | |
| "grad_norm": 0.44090238213539124, | |
| "learning_rate": 5.7142857142857135e-05, | |
| "loss": 0.7953, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 2.485788823287107, | |
| "grad_norm": 0.47564128041267395, | |
| "learning_rate": 5.7020757020757014e-05, | |
| "loss": 0.5062, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 2.4868872717286834, | |
| "grad_norm": 0.3644583225250244, | |
| "learning_rate": 5.68986568986569e-05, | |
| "loss": 0.6417, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 2.4879857201702595, | |
| "grad_norm": 0.5264548659324646, | |
| "learning_rate": 5.677655677655677e-05, | |
| "loss": 0.5971, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 2.4890841686118357, | |
| "grad_norm": 0.7300589680671692, | |
| "learning_rate": 5.665445665445665e-05, | |
| "loss": 0.6249, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 2.490182617053412, | |
| "grad_norm": 0.9016311764717102, | |
| "learning_rate": 5.653235653235652e-05, | |
| "loss": 0.5761, | |
| "step": 2267 | |
| }, | |
| { | |
| "epoch": 2.4912810654949884, | |
| "grad_norm": 0.7480237483978271, | |
| "learning_rate": 5.641025641025641e-05, | |
| "loss": 0.4026, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 2.4923795139365645, | |
| "grad_norm": 0.5738864541053772, | |
| "learning_rate": 5.6288156288156286e-05, | |
| "loss": 0.8657, | |
| "step": 2269 | |
| }, | |
| { | |
| "epoch": 2.493477962378141, | |
| "grad_norm": 0.7320820093154907, | |
| "learning_rate": 5.616605616605616e-05, | |
| "loss": 0.7341, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.4945764108197173, | |
| "grad_norm": 0.7029497623443604, | |
| "learning_rate": 5.6043956043956037e-05, | |
| "loss": 0.7597, | |
| "step": 2271 | |
| }, | |
| { | |
| "epoch": 2.4956748592612934, | |
| "grad_norm": 0.5160001516342163, | |
| "learning_rate": 5.592185592185592e-05, | |
| "loss": 0.6488, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 2.4967733077028695, | |
| "grad_norm": 0.5425933003425598, | |
| "learning_rate": 5.5799755799755794e-05, | |
| "loss": 0.7102, | |
| "step": 2273 | |
| }, | |
| { | |
| "epoch": 2.497871756144446, | |
| "grad_norm": 0.5881295204162598, | |
| "learning_rate": 5.567765567765567e-05, | |
| "loss": 0.8123, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 2.4989702045860223, | |
| "grad_norm": 0.6021397113800049, | |
| "learning_rate": 5.5555555555555545e-05, | |
| "loss": 0.8887, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 2.5000686530275984, | |
| "grad_norm": 0.4754411578178406, | |
| "learning_rate": 5.543345543345543e-05, | |
| "loss": 0.8162, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 2.501167101469175, | |
| "grad_norm": 0.46976983547210693, | |
| "learning_rate": 5.531135531135531e-05, | |
| "loss": 0.4177, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 2.502265549910751, | |
| "grad_norm": 0.4946482181549072, | |
| "learning_rate": 5.518925518925518e-05, | |
| "loss": 0.6997, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 2.5033639983523273, | |
| "grad_norm": 0.49166280031204224, | |
| "learning_rate": 5.5067155067155066e-05, | |
| "loss": 0.6436, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 2.5044624467939034, | |
| "grad_norm": 0.40157628059387207, | |
| "learning_rate": 5.494505494505494e-05, | |
| "loss": 0.6998, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.50556089523548, | |
| "grad_norm": 0.4139937162399292, | |
| "learning_rate": 5.482295482295482e-05, | |
| "loss": 0.4021, | |
| "step": 2281 | |
| }, | |
| { | |
| "epoch": 2.506659343677056, | |
| "grad_norm": 3.6814892292022705, | |
| "learning_rate": 5.4700854700854696e-05, | |
| "loss": 0.6402, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 2.5077577921186327, | |
| "grad_norm": 0.3136257529258728, | |
| "learning_rate": 5.4578754578754574e-05, | |
| "loss": 0.5364, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 2.508856240560209, | |
| "grad_norm": 0.42901432514190674, | |
| "learning_rate": 5.445665445665445e-05, | |
| "loss": 0.6838, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 2.509954689001785, | |
| "grad_norm": 0.8462406992912292, | |
| "learning_rate": 5.433455433455433e-05, | |
| "loss": 0.4232, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 2.511053137443361, | |
| "grad_norm": 1.244150996208191, | |
| "learning_rate": 5.4212454212454204e-05, | |
| "loss": 0.6192, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 2.5121515858849373, | |
| "grad_norm": 0.834296703338623, | |
| "learning_rate": 5.409035409035409e-05, | |
| "loss": 0.548, | |
| "step": 2287 | |
| }, | |
| { | |
| "epoch": 2.513250034326514, | |
| "grad_norm": 0.4279276430606842, | |
| "learning_rate": 5.396825396825396e-05, | |
| "loss": 0.7549, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 2.51434848276809, | |
| "grad_norm": 0.5770757794380188, | |
| "learning_rate": 5.384615384615384e-05, | |
| "loss": 0.6156, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 2.5154469312096666, | |
| "grad_norm": 0.41763821244239807, | |
| "learning_rate": 5.3724053724053725e-05, | |
| "loss": 0.5019, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.5165453796512427, | |
| "grad_norm": 0.5212944746017456, | |
| "learning_rate": 5.36019536019536e-05, | |
| "loss": 0.6132, | |
| "step": 2291 | |
| }, | |
| { | |
| "epoch": 2.517643828092819, | |
| "grad_norm": 0.44493457674980164, | |
| "learning_rate": 5.3479853479853476e-05, | |
| "loss": 0.4162, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 2.518742276534395, | |
| "grad_norm": 0.46922022104263306, | |
| "learning_rate": 5.335775335775335e-05, | |
| "loss": 0.4624, | |
| "step": 2293 | |
| }, | |
| { | |
| "epoch": 2.5198407249759716, | |
| "grad_norm": 0.41906213760375977, | |
| "learning_rate": 5.3235653235653233e-05, | |
| "loss": 0.612, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 2.5209391734175477, | |
| "grad_norm": 0.620276153087616, | |
| "learning_rate": 5.311355311355311e-05, | |
| "loss": 0.6322, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 2.522037621859124, | |
| "grad_norm": 0.6597051620483398, | |
| "learning_rate": 5.2991452991452984e-05, | |
| "loss": 0.7659, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 2.5231360703007004, | |
| "grad_norm": 4.377660274505615, | |
| "learning_rate": 5.286935286935286e-05, | |
| "loss": 0.8294, | |
| "step": 2297 | |
| }, | |
| { | |
| "epoch": 2.5242345187422766, | |
| "grad_norm": 0.6086331009864807, | |
| "learning_rate": 5.274725274725275e-05, | |
| "loss": 0.5164, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 2.5253329671838527, | |
| "grad_norm": 0.5100352168083191, | |
| "learning_rate": 5.262515262515262e-05, | |
| "loss": 0.6319, | |
| "step": 2299 | |
| }, | |
| { | |
| "epoch": 2.526431415625429, | |
| "grad_norm": 0.6642487049102783, | |
| "learning_rate": 5.25030525030525e-05, | |
| "loss": 0.533, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.5275298640670054, | |
| "grad_norm": 0.5834927558898926, | |
| "learning_rate": 5.238095238095237e-05, | |
| "loss": 0.5669, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 2.5286283125085816, | |
| "grad_norm": 0.530815064907074, | |
| "learning_rate": 5.2258852258852256e-05, | |
| "loss": 0.6189, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 2.529726760950158, | |
| "grad_norm": 0.6275864243507385, | |
| "learning_rate": 5.2136752136752135e-05, | |
| "loss": 0.8403, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 2.5308252093917343, | |
| "grad_norm": 0.5878366827964783, | |
| "learning_rate": 5.201465201465201e-05, | |
| "loss": 0.6176, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.5319236578333104, | |
| "grad_norm": 0.37410980463027954, | |
| "learning_rate": 5.189255189255189e-05, | |
| "loss": 0.6337, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 2.5330221062748866, | |
| "grad_norm": 0.43912917375564575, | |
| "learning_rate": 5.1770451770451764e-05, | |
| "loss": 0.5348, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 2.534120554716463, | |
| "grad_norm": 1.4737471342086792, | |
| "learning_rate": 5.164835164835164e-05, | |
| "loss": 0.4862, | |
| "step": 2307 | |
| }, | |
| { | |
| "epoch": 2.5352190031580393, | |
| "grad_norm": 0.3978705108165741, | |
| "learning_rate": 5.152625152625152e-05, | |
| "loss": 0.7929, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 2.5363174515996154, | |
| "grad_norm": 0.3852058947086334, | |
| "learning_rate": 5.14041514041514e-05, | |
| "loss": 0.5895, | |
| "step": 2309 | |
| }, | |
| { | |
| "epoch": 2.537415900041192, | |
| "grad_norm": 17.968448638916016, | |
| "learning_rate": 5.128205128205128e-05, | |
| "loss": 0.4661, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.538514348482768, | |
| "grad_norm": 0.9369175434112549, | |
| "learning_rate": 5.115995115995115e-05, | |
| "loss": 0.5957, | |
| "step": 2311 | |
| }, | |
| { | |
| "epoch": 2.5396127969243443, | |
| "grad_norm": 0.612750768661499, | |
| "learning_rate": 5.103785103785103e-05, | |
| "loss": 0.6786, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 2.5407112453659204, | |
| "grad_norm": 0.588512659072876, | |
| "learning_rate": 5.0915750915750915e-05, | |
| "loss": 1.0482, | |
| "step": 2313 | |
| }, | |
| { | |
| "epoch": 2.541809693807497, | |
| "grad_norm": 0.4964143633842468, | |
| "learning_rate": 5.079365079365079e-05, | |
| "loss": 0.5673, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 2.542908142249073, | |
| "grad_norm": 0.5807982683181763, | |
| "learning_rate": 5.0671550671550666e-05, | |
| "loss": 0.5493, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 2.5440065906906497, | |
| "grad_norm": 0.5131386518478394, | |
| "learning_rate": 5.054945054945055e-05, | |
| "loss": 0.5947, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 2.545105039132226, | |
| "grad_norm": 0.4521124064922333, | |
| "learning_rate": 5.0427350427350424e-05, | |
| "loss": 0.5554, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 2.546203487573802, | |
| "grad_norm": 0.9441378712654114, | |
| "learning_rate": 5.03052503052503e-05, | |
| "loss": 0.6991, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 2.547301936015378, | |
| "grad_norm": 0.6353013515472412, | |
| "learning_rate": 5.0183150183150174e-05, | |
| "loss": 0.5308, | |
| "step": 2319 | |
| }, | |
| { | |
| "epoch": 2.5484003844569547, | |
| "grad_norm": 0.5940631628036499, | |
| "learning_rate": 5.006105006105006e-05, | |
| "loss": 0.6536, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.549498832898531, | |
| "grad_norm": 0.5457591414451599, | |
| "learning_rate": 4.993894993894994e-05, | |
| "loss": 0.6927, | |
| "step": 2321 | |
| }, | |
| { | |
| "epoch": 2.550597281340107, | |
| "grad_norm": 0.6265937685966492, | |
| "learning_rate": 4.981684981684981e-05, | |
| "loss": 0.6341, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 2.5516957297816836, | |
| "grad_norm": 0.5842925310134888, | |
| "learning_rate": 4.969474969474969e-05, | |
| "loss": 0.4583, | |
| "step": 2323 | |
| }, | |
| { | |
| "epoch": 2.5527941782232597, | |
| "grad_norm": 0.5363351106643677, | |
| "learning_rate": 4.957264957264956e-05, | |
| "loss": 0.6882, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 2.553892626664836, | |
| "grad_norm": 0.3677682876586914, | |
| "learning_rate": 4.9450549450549446e-05, | |
| "loss": 0.5671, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 2.554991075106412, | |
| "grad_norm": 1.222985863685608, | |
| "learning_rate": 4.9328449328449325e-05, | |
| "loss": 0.4936, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 2.5560895235479886, | |
| "grad_norm": 1.187898874282837, | |
| "learning_rate": 4.92063492063492e-05, | |
| "loss": 0.4893, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 2.5571879719895647, | |
| "grad_norm": 0.38843801617622375, | |
| "learning_rate": 4.908424908424908e-05, | |
| "loss": 0.6512, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 2.558286420431141, | |
| "grad_norm": 0.9550036191940308, | |
| "learning_rate": 4.896214896214896e-05, | |
| "loss": 0.6055, | |
| "step": 2329 | |
| }, | |
| { | |
| "epoch": 2.5593848688727174, | |
| "grad_norm": 0.80762779712677, | |
| "learning_rate": 4.884004884004883e-05, | |
| "loss": 0.8852, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.5604833173142936, | |
| "grad_norm": 0.7496643662452698, | |
| "learning_rate": 4.871794871794872e-05, | |
| "loss": 0.6535, | |
| "step": 2331 | |
| }, | |
| { | |
| "epoch": 2.5615817657558697, | |
| "grad_norm": 0.5532578825950623, | |
| "learning_rate": 4.859584859584859e-05, | |
| "loss": 0.6336, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 2.562680214197446, | |
| "grad_norm": 0.4058012366294861, | |
| "learning_rate": 4.847374847374847e-05, | |
| "loss": 0.6529, | |
| "step": 2333 | |
| }, | |
| { | |
| "epoch": 2.5637786626390224, | |
| "grad_norm": 3.1913115978240967, | |
| "learning_rate": 4.835164835164835e-05, | |
| "loss": 0.548, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 2.5648771110805986, | |
| "grad_norm": 0.47375988960266113, | |
| "learning_rate": 4.822954822954822e-05, | |
| "loss": 0.7567, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 2.565975559522175, | |
| "grad_norm": 0.5287726521492004, | |
| "learning_rate": 4.8107448107448106e-05, | |
| "loss": 0.6009, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 2.5670740079637513, | |
| "grad_norm": 0.43966931104660034, | |
| "learning_rate": 4.798534798534798e-05, | |
| "loss": 0.5538, | |
| "step": 2337 | |
| }, | |
| { | |
| "epoch": 2.5681724564053274, | |
| "grad_norm": 0.6683239340782166, | |
| "learning_rate": 4.7863247863247856e-05, | |
| "loss": 0.3999, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 2.5692709048469036, | |
| "grad_norm": 0.5260687470436096, | |
| "learning_rate": 4.774114774114774e-05, | |
| "loss": 0.7212, | |
| "step": 2339 | |
| }, | |
| { | |
| "epoch": 2.57036935328848, | |
| "grad_norm": 1.086850881576538, | |
| "learning_rate": 4.7619047619047614e-05, | |
| "loss": 0.7439, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.5714678017300563, | |
| "grad_norm": 0.9744517207145691, | |
| "learning_rate": 4.749694749694749e-05, | |
| "loss": 0.5625, | |
| "step": 2341 | |
| }, | |
| { | |
| "epoch": 2.5725662501716324, | |
| "grad_norm": 0.6829352974891663, | |
| "learning_rate": 4.737484737484738e-05, | |
| "loss": 0.5241, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 2.573664698613209, | |
| "grad_norm": 0.9441612958908081, | |
| "learning_rate": 4.725274725274725e-05, | |
| "loss": 0.8815, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 2.574763147054785, | |
| "grad_norm": 0.9406607151031494, | |
| "learning_rate": 4.713064713064713e-05, | |
| "loss": 0.7176, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 2.5758615954963613, | |
| "grad_norm": 0.6601364016532898, | |
| "learning_rate": 4.7008547008547e-05, | |
| "loss": 0.7713, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 2.5769600439379374, | |
| "grad_norm": 2.5189599990844727, | |
| "learning_rate": 4.688644688644688e-05, | |
| "loss": 0.5572, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 2.578058492379514, | |
| "grad_norm": 0.7295210957527161, | |
| "learning_rate": 4.6764346764346765e-05, | |
| "loss": 0.4431, | |
| "step": 2347 | |
| }, | |
| { | |
| "epoch": 2.57915694082109, | |
| "grad_norm": 0.5053385496139526, | |
| "learning_rate": 4.6642246642246637e-05, | |
| "loss": 0.4881, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 2.5802553892626667, | |
| "grad_norm": 0.6556063890457153, | |
| "learning_rate": 4.6520146520146515e-05, | |
| "loss": 0.5168, | |
| "step": 2349 | |
| }, | |
| { | |
| "epoch": 2.581353837704243, | |
| "grad_norm": 0.37052014470100403, | |
| "learning_rate": 4.639804639804639e-05, | |
| "loss": 0.3954, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.582452286145819, | |
| "grad_norm": 0.5975561738014221, | |
| "learning_rate": 4.627594627594627e-05, | |
| "loss": 0.5714, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 2.583550734587395, | |
| "grad_norm": 0.7273014187812805, | |
| "learning_rate": 4.615384615384615e-05, | |
| "loss": 0.7287, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.5846491830289717, | |
| "grad_norm": 0.566586971282959, | |
| "learning_rate": 4.603174603174602e-05, | |
| "loss": 0.5589, | |
| "step": 2353 | |
| }, | |
| { | |
| "epoch": 2.585747631470548, | |
| "grad_norm": 0.5846517086029053, | |
| "learning_rate": 4.590964590964591e-05, | |
| "loss": 0.5061, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 2.586846079912124, | |
| "grad_norm": 0.7470859885215759, | |
| "learning_rate": 4.578754578754579e-05, | |
| "loss": 0.5433, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 2.5879445283537006, | |
| "grad_norm": 0.5419175624847412, | |
| "learning_rate": 4.566544566544566e-05, | |
| "loss": 0.5502, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 2.5890429767952767, | |
| "grad_norm": 1.507851004600525, | |
| "learning_rate": 4.554334554334554e-05, | |
| "loss": 0.7399, | |
| "step": 2357 | |
| }, | |
| { | |
| "epoch": 2.590141425236853, | |
| "grad_norm": 1.4420006275177002, | |
| "learning_rate": 4.542124542124542e-05, | |
| "loss": 0.4233, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 2.591239873678429, | |
| "grad_norm": 0.6471789479255676, | |
| "learning_rate": 4.5299145299145296e-05, | |
| "loss": 0.4052, | |
| "step": 2359 | |
| }, | |
| { | |
| "epoch": 2.5923383221200056, | |
| "grad_norm": 0.5886567831039429, | |
| "learning_rate": 4.5177045177045174e-05, | |
| "loss": 0.7197, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.5934367705615817, | |
| "grad_norm": 0.843024492263794, | |
| "learning_rate": 4.5054945054945046e-05, | |
| "loss": 0.7636, | |
| "step": 2361 | |
| }, | |
| { | |
| "epoch": 2.5945352190031583, | |
| "grad_norm": 0.8689064979553223, | |
| "learning_rate": 4.493284493284493e-05, | |
| "loss": 0.6694, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 2.5956336674447344, | |
| "grad_norm": 0.5112485289573669, | |
| "learning_rate": 4.4810744810744804e-05, | |
| "loss": 0.5338, | |
| "step": 2363 | |
| }, | |
| { | |
| "epoch": 2.5967321158863106, | |
| "grad_norm": 0.4828614294528961, | |
| "learning_rate": 4.468864468864468e-05, | |
| "loss": 0.8519, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 2.5978305643278867, | |
| "grad_norm": 0.5644575357437134, | |
| "learning_rate": 4.456654456654457e-05, | |
| "loss": 0.5605, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 2.598929012769463, | |
| "grad_norm": 0.7749584913253784, | |
| "learning_rate": 4.444444444444444e-05, | |
| "loss": 0.6697, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 2.6000274612110394, | |
| "grad_norm": 0.9038271307945251, | |
| "learning_rate": 4.432234432234432e-05, | |
| "loss": 0.7242, | |
| "step": 2367 | |
| }, | |
| { | |
| "epoch": 2.6011259096526156, | |
| "grad_norm": 0.5102944374084473, | |
| "learning_rate": 4.42002442002442e-05, | |
| "loss": 0.5841, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 2.602224358094192, | |
| "grad_norm": 0.5072823762893677, | |
| "learning_rate": 4.4078144078144076e-05, | |
| "loss": 0.4927, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 2.6033228065357683, | |
| "grad_norm": 0.3654184341430664, | |
| "learning_rate": 4.3956043956043955e-05, | |
| "loss": 0.6449, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.6044212549773444, | |
| "grad_norm": 1.7309939861297607, | |
| "learning_rate": 4.3833943833943827e-05, | |
| "loss": 0.6979, | |
| "step": 2371 | |
| }, | |
| { | |
| "epoch": 2.6055197034189206, | |
| "grad_norm": 0.7982075214385986, | |
| "learning_rate": 4.3711843711843705e-05, | |
| "loss": 0.6589, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 2.606618151860497, | |
| "grad_norm": 0.6989462375640869, | |
| "learning_rate": 4.358974358974359e-05, | |
| "loss": 0.7104, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 2.6077166003020733, | |
| "grad_norm": 0.7331676483154297, | |
| "learning_rate": 4.346764346764346e-05, | |
| "loss": 0.7565, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 2.6088150487436494, | |
| "grad_norm": 1.0566400289535522, | |
| "learning_rate": 4.334554334554334e-05, | |
| "loss": 0.6967, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 2.609913497185226, | |
| "grad_norm": 0.5988017320632935, | |
| "learning_rate": 4.322344322344321e-05, | |
| "loss": 0.7871, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 2.611011945626802, | |
| "grad_norm": 0.4248102307319641, | |
| "learning_rate": 4.31013431013431e-05, | |
| "loss": 0.6891, | |
| "step": 2377 | |
| }, | |
| { | |
| "epoch": 2.6121103940683783, | |
| "grad_norm": 1.9839611053466797, | |
| "learning_rate": 4.297924297924298e-05, | |
| "loss": 0.6647, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 2.6132088425099544, | |
| "grad_norm": 0.4382665455341339, | |
| "learning_rate": 4.285714285714285e-05, | |
| "loss": 0.5969, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 2.614307290951531, | |
| "grad_norm": 1.1918715238571167, | |
| "learning_rate": 4.2735042735042735e-05, | |
| "loss": 0.7788, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.615405739393107, | |
| "grad_norm": 0.38117820024490356, | |
| "learning_rate": 4.2612942612942614e-05, | |
| "loss": 0.4967, | |
| "step": 2381 | |
| }, | |
| { | |
| "epoch": 2.6165041878346837, | |
| "grad_norm": 0.6454489827156067, | |
| "learning_rate": 4.2490842490842486e-05, | |
| "loss": 0.7724, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 2.61760263627626, | |
| "grad_norm": 1.0696319341659546, | |
| "learning_rate": 4.2368742368742364e-05, | |
| "loss": 0.5292, | |
| "step": 2383 | |
| }, | |
| { | |
| "epoch": 2.618701084717836, | |
| "grad_norm": 0.5887579321861267, | |
| "learning_rate": 4.224664224664224e-05, | |
| "loss": 0.5317, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 2.619799533159412, | |
| "grad_norm": 0.557188093662262, | |
| "learning_rate": 4.212454212454212e-05, | |
| "loss": 0.7172, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 2.6208979816009887, | |
| "grad_norm": 0.5122195482254028, | |
| "learning_rate": 4.2002442002442e-05, | |
| "loss": 0.6398, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 2.621996430042565, | |
| "grad_norm": 0.520722508430481, | |
| "learning_rate": 4.188034188034187e-05, | |
| "loss": 0.3984, | |
| "step": 2387 | |
| }, | |
| { | |
| "epoch": 2.623094878484141, | |
| "grad_norm": 1.2077422142028809, | |
| "learning_rate": 4.175824175824176e-05, | |
| "loss": 0.6686, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 2.6241933269257176, | |
| "grad_norm": 1.1437829732894897, | |
| "learning_rate": 4.163614163614163e-05, | |
| "loss": 0.6653, | |
| "step": 2389 | |
| }, | |
| { | |
| "epoch": 2.6252917753672937, | |
| "grad_norm": 0.6157158017158508, | |
| "learning_rate": 4.151404151404151e-05, | |
| "loss": 0.7074, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.62639022380887, | |
| "grad_norm": 1.8944931030273438, | |
| "learning_rate": 4.1391941391941394e-05, | |
| "loss": 0.5991, | |
| "step": 2391 | |
| }, | |
| { | |
| "epoch": 2.627488672250446, | |
| "grad_norm": 0.6598528623580933, | |
| "learning_rate": 4.1269841269841266e-05, | |
| "loss": 0.6051, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 2.6285871206920226, | |
| "grad_norm": 0.9341129660606384, | |
| "learning_rate": 4.1147741147741145e-05, | |
| "loss": 0.3795, | |
| "step": 2393 | |
| }, | |
| { | |
| "epoch": 2.6296855691335987, | |
| "grad_norm": 0.4246079921722412, | |
| "learning_rate": 4.1025641025641023e-05, | |
| "loss": 0.4603, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 2.6307840175751753, | |
| "grad_norm": 0.6639881134033203, | |
| "learning_rate": 4.09035409035409e-05, | |
| "loss": 0.5862, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 2.6318824660167515, | |
| "grad_norm": 1.297917366027832, | |
| "learning_rate": 4.078144078144078e-05, | |
| "loss": 0.6175, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 2.6329809144583276, | |
| "grad_norm": 0.7880698442459106, | |
| "learning_rate": 4.065934065934065e-05, | |
| "loss": 0.7034, | |
| "step": 2397 | |
| }, | |
| { | |
| "epoch": 2.6340793628999037, | |
| "grad_norm": 0.6197066903114319, | |
| "learning_rate": 4.053724053724053e-05, | |
| "loss": 0.659, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 2.6351778113414803, | |
| "grad_norm": 0.7560408711433411, | |
| "learning_rate": 4.041514041514042e-05, | |
| "loss": 0.5543, | |
| "step": 2399 | |
| }, | |
| { | |
| "epoch": 2.6362762597830565, | |
| "grad_norm": 2.2571635246276855, | |
| "learning_rate": 4.029304029304029e-05, | |
| "loss": 0.712, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.6373747082246326, | |
| "grad_norm": 0.8119613528251648, | |
| "learning_rate": 4.017094017094017e-05, | |
| "loss": 0.6407, | |
| "step": 2401 | |
| }, | |
| { | |
| "epoch": 2.638473156666209, | |
| "grad_norm": 3.9773592948913574, | |
| "learning_rate": 4.004884004884004e-05, | |
| "loss": 0.6434, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 2.6395716051077853, | |
| "grad_norm": 1.2648125886917114, | |
| "learning_rate": 3.9926739926739925e-05, | |
| "loss": 0.689, | |
| "step": 2403 | |
| }, | |
| { | |
| "epoch": 2.6406700535493615, | |
| "grad_norm": 0.7015364170074463, | |
| "learning_rate": 3.9804639804639804e-05, | |
| "loss": 0.4175, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 2.6417685019909376, | |
| "grad_norm": 0.941303551197052, | |
| "learning_rate": 3.9682539682539676e-05, | |
| "loss": 0.4126, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 2.642866950432514, | |
| "grad_norm": 0.7533726096153259, | |
| "learning_rate": 3.956043956043956e-05, | |
| "loss": 0.7401, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 2.6439653988740903, | |
| "grad_norm": 0.5480525493621826, | |
| "learning_rate": 3.943833943833943e-05, | |
| "loss": 0.5567, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 2.6450638473156665, | |
| "grad_norm": 0.6171422004699707, | |
| "learning_rate": 3.931623931623931e-05, | |
| "loss": 0.721, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.646162295757243, | |
| "grad_norm": 0.6719728708267212, | |
| "learning_rate": 3.919413919413919e-05, | |
| "loss": 0.5015, | |
| "step": 2409 | |
| }, | |
| { | |
| "epoch": 2.647260744198819, | |
| "grad_norm": 1.8106555938720703, | |
| "learning_rate": 3.907203907203906e-05, | |
| "loss": 0.6954, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.6483591926403953, | |
| "grad_norm": 0.42534878849983215, | |
| "learning_rate": 3.894993894993895e-05, | |
| "loss": 0.5241, | |
| "step": 2411 | |
| }, | |
| { | |
| "epoch": 2.6494576410819715, | |
| "grad_norm": 0.8733202219009399, | |
| "learning_rate": 3.882783882783883e-05, | |
| "loss": 0.4485, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 2.650556089523548, | |
| "grad_norm": 0.9050257802009583, | |
| "learning_rate": 3.87057387057387e-05, | |
| "loss": 0.6202, | |
| "step": 2413 | |
| }, | |
| { | |
| "epoch": 2.651654537965124, | |
| "grad_norm": 0.650347888469696, | |
| "learning_rate": 3.8583638583638584e-05, | |
| "loss": 0.621, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 2.6527529864067008, | |
| "grad_norm": 6.092042446136475, | |
| "learning_rate": 3.8461538461538456e-05, | |
| "loss": 0.5143, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 2.653851434848277, | |
| "grad_norm": 0.7801241874694824, | |
| "learning_rate": 3.8339438339438335e-05, | |
| "loss": 0.5424, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 2.654949883289853, | |
| "grad_norm": 0.5492686629295349, | |
| "learning_rate": 3.821733821733822e-05, | |
| "loss": 0.642, | |
| "step": 2417 | |
| }, | |
| { | |
| "epoch": 2.656048331731429, | |
| "grad_norm": 0.4257514774799347, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.8273, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 2.6571467801730058, | |
| "grad_norm": 1.0180964469909668, | |
| "learning_rate": 3.797313797313797e-05, | |
| "loss": 0.6962, | |
| "step": 2419 | |
| }, | |
| { | |
| "epoch": 2.658245228614582, | |
| "grad_norm": 0.3844882547855377, | |
| "learning_rate": 3.785103785103784e-05, | |
| "loss": 0.7315, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.659343677056158, | |
| "grad_norm": 0.46182385087013245, | |
| "learning_rate": 3.772893772893772e-05, | |
| "loss": 0.3889, | |
| "step": 2421 | |
| }, | |
| { | |
| "epoch": 2.6604421254977346, | |
| "grad_norm": 0.562627375125885, | |
| "learning_rate": 3.760683760683761e-05, | |
| "loss": 0.6415, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 2.6615405739393108, | |
| "grad_norm": 0.3234645128250122, | |
| "learning_rate": 3.7484737484737486e-05, | |
| "loss": 0.4819, | |
| "step": 2423 | |
| }, | |
| { | |
| "epoch": 2.662639022380887, | |
| "grad_norm": 0.6804205775260925, | |
| "learning_rate": 3.736263736263736e-05, | |
| "loss": 0.4248, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 2.663737470822463, | |
| "grad_norm": 0.5543864369392395, | |
| "learning_rate": 3.7240537240537236e-05, | |
| "loss": 0.5259, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 2.6648359192640396, | |
| "grad_norm": 0.8411497473716736, | |
| "learning_rate": 3.7118437118437115e-05, | |
| "loss": 0.5448, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 2.6659343677056158, | |
| "grad_norm": 0.4386245608329773, | |
| "learning_rate": 3.6996336996336994e-05, | |
| "loss": 0.9601, | |
| "step": 2427 | |
| }, | |
| { | |
| "epoch": 2.6670328161471923, | |
| "grad_norm": 0.773210346698761, | |
| "learning_rate": 3.687423687423687e-05, | |
| "loss": 0.8601, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 2.6681312645887685, | |
| "grad_norm": 0.4636232852935791, | |
| "learning_rate": 3.675213675213675e-05, | |
| "loss": 0.6322, | |
| "step": 2429 | |
| }, | |
| { | |
| "epoch": 2.6692297130303446, | |
| "grad_norm": 1.6318496465682983, | |
| "learning_rate": 3.663003663003662e-05, | |
| "loss": 0.4402, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.6703281614719208, | |
| "grad_norm": 0.5299782156944275, | |
| "learning_rate": 3.65079365079365e-05, | |
| "loss": 0.5622, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 2.6714266099134973, | |
| "grad_norm": 1.1223825216293335, | |
| "learning_rate": 3.638583638583638e-05, | |
| "loss": 0.5994, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.6725250583550735, | |
| "grad_norm": 1.8495402336120605, | |
| "learning_rate": 3.626373626373626e-05, | |
| "loss": 0.669, | |
| "step": 2433 | |
| }, | |
| { | |
| "epoch": 2.6736235067966496, | |
| "grad_norm": 0.4963383972644806, | |
| "learning_rate": 3.614163614163614e-05, | |
| "loss": 0.5412, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 2.674721955238226, | |
| "grad_norm": 0.5644822716712952, | |
| "learning_rate": 3.601953601953602e-05, | |
| "loss": 0.5768, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 2.6758204036798023, | |
| "grad_norm": 0.5272318720817566, | |
| "learning_rate": 3.5897435897435896e-05, | |
| "loss": 0.5909, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 2.6769188521213785, | |
| "grad_norm": 0.29838863015174866, | |
| "learning_rate": 3.5775335775335774e-05, | |
| "loss": 0.5625, | |
| "step": 2437 | |
| }, | |
| { | |
| "epoch": 2.6780173005629546, | |
| "grad_norm": 0.5375344157218933, | |
| "learning_rate": 3.565323565323565e-05, | |
| "loss": 0.5932, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 2.679115749004531, | |
| "grad_norm": 0.7850833535194397, | |
| "learning_rate": 3.5531135531135525e-05, | |
| "loss": 0.6706, | |
| "step": 2439 | |
| }, | |
| { | |
| "epoch": 2.6802141974461073, | |
| "grad_norm": 0.5286651253700256, | |
| "learning_rate": 3.540903540903541e-05, | |
| "loss": 0.6865, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.681312645887684, | |
| "grad_norm": 0.9832364320755005, | |
| "learning_rate": 3.528693528693528e-05, | |
| "loss": 0.7941, | |
| "step": 2441 | |
| }, | |
| { | |
| "epoch": 2.68241109432926, | |
| "grad_norm": 0.4431805908679962, | |
| "learning_rate": 3.516483516483516e-05, | |
| "loss": 0.4706, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 2.683509542770836, | |
| "grad_norm": 1.7264482975006104, | |
| "learning_rate": 3.504273504273504e-05, | |
| "loss": 0.6308, | |
| "step": 2443 | |
| }, | |
| { | |
| "epoch": 2.6846079912124123, | |
| "grad_norm": 0.6196084022521973, | |
| "learning_rate": 3.492063492063492e-05, | |
| "loss": 1.0233, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 2.6857064396539885, | |
| "grad_norm": 0.855876088142395, | |
| "learning_rate": 3.47985347985348e-05, | |
| "loss": 0.5522, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 2.686804888095565, | |
| "grad_norm": 0.45323798060417175, | |
| "learning_rate": 3.4676434676434676e-05, | |
| "loss": 0.6232, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 2.687903336537141, | |
| "grad_norm": 0.577273964881897, | |
| "learning_rate": 3.455433455433455e-05, | |
| "loss": 0.5051, | |
| "step": 2447 | |
| }, | |
| { | |
| "epoch": 2.689001784978718, | |
| "grad_norm": 0.4999620020389557, | |
| "learning_rate": 3.4432234432234427e-05, | |
| "loss": 0.4881, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 2.690100233420294, | |
| "grad_norm": 0.5028046369552612, | |
| "learning_rate": 3.431013431013431e-05, | |
| "loss": 0.6575, | |
| "step": 2449 | |
| }, | |
| { | |
| "epoch": 2.69119868186187, | |
| "grad_norm": 2.122028350830078, | |
| "learning_rate": 3.4188034188034184e-05, | |
| "loss": 0.7226, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.692297130303446, | |
| "grad_norm": 0.4979703426361084, | |
| "learning_rate": 3.406593406593406e-05, | |
| "loss": 0.5768, | |
| "step": 2451 | |
| }, | |
| { | |
| "epoch": 2.693395578745023, | |
| "grad_norm": 0.9270527958869934, | |
| "learning_rate": 3.394383394383394e-05, | |
| "loss": 0.6464, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 2.694494027186599, | |
| "grad_norm": 1.0739809274673462, | |
| "learning_rate": 3.382173382173382e-05, | |
| "loss": 0.753, | |
| "step": 2453 | |
| }, | |
| { | |
| "epoch": 2.695592475628175, | |
| "grad_norm": 0.6039335131645203, | |
| "learning_rate": 3.36996336996337e-05, | |
| "loss": 0.7909, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 2.6966909240697516, | |
| "grad_norm": 0.49040424823760986, | |
| "learning_rate": 3.357753357753358e-05, | |
| "loss": 0.6112, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 2.6977893725113278, | |
| "grad_norm": 0.6890440583229065, | |
| "learning_rate": 3.345543345543345e-05, | |
| "loss": 0.6849, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 2.698887820952904, | |
| "grad_norm": 0.7819212675094604, | |
| "learning_rate": 3.333333333333333e-05, | |
| "loss": 0.6797, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 2.69998626939448, | |
| "grad_norm": 1.0147050619125366, | |
| "learning_rate": 3.321123321123321e-05, | |
| "loss": 0.6867, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 2.7010847178360566, | |
| "grad_norm": 1.3562036752700806, | |
| "learning_rate": 3.3089133089133086e-05, | |
| "loss": 0.7811, | |
| "step": 2459 | |
| }, | |
| { | |
| "epoch": 2.7021831662776328, | |
| "grad_norm": 0.5813838839530945, | |
| "learning_rate": 3.2967032967032964e-05, | |
| "loss": 0.5405, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.7032816147192094, | |
| "grad_norm": 0.6152640581130981, | |
| "learning_rate": 3.284493284493284e-05, | |
| "loss": 0.425, | |
| "step": 2461 | |
| }, | |
| { | |
| "epoch": 2.7043800631607855, | |
| "grad_norm": 1.1984590291976929, | |
| "learning_rate": 3.272283272283272e-05, | |
| "loss": 0.592, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 2.7054785116023616, | |
| "grad_norm": 0.48487693071365356, | |
| "learning_rate": 3.26007326007326e-05, | |
| "loss": 0.5223, | |
| "step": 2463 | |
| }, | |
| { | |
| "epoch": 2.7065769600439378, | |
| "grad_norm": 0.47191065549850464, | |
| "learning_rate": 3.247863247863247e-05, | |
| "loss": 0.6479, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 2.7076754084855144, | |
| "grad_norm": 1.3167297840118408, | |
| "learning_rate": 3.235653235653235e-05, | |
| "loss": 0.4552, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 2.7087738569270905, | |
| "grad_norm": 1.3219714164733887, | |
| "learning_rate": 3.2234432234432237e-05, | |
| "loss": 0.5839, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 2.7098723053686666, | |
| "grad_norm": 0.8047394752502441, | |
| "learning_rate": 3.211233211233211e-05, | |
| "loss": 0.795, | |
| "step": 2467 | |
| }, | |
| { | |
| "epoch": 2.710970753810243, | |
| "grad_norm": 0.6053475737571716, | |
| "learning_rate": 3.199023199023199e-05, | |
| "loss": 0.743, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 2.7120692022518194, | |
| "grad_norm": 0.4619985818862915, | |
| "learning_rate": 3.1868131868131866e-05, | |
| "loss": 0.642, | |
| "step": 2469 | |
| }, | |
| { | |
| "epoch": 2.7131676506933955, | |
| "grad_norm": 0.8241426944732666, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 0.521, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.7142660991349716, | |
| "grad_norm": 0.4344565272331238, | |
| "learning_rate": 3.162393162393162e-05, | |
| "loss": 0.4615, | |
| "step": 2471 | |
| }, | |
| { | |
| "epoch": 2.715364547576548, | |
| "grad_norm": 0.9640605449676514, | |
| "learning_rate": 3.15018315018315e-05, | |
| "loss": 0.4735, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 2.7164629960181244, | |
| "grad_norm": 0.49423810839653015, | |
| "learning_rate": 3.1379731379731374e-05, | |
| "loss": 0.7547, | |
| "step": 2473 | |
| }, | |
| { | |
| "epoch": 2.717561444459701, | |
| "grad_norm": 0.7234408855438232, | |
| "learning_rate": 3.125763125763125e-05, | |
| "loss": 0.464, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 2.718659892901277, | |
| "grad_norm": 0.542647123336792, | |
| "learning_rate": 3.113553113553113e-05, | |
| "loss": 0.5563, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 2.719758341342853, | |
| "grad_norm": 0.555722177028656, | |
| "learning_rate": 3.101343101343101e-05, | |
| "loss": 0.6899, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 2.7208567897844294, | |
| "grad_norm": 0.6171600222587585, | |
| "learning_rate": 3.089133089133089e-05, | |
| "loss": 0.6088, | |
| "step": 2477 | |
| }, | |
| { | |
| "epoch": 2.7219552382260055, | |
| "grad_norm": 0.9118738770484924, | |
| "learning_rate": 3.076923076923077e-05, | |
| "loss": 0.7778, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 2.723053686667582, | |
| "grad_norm": 0.6610655784606934, | |
| "learning_rate": 3.064713064713064e-05, | |
| "loss": 0.6935, | |
| "step": 2479 | |
| }, | |
| { | |
| "epoch": 2.724152135109158, | |
| "grad_norm": 0.6729289889335632, | |
| "learning_rate": 3.0525030525030525e-05, | |
| "loss": 0.792, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.725250583550735, | |
| "grad_norm": 0.4955647587776184, | |
| "learning_rate": 3.04029304029304e-05, | |
| "loss": 0.6746, | |
| "step": 2481 | |
| }, | |
| { | |
| "epoch": 2.726349031992311, | |
| "grad_norm": 0.42975953221321106, | |
| "learning_rate": 3.028083028083028e-05, | |
| "loss": 0.5318, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 2.727447480433887, | |
| "grad_norm": 0.3555055856704712, | |
| "learning_rate": 3.0158730158730154e-05, | |
| "loss": 0.6377, | |
| "step": 2483 | |
| }, | |
| { | |
| "epoch": 2.728545928875463, | |
| "grad_norm": 3.138209342956543, | |
| "learning_rate": 3.0036630036630036e-05, | |
| "loss": 0.6296, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 2.72964437731704, | |
| "grad_norm": 0.5710242390632629, | |
| "learning_rate": 2.9914529914529912e-05, | |
| "loss": 0.8987, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 2.730742825758616, | |
| "grad_norm": 0.5200769305229187, | |
| "learning_rate": 2.979242979242979e-05, | |
| "loss": 0.5154, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 2.731841274200192, | |
| "grad_norm": 0.797572910785675, | |
| "learning_rate": 2.9670329670329666e-05, | |
| "loss": 0.8039, | |
| "step": 2487 | |
| }, | |
| { | |
| "epoch": 2.7329397226417687, | |
| "grad_norm": 0.4667447805404663, | |
| "learning_rate": 2.9548229548229548e-05, | |
| "loss": 0.586, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 2.734038171083345, | |
| "grad_norm": 0.5500869154930115, | |
| "learning_rate": 2.9426129426129423e-05, | |
| "loss": 0.7007, | |
| "step": 2489 | |
| }, | |
| { | |
| "epoch": 2.735136619524921, | |
| "grad_norm": 0.5311625003814697, | |
| "learning_rate": 2.9304029304029302e-05, | |
| "loss": 0.4257, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.736235067966497, | |
| "grad_norm": 0.6474941968917847, | |
| "learning_rate": 2.9181929181929177e-05, | |
| "loss": 0.4747, | |
| "step": 2491 | |
| }, | |
| { | |
| "epoch": 2.7373335164080737, | |
| "grad_norm": 1.1186646223068237, | |
| "learning_rate": 2.9059829059829056e-05, | |
| "loss": 0.8177, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 2.73843196484965, | |
| "grad_norm": 2.455371379852295, | |
| "learning_rate": 2.8937728937728938e-05, | |
| "loss": 0.6535, | |
| "step": 2493 | |
| }, | |
| { | |
| "epoch": 2.7395304132912264, | |
| "grad_norm": 0.5033484101295471, | |
| "learning_rate": 2.8815628815628813e-05, | |
| "loss": 0.525, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 2.7406288617328025, | |
| "grad_norm": 0.5826357007026672, | |
| "learning_rate": 2.869352869352869e-05, | |
| "loss": 0.476, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 2.7417273101743787, | |
| "grad_norm": 0.5875104665756226, | |
| "learning_rate": 2.8571428571428567e-05, | |
| "loss": 0.6903, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 2.742825758615955, | |
| "grad_norm": 0.6006028056144714, | |
| "learning_rate": 2.844932844932845e-05, | |
| "loss": 0.8522, | |
| "step": 2497 | |
| }, | |
| { | |
| "epoch": 2.7439242070575314, | |
| "grad_norm": 0.5605003833770752, | |
| "learning_rate": 2.8327228327228325e-05, | |
| "loss": 0.5312, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 2.7450226554991075, | |
| "grad_norm": 0.7641153931617737, | |
| "learning_rate": 2.8205128205128204e-05, | |
| "loss": 0.6841, | |
| "step": 2499 | |
| }, | |
| { | |
| "epoch": 2.7461211039406836, | |
| "grad_norm": 0.5523414015769958, | |
| "learning_rate": 2.808302808302808e-05, | |
| "loss": 0.6582, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.7472195523822602, | |
| "grad_norm": 0.40714672207832336, | |
| "learning_rate": 2.796092796092796e-05, | |
| "loss": 0.7493, | |
| "step": 2501 | |
| }, | |
| { | |
| "epoch": 2.7483180008238364, | |
| "grad_norm": 0.6960926651954651, | |
| "learning_rate": 2.7838827838827836e-05, | |
| "loss": 0.7104, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 2.7494164492654125, | |
| "grad_norm": 0.42409783601760864, | |
| "learning_rate": 2.7716727716727715e-05, | |
| "loss": 0.5643, | |
| "step": 2503 | |
| }, | |
| { | |
| "epoch": 2.7505148977069886, | |
| "grad_norm": 0.5174455046653748, | |
| "learning_rate": 2.759462759462759e-05, | |
| "loss": 0.4545, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 2.7516133461485652, | |
| "grad_norm": 0.6353528499603271, | |
| "learning_rate": 2.747252747252747e-05, | |
| "loss": 0.5068, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 2.7527117945901414, | |
| "grad_norm": 0.46814125776290894, | |
| "learning_rate": 2.7350427350427348e-05, | |
| "loss": 0.7979, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 2.753810243031718, | |
| "grad_norm": 0.7229417562484741, | |
| "learning_rate": 2.7228327228327227e-05, | |
| "loss": 0.6212, | |
| "step": 2507 | |
| }, | |
| { | |
| "epoch": 2.754908691473294, | |
| "grad_norm": 1.2155603170394897, | |
| "learning_rate": 2.7106227106227102e-05, | |
| "loss": 0.8444, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 2.7560071399148702, | |
| "grad_norm": 0.462703138589859, | |
| "learning_rate": 2.698412698412698e-05, | |
| "loss": 0.8263, | |
| "step": 2509 | |
| }, | |
| { | |
| "epoch": 2.7571055883564464, | |
| "grad_norm": 0.9474642872810364, | |
| "learning_rate": 2.6862026862026863e-05, | |
| "loss": 0.7586, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.758204036798023, | |
| "grad_norm": 4.502622127532959, | |
| "learning_rate": 2.6739926739926738e-05, | |
| "loss": 0.5806, | |
| "step": 2511 | |
| }, | |
| { | |
| "epoch": 2.759302485239599, | |
| "grad_norm": 1.1251213550567627, | |
| "learning_rate": 2.6617826617826617e-05, | |
| "loss": 0.6333, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 2.7604009336811752, | |
| "grad_norm": 0.7035579681396484, | |
| "learning_rate": 2.6495726495726492e-05, | |
| "loss": 0.4739, | |
| "step": 2513 | |
| }, | |
| { | |
| "epoch": 2.761499382122752, | |
| "grad_norm": 0.5279493927955627, | |
| "learning_rate": 2.6373626373626374e-05, | |
| "loss": 0.597, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 2.762597830564328, | |
| "grad_norm": 0.5512554049491882, | |
| "learning_rate": 2.625152625152625e-05, | |
| "loss": 0.6471, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 2.763696279005904, | |
| "grad_norm": 0.857778012752533, | |
| "learning_rate": 2.6129426129426128e-05, | |
| "loss": 0.6172, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 2.7647947274474802, | |
| "grad_norm": 0.5348466634750366, | |
| "learning_rate": 2.6007326007326004e-05, | |
| "loss": 0.8074, | |
| "step": 2517 | |
| }, | |
| { | |
| "epoch": 2.765893175889057, | |
| "grad_norm": 0.5413629412651062, | |
| "learning_rate": 2.5885225885225882e-05, | |
| "loss": 0.3879, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 2.766991624330633, | |
| "grad_norm": 0.569411039352417, | |
| "learning_rate": 2.576312576312576e-05, | |
| "loss": 0.4392, | |
| "step": 2519 | |
| }, | |
| { | |
| "epoch": 2.7680900727722095, | |
| "grad_norm": 0.5127429962158203, | |
| "learning_rate": 2.564102564102564e-05, | |
| "loss": 0.6566, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.7691885212137857, | |
| "grad_norm": 0.7328614592552185, | |
| "learning_rate": 2.5518925518925515e-05, | |
| "loss": 0.6801, | |
| "step": 2521 | |
| }, | |
| { | |
| "epoch": 2.770286969655362, | |
| "grad_norm": 0.615686297416687, | |
| "learning_rate": 2.5396825396825394e-05, | |
| "loss": 0.6366, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 2.771385418096938, | |
| "grad_norm": 0.5250161290168762, | |
| "learning_rate": 2.5274725274725276e-05, | |
| "loss": 0.5737, | |
| "step": 2523 | |
| }, | |
| { | |
| "epoch": 2.772483866538514, | |
| "grad_norm": 0.6708832383155823, | |
| "learning_rate": 2.515262515262515e-05, | |
| "loss": 0.6681, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 2.7735823149800907, | |
| "grad_norm": 0.6120278835296631, | |
| "learning_rate": 2.503052503052503e-05, | |
| "loss": 0.4964, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 2.774680763421667, | |
| "grad_norm": 0.7024976015090942, | |
| "learning_rate": 2.4908424908424905e-05, | |
| "loss": 0.7984, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 2.7757792118632434, | |
| "grad_norm": 7.281716823577881, | |
| "learning_rate": 2.478632478632478e-05, | |
| "loss": 0.7191, | |
| "step": 2527 | |
| }, | |
| { | |
| "epoch": 2.7768776603048195, | |
| "grad_norm": 0.7347024083137512, | |
| "learning_rate": 2.4664224664224663e-05, | |
| "loss": 0.8684, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 2.7779761087463957, | |
| "grad_norm": 1.1338274478912354, | |
| "learning_rate": 2.454212454212454e-05, | |
| "loss": 0.5936, | |
| "step": 2529 | |
| }, | |
| { | |
| "epoch": 2.779074557187972, | |
| "grad_norm": 0.4176536202430725, | |
| "learning_rate": 2.4420024420024417e-05, | |
| "loss": 0.445, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.7801730056295484, | |
| "grad_norm": 0.9390072822570801, | |
| "learning_rate": 2.4297924297924295e-05, | |
| "loss": 0.5821, | |
| "step": 2531 | |
| }, | |
| { | |
| "epoch": 2.7812714540711245, | |
| "grad_norm": 1.1045840978622437, | |
| "learning_rate": 2.4175824175824174e-05, | |
| "loss": 0.7372, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 2.7823699025127007, | |
| "grad_norm": 0.5568689703941345, | |
| "learning_rate": 2.4053724053724053e-05, | |
| "loss": 0.5005, | |
| "step": 2533 | |
| }, | |
| { | |
| "epoch": 2.7834683509542772, | |
| "grad_norm": 0.2747582793235779, | |
| "learning_rate": 2.3931623931623928e-05, | |
| "loss": 0.5778, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 2.7845667993958534, | |
| "grad_norm": 1.4027804136276245, | |
| "learning_rate": 2.3809523809523807e-05, | |
| "loss": 0.5368, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 2.7856652478374295, | |
| "grad_norm": 0.7523220777511597, | |
| "learning_rate": 2.368742368742369e-05, | |
| "loss": 0.58, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 2.7867636962790057, | |
| "grad_norm": 0.33777353167533875, | |
| "learning_rate": 2.3565323565323564e-05, | |
| "loss": 0.5269, | |
| "step": 2537 | |
| }, | |
| { | |
| "epoch": 2.7878621447205822, | |
| "grad_norm": 0.5818787217140198, | |
| "learning_rate": 2.344322344322344e-05, | |
| "loss": 0.4459, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 2.7889605931621584, | |
| "grad_norm": 0.36858034133911133, | |
| "learning_rate": 2.3321123321123318e-05, | |
| "loss": 0.712, | |
| "step": 2539 | |
| }, | |
| { | |
| "epoch": 2.790059041603735, | |
| "grad_norm": 0.5299241542816162, | |
| "learning_rate": 2.3199023199023194e-05, | |
| "loss": 0.6086, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.791157490045311, | |
| "grad_norm": 2.432325601577759, | |
| "learning_rate": 2.3076923076923076e-05, | |
| "loss": 1.0386, | |
| "step": 2541 | |
| }, | |
| { | |
| "epoch": 2.7922559384868872, | |
| "grad_norm": 0.746638834476471, | |
| "learning_rate": 2.2954822954822954e-05, | |
| "loss": 0.7372, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 2.7933543869284634, | |
| "grad_norm": 0.6017647981643677, | |
| "learning_rate": 2.283272283272283e-05, | |
| "loss": 0.9134, | |
| "step": 2543 | |
| }, | |
| { | |
| "epoch": 2.79445283537004, | |
| "grad_norm": 0.7385385036468506, | |
| "learning_rate": 2.271062271062271e-05, | |
| "loss": 0.6827, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 2.795551283811616, | |
| "grad_norm": 0.6607246994972229, | |
| "learning_rate": 2.2588522588522587e-05, | |
| "loss": 0.6333, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 2.7966497322531922, | |
| "grad_norm": 0.40185117721557617, | |
| "learning_rate": 2.2466422466422466e-05, | |
| "loss": 0.6589, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 2.797748180694769, | |
| "grad_norm": 0.48225662112236023, | |
| "learning_rate": 2.234432234432234e-05, | |
| "loss": 0.6571, | |
| "step": 2547 | |
| }, | |
| { | |
| "epoch": 2.798846629136345, | |
| "grad_norm": 0.8996065855026245, | |
| "learning_rate": 2.222222222222222e-05, | |
| "loss": 0.7518, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 2.799945077577921, | |
| "grad_norm": 0.7139112949371338, | |
| "learning_rate": 2.21001221001221e-05, | |
| "loss": 0.6517, | |
| "step": 2549 | |
| }, | |
| { | |
| "epoch": 2.8010435260194972, | |
| "grad_norm": 0.5433416366577148, | |
| "learning_rate": 2.1978021978021977e-05, | |
| "loss": 0.3799, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.802141974461074, | |
| "grad_norm": 0.3883088231086731, | |
| "learning_rate": 2.1855921855921853e-05, | |
| "loss": 0.9269, | |
| "step": 2551 | |
| }, | |
| { | |
| "epoch": 2.80324042290265, | |
| "grad_norm": 0.5275357961654663, | |
| "learning_rate": 2.173382173382173e-05, | |
| "loss": 0.6606, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 2.8043388713442265, | |
| "grad_norm": 0.4666341543197632, | |
| "learning_rate": 2.1611721611721607e-05, | |
| "loss": 0.6982, | |
| "step": 2553 | |
| }, | |
| { | |
| "epoch": 2.8054373197858027, | |
| "grad_norm": 0.9221529364585876, | |
| "learning_rate": 2.148962148962149e-05, | |
| "loss": 0.4769, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 2.806535768227379, | |
| "grad_norm": 0.7469640374183655, | |
| "learning_rate": 2.1367521367521368e-05, | |
| "loss": 0.6985, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 2.807634216668955, | |
| "grad_norm": 0.6858775615692139, | |
| "learning_rate": 2.1245421245421243e-05, | |
| "loss": 0.4511, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.808732665110531, | |
| "grad_norm": 1.266801357269287, | |
| "learning_rate": 2.112332112332112e-05, | |
| "loss": 0.421, | |
| "step": 2557 | |
| }, | |
| { | |
| "epoch": 2.8098311135521077, | |
| "grad_norm": 0.5506262183189392, | |
| "learning_rate": 2.1001221001221e-05, | |
| "loss": 0.6082, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 2.810929561993684, | |
| "grad_norm": 0.5359029173851013, | |
| "learning_rate": 2.087912087912088e-05, | |
| "loss": 0.8111, | |
| "step": 2559 | |
| }, | |
| { | |
| "epoch": 2.8120280104352604, | |
| "grad_norm": 0.6969206929206848, | |
| "learning_rate": 2.0757020757020754e-05, | |
| "loss": 0.8331, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.8131264588768365, | |
| "grad_norm": 0.6040379405021667, | |
| "learning_rate": 2.0634920634920633e-05, | |
| "loss": 0.575, | |
| "step": 2561 | |
| }, | |
| { | |
| "epoch": 2.8142249073184127, | |
| "grad_norm": 1.3847273588180542, | |
| "learning_rate": 2.0512820512820512e-05, | |
| "loss": 0.5442, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 2.815323355759989, | |
| "grad_norm": 0.8050490617752075, | |
| "learning_rate": 2.039072039072039e-05, | |
| "loss": 0.6267, | |
| "step": 2563 | |
| }, | |
| { | |
| "epoch": 2.8164218042015654, | |
| "grad_norm": 0.5663136839866638, | |
| "learning_rate": 2.0268620268620266e-05, | |
| "loss": 0.5246, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 2.8175202526431415, | |
| "grad_norm": 0.3316130042076111, | |
| "learning_rate": 2.0146520146520144e-05, | |
| "loss": 0.5175, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 2.8186187010847177, | |
| "grad_norm": 0.4782855808734894, | |
| "learning_rate": 2.002442002442002e-05, | |
| "loss": 0.5111, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 2.8197171495262943, | |
| "grad_norm": 0.44766396284103394, | |
| "learning_rate": 1.9902319902319902e-05, | |
| "loss": 0.5825, | |
| "step": 2567 | |
| }, | |
| { | |
| "epoch": 2.8208155979678704, | |
| "grad_norm": 0.6830618977546692, | |
| "learning_rate": 1.978021978021978e-05, | |
| "loss": 0.5685, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 2.8219140464094465, | |
| "grad_norm": 0.5860748887062073, | |
| "learning_rate": 1.9658119658119656e-05, | |
| "loss": 0.7557, | |
| "step": 2569 | |
| }, | |
| { | |
| "epoch": 2.8230124948510227, | |
| "grad_norm": 0.49533459544181824, | |
| "learning_rate": 1.953601953601953e-05, | |
| "loss": 0.7326, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.8241109432925993, | |
| "grad_norm": 0.4989941418170929, | |
| "learning_rate": 1.9413919413919413e-05, | |
| "loss": 0.5757, | |
| "step": 2571 | |
| }, | |
| { | |
| "epoch": 2.8252093917341754, | |
| "grad_norm": 0.4973461627960205, | |
| "learning_rate": 1.9291819291819292e-05, | |
| "loss": 0.5357, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 2.826307840175752, | |
| "grad_norm": 0.7442370057106018, | |
| "learning_rate": 1.9169719169719167e-05, | |
| "loss": 0.7283, | |
| "step": 2573 | |
| }, | |
| { | |
| "epoch": 2.827406288617328, | |
| "grad_norm": 1.3321865797042847, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.5107, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 2.8285047370589043, | |
| "grad_norm": 0.47394871711730957, | |
| "learning_rate": 1.892551892551892e-05, | |
| "loss": 0.5495, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 2.8296031855004804, | |
| "grad_norm": 0.6102151274681091, | |
| "learning_rate": 1.8803418803418804e-05, | |
| "loss": 0.5983, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 2.830701633942057, | |
| "grad_norm": 0.4657471179962158, | |
| "learning_rate": 1.868131868131868e-05, | |
| "loss": 0.5937, | |
| "step": 2577 | |
| }, | |
| { | |
| "epoch": 2.831800082383633, | |
| "grad_norm": 0.41180238127708435, | |
| "learning_rate": 1.8559218559218558e-05, | |
| "loss": 0.7775, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 2.8328985308252093, | |
| "grad_norm": 3.5043845176696777, | |
| "learning_rate": 1.8437118437118436e-05, | |
| "loss": 0.5304, | |
| "step": 2579 | |
| }, | |
| { | |
| "epoch": 2.833996979266786, | |
| "grad_norm": 0.4502231776714325, | |
| "learning_rate": 1.831501831501831e-05, | |
| "loss": 0.6556, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.835095427708362, | |
| "grad_norm": 0.6165898442268372, | |
| "learning_rate": 1.819291819291819e-05, | |
| "loss": 0.8434, | |
| "step": 2581 | |
| }, | |
| { | |
| "epoch": 2.836193876149938, | |
| "grad_norm": 0.5112649202346802, | |
| "learning_rate": 1.807081807081807e-05, | |
| "loss": 0.7429, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 2.8372923245915143, | |
| "grad_norm": 0.4834790527820587, | |
| "learning_rate": 1.7948717948717948e-05, | |
| "loss": 0.5772, | |
| "step": 2583 | |
| }, | |
| { | |
| "epoch": 2.838390773033091, | |
| "grad_norm": 0.4251219630241394, | |
| "learning_rate": 1.7826617826617826e-05, | |
| "loss": 0.5192, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 2.839489221474667, | |
| "grad_norm": 0.7645363807678223, | |
| "learning_rate": 1.7704517704517705e-05, | |
| "loss": 0.6624, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 2.8405876699162436, | |
| "grad_norm": 0.5651314854621887, | |
| "learning_rate": 1.758241758241758e-05, | |
| "loss": 0.5829, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 2.8416861183578197, | |
| "grad_norm": 1.059164047241211, | |
| "learning_rate": 1.746031746031746e-05, | |
| "loss": 0.6688, | |
| "step": 2587 | |
| }, | |
| { | |
| "epoch": 2.842784566799396, | |
| "grad_norm": 2.2424001693725586, | |
| "learning_rate": 1.7338217338217338e-05, | |
| "loss": 0.4515, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 2.843883015240972, | |
| "grad_norm": 0.6211466789245605, | |
| "learning_rate": 1.7216117216117213e-05, | |
| "loss": 0.836, | |
| "step": 2589 | |
| }, | |
| { | |
| "epoch": 2.8449814636825486, | |
| "grad_norm": 0.4224345088005066, | |
| "learning_rate": 1.7094017094017092e-05, | |
| "loss": 0.536, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.8460799121241247, | |
| "grad_norm": 0.7985780239105225, | |
| "learning_rate": 1.697191697191697e-05, | |
| "loss": 0.7433, | |
| "step": 2591 | |
| }, | |
| { | |
| "epoch": 2.847178360565701, | |
| "grad_norm": 1.4033039808273315, | |
| "learning_rate": 1.684981684981685e-05, | |
| "loss": 0.7479, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 2.8482768090072774, | |
| "grad_norm": 1.1432255506515503, | |
| "learning_rate": 1.6727716727716725e-05, | |
| "loss": 0.652, | |
| "step": 2593 | |
| }, | |
| { | |
| "epoch": 2.8493752574488536, | |
| "grad_norm": 0.9324535727500916, | |
| "learning_rate": 1.6605616605616603e-05, | |
| "loss": 0.5225, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 2.8504737058904297, | |
| "grad_norm": 0.5573447942733765, | |
| "learning_rate": 1.6483516483516482e-05, | |
| "loss": 0.6649, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 2.851572154332006, | |
| "grad_norm": 0.6875207424163818, | |
| "learning_rate": 1.636141636141636e-05, | |
| "loss": 0.7334, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 2.8526706027735824, | |
| "grad_norm": 0.32099124789237976, | |
| "learning_rate": 1.6239316239316236e-05, | |
| "loss": 0.5732, | |
| "step": 2597 | |
| }, | |
| { | |
| "epoch": 2.8537690512151586, | |
| "grad_norm": 0.4142940938472748, | |
| "learning_rate": 1.6117216117216118e-05, | |
| "loss": 0.6605, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 2.8548674996567347, | |
| "grad_norm": 0.5377205610275269, | |
| "learning_rate": 1.5995115995115994e-05, | |
| "loss": 0.5556, | |
| "step": 2599 | |
| }, | |
| { | |
| "epoch": 2.8559659480983113, | |
| "grad_norm": 0.43509960174560547, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 0.8321, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.8570643965398874, | |
| "grad_norm": 0.4376494586467743, | |
| "learning_rate": 1.575091575091575e-05, | |
| "loss": 0.6392, | |
| "step": 2601 | |
| }, | |
| { | |
| "epoch": 2.8581628449814636, | |
| "grad_norm": 0.507837176322937, | |
| "learning_rate": 1.5628815628815626e-05, | |
| "loss": 0.5326, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 2.8592612934230397, | |
| "grad_norm": 29.0502986907959, | |
| "learning_rate": 1.5506715506715505e-05, | |
| "loss": 0.5478, | |
| "step": 2603 | |
| }, | |
| { | |
| "epoch": 2.8603597418646163, | |
| "grad_norm": 0.6940420866012573, | |
| "learning_rate": 1.5384615384615384e-05, | |
| "loss": 1.3063, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 2.8614581903061924, | |
| "grad_norm": 0.7178813219070435, | |
| "learning_rate": 1.5262515262515263e-05, | |
| "loss": 0.7447, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 2.862556638747769, | |
| "grad_norm": 0.6209506392478943, | |
| "learning_rate": 1.514041514041514e-05, | |
| "loss": 0.5496, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 2.863655087189345, | |
| "grad_norm": 0.5526819825172424, | |
| "learning_rate": 1.5018315018315018e-05, | |
| "loss": 0.4224, | |
| "step": 2607 | |
| }, | |
| { | |
| "epoch": 2.8647535356309213, | |
| "grad_norm": 0.5056405663490295, | |
| "learning_rate": 1.4896214896214895e-05, | |
| "loss": 0.6248, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 2.8658519840724974, | |
| "grad_norm": 2.416952610015869, | |
| "learning_rate": 1.4774114774114774e-05, | |
| "loss": 0.7551, | |
| "step": 2609 | |
| }, | |
| { | |
| "epoch": 2.866950432514074, | |
| "grad_norm": 0.52223140001297, | |
| "learning_rate": 1.4652014652014651e-05, | |
| "loss": 1.1146, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.86804888095565, | |
| "grad_norm": 0.685767650604248, | |
| "learning_rate": 1.4529914529914528e-05, | |
| "loss": 0.715, | |
| "step": 2611 | |
| }, | |
| { | |
| "epoch": 2.8691473293972263, | |
| "grad_norm": 0.650374174118042, | |
| "learning_rate": 1.4407814407814407e-05, | |
| "loss": 0.8844, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 2.870245777838803, | |
| "grad_norm": 0.46946465969085693, | |
| "learning_rate": 1.4285714285714284e-05, | |
| "loss": 0.9545, | |
| "step": 2613 | |
| }, | |
| { | |
| "epoch": 2.871344226280379, | |
| "grad_norm": 0.5312052369117737, | |
| "learning_rate": 1.4163614163614162e-05, | |
| "loss": 0.5204, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 2.872442674721955, | |
| "grad_norm": 0.41921889781951904, | |
| "learning_rate": 1.404151404151404e-05, | |
| "loss": 0.4614, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 2.8735411231635313, | |
| "grad_norm": 0.513203501701355, | |
| "learning_rate": 1.3919413919413918e-05, | |
| "loss": 0.613, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 2.874639571605108, | |
| "grad_norm": 1.1020901203155518, | |
| "learning_rate": 1.3797313797313795e-05, | |
| "loss": 0.525, | |
| "step": 2617 | |
| }, | |
| { | |
| "epoch": 2.875738020046684, | |
| "grad_norm": 0.39301392436027527, | |
| "learning_rate": 1.3675213675213674e-05, | |
| "loss": 0.5799, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 2.8768364684882606, | |
| "grad_norm": 1.576910376548767, | |
| "learning_rate": 1.3553113553113551e-05, | |
| "loss": 0.6286, | |
| "step": 2619 | |
| }, | |
| { | |
| "epoch": 2.8779349169298367, | |
| "grad_norm": 0.36711424589157104, | |
| "learning_rate": 1.3431013431013431e-05, | |
| "loss": 0.7542, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.879033365371413, | |
| "grad_norm": 1.2777636051177979, | |
| "learning_rate": 1.3308913308913308e-05, | |
| "loss": 0.6269, | |
| "step": 2621 | |
| }, | |
| { | |
| "epoch": 2.880131813812989, | |
| "grad_norm": 0.5584180355072021, | |
| "learning_rate": 1.3186813186813187e-05, | |
| "loss": 0.5633, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 2.8812302622545656, | |
| "grad_norm": 1.2418673038482666, | |
| "learning_rate": 1.3064713064713064e-05, | |
| "loss": 0.537, | |
| "step": 2623 | |
| }, | |
| { | |
| "epoch": 2.8823287106961417, | |
| "grad_norm": 0.5850531458854675, | |
| "learning_rate": 1.2942612942612941e-05, | |
| "loss": 0.595, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 2.883427159137718, | |
| "grad_norm": 1.054592251777649, | |
| "learning_rate": 1.282051282051282e-05, | |
| "loss": 0.8308, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 2.8845256075792944, | |
| "grad_norm": 0.3231412470340729, | |
| "learning_rate": 1.2698412698412697e-05, | |
| "loss": 0.4044, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 2.8856240560208706, | |
| "grad_norm": 0.47942933440208435, | |
| "learning_rate": 1.2576312576312576e-05, | |
| "loss": 0.6299, | |
| "step": 2627 | |
| }, | |
| { | |
| "epoch": 2.8867225044624467, | |
| "grad_norm": 0.4884187579154968, | |
| "learning_rate": 1.2454212454212453e-05, | |
| "loss": 0.6606, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 2.887820952904023, | |
| "grad_norm": 0.6658734083175659, | |
| "learning_rate": 1.2332112332112331e-05, | |
| "loss": 0.642, | |
| "step": 2629 | |
| }, | |
| { | |
| "epoch": 2.8889194013455994, | |
| "grad_norm": 0.24990247189998627, | |
| "learning_rate": 1.2210012210012208e-05, | |
| "loss": 0.4041, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.8900178497871756, | |
| "grad_norm": 0.6446508169174194, | |
| "learning_rate": 1.2087912087912087e-05, | |
| "loss": 0.7126, | |
| "step": 2631 | |
| }, | |
| { | |
| "epoch": 2.891116298228752, | |
| "grad_norm": 0.7800988554954529, | |
| "learning_rate": 1.1965811965811964e-05, | |
| "loss": 0.6733, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 2.8922147466703283, | |
| "grad_norm": 0.5319482684135437, | |
| "learning_rate": 1.1843711843711844e-05, | |
| "loss": 0.6445, | |
| "step": 2633 | |
| }, | |
| { | |
| "epoch": 2.8933131951119044, | |
| "grad_norm": 0.6029678583145142, | |
| "learning_rate": 1.172161172161172e-05, | |
| "loss": 0.7642, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 2.8944116435534806, | |
| "grad_norm": 0.9029693007469177, | |
| "learning_rate": 1.1599511599511597e-05, | |
| "loss": 0.635, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 2.8955100919950567, | |
| "grad_norm": 0.6022691130638123, | |
| "learning_rate": 1.1477411477411477e-05, | |
| "loss": 0.5361, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 2.8966085404366333, | |
| "grad_norm": 0.6777801513671875, | |
| "learning_rate": 1.1355311355311354e-05, | |
| "loss": 0.5099, | |
| "step": 2637 | |
| }, | |
| { | |
| "epoch": 2.8977069888782094, | |
| "grad_norm": 0.4157528877258301, | |
| "learning_rate": 1.1233211233211233e-05, | |
| "loss": 0.5038, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 2.898805437319786, | |
| "grad_norm": 2.6101133823394775, | |
| "learning_rate": 1.111111111111111e-05, | |
| "loss": 0.6324, | |
| "step": 2639 | |
| }, | |
| { | |
| "epoch": 2.899903885761362, | |
| "grad_norm": 0.6885612607002258, | |
| "learning_rate": 1.0989010989010989e-05, | |
| "loss": 0.4931, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.9010023342029383, | |
| "grad_norm": 0.5510079264640808, | |
| "learning_rate": 1.0866910866910866e-05, | |
| "loss": 0.5088, | |
| "step": 2641 | |
| }, | |
| { | |
| "epoch": 2.9021007826445144, | |
| "grad_norm": 0.6099854111671448, | |
| "learning_rate": 1.0744810744810744e-05, | |
| "loss": 0.4647, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 2.903199231086091, | |
| "grad_norm": 0.4390881657600403, | |
| "learning_rate": 1.0622710622710621e-05, | |
| "loss": 0.6787, | |
| "step": 2643 | |
| }, | |
| { | |
| "epoch": 2.904297679527667, | |
| "grad_norm": 0.46238628029823303, | |
| "learning_rate": 1.05006105006105e-05, | |
| "loss": 0.5655, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 2.9053961279692433, | |
| "grad_norm": 0.479106605052948, | |
| "learning_rate": 1.0378510378510377e-05, | |
| "loss": 0.7833, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 2.90649457641082, | |
| "grad_norm": 0.4643683135509491, | |
| "learning_rate": 1.0256410256410256e-05, | |
| "loss": 0.4563, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.907593024852396, | |
| "grad_norm": 0.4173976480960846, | |
| "learning_rate": 1.0134310134310133e-05, | |
| "loss": 0.6614, | |
| "step": 2647 | |
| }, | |
| { | |
| "epoch": 2.908691473293972, | |
| "grad_norm": 0.7158990502357483, | |
| "learning_rate": 1.001221001221001e-05, | |
| "loss": 0.7342, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 2.9097899217355483, | |
| "grad_norm": 0.7276301980018616, | |
| "learning_rate": 9.89010989010989e-06, | |
| "loss": 0.6883, | |
| "step": 2649 | |
| }, | |
| { | |
| "epoch": 2.910888370177125, | |
| "grad_norm": 0.63588947057724, | |
| "learning_rate": 9.768009768009766e-06, | |
| "loss": 0.7533, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.911986818618701, | |
| "grad_norm": 1.8038127422332764, | |
| "learning_rate": 9.645909645909646e-06, | |
| "loss": 0.6238, | |
| "step": 2651 | |
| }, | |
| { | |
| "epoch": 2.9130852670602776, | |
| "grad_norm": 0.7289617657661438, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.4767, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 2.9141837155018537, | |
| "grad_norm": 0.3828502893447876, | |
| "learning_rate": 9.401709401709402e-06, | |
| "loss": 0.4812, | |
| "step": 2653 | |
| }, | |
| { | |
| "epoch": 2.91528216394343, | |
| "grad_norm": 0.5157826542854309, | |
| "learning_rate": 9.279609279609279e-06, | |
| "loss": 0.703, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 2.916380612385006, | |
| "grad_norm": 0.6833345890045166, | |
| "learning_rate": 9.157509157509156e-06, | |
| "loss": 0.7471, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 2.9174790608265826, | |
| "grad_norm": 1.0189886093139648, | |
| "learning_rate": 9.035409035409035e-06, | |
| "loss": 0.6065, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 2.9185775092681587, | |
| "grad_norm": 0.5197221040725708, | |
| "learning_rate": 8.913308913308913e-06, | |
| "loss": 0.5904, | |
| "step": 2657 | |
| }, | |
| { | |
| "epoch": 2.919675957709735, | |
| "grad_norm": 0.6265780925750732, | |
| "learning_rate": 8.79120879120879e-06, | |
| "loss": 0.5622, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 2.9207744061513115, | |
| "grad_norm": 0.5703533887863159, | |
| "learning_rate": 8.669108669108669e-06, | |
| "loss": 0.8005, | |
| "step": 2659 | |
| }, | |
| { | |
| "epoch": 2.9218728545928876, | |
| "grad_norm": 0.8656613230705261, | |
| "learning_rate": 8.547008547008546e-06, | |
| "loss": 0.4942, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.9229713030344637, | |
| "grad_norm": 0.6180423498153687, | |
| "learning_rate": 8.424908424908425e-06, | |
| "loss": 0.8163, | |
| "step": 2661 | |
| }, | |
| { | |
| "epoch": 2.92406975147604, | |
| "grad_norm": 0.7308143377304077, | |
| "learning_rate": 8.302808302808302e-06, | |
| "loss": 0.7639, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 2.9251681999176165, | |
| "grad_norm": 0.585617184638977, | |
| "learning_rate": 8.18070818070818e-06, | |
| "loss": 0.7614, | |
| "step": 2663 | |
| }, | |
| { | |
| "epoch": 2.9262666483591926, | |
| "grad_norm": 0.5277345776557922, | |
| "learning_rate": 8.058608058608059e-06, | |
| "loss": 0.6489, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 2.927365096800769, | |
| "grad_norm": 0.3540293574333191, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 0.4503, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 2.9284635452423453, | |
| "grad_norm": 0.554492175579071, | |
| "learning_rate": 7.814407814407813e-06, | |
| "loss": 0.5785, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 2.9295619936839215, | |
| "grad_norm": 0.5547875761985779, | |
| "learning_rate": 7.692307692307692e-06, | |
| "loss": 0.5763, | |
| "step": 2667 | |
| }, | |
| { | |
| "epoch": 2.9306604421254976, | |
| "grad_norm": 0.745947003364563, | |
| "learning_rate": 7.57020757020757e-06, | |
| "loss": 0.512, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 2.931758890567074, | |
| "grad_norm": 0.47691571712493896, | |
| "learning_rate": 7.448107448107448e-06, | |
| "loss": 0.7018, | |
| "step": 2669 | |
| }, | |
| { | |
| "epoch": 2.9328573390086503, | |
| "grad_norm": 0.9611607789993286, | |
| "learning_rate": 7.3260073260073255e-06, | |
| "loss": 0.7419, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.9339557874502264, | |
| "grad_norm": 0.5495268106460571, | |
| "learning_rate": 7.203907203907203e-06, | |
| "loss": 0.6096, | |
| "step": 2671 | |
| }, | |
| { | |
| "epoch": 2.935054235891803, | |
| "grad_norm": 0.8863226771354675, | |
| "learning_rate": 7.081807081807081e-06, | |
| "loss": 0.7149, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 2.936152684333379, | |
| "grad_norm": 0.4234665334224701, | |
| "learning_rate": 6.959706959706959e-06, | |
| "loss": 0.6913, | |
| "step": 2673 | |
| }, | |
| { | |
| "epoch": 2.9372511327749553, | |
| "grad_norm": 0.9667326211929321, | |
| "learning_rate": 6.837606837606837e-06, | |
| "loss": 0.4181, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 2.9383495812165314, | |
| "grad_norm": 0.543683648109436, | |
| "learning_rate": 6.715506715506716e-06, | |
| "loss": 0.6329, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 2.939448029658108, | |
| "grad_norm": 0.5083779692649841, | |
| "learning_rate": 6.5934065934065935e-06, | |
| "loss": 0.8742, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 2.940546478099684, | |
| "grad_norm": 0.7212001085281372, | |
| "learning_rate": 6.4713064713064706e-06, | |
| "loss": 0.6912, | |
| "step": 2677 | |
| }, | |
| { | |
| "epoch": 2.9416449265412603, | |
| "grad_norm": 0.9474835991859436, | |
| "learning_rate": 6.349206349206348e-06, | |
| "loss": 0.649, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 2.942743374982837, | |
| "grad_norm": 0.8142021298408508, | |
| "learning_rate": 6.227106227106226e-06, | |
| "loss": 0.6136, | |
| "step": 2679 | |
| }, | |
| { | |
| "epoch": 2.943841823424413, | |
| "grad_norm": 2.9018187522888184, | |
| "learning_rate": 6.105006105006104e-06, | |
| "loss": 0.7157, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.944940271865989, | |
| "grad_norm": 0.4023605287075043, | |
| "learning_rate": 5.982905982905982e-06, | |
| "loss": 0.5675, | |
| "step": 2681 | |
| }, | |
| { | |
| "epoch": 2.9460387203075653, | |
| "grad_norm": 0.3693840801715851, | |
| "learning_rate": 5.86080586080586e-06, | |
| "loss": 0.5982, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 2.947137168749142, | |
| "grad_norm": 0.4298234283924103, | |
| "learning_rate": 5.738705738705739e-06, | |
| "loss": 0.5379, | |
| "step": 2683 | |
| }, | |
| { | |
| "epoch": 2.948235617190718, | |
| "grad_norm": 0.6495395302772522, | |
| "learning_rate": 5.6166056166056165e-06, | |
| "loss": 0.5411, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 2.9493340656322946, | |
| "grad_norm": 0.44857510924339294, | |
| "learning_rate": 5.494505494505494e-06, | |
| "loss": 0.5154, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 2.9504325140738707, | |
| "grad_norm": 0.7485830187797546, | |
| "learning_rate": 5.372405372405372e-06, | |
| "loss": 0.6595, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 2.951530962515447, | |
| "grad_norm": 0.5141469836235046, | |
| "learning_rate": 5.25030525030525e-06, | |
| "loss": 0.6289, | |
| "step": 2687 | |
| }, | |
| { | |
| "epoch": 2.952629410957023, | |
| "grad_norm": 0.8847435712814331, | |
| "learning_rate": 5.128205128205128e-06, | |
| "loss": 0.6734, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.9537278593985996, | |
| "grad_norm": 0.570573091506958, | |
| "learning_rate": 5.006105006105005e-06, | |
| "loss": 0.7013, | |
| "step": 2689 | |
| }, | |
| { | |
| "epoch": 2.9548263078401757, | |
| "grad_norm": 0.4376991391181946, | |
| "learning_rate": 4.884004884004883e-06, | |
| "loss": 0.5918, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.955924756281752, | |
| "grad_norm": 0.5480318069458008, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.6227, | |
| "step": 2691 | |
| }, | |
| { | |
| "epoch": 2.9570232047233285, | |
| "grad_norm": 0.5831297636032104, | |
| "learning_rate": 4.639804639804639e-06, | |
| "loss": 0.6264, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 2.9581216531649046, | |
| "grad_norm": 1.5778921842575073, | |
| "learning_rate": 4.517704517704517e-06, | |
| "loss": 0.6352, | |
| "step": 2693 | |
| }, | |
| { | |
| "epoch": 2.9592201016064807, | |
| "grad_norm": 0.9567496180534363, | |
| "learning_rate": 4.395604395604395e-06, | |
| "loss": 0.6067, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 2.960318550048057, | |
| "grad_norm": 0.5237869620323181, | |
| "learning_rate": 4.273504273504273e-06, | |
| "loss": 0.8241, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 2.9614169984896335, | |
| "grad_norm": 0.3452164828777313, | |
| "learning_rate": 4.151404151404151e-06, | |
| "loss": 0.5718, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 2.9625154469312096, | |
| "grad_norm": 0.42237767577171326, | |
| "learning_rate": 4.0293040293040296e-06, | |
| "loss": 0.5199, | |
| "step": 2697 | |
| }, | |
| { | |
| "epoch": 2.963613895372786, | |
| "grad_norm": 0.7035055756568909, | |
| "learning_rate": 3.907203907203907e-06, | |
| "loss": 0.7078, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 2.9647123438143623, | |
| "grad_norm": 0.39236482977867126, | |
| "learning_rate": 3.785103785103785e-06, | |
| "loss": 0.59, | |
| "step": 2699 | |
| }, | |
| { | |
| "epoch": 2.9658107922559385, | |
| "grad_norm": 1.1658680438995361, | |
| "learning_rate": 3.6630036630036627e-06, | |
| "loss": 0.53, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.9669092406975146, | |
| "grad_norm": 0.6797634363174438, | |
| "learning_rate": 3.5409035409035406e-06, | |
| "loss": 0.6763, | |
| "step": 2701 | |
| }, | |
| { | |
| "epoch": 2.968007689139091, | |
| "grad_norm": 1.0421425104141235, | |
| "learning_rate": 3.4188034188034185e-06, | |
| "loss": 0.4, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 2.9691061375806673, | |
| "grad_norm": 0.36937475204467773, | |
| "learning_rate": 3.2967032967032968e-06, | |
| "loss": 0.5401, | |
| "step": 2703 | |
| }, | |
| { | |
| "epoch": 2.9702045860222435, | |
| "grad_norm": 0.4324638843536377, | |
| "learning_rate": 3.174603174603174e-06, | |
| "loss": 0.5882, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 2.97130303446382, | |
| "grad_norm": 1.2700526714324951, | |
| "learning_rate": 3.052503052503052e-06, | |
| "loss": 0.613, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 2.972401482905396, | |
| "grad_norm": 0.5261131525039673, | |
| "learning_rate": 2.93040293040293e-06, | |
| "loss": 0.6279, | |
| "step": 2706 | |
| }, | |
| { | |
| "epoch": 2.9734999313469723, | |
| "grad_norm": 0.42924660444259644, | |
| "learning_rate": 2.8083028083028082e-06, | |
| "loss": 1.0058, | |
| "step": 2707 | |
| }, | |
| { | |
| "epoch": 2.9745983797885485, | |
| "grad_norm": 3.100399971008301, | |
| "learning_rate": 2.686202686202686e-06, | |
| "loss": 0.5209, | |
| "step": 2708 | |
| }, | |
| { | |
| "epoch": 2.975696828230125, | |
| "grad_norm": 0.3666403293609619, | |
| "learning_rate": 2.564102564102564e-06, | |
| "loss": 0.5231, | |
| "step": 2709 | |
| }, | |
| { | |
| "epoch": 2.976795276671701, | |
| "grad_norm": 1.1315009593963623, | |
| "learning_rate": 2.4420024420024414e-06, | |
| "loss": 0.4449, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.9778937251132778, | |
| "grad_norm": 0.3323412537574768, | |
| "learning_rate": 2.3199023199023197e-06, | |
| "loss": 0.4806, | |
| "step": 2711 | |
| }, | |
| { | |
| "epoch": 2.978992173554854, | |
| "grad_norm": 0.7348967790603638, | |
| "learning_rate": 2.1978021978021976e-06, | |
| "loss": 0.7521, | |
| "step": 2712 | |
| }, | |
| { | |
| "epoch": 2.98009062199643, | |
| "grad_norm": 1.018898606300354, | |
| "learning_rate": 2.0757020757020754e-06, | |
| "loss": 0.8468, | |
| "step": 2713 | |
| }, | |
| { | |
| "epoch": 2.981189070438006, | |
| "grad_norm": 0.46808505058288574, | |
| "learning_rate": 1.9536019536019533e-06, | |
| "loss": 0.6992, | |
| "step": 2714 | |
| }, | |
| { | |
| "epoch": 2.9822875188795823, | |
| "grad_norm": 0.5411276817321777, | |
| "learning_rate": 1.8315018315018314e-06, | |
| "loss": 0.5949, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 2.983385967321159, | |
| "grad_norm": 0.45061302185058594, | |
| "learning_rate": 1.7094017094017092e-06, | |
| "loss": 0.4617, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 2.984484415762735, | |
| "grad_norm": 0.44529294967651367, | |
| "learning_rate": 1.587301587301587e-06, | |
| "loss": 0.5811, | |
| "step": 2717 | |
| }, | |
| { | |
| "epoch": 2.9855828642043116, | |
| "grad_norm": 1.255299687385559, | |
| "learning_rate": 1.465201465201465e-06, | |
| "loss": 1.1899, | |
| "step": 2718 | |
| }, | |
| { | |
| "epoch": 2.9866813126458878, | |
| "grad_norm": 0.8325234651565552, | |
| "learning_rate": 1.343101343101343e-06, | |
| "loss": 0.6344, | |
| "step": 2719 | |
| }, | |
| { | |
| "epoch": 2.987779761087464, | |
| "grad_norm": 1.0692095756530762, | |
| "learning_rate": 1.2210012210012207e-06, | |
| "loss": 0.5136, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.98887820952904, | |
| "grad_norm": 0.4980855882167816, | |
| "learning_rate": 1.0989010989010988e-06, | |
| "loss": 0.6352, | |
| "step": 2721 | |
| }, | |
| { | |
| "epoch": 2.9899766579706166, | |
| "grad_norm": 0.8502411246299744, | |
| "learning_rate": 9.768009768009766e-07, | |
| "loss": 0.599, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 2.9910751064121928, | |
| "grad_norm": 0.4849570691585541, | |
| "learning_rate": 8.547008547008546e-07, | |
| "loss": 0.5862, | |
| "step": 2723 | |
| }, | |
| { | |
| "epoch": 2.992173554853769, | |
| "grad_norm": 0.5491626858711243, | |
| "learning_rate": 7.326007326007325e-07, | |
| "loss": 0.5634, | |
| "step": 2724 | |
| }, | |
| { | |
| "epoch": 2.9932720032953455, | |
| "grad_norm": 0.7289263606071472, | |
| "learning_rate": 6.105006105006104e-07, | |
| "loss": 0.6643, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 2.9943704517369216, | |
| "grad_norm": 1.5343972444534302, | |
| "learning_rate": 4.884004884004883e-07, | |
| "loss": 0.71, | |
| "step": 2726 | |
| }, | |
| { | |
| "epoch": 2.9954689001784978, | |
| "grad_norm": 0.5619814395904541, | |
| "learning_rate": 3.6630036630036624e-07, | |
| "loss": 0.721, | |
| "step": 2727 | |
| }, | |
| { | |
| "epoch": 2.996567348620074, | |
| "grad_norm": 0.500442624092102, | |
| "learning_rate": 2.4420024420024416e-07, | |
| "loss": 0.6571, | |
| "step": 2728 | |
| }, | |
| { | |
| "epoch": 2.9976657970616505, | |
| "grad_norm": 0.42292630672454834, | |
| "learning_rate": 1.2210012210012208e-07, | |
| "loss": 0.4772, | |
| "step": 2729 | |
| }, | |
| { | |
| "epoch": 2.9987642455032266, | |
| "grad_norm": 0.4350331425666809, | |
| "learning_rate": 0.0, | |
| "loss": 0.7493, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.9987642455032266, | |
| "step": 2730, | |
| "total_flos": 1.0372510312766669e+18, | |
| "train_loss": 0.674373844124022, | |
| "train_runtime": 11584.4184, | |
| "train_samples_per_second": 1.886, | |
| "train_steps_per_second": 0.236 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2730, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0372510312766669e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |