diff --git "a/weights/checkpoint_epoch_10_metadata.json" "b/weights/checkpoint_epoch_10_metadata.json" --- "a/weights/checkpoint_epoch_10_metadata.json" +++ "b/weights/checkpoint_epoch_10_metadata.json" @@ -3,235 +3,225 @@ "optimizer_state_dict": { "state": { "0": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 2.3530e-05, 6.0072e-05, -8.8154e-06, ..., 3.5271e-05,\n 2.2801e-05, 2.1883e-05],\n [-1.1921e-04, 1.7628e-04, -1.1531e-04, ..., 5.9485e-05,\n -1.4297e-04, 6.2732e-05],\n [ 6.6196e-05, -5.6226e-05, -8.6743e-05, ..., 7.3746e-05,\n -5.1870e-05, 1.2947e-05],\n ...,\n [-2.5849e-04, 1.6659e-04, -9.3175e-05, ..., -3.6065e-05,\n -8.8496e-05, 5.2400e-05],\n [-6.9186e-16, -1.0957e-15, 3.9209e-15, ..., -1.8864e-15,\n 1.1515e-16, -2.1864e-15],\n [-3.9295e-05, 4.6565e-05, 6.9427e-07, ..., -2.5127e-05,\n -4.9041e-05, 4.7725e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.1560e-08, 6.0060e-08, 7.5593e-09, ..., 1.7688e-08, 1.7216e-08,\n 4.4965e-09],\n [1.2321e-07, 9.4368e-08, 3.0819e-08, ..., 5.7368e-08, 2.9058e-08,\n 4.0093e-08],\n [7.2192e-08, 9.1778e-08, 2.6598e-08, ..., 2.6849e-08, 1.7405e-08,\n 2.1316e-08],\n ...,\n [1.8360e-07, 1.0476e-07, 2.0546e-08, ..., 2.4855e-08, 1.9352e-08,\n 1.3445e-08],\n [2.6992e-11, 9.1711e-11, 1.2411e-11, ..., 2.8353e-11, 2.5807e-11,\n 2.0060e-11],\n [1.2515e-07, 7.9013e-08, 1.2013e-08, ..., 1.6320e-08, 3.0070e-08,\n 1.3699e-08]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[ 1.6739e-05, -4.5606e-05, 6.5077e-06, ..., 2.0072e-05,\n 1.0489e-05, 1.5309e-05],\n [-5.8033e-05, -4.5931e-06, 1.6543e-05, ..., -3.2093e-06,\n 7.0874e-06, -1.1865e-05],\n [-3.9455e-06, -2.1452e-07, -1.0664e-06, ..., -1.2511e-06,\n 3.0180e-07, 4.5739e-07],\n ...,\n [ 2.0947e-05, -2.4873e-05, 3.2363e-05, ..., 4.5308e-06,\n -2.4054e-05, 4.9465e-06],\n [-4.7665e-06, -1.1796e-05, -4.0522e-05, ..., 3.3716e-05,\n -1.2486e-05, 8.4833e-06],\n [ 4.7208e-06, -2.1184e-05, 1.8751e-05, ..., -7.1920e-08,\n 1.4588e-05, -4.2959e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1258e-08, 1.2121e-08, 5.4884e-09, ..., 6.9905e-09, 5.4585e-09,\n 4.2942e-09],\n [9.2535e-09, 8.9640e-09, 1.0811e-08, ..., 7.6101e-09, 5.1284e-09,\n 4.8838e-09],\n [3.1808e-11, 2.8556e-11, 3.5860e-11, ..., 1.6663e-11, 2.3441e-11,\n 1.9822e-11],\n ...,\n [1.0021e-08, 7.3923e-09, 7.7857e-09, ..., 5.8398e-09, 5.7656e-09,\n 4.0426e-09],\n [1.2363e-08, 9.2526e-09, 8.9886e-09, ..., 8.5243e-09, 7.1149e-09,\n 5.4051e-09],\n [2.6955e-09, 3.7269e-09, 2.8274e-09, ..., 1.8437e-09, 1.9974e-09,\n 1.6057e-09]], device='cuda:0')" }, "1": { - "step": "tensor(6260.)", - "exp_avg": "tensor([-2.5383e-04, 3.3642e-04, 1.5263e-03, 2.0180e-04, -3.2217e-04,\n 9.9299e-04, -1.2958e-03, 1.7783e-03, -1.1433e-02, 1.3442e-03,\n -5.2038e-06, -1.2961e-03, 5.6052e-45, 4.6860e-04, 3.0243e-04,\n -1.5875e-03, -7.7546e-04, -7.4867e-04, 1.2261e-03, 5.6052e-45,\n -2.8617e-03, -1.2040e-03, 1.9934e-03, 4.4646e-04, 5.6052e-45,\n 1.4099e-03, -4.9186e-04, 2.8079e-03, 8.8708e-04, -1.7457e-03,\n 6.8585e-04, 7.7115e-04, -5.9056e-09, 5.6052e-45, -3.1902e-03,\n -2.6145e-04, 5.1512e-03, 3.0591e-03, 6.3891e-04, -3.4599e-03,\n -2.0975e-04, -9.9475e-04, 3.1483e-03, 6.3304e-03, -2.7553e-03,\n -1.6319e-04, 3.6616e-04, -8.7693e-04, -1.1744e-03, 1.3425e-03,\n -6.8330e-04, 5.6052e-45, -4.3270e-11, 5.6052e-45, -1.0810e-03,\n 7.9324e-04, 2.9458e-04, 1.1268e-03, -3.3478e-05, 1.2027e-03,\n -1.7015e-04, 2.4234e-03, 5.1939e-04, -2.7528e-04, 5.9288e-04,\n -2.0985e-03, -4.4793e-03, 5.6052e-45, -2.4158e-03, 1.2987e-03,\n 1.6490e-03, 5.6052e-45, 3.0649e-03, 1.9158e-03, -1.7971e-03,\n -3.5207e-03, 4.2541e-05, 6.6341e-04, 1.8764e-03, 7.9388e-04,\n 2.4894e-03, 2.6783e-03, -1.8094e-03, 2.0309e-04, 1.5313e-03,\n -2.3402e-03, -3.6859e-03, 1.0882e-03, 1.4556e-03, -2.3890e-03,\n 5.6052e-45, 1.7438e-03, 1.4836e-03, 1.1200e-03, 3.1667e-03,\n -3.7070e-03, -2.8607e-04, 6.8942e-04, -8.5061e-04, 3.8672e-04,\n -9.2338e-04, 1.1207e-03, 3.0808e-03, 2.3356e-03, 4.5637e-04,\n -1.5440e-03, 3.3261e-03, -3.2884e-03, 1.1839e-04, -4.6794e-05,\n -1.7103e-03, 3.3829e-03, 5.6052e-45, -2.5911e-03, -5.6360e-04,\n 8.8871e-04, -8.4471e-04, -2.2190e-03, -5.0496e-16, 1.7679e-07,\n 5.6052e-45, 8.2512e-04, 5.6052e-45, 6.8095e-04, 2.3092e-03,\n 1.2416e-03, -8.7029e-04, 5.6052e-45, 1.0723e-03, 3.2067e-03,\n -2.6833e-04, 1.1413e-03, 1.3956e-03, 2.5150e-20, -6.5868e-05,\n 1.1413e-03, 1.9837e-03, 2.2025e-03, 1.7465e-03, -4.4475e-04,\n -4.3164e-04, -3.9841e-03, 2.4628e-03, -7.6038e-04, -1.9682e-03,\n 1.0329e-04, -5.6052e-45, 1.1217e-03, -2.2650e-03, -5.6302e-04,\n -1.4785e-25, -4.4081e-04, 1.1927e-03, 5.6052e-45, 9.4334e-04,\n 1.8371e-04, -1.7026e-33, 5.0472e-04, 6.1390e-03, 2.0046e-04,\n -5.5753e-04, -2.2900e-03, 5.3853e-03, -2.5106e-03, -5.0420e-03,\n -2.5502e-03, -2.5170e-03, -1.1467e-03, -8.4865e-04, 5.6052e-45,\n 5.6052e-45, 3.2256e-04, 1.9479e-03, 5.1432e-04, -4.2804e-04,\n -8.0500e-05, 3.3792e-04, 8.9718e-04, 5.6052e-45, 8.0570e-05,\n -4.0074e-03, 1.0959e-03, -1.4137e-04, 5.6052e-45, -2.0903e-04,\n 9.3977e-04, -5.2416e-04, 1.8220e-04, -1.1301e-03, 5.6052e-45,\n 5.6052e-45, 1.0757e-04, 4.9479e-03, 3.5221e-03, -4.5186e-04,\n 5.0306e-04, 7.9772e-04, 1.1046e-03, 5.6052e-45, 1.8709e-03,\n 9.2141e-04, -3.1505e-04, 9.0721e-04, -4.0493e-04, 3.5744e-04,\n 5.6052e-45, 5.1685e-04, -1.5379e-03, -1.1911e-03, 6.4799e-04,\n 3.5037e-04, 2.0768e-03, -7.5834e-03, 6.5965e-04, -2.8627e-03,\n 1.7813e-03, 2.3761e-04, 3.7499e-04, 2.4069e-03, -4.7942e-05,\n 7.1553e-04, -3.7912e-03, 1.7914e-03, 3.6356e-03, 1.3194e-03,\n 1.4334e-03, 2.4092e-03, 3.0253e-03, 1.1649e-03, 1.1193e-04,\n -1.8358e-03, -1.8537e-03, 1.8078e-04, 5.6052e-45, 1.0969e-03,\n 3.5196e-04, 1.5360e-03, 2.0031e-03, -5.4725e-04, -5.0848e-04,\n 1.5853e-03, 1.6889e-03, 1.2233e-03, -3.2627e-03, 2.4035e-03,\n -2.7509e-03, -2.3404e-04, -4.1648e-04, 1.5942e-04, -1.3968e-03,\n 1.2093e-04, 1.9641e-03, 1.1517e-03, 1.1792e-03, -6.6849e-04,\n 2.5845e-03, 5.6052e-45, 1.7769e-03, -6.1465e-04, -2.4848e-04,\n 6.4684e-04, -2.0915e-04, -1.9209e-05, 2.1165e-03, -2.5082e-03,\n 2.8314e-03, 2.3913e-03, 5.6052e-45, -7.4337e-03, -1.7149e-03,\n 1.4748e-03, 3.2400e-04, -1.4173e-04, -6.7552e-04, -2.1635e-03,\n -7.3868e-14, -1.2644e-03, 5.6052e-45, 1.4047e-03, -8.3320e-04,\n 1.7829e-03, 1.1660e-03, 2.2050e-03, 1.1764e-03, 2.3199e-03,\n -4.0779e-03, 1.0116e-04, -4.2430e-03, 5.0496e-04, -2.0566e-04,\n -3.4104e-04, -2.9261e-04, -2.8332e-03, -1.8661e-03, 1.5172e-03,\n -8.6613e-04, -1.4609e-03, -3.4278e-04, 5.5266e-04, 8.5275e-04,\n -7.3712e-03, -8.1383e-04, -1.8512e-05, 2.9731e-04, -1.2587e-04,\n -1.7235e-03, 8.4856e-04, 5.6052e-45, 5.6052e-45, 1.0038e-03,\n 1.3149e-03, 1.1838e-03, -1.2740e-03, -1.4760e-03, -3.8575e-04,\n 1.2066e-04, 5.9129e-04, 2.3998e-03, -9.2707e-04, -2.2159e-03,\n -4.7793e-05, 1.9066e-03, -4.4703e-04, 1.6235e-03, 2.7080e-03,\n 2.2248e-03, -1.1790e-03, 5.8560e-04, -4.2412e-04, 1.9551e-03,\n 6.0337e-05, -1.9445e-03, 1.3213e-03, 2.9132e-04, 5.6052e-45,\n -7.4821e-04, 9.1699e-05, -1.6500e-03, -1.2714e-03, -3.9415e-04,\n 3.3128e-04, -1.3279e-03, -4.1022e-03, -7.1368e-04, 1.0617e-04,\n 4.2122e-04, 5.6052e-45, -7.3137e-04, 2.8261e-03, -3.8412e-03,\n -1.6172e-03, 9.4337e-04, -2.5811e-04, 1.7815e-03, 4.3357e-04,\n 2.1772e-03, 1.0317e-04, -7.5788e-04, -6.6014e-04, 1.8676e-03,\n 3.8791e-04, 1.2253e-03, -6.2575e-03, -1.4788e-03, 2.7072e-05,\n 1.7084e-03, -6.1967e-04, 1.3204e-03, -3.1039e-03, 3.0302e-04,\n -2.6108e-03, 1.0986e-03, -5.3722e-04, -1.7633e-03, 1.0695e-03,\n -3.9537e-03, 1.9419e-03, 4.6936e-04, 5.6052e-45, -1.9232e-03,\n 1.6557e-03, 5.6052e-45, -7.4832e-04, 3.0119e-03, 1.8134e-03,\n 2.8102e-03, -6.7810e-04, 1.2902e-03, 2.5185e-03, 2.7113e-03,\n -1.6999e-03, -1.7654e-03, -1.6577e-04, -9.4647e-21, 5.6052e-45,\n -6.3248e-03, 1.7675e-03, 2.5238e-03, -1.6636e-04, 5.1199e-04,\n -3.7028e-03, 3.2892e-03, -5.0988e-04, -1.0067e-03, -4.8039e-03,\n -2.1621e-03, -7.8040e-04, 5.6052e-45, 5.0562e-05, 9.8408e-04,\n 5.6052e-45, -1.0536e-03, 5.6052e-45, -1.3911e-03, 2.2791e-03,\n 1.2400e-03, 1.3171e-03, 5.6052e-45, -2.2337e-04, 5.3265e-04,\n -2.5126e-04, 1.7054e-03, 2.0940e-03, 2.6929e-03, -7.9512e-04,\n 7.4613e-04, -3.6072e-04, -2.4193e-04, -7.2536e-05, 5.6052e-45,\n 2.5617e-03, -4.0028e-03, 7.1800e-04, 5.6052e-45, -2.7940e-03,\n 1.3879e-03, -5.3645e-03, -1.9681e-03, 2.3033e-04, -2.1055e-04,\n -2.3001e-03, 1.5731e-04, -4.4659e-03, 3.5292e-03, -2.4825e-03,\n -5.9628e-04, 1.6698e-03, 5.6052e-45, -1.1510e-03, -1.0174e-04,\n 5.6052e-45, 9.2394e-05, -9.2492e-04, -1.2869e-04, 7.0905e-04,\n 4.5798e-08, 1.8736e-04, -7.6005e-05, 4.3942e-04, 5.6052e-45,\n 2.7952e-04, 3.7757e-04, 5.6052e-45, 3.8123e-04, 5.3931e-05,\n 5.0965e-04, 1.6549e-03, 5.6052e-45, 2.1060e-03, -4.1522e-05,\n 3.7485e-03, 1.2029e-04, -4.6988e-04, -3.2823e-05, -1.5356e-03,\n 3.4967e-04, 5.6052e-45, -5.2579e-04, 5.6052e-45, 1.6180e-03,\n 8.4434e-04, -4.9545e-04, 1.7854e-03, 2.0302e-03, 2.8658e-03,\n 4.2917e-03, -3.1647e-04, -1.3547e-05, -1.8413e-03, -3.0740e-04,\n 1.9323e-37, -8.3822e-04, -1.3524e-04, -1.4803e-03, 1.4194e-04,\n 2.0769e-04, -1.1327e-03, 4.8733e-04, 6.5384e-04, -1.3887e-03,\n 1.6279e-03, 5.6052e-45, 5.1626e-04, -1.5877e-03, -5.7097e-04,\n 2.4501e-03, -6.7106e-04, 7.8480e-04, -7.5487e-04, 5.3939e-04,\n -2.7195e-03, -1.4107e-03, -5.9300e-04, -4.2834e-03, 5.6052e-45,\n 5.6052e-45, -7.5068e-04, -6.3335e-03, -9.7074e-05, 1.1434e-03,\n 5.2953e-04, 2.5766e-05, -1.9734e-04, 2.5388e-03, -2.8762e-03,\n -1.9364e-04, -2.7211e-03, 1.4126e-03, 2.6122e-04, -1.1918e-03,\n 2.9070e-04, -2.0184e-03, -1.2838e-03, -2.9494e-05, 5.6052e-45,\n -1.4761e-03, -2.0301e-03, 5.6052e-45, 5.6052e-45, -7.1633e-04,\n -3.4456e-03, -1.5447e-03, 9.4841e-04, 3.8709e-04, 6.8828e-06,\n 2.0253e-03, 4.5288e-04, 2.9476e-03, -8.0906e-05, -9.8687e-05,\n 3.9675e-04, 2.5121e-03, 8.8842e-04, 5.3956e-05, -5.2579e-04,\n -2.4194e-03, -2.5995e-03, -2.7839e-04, 2.0765e-04, 2.2041e-03,\n -1.0488e-03, -8.5193e-04, 1.0936e-03, -2.3788e-04, 3.6597e-04,\n -7.3021e-04, 4.5324e-04, 5.6987e-04, 3.6895e-03, 1.2378e-03,\n 2.7245e-03, -7.9741e-04, 1.0225e-03, 1.9997e-04, 1.8216e-04,\n -5.6052e-45, 5.6052e-45, -1.7986e-03, 3.7832e-04, 9.0102e-04,\n 2.8945e-04, 5.6052e-45, -1.4635e-04, -1.5616e-03, -2.1742e-03,\n -1.7092e-04, 2.0810e-04, 6.1069e-04, 2.3253e-03, -3.7953e-03,\n -2.9309e-04, 7.9889e-04, 8.2072e-04, -3.5315e-03, 7.0650e-04,\n 2.3199e-04, -1.5255e-04, -3.0804e-03, 3.8992e-03, -3.2597e-03,\n 6.2782e-03, 4.7445e-04, -7.1991e-04, -2.8219e-03, -5.2184e-03,\n 5.4292e-04, -1.7206e-03, 1.5001e-03, -3.7600e-03, -3.9258e-03,\n 2.1162e-03, -1.5392e-03, -3.8298e-03, 5.8741e-04, 1.2812e-03,\n 1.4966e-03, -2.1805e-04, 4.4091e-04, -3.9576e-04, -8.4735e-10,\n 5.4539e-08, 2.2878e-03, 8.9630e-04, 1.0187e-03, 2.0335e-03,\n -2.6973e-03, -4.0402e-03, 6.2057e-04, 8.3841e-04, -5.2095e-04,\n -1.0639e-03, -3.3994e-39, -5.1481e-04, -2.1795e-03, -2.6319e-04,\n 6.3650e-04, 2.4263e-04, -2.0230e-03, -6.0368e-14, -8.9237e-04],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.3311e-05, 6.0514e-05, 2.5175e-05, 1.9146e-05, 3.9522e-06, 5.0957e-06,\n 3.5278e-05, 3.3902e-05, 3.4233e-05, 3.4695e-05, 3.0516e-05, 6.2106e-05,\n 4.3085e-08, 2.9803e-05, 3.7183e-05, 3.6746e-05, 5.5893e-05, 3.8376e-05,\n 2.1617e-05, 1.1218e-08, 3.4465e-05, 2.2396e-05, 2.3250e-05, 3.4491e-05,\n 4.0947e-08, 4.7140e-05, 3.3724e-05, 4.7018e-05, 4.1878e-05, 4.0964e-05,\n 4.4174e-05, 3.7067e-05, 8.2439e-08, 7.2318e-09, 3.7203e-05, 3.0538e-05,\n 3.8977e-05, 2.1839e-05, 2.3140e-05, 3.7936e-05, 3.7581e-05, 2.1455e-05,\n 3.4589e-05, 3.2321e-05, 2.9276e-05, 4.0218e-05, 3.2253e-05, 5.6126e-05,\n 6.1230e-05, 2.7371e-05, 3.9125e-05, 4.2957e-09, 6.9704e-09, 6.5234e-09,\n 4.7881e-05, 7.3133e-05, 2.9743e-05, 2.7767e-05, 1.0309e-07, 4.5017e-05,\n 3.3996e-05, 3.2703e-05, 6.4470e-05, 1.8737e-05, 3.8428e-05, 3.9311e-05,\n 3.6721e-05, 7.9843e-09, 5.2418e-05, 3.5935e-05, 1.1966e-05, 3.2615e-08,\n 4.1343e-05, 2.9839e-05, 1.2738e-05, 4.1247e-05, 4.5111e-05, 4.7932e-05,\n 2.6883e-05, 4.5188e-05, 7.1613e-05, 3.1745e-05, 4.3055e-05, 2.8623e-05,\n 3.4108e-05, 3.4707e-05, 3.4663e-05, 2.0292e-05, 3.8552e-05, 3.8108e-05,\n 5.0915e-08, 4.5768e-05, 3.0824e-05, 2.5799e-05, 5.6976e-05, 1.4823e-05,\n 3.9359e-05, 3.3999e-05, 3.1020e-05, 2.4178e-05, 2.4476e-05, 1.0092e-05,\n 4.7268e-05, 5.1863e-05, 8.2968e-05, 2.5773e-05, 3.1980e-05, 5.1106e-05,\n 1.4126e-05, 2.1015e-05, 5.1987e-05, 5.2175e-05, 3.4424e-09, 2.5601e-05,\n 8.7473e-06, 2.5030e-05, 3.5607e-05, 6.0366e-05, 1.5349e-09, 2.9979e-08,\n 5.5832e-09, 3.3000e-05, 4.4458e-09, 6.6693e-05, 3.4208e-05, 4.5696e-05,\n 4.5871e-05, 8.3632e-10, 1.6641e-05, 4.3601e-05, 3.2062e-05, 3.6073e-05,\n 4.0439e-05, 1.4920e-09, 4.0968e-05, 3.5414e-05, 2.3472e-05, 2.9006e-05,\n 3.0424e-05, 2.8554e-05, 6.2339e-05, 3.9619e-05, 4.7221e-05, 3.4816e-05,\n 4.0517e-05, 4.1525e-05, 6.5837e-08, 4.0609e-05, 3.5331e-05, 3.2016e-05,\n 5.6504e-08, 2.1272e-05, 3.0968e-05, 3.0297e-08, 2.9838e-05, 4.3890e-05,\n 6.0013e-08, 5.1566e-05, 3.0873e-05, 3.7370e-05, 2.6625e-05, 3.4743e-05,\n 9.3730e-05, 3.8688e-05, 3.9094e-05, 2.8193e-05, 6.0957e-05, 2.1070e-05,\n 3.7245e-05, 7.1297e-09, 1.0987e-08, 3.4593e-05, 5.3563e-05, 2.8054e-05,\n 3.3369e-05, 4.8340e-05, 1.6512e-05, 4.0573e-05, 1.9940e-09, 1.9736e-05,\n 3.7234e-05, 4.8279e-05, 8.4764e-06, 1.4672e-08, 3.7023e-05, 3.7642e-05,\n 5.2603e-05, 2.3431e-05, 3.1872e-05, 4.2605e-09, 4.8814e-08, 3.3933e-05,\n 1.9507e-05, 4.3989e-05, 3.1003e-05, 3.6994e-05, 2.2514e-05, 4.2640e-05,\n 7.6941e-09, 3.2019e-05, 2.6379e-05, 1.6421e-05, 5.7783e-05, 1.9437e-05,\n 7.3734e-05, 3.7950e-10, 2.9028e-05, 3.5288e-05, 2.7534e-05, 3.5252e-05,\n 1.7869e-05, 3.9220e-05, 4.5721e-05, 3.7415e-05, 2.4657e-05, 2.9123e-05,\n 2.4406e-05, 4.4917e-05, 3.8253e-05, 2.3384e-05, 2.2344e-05, 4.0144e-05,\n 2.5456e-05, 3.1103e-05, 3.4081e-05, 3.6113e-05, 4.9859e-05, 5.3670e-05,\n 2.3129e-05, 4.6888e-05, 4.0657e-05, 7.0371e-05, 4.0257e-05, 9.4332e-09,\n 4.1213e-05, 1.8357e-05, 3.0556e-05, 2.2143e-05, 7.4990e-05, 3.8493e-05,\n 2.8854e-05, 3.9848e-05, 3.2491e-05, 3.2045e-05, 5.0824e-05, 2.2425e-05,\n 2.0981e-05, 4.5693e-05, 2.3878e-05, 2.8133e-05, 3.6585e-05, 3.1763e-05,\n 2.5954e-05, 3.0128e-05, 3.7588e-05, 2.9525e-05, 4.0645e-08, 2.7552e-05,\n 2.1957e-05, 1.2320e-05, 3.4997e-05, 5.9168e-05, 4.2474e-05, 3.6013e-05,\n 3.3367e-05, 3.2273e-05, 3.1601e-05, 6.0249e-09, 4.8408e-05, 3.7184e-05,\n 2.3909e-05, 5.3186e-05, 2.9116e-05, 2.2981e-05, 4.6294e-05, 1.0514e-08,\n 3.6242e-05, 2.9711e-08, 2.3919e-05, 1.4452e-05, 3.4708e-05, 2.7793e-05,\n 5.1990e-05, 4.0867e-05, 3.6544e-05, 4.3248e-05, 2.1820e-06, 5.2025e-05,\n 4.7962e-05, 2.4138e-05, 8.4048e-06, 3.5386e-05, 4.6387e-05, 3.7644e-05,\n 3.3176e-05, 5.5716e-05, 6.0354e-05, 3.7498e-05, 3.2842e-05, 7.3572e-05,\n 4.3849e-05, 8.3857e-06, 6.7912e-06, 2.9934e-05, 3.5644e-05, 1.0878e-05,\n 4.0054e-05, 1.2506e-09, 2.4638e-08, 3.6757e-05, 5.2168e-05, 2.2669e-05,\n 3.2675e-05, 4.3797e-05, 4.5221e-05, 3.2376e-05, 4.0366e-05, 2.6117e-05,\n 2.7149e-05, 4.7122e-05, 4.9004e-05, 5.7672e-05, 4.9505e-05, 2.2842e-05,\n 1.4050e-05, 3.4761e-05, 3.2627e-05, 1.0293e-05, 2.7988e-05, 4.0666e-05,\n 9.9559e-06, 2.3909e-05, 4.1833e-05, 4.9941e-05, 2.9393e-12, 3.5011e-05,\n 2.3016e-05, 5.2086e-05, 2.3590e-05, 3.3328e-05, 5.8258e-06, 3.7053e-05,\n 2.7632e-05, 3.0490e-05, 3.1777e-05, 4.7832e-05, 4.3014e-08, 9.9737e-06,\n 3.9429e-05, 3.9532e-05, 2.9914e-05, 3.9563e-05, 2.8559e-05, 3.0181e-05,\n 5.6046e-05, 3.5828e-05, 3.7762e-05, 3.1532e-05, 4.3956e-05, 8.2534e-06,\n 1.8275e-05, 2.1712e-05, 4.7149e-05, 5.6648e-05, 4.3619e-05, 8.8223e-05,\n 1.4317e-05, 4.4178e-05, 5.2889e-05, 3.8753e-05, 4.6259e-05, 2.4645e-05,\n 2.8898e-05, 9.9445e-06, 3.2438e-05, 5.5087e-05, 6.8958e-05, 1.1871e-05,\n 1.8726e-08, 3.7359e-05, 2.4413e-05, 1.0217e-07, 2.8345e-05, 6.0592e-05,\n 3.5349e-05, 2.8253e-05, 3.4053e-05, 4.2070e-05, 2.2226e-05, 2.1119e-05,\n 5.2166e-05, 4.3497e-05, 2.8234e-05, 1.0155e-08, 3.3695e-08, 3.9174e-05,\n 3.7581e-05, 3.4287e-05, 5.7549e-05, 2.9426e-05, 3.6184e-05, 2.3307e-05,\n 1.4121e-05, 2.6148e-05, 3.2520e-05, 5.3005e-05, 2.4563e-05, 1.2409e-08,\n 1.1708e-05, 9.9340e-06, 3.5747e-09, 1.1808e-05, 6.2365e-09, 4.6425e-05,\n 4.2321e-05, 1.5619e-05, 3.1568e-05, 1.6837e-08, 1.8509e-05, 4.2679e-05,\n 2.6337e-05, 3.9077e-05, 4.6447e-05, 4.8305e-05, 3.2076e-05, 3.9195e-06,\n 3.1133e-05, 6.6710e-06, 2.0082e-05, 8.4633e-09, 2.3589e-05, 4.4472e-05,\n 3.3544e-05, 1.0815e-08, 2.2287e-05, 3.0834e-05, 4.8556e-05, 3.9168e-05,\n 3.3910e-05, 2.5545e-05, 4.6118e-05, 1.7317e-05, 3.7420e-05, 5.2016e-05,\n 2.8716e-05, 8.2903e-06, 3.4994e-05, 3.6504e-09, 2.9451e-05, 2.3958e-05,\n 1.2111e-07, 2.2642e-05, 2.8133e-05, 3.7288e-06, 3.0414e-05, 7.8381e-09,\n 2.0593e-05, 3.9261e-06, 3.1326e-05, 1.6350e-07, 3.0301e-05, 5.4665e-05,\n 1.0630e-09, 2.7470e-05, 6.1610e-06, 3.7335e-05, 2.5991e-05, 8.6819e-09,\n 4.5253e-05, 2.2189e-05, 6.1497e-05, 4.5398e-05, 2.5775e-05, 1.7709e-05,\n 2.2717e-05, 3.0640e-05, 6.0346e-08, 4.4240e-05, 1.9160e-08, 9.2665e-06,\n 7.2145e-06, 8.2439e-06, 3.6468e-05, 3.7881e-05, 6.7687e-05, 2.4483e-05,\n 4.3888e-05, 2.8398e-05, 4.2333e-05, 3.1223e-05, 8.0390e-09, 3.6482e-05,\n 3.5413e-05, 6.6510e-05, 6.6403e-06, 2.0803e-05, 2.7950e-05, 2.1849e-05,\n 3.8547e-05, 1.7279e-05, 3.1985e-05, 5.9652e-08, 1.9927e-05, 4.6623e-05,\n 3.2985e-05, 5.9695e-05, 3.7871e-05, 3.1045e-05, 4.2322e-05, 3.6158e-05,\n 3.5571e-05, 7.4985e-05, 2.9321e-05, 3.8378e-05, 4.4773e-08, 4.9414e-08,\n 4.3266e-05, 8.0830e-05, 4.8934e-05, 2.9087e-05, 3.6011e-05, 3.4274e-05,\n 3.1817e-05, 4.7425e-05, 3.2986e-05, 6.2725e-06, 3.1714e-05, 3.8764e-05,\n 3.0417e-05, 2.6466e-05, 7.6673e-07, 2.8387e-05, 3.7635e-05, 2.5055e-05,\n 4.4984e-08, 2.7384e-05, 3.6660e-05, 3.6440e-09, 1.0964e-08, 3.4977e-05,\n 4.1029e-05, 4.4366e-05, 2.2350e-05, 4.1401e-05, 1.9588e-05, 3.9129e-05,\n 3.1102e-05, 1.9863e-05, 2.2185e-05, 3.7681e-05, 3.7233e-05, 2.8558e-05,\n 4.7910e-05, 4.8012e-05, 4.0460e-05, 2.4428e-05, 3.2610e-05, 1.0697e-04,\n 2.8344e-05, 3.2518e-05, 1.8103e-05, 3.1356e-05, 3.8430e-05, 6.8956e-05,\n 6.5067e-06, 2.7966e-05, 4.8813e-05, 2.2977e-05, 4.3701e-05, 2.9500e-05,\n 4.1277e-05, 2.8475e-05, 3.4002e-05, 4.7319e-05, 5.5738e-06, 5.7155e-09,\n 6.5416e-08, 2.9182e-05, 3.1456e-05, 3.8036e-05, 3.2897e-05, 1.2451e-07,\n 3.0031e-05, 3.4883e-05, 2.0542e-05, 3.6318e-05, 2.6387e-05, 2.4917e-05,\n 4.9457e-05, 4.2343e-05, 3.3598e-05, 5.8827e-05, 4.3868e-05, 5.4305e-05,\n 3.0545e-05, 4.1284e-05, 3.5085e-05, 4.0758e-05, 4.8390e-05, 6.1708e-05,\n 4.9915e-05, 2.1822e-05, 3.0854e-05, 3.5410e-05, 3.8892e-05, 2.5923e-05,\n 2.9530e-05, 3.5008e-05, 6.7068e-05, 5.9785e-05, 4.2225e-05, 4.4093e-05,\n 3.9467e-05, 1.6882e-05, 2.0927e-05, 3.6647e-05, 3.2884e-05, 3.6112e-05,\n 1.8672e-05, 3.0325e-09, 8.1847e-08, 6.6168e-06, 3.8777e-05, 3.4095e-05,\n 3.4811e-05, 3.6944e-05, 3.4062e-05, 2.6564e-05, 2.5683e-05, 5.3070e-05,\n 2.5196e-05, 2.0969e-08, 5.9801e-06, 4.5888e-05, 3.0643e-05, 5.4534e-05,\n 3.7676e-05, 3.2464e-05, 1.9150e-08, 2.4995e-05], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([ 6.4066e-04, -5.0628e-04, -9.4757e-05, ..., -1.3069e-04,\n 8.1707e-04, -5.6003e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.3089e-05, 1.2634e-05, 4.7040e-08, ..., 1.1755e-05, 1.4330e-05,\n 4.6614e-06], device='cuda:0')" }, "2": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 7.9436e-06, -7.0702e-05, -4.0870e-07, ..., -3.0017e-05,\n -3.6760e-16, -5.4073e-05],\n [ 2.8054e-06, 2.0132e-05, -5.1456e-05, ..., 4.6699e-06,\n -7.4626e-15, 1.6501e-05],\n [ 9.1971e-06, -1.0070e-04, -9.4052e-05, ..., 3.1487e-05,\n -1.8700e-15, 3.3967e-05],\n ...,\n [-4.3714e-06, -3.6980e-05, -4.2132e-05, ..., -1.0561e-04,\n -6.8541e-16, 4.5185e-05],\n [-1.2728e-05, -4.1654e-05, 2.3512e-05, ..., -9.5335e-05,\n 6.1973e-15, 9.6555e-05],\n [-3.3969e-06, -6.9386e-05, 9.9327e-05, ..., 4.8945e-06,\n 7.7783e-15, 6.3467e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3648e-09, 5.9209e-09, 6.1990e-09, ..., 6.3399e-09, 1.5939e-11,\n 5.4520e-09],\n [2.9963e-09, 1.6589e-08, 1.0601e-08, ..., 1.3126e-08, 1.0125e-11,\n 8.2170e-09],\n [1.7926e-09, 1.1513e-08, 1.2759e-08, ..., 8.8502e-09, 2.4057e-11,\n 1.4364e-08],\n ...,\n [3.0310e-09, 1.1686e-08, 1.3834e-08, ..., 1.2641e-08, 2.1579e-11,\n 1.6039e-08],\n [3.1418e-09, 1.1282e-08, 1.2126e-08, ..., 1.2456e-08, 1.8113e-11,\n 3.8286e-08],\n [3.1862e-09, 1.1905e-08, 1.4944e-08, ..., 1.7467e-08, 3.6336e-11,\n 8.1483e-09]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[-7.2368e-06, -1.1073e-07, -4.4978e-09, ..., -8.5510e-06,\n 1.4223e-06, 1.2094e-05],\n [ 3.7071e-06, 9.5835e-08, -1.7412e-08, ..., -5.0400e-07,\n 5.4481e-06, -3.7365e-06],\n [-2.8565e-06, -6.0232e-07, -2.4080e-33, ..., -7.4501e-07,\n 4.5857e-06, -3.0325e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 1.5191e-05, 6.0268e-07, -2.5183e-08, ..., -9.7206e-07,\n -4.0751e-06, 4.1295e-06],\n [-2.4974e-06, -3.3868e-06, 3.0848e-08, ..., 4.4032e-06,\n -8.5272e-06, 9.1030e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.6016e-10, 1.8050e-10, 3.9481e-12, ..., 1.6111e-09, 7.7856e-10,\n 4.3555e-10],\n [6.5044e-10, 3.1379e-11, 2.9507e-13, ..., 2.3564e-09, 1.2419e-09,\n 6.2676e-11],\n [4.6684e-10, 1.2257e-09, 1.5144e-14, ..., 1.0586e-09, 2.6087e-09,\n 3.8964e-11],\n ...,\n [0.0000e+00, 1.9922e-21, 0.0000e+00, ..., 5.8361e-23, 6.9702e-23,\n 0.0000e+00],\n [5.0624e-09, 6.0870e-10, 1.3764e-11, ..., 1.5517e-09, 6.7616e-10,\n 5.6453e-10],\n [1.0861e-09, 1.6294e-09, 4.6904e-12, ..., 8.3689e-10, 5.6864e-09,\n 2.6142e-10]], device='cuda:0')" }, "3": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-3.5884e-06, -9.1305e-07, -3.8677e-06, ..., 5.6785e-08,\n -4.7949e-06, 3.2131e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-1.6671e-05, 2.1392e-05, 3.0510e-06, ..., -7.4079e-06,\n -1.3666e-05, -2.0398e-05],\n [-3.5488e-07, 6.6073e-06, 3.5814e-06, ..., 6.1645e-06,\n -1.3975e-06, 1.5459e-06],\n [-1.5385e-10, 6.2621e-11, 1.7855e-11, ..., -4.4535e-11,\n 3.1088e-11, 5.2681e-11]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.8987e-12, 4.9801e-12, 4.0329e-14, ..., 5.2389e-18, 1.8822e-13,\n 1.6074e-13],\n [8.2366e-10, 3.1522e-09, 2.5344e-10, ..., 2.2229e-10, 2.8496e-10,\n 1.9323e-10],\n [3.2335e-13, 4.8761e-13, 1.7999e-13, ..., 3.1182e-14, 4.4236e-14,\n 1.3357e-13],\n ...,\n [4.0422e-09, 5.9532e-09, 8.4945e-10, ..., 1.0579e-09, 1.9066e-09,\n 1.7891e-09],\n [4.5963e-09, 3.2513e-09, 9.1882e-10, ..., 1.3384e-09, 9.1669e-10,\n 8.4945e-10],\n [1.3125e-12, 1.1983e-12, 6.5030e-13, ..., 4.3585e-14, 1.5564e-13,\n 1.8954e-13]], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([ 3.8338e-05, 1.5834e-05, 1.8492e-04, -6.5094e-05, -1.9471e-04,\n 5.6052e-45, -6.7887e-05, -2.9346e-05, 3.3579e-06, -2.4709e-04,\n -1.0341e-04, 2.5436e-06, 1.3127e-04, 3.9198e-05, -2.8213e-05,\n 1.0745e-04, -4.4677e-04, 1.3987e-05, 5.2284e-05, 9.3746e-06,\n -3.6078e-05, 1.2300e-05, 1.0411e-04, -9.9970e-05, -3.1225e-04,\n -1.8167e-05, 4.0361e-05, 5.6052e-45, 1.3360e-05, 2.9073e-05,\n 6.3603e-05, 3.4925e-05, -4.7478e-06, -1.5589e-04, -3.5119e-05,\n -1.3531e-05, 9.9754e-05, 5.4368e-05, 1.3799e-04, -5.4927e-05,\n -1.5107e-04, 5.4087e-05, 3.1507e-06, 7.3043e-05, 9.3905e-05,\n -1.5327e-04, 1.9220e-04, -1.1672e-04, -2.3976e-04, 8.6001e-05,\n 5.6052e-45, -4.8713e-06, -4.6724e-05, -1.0336e-04, 1.5337e-04,\n -6.6736e-05, 5.5120e-05, -1.4183e-04, 1.1549e-04, 1.0652e-04,\n 4.1970e-05, -1.3084e-05, 5.1940e-05, 1.4866e-05, 4.0846e-05,\n -6.2122e-05, 6.7902e-05, 2.3593e-05, 9.0107e-05, 4.6437e-05,\n 5.4268e-05, -5.6052e-45, -2.2738e-05, -1.1862e-04, -1.7789e-05,\n 1.3623e-04, 8.2502e-05, 1.7110e-05, 2.9336e-05, 6.2896e-05,\n 1.3278e-04, -5.7928e-05, 6.6922e-05, -5.6052e-45, 2.5500e-06,\n -9.9526e-05, 3.0418e-04, 1.6249e-04, -8.4341e-05, 2.7578e-05,\n 1.8996e-04, -3.5378e-06, 2.0701e-05, -1.7009e-04, -8.6813e-07,\n -1.2712e-04, 3.1232e-05, -2.0433e-05, 1.7426e-05, 1.5201e-05,\n -7.8730e-05, -3.4225e-05, -7.0286e-05, -4.7577e-05, -3.7810e-05,\n -5.0647e-05, -9.1227e-05, 7.8203e-05, 7.1191e-05, 9.1648e-05,\n 3.5480e-04, -1.2691e-04, 8.9211e-05, -2.0269e-05, -4.0072e-05,\n 4.1760e-06, -1.2247e-04, 5.3114e-05, -2.8885e-05, 5.6052e-45,\n 5.4588e-05, -2.2759e-04, -1.1325e-04, -5.6052e-45, 5.3218e-05,\n 1.6035e-04, -1.1660e-04, -2.3849e-06, 4.5767e-05, 1.3015e-04,\n -3.3889e-04, 3.2962e-06, 5.6052e-45, 1.6555e-04, -1.7252e-06,\n 2.7753e-05, -7.2577e-17, 2.9808e-05, 4.0751e-05, 6.1913e-05,\n 5.6052e-45, 5.6052e-45, -1.1975e-05, -2.0122e-04, 5.6052e-45,\n 5.6052e-45, -5.2588e-05, 1.0905e-05, 5.6052e-45, -6.1229e-05,\n 3.3126e-05, -8.8374e-05, 5.9280e-05, 2.3443e-04, 9.3245e-06,\n -4.3880e-05, 4.7527e-05, -1.2535e-04, 5.6052e-45, 2.1243e-05,\n -9.5787e-05, -7.3071e-06, -6.1816e-06, 1.3621e-04, 2.7513e-05,\n -1.3907e-04, 5.7287e-05, 5.6052e-45, 2.0030e-04, 4.2312e-04,\n 3.2210e-05, -1.9548e-04, 1.6744e-05, -1.4295e-04, -1.0301e-05,\n 4.8333e-05, 1.7491e-04, 4.7614e-05, -1.0770e-04, 2.0719e-04,\n -2.0302e-04, -1.1586e-05, 2.9632e-04, -1.3544e-04, 5.6052e-45,\n 5.6052e-45, 1.8066e-04, 2.1504e-05, 6.2024e-05, -4.6624e-05,\n -4.9966e-05, 1.3313e-04, 6.7837e-05, -1.3627e-04, -6.9981e-05,\n 7.0191e-05, 2.7019e-04, 2.3466e-04, 3.1086e-05, 5.6052e-45,\n -1.1904e-04, -8.0481e-05, -4.7839e-06, -3.6322e-05, -4.4722e-05,\n 1.5362e-04, 2.0214e-04, 1.0044e-04, -6.4273e-05, 5.6052e-45,\n -2.9045e-04, -4.4491e-17, -4.7793e-05, 4.7031e-05, 1.3973e-04,\n 3.1927e-05, -9.8028e-05, 8.3025e-05, -2.5517e-05, 7.0862e-05,\n -6.4046e-05, 4.5396e-05, -1.1648e-04, -2.0059e-05, 8.7001e-05,\n -5.6052e-45, 2.2055e-05, 1.8401e-04, 1.5473e-04, -1.0084e-05,\n -4.4574e-05, 8.5887e-05, 5.4844e-05, 5.6215e-05, 5.6052e-45,\n -2.7584e-04, 1.5147e-05, 5.6052e-45, 9.3455e-05, -1.1566e-06,\n -9.7674e-06, -1.3733e-43, 2.0191e-04, -8.9031e-05, 3.4434e-05,\n 9.4515e-06, 1.0362e-04, 1.9286e-04, 4.0914e-05, 3.8426e-34,\n 6.9085e-05, -7.6557e-05, -1.0770e-04, 3.8685e-05, 5.6416e-05,\n 6.8888e-05, 5.6052e-45, 3.9085e-05, -6.9866e-05, -7.2529e-05,\n 5.6052e-45, 1.4369e-04, -8.0630e-05, 3.3343e-05, -5.3500e-05,\n 6.6456e-05, 9.9716e-05, 1.1840e-04, 3.1793e-05, 1.1513e-04,\n 4.8116e-05, 5.1183e-05, 1.0271e-04, -1.4210e-04, 1.0144e-04,\n 7.2953e-05, -2.2398e-04, -1.1546e-07, 9.9781e-05, 3.4274e-05,\n 5.6052e-45, 1.8358e-04, 1.9391e-05, -7.3410e-04, -1.2678e-04,\n 4.5389e-05, 4.7737e-05, 5.5905e-05, 6.2708e-05, 3.1626e-05,\n -5.9282e-05, -3.2450e-04, 2.9844e-05, 4.1126e-05, -1.7190e-04,\n -3.3179e-05, -4.1625e-05, -6.6495e-05, -4.7444e-05, 6.0293e-05,\n 5.9508e-05, 1.4603e-04, 2.1163e-05, 2.5824e-04, -7.6519e-05,\n -2.4071e-05, 3.2787e-05, -1.6573e-05, -8.7245e-05, -8.7377e-05,\n -1.9887e-04, 5.6052e-45, 3.1261e-05, -6.0945e-05, 2.6664e-04,\n 5.6052e-45, 1.7670e-05, -1.1690e-05, 7.0645e-05, -3.6721e-05,\n 2.4997e-05, -2.6209e-04, -5.4700e-05, -1.5361e-04, 6.1355e-05,\n 5.6052e-45, 1.0565e-04, -8.9283e-05, -2.1914e-04, 1.4611e-05,\n -1.3683e-04, 8.2620e-06, 1.8445e-04, -2.6210e-04, -7.0572e-05,\n 7.4014e-05, -3.1608e-04, 1.0134e-04, 5.6052e-45, 2.8914e-05,\n 8.4659e-05, 7.4354e-05, -2.1309e-05, -4.2984e-05, 4.8063e-04,\n -1.2969e-04, -6.9257e-05, 1.1480e-04, 2.9987e-05, -1.7517e-05,\n -4.6342e-04, -1.9304e-05, 1.3261e-05, 5.6052e-45, 8.3010e-06,\n -1.8000e-05, 3.4677e-05, 1.9226e-05, 6.3529e-05, -2.0970e-05,\n 1.1591e-04, -2.7765e-04, -5.6052e-45, 4.1092e-05, -8.4599e-05,\n 3.5604e-05, 4.7008e-05, 1.2554e-04, -1.0503e-04, 1.1108e-05,\n 9.1951e-05, -1.7860e-04, 6.0412e-05, 1.0916e-04, -1.1216e-06,\n -1.0128e-04, -8.4402e-11, 4.9091e-05, 5.6052e-45, -3.3904e-06,\n -1.4325e-05, 2.9898e-05, 1.5733e-04, 6.0061e-05, 5.6052e-45,\n 8.4418e-05, 1.0916e-04, -2.9743e-05, 7.3438e-05, -8.2105e-06,\n -3.1672e-05, -1.4579e-04, 3.0832e-05, 1.7042e-05, -5.2231e-05,\n 1.5488e-04, -6.4987e-05, -6.4436e-08, 5.6052e-45, -6.3597e-05,\n 3.6016e-05, -3.4403e-04, 1.5039e-04, -3.9174e-04, 1.4728e-04,\n -3.7639e-04, 2.3566e-05, 8.7664e-06, 4.7571e-05, -1.0533e-04,\n -2.2126e-05, 5.6052e-45, -2.0378e-05, 1.0171e-04, -7.1242e-05,\n 1.7051e-05, -2.4669e-06, 1.5573e-08, 7.8447e-05, 1.1643e-04,\n 2.4580e-04, -1.7523e-05, 9.8476e-07, 5.6052e-45, 5.6052e-45,\n 1.2230e-04, -4.3845e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 2.4881e-05, -1.7767e-04, -3.8357e-05, 9.2008e-05, -1.7059e-04,\n -5.6169e-05, -2.3600e-04, 7.0385e-05, -2.4619e-05, -5.6052e-45,\n -1.0185e-04, -6.7543e-05, 2.1006e-05, 5.6052e-45, -9.0692e-05,\n 6.5383e-05, -9.0332e-05, 2.4078e-05, 4.3493e-05, 4.5471e-05,\n 1.2336e-04, -4.6968e-06, -3.2414e-05, -9.3189e-05, -1.5567e-06,\n -3.5074e-05, -1.1660e-04, 9.1233e-05, 5.6052e-45, 6.5415e-05,\n -2.2935e-04, 3.0779e-05, 5.6052e-45, -6.7117e-05, 5.3318e-05,\n 7.1927e-05, 5.8392e-05, -1.1579e-05, 3.4361e-05, -4.6428e-06,\n -1.3955e-06, 5.6052e-45, -1.7393e-04, -2.5234e-04, 5.3000e-06,\n -9.8991e-06, 1.9618e-04, 7.6443e-05, 7.3790e-05, -1.5924e-04,\n 4.1227e-05, -8.3633e-06, -5.5199e-05, -6.4029e-06, 1.6722e-04,\n -1.3585e-04, 7.9882e-05, -8.2658e-05, 1.7762e-04, 1.1994e-04,\n 7.7944e-05, 1.0208e-06, -1.4618e-04, 3.1729e-05, -9.3161e-05,\n -9.3007e-06, -1.0199e-04, -3.0214e-04, -5.0228e-05, 1.7326e-05,\n 5.6052e-45, -1.0791e-04, 6.9524e-06, 6.8538e-05, 4.8785e-05,\n -5.9602e-05, 1.0645e-04, 9.5111e-05, 9.3567e-05, -9.1263e-05,\n 2.6586e-05, -1.1018e-04, -1.3177e-04, 5.5136e-05, -1.5823e-05,\n 1.6781e-06, -4.4973e-05, -3.1025e-05, 5.6052e-45, 1.4700e-04,\n 7.3040e-05, -6.5949e-05, -1.0560e-04, 1.5101e-04, 1.0070e-04,\n -1.7089e-04, 5.6052e-45, -4.0446e-05, 6.8111e-05, 3.2299e-05,\n -1.3718e-05, 1.0884e-04, 5.8561e-05, 1.4967e-04, -1.7478e-04,\n 5.5226e-05, 3.9817e-05, 5.6052e-45, -3.4492e-05, -1.7767e-04,\n 9.1644e-05, -2.8791e-05, -7.3023e-05, -1.3001e-05, 1.3151e-04,\n 5.6052e-45, -1.0105e-04, -1.2606e-04, 2.2201e-05, 7.5053e-05,\n -1.3393e-04, 2.2534e-04, -1.2907e-05, 1.5013e-04, 3.4749e-05,\n 4.0745e-05, -5.3109e-05, -4.3454e-07, -4.7733e-05, 3.9174e-05,\n 5.6052e-45, -1.8733e-05, 1.0224e-05, -2.5377e-05, -1.4553e-04,\n -7.4754e-05, 7.3736e-05, 5.6052e-45, 6.1199e-05, 5.6052e-45,\n 3.5704e-04, -1.8321e-05, 8.8511e-05, -7.7434e-05, 8.3819e-05,\n -4.7208e-05, 2.4364e-05, 4.1879e-06, -9.5273e-05, -8.7779e-05,\n 1.3414e-04, -5.6052e-45, -1.0957e-04, -3.2330e-05, 5.6052e-45,\n 1.9042e-04, 1.5884e-04, 1.2635e-05, -5.5440e-05, -6.6424e-05,\n -2.0537e-05, 5.6052e-45, -8.2701e-05, 8.2325e-05, 1.6928e-04,\n 9.8139e-05, 2.1447e-04, -4.1205e-05, 1.7929e-05, 3.8252e-05,\n 3.1297e-06, -6.9015e-05, -1.2699e-04, -6.4814e-05, -3.5941e-04,\n -2.7355e-05, 2.1245e-06, 1.4619e-04, 2.1627e-05, 1.1990e-04,\n 1.0346e-04, 4.6594e-05, -1.4110e-04, -1.2765e-05, -5.9671e-05,\n 5.6052e-45, -2.1466e-04, 5.6052e-45, 6.2670e-05, 5.4257e-05,\n 4.1252e-06, 2.0069e-07, 1.2483e-05, -4.7608e-05, -1.3693e-05,\n 2.1601e-05, 5.6052e-45, 1.5222e-04, -1.4990e-04, -6.2353e-05,\n -1.1900e-05, 4.3843e-05, 5.6052e-45, 5.6052e-45, 8.6596e-06,\n -2.0068e-05, 6.5634e-10, 1.6175e-04, -9.2979e-05, -4.5357e-06,\n -7.3964e-05, 5.6052e-45, 2.4775e-05, -2.5325e-05, -1.1181e-04,\n -5.8519e-05, -5.6319e-06, 1.3475e-04, -4.5591e-05, -3.5382e-05,\n -6.6902e-05, 4.7747e-05, -1.7127e-04, 8.5852e-05, 6.7748e-05,\n 2.0767e-04, 2.6949e-06, -2.6934e-06, 4.6227e-05, 2.4897e-05,\n 2.0819e-04, -6.8990e-06, 9.0135e-06, -1.8648e-04, 8.0874e-05,\n 2.2404e-05, -5.8739e-05, -2.7193e-05, -1.2047e-05, 3.8849e-05,\n 1.1425e-04, 5.6052e-45, -9.5623e-06, 7.1133e-05, -2.5090e-05,\n 1.3318e-04, -2.5409e-04, -4.0115e-06, 1.8578e-06, 5.6052e-45,\n 6.8821e-06, 1.0957e-04, 9.0231e-05, 5.6052e-45, -1.6217e-04,\n 5.6052e-45, 3.1255e-05, 6.1926e-05, 2.9526e-05, -3.6155e-04,\n 5.8803e-05, 8.3979e-05, -2.3119e-08, -3.2408e-05, 5.4701e-05,\n 1.2841e-04, 4.6234e-05, 1.5196e-04, -4.5652e-05, -2.7551e-05,\n -8.2365e-05, -8.3169e-05, 4.9639e-05, -8.8392e-06, 1.3806e-04,\n 7.9803e-05, 5.6052e-45, -1.4632e-04, 1.3225e-05, 5.6052e-45,\n -8.0408e-06, -1.9889e-05, 5.7814e-06, -1.0916e-04, -4.1572e-05,\n -3.9116e-05, 7.4170e-05, 1.6995e-04, 9.8269e-05, 2.3556e-04,\n -8.8282e-44, 6.2668e-05, 1.8452e-05, -2.2965e-04, 1.0121e-04,\n -2.0938e-05, 9.3765e-05, -2.8954e-05, -9.6139e-05, 1.5137e-05,\n 5.8620e-05, -5.0483e-05, 2.6956e-05, 8.5260e-05, 5.6052e-45,\n 8.6723e-05, -9.7520e-06, -2.1795e-05, 5.6677e-05, 7.3113e-05,\n -6.0831e-05, -1.3910e-04, 1.7094e-05, 1.2432e-05, -3.1050e-06,\n 8.4306e-05, 1.9496e-04, -2.4183e-15, -9.6555e-05, -4.5010e-32,\n -5.3339e-05, -8.8532e-05, 2.4145e-05, 1.2026e-04, 1.7821e-04,\n 1.1394e-05, -1.7625e-05, -2.6625e-04, -3.3426e-06, 2.0769e-05,\n -4.1537e-05, 9.1242e-05, -8.4406e-05, 2.3981e-05, 5.6052e-45,\n 5.6052e-45, 1.4304e-04, 6.1875e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.2831e-07, 7.8835e-08, 1.1975e-07, 1.1083e-07, 2.7089e-07, 9.7589e-11,\n 2.7010e-07, 1.1604e-07, 8.4197e-08, 2.4342e-07, 1.1561e-07, 4.1218e-08,\n 1.3980e-07, 4.8864e-08, 1.3590e-07, 1.5475e-07, 1.8933e-07, 9.7735e-08,\n 1.1497e-07, 1.0996e-07, 4.2841e-08, 4.6955e-08, 1.0673e-07, 1.6627e-07,\n 2.1611e-07, 2.4904e-07, 1.0364e-07, 2.2161e-13, 1.6223e-07, 1.5843e-07,\n 5.4412e-08, 1.8164e-07, 1.2861e-07, 1.9029e-07, 1.1467e-07, 1.1823e-07,\n 5.0594e-08, 1.2410e-07, 1.3814e-07, 1.4044e-07, 2.1348e-07, 1.3688e-07,\n 2.2986e-07, 2.4630e-07, 1.7245e-07, 9.6979e-08, 1.4059e-07, 1.5480e-07,\n 3.3408e-07, 2.3735e-07, 1.9008e-10, 3.3312e-07, 1.2550e-07, 1.3018e-07,\n 1.3802e-07, 7.5437e-08, 1.7158e-07, 9.5840e-08, 6.6703e-08, 1.6074e-07,\n 7.3181e-08, 1.2519e-07, 3.1346e-07, 1.1711e-07, 5.4631e-08, 1.4526e-07,\n 2.4939e-07, 1.3793e-07, 6.6989e-08, 6.1171e-08, 1.5723e-07, 8.6475e-11,\n 9.4480e-08, 4.1507e-07, 2.1404e-07, 1.8806e-07, 7.2521e-08, 3.2092e-07,\n 7.8551e-08, 1.5137e-07, 9.5632e-08, 1.6666e-07, 2.2959e-07, 1.6572e-12,\n 9.5166e-08, 1.7150e-07, 1.6618e-07, 2.5501e-07, 1.0166e-07, 3.4299e-07,\n 1.7882e-07, 6.9997e-08, 1.6664e-07, 1.7929e-07, 9.3558e-08, 1.9799e-07,\n 1.4364e-07, 8.3178e-08, 1.3263e-07, 1.9286e-07, 1.6292e-07, 1.6182e-07,\n 3.0819e-07, 9.4559e-08, 1.6943e-07, 1.4908e-07, 1.2463e-07, 1.2261e-07,\n 1.2437e-07, 7.8194e-08, 1.8852e-07, 1.6853e-07, 1.3706e-07, 6.4679e-08,\n 1.9734e-07, 1.7178e-07, 1.9068e-07, 1.1823e-07, 7.6092e-08, 4.5368e-11,\n 1.4799e-07, 2.0619e-07, 1.2484e-07, 1.1369e-10, 1.4194e-07, 1.8260e-07,\n 1.4154e-07, 9.1256e-08, 2.1497e-07, 9.4471e-08, 1.2992e-07, 1.0975e-07,\n 4.9661e-12, 2.4187e-07, 1.9821e-07, 1.4403e-07, 2.0179e-14, 1.4593e-07,\n 1.0896e-07, 9.3644e-08, 5.0069e-11, 2.7533e-11, 1.0684e-07, 8.2629e-08,\n 1.0667e-13, 1.4189e-10, 2.1111e-07, 1.7993e-07, 3.8122e-10, 9.2490e-08,\n 1.2195e-07, 1.7369e-07, 1.2866e-07, 1.7901e-07, 4.6329e-08, 1.7994e-07,\n 1.4216e-07, 1.5305e-07, 1.5406e-10, 8.8328e-08, 2.0359e-07, 1.8017e-07,\n 1.9046e-07, 2.9282e-07, 1.1386e-07, 1.5698e-07, 2.7590e-08, 2.0057e-16,\n 1.4570e-07, 1.5285e-07, 2.1889e-07, 2.1374e-07, 1.5585e-07, 1.7286e-07,\n 1.7271e-07, 1.4085e-07, 1.1321e-07, 1.9387e-07, 1.7832e-07, 2.6723e-07,\n 2.0759e-07, 1.6794e-07, 1.5630e-07, 1.2358e-07, 8.4960e-15, 1.1719e-19,\n 2.5460e-07, 1.7721e-07, 8.2262e-08, 9.1652e-08, 9.4354e-08, 1.0355e-07,\n 1.9285e-07, 2.3172e-07, 1.9008e-07, 9.8755e-08, 2.4754e-07, 1.8396e-07,\n 1.5351e-07, 1.2836e-21, 4.3904e-08, 9.5908e-08, 1.7584e-07, 1.6028e-07,\n 2.5037e-07, 1.3867e-07, 1.9192e-07, 1.1454e-07, 5.2882e-08, 9.6215e-13,\n 2.1426e-07, 1.3499e-12, 9.3998e-08, 2.7865e-07, 1.7437e-07, 1.2379e-07,\n 1.5386e-07, 1.0282e-07, 1.7268e-07, 1.4373e-07, 5.2500e-08, 2.4251e-07,\n 2.2427e-07, 1.0942e-07, 6.4549e-08, 1.9005e-10, 1.7753e-07, 2.9705e-07,\n 2.7250e-07, 2.9689e-08, 1.4526e-07, 1.2964e-07, 1.6821e-07, 3.4815e-07,\n 1.2026e-10, 2.1915e-07, 1.8790e-07, 2.2820e-14, 9.8907e-08, 1.4693e-07,\n 2.9290e-08, 6.2556e-11, 2.5916e-07, 1.5958e-07, 1.1549e-07, 1.6191e-07,\n 9.2879e-08, 1.6551e-07, 1.6365e-07, 7.9796e-11, 2.0054e-07, 3.0862e-07,\n 1.0217e-07, 2.0116e-07, 1.5177e-07, 2.4505e-07, 2.4397e-14, 8.5326e-08,\n 1.1957e-07, 3.1166e-07, 1.4366e-19, 1.3975e-07, 1.6399e-07, 1.0319e-07,\n 1.8390e-07, 9.3911e-08, 9.0408e-08, 1.0758e-07, 1.8804e-07, 2.3285e-07,\n 1.7552e-07, 2.0642e-07, 1.1103e-07, 1.5153e-07, 2.8263e-07, 1.4024e-07,\n 2.3541e-07, 3.2482e-13, 8.4294e-08, 2.6203e-07, 3.5467e-10, 2.9966e-07,\n 7.3008e-08, 2.4355e-07, 1.8710e-07, 1.2610e-07, 2.8211e-07, 1.9532e-07,\n 1.1553e-07, 1.8804e-07, 1.3760e-07, 1.2104e-07, 6.1363e-08, 9.1622e-08,\n 2.1649e-07, 8.9807e-08, 1.2477e-07, 1.9518e-07, 1.2794e-07, 5.9230e-08,\n 1.6406e-07, 1.4811e-07, 5.7825e-08, 1.0195e-07, 1.5871e-07, 3.5050e-08,\n 4.2095e-08, 8.2458e-08, 1.8160e-07, 1.7110e-07, 1.8040e-07, 2.0593e-14,\n 1.7658e-07, 1.3343e-07, 2.0231e-07, 4.0905e-14, 5.4089e-08, 3.0430e-07,\n 7.3112e-08, 1.1244e-07, 1.6697e-07, 4.4856e-08, 8.9548e-08, 1.8273e-07,\n 1.2095e-07, 1.4840e-10, 1.3726e-07, 1.5653e-07, 1.4608e-07, 2.0512e-07,\n 1.6992e-07, 6.4597e-08, 1.4089e-07, 1.2128e-07, 7.1303e-08, 1.8041e-07,\n 1.5103e-07, 1.3927e-07, 1.1146e-19, 7.2460e-09, 2.6282e-07, 1.6153e-07,\n 1.7683e-07, 7.9917e-08, 2.4417e-07, 1.6417e-07, 2.2003e-07, 5.5896e-08,\n 1.8729e-07, 1.8227e-07, 2.1909e-07, 1.5788e-07, 9.2848e-08, 3.0344e-11,\n 1.4666e-07, 3.5292e-07, 6.7597e-08, 8.0603e-08, 1.5194e-07, 1.0514e-07,\n 1.7822e-07, 2.0553e-07, 1.2584e-10, 1.8304e-07, 1.3752e-07, 5.8689e-08,\n 9.7881e-08, 2.1554e-07, 1.1277e-07, 7.1661e-08, 8.8973e-08, 5.8726e-08,\n 8.8394e-08, 1.2571e-07, 1.2477e-07, 1.0719e-07, 2.1908e-11, 7.9464e-08,\n 5.9748e-11, 9.1225e-08, 3.7134e-07, 1.6494e-07, 9.4660e-08, 1.6226e-07,\n 1.2486e-10, 2.5789e-07, 9.2184e-08, 2.2079e-07, 2.6215e-07, 9.3403e-08,\n 1.3190e-07, 2.6655e-07, 2.2141e-07, 4.3144e-08, 2.1729e-07, 2.3754e-07,\n 5.2850e-08, 5.2293e-11, 4.2943e-10, 5.9558e-08, 1.3728e-07, 2.5096e-07,\n 2.2236e-07, 2.9772e-07, 1.6785e-07, 2.5167e-07, 2.2849e-08, 1.9994e-07,\n 1.7409e-07, 2.2715e-07, 2.2258e-07, 7.7301e-16, 8.5321e-08, 1.7055e-07,\n 9.9993e-08, 1.6192e-07, 4.2466e-08, 2.3894e-07, 1.2571e-07, 1.2247e-07,\n 1.5328e-07, 2.2692e-07, 2.5414e-07, 5.2282e-14, 3.4294e-21, 1.2066e-07,\n 3.5301e-08, 2.8841e-11, 7.9289e-14, 1.6582e-13, 1.8588e-07, 1.6123e-07,\n 1.4180e-07, 1.5382e-07, 2.2172e-07, 2.4935e-08, 2.4100e-07, 1.8557e-07,\n 1.9297e-07, 4.3236e-13, 1.0109e-07, 1.3145e-07, 1.0630e-07, 5.2429e-13,\n 1.2806e-07, 2.9403e-07, 1.0930e-07, 1.2879e-07, 9.5214e-08, 7.3850e-08,\n 3.1836e-07, 8.4224e-08, 1.5709e-07, 1.9552e-07, 2.1044e-07, 1.6637e-07,\n 9.4918e-08, 3.0636e-07, 1.6249e-11, 1.3069e-07, 9.4134e-08, 4.3312e-08,\n 3.9587e-15, 1.4510e-07, 1.7645e-07, 2.3014e-07, 8.1495e-08, 1.0928e-07,\n 1.7427e-07, 9.9783e-08, 9.4182e-08, 1.6025e-12, 5.7380e-08, 2.4504e-07,\n 7.6571e-08, 2.6336e-08, 1.4169e-07, 1.2426e-07, 5.2072e-08, 2.2054e-07,\n 1.6611e-07, 1.7326e-07, 1.7315e-07, 1.8136e-07, 1.8374e-07, 1.7230e-07,\n 2.3466e-07, 2.1170e-07, 1.5341e-07, 1.4902e-07, 1.4816e-07, 1.0382e-07,\n 2.3125e-07, 6.8401e-08, 2.1317e-07, 6.1400e-08, 1.7588e-07, 1.9143e-07,\n 1.9782e-07, 5.7935e-08, 8.5786e-10, 3.7417e-07, 1.4126e-07, 1.3105e-07,\n 1.5596e-07, 8.6862e-08, 2.0110e-07, 1.7027e-07, 1.4499e-07, 2.0062e-07,\n 1.1743e-07, 2.2115e-07, 3.3995e-07, 2.4315e-07, 1.1944e-07, 1.4273e-07,\n 1.2757e-07, 8.3071e-08, 1.1882e-10, 1.6534e-07, 1.3527e-07, 9.0788e-08,\n 1.4204e-07, 1.3316e-07, 1.4192e-07, 2.1360e-07, 3.4048e-15, 2.7393e-07,\n 1.0938e-07, 1.1590e-07, 1.8752e-07, 1.8352e-07, 1.5113e-07, 1.8544e-07,\n 1.5928e-07, 8.8225e-08, 1.5824e-07, 3.9677e-10, 7.2909e-08, 1.7175e-07,\n 2.9230e-07, 1.9404e-07, 2.5022e-07, 3.6183e-07, 1.7020e-07, 1.4298e-12,\n 1.8184e-07, 1.9170e-07, 2.4780e-07, 1.1442e-07, 2.2179e-07, 2.0976e-07,\n 1.5138e-07, 1.4580e-07, 1.4132e-07, 1.8396e-07, 1.4203e-07, 1.7264e-07,\n 1.6327e-07, 1.3491e-07, 2.7022e-14, 1.9990e-07, 1.9329e-07, 9.3029e-08,\n 2.0107e-07, 1.6634e-07, 1.2807e-07, 5.6572e-13, 1.6130e-07, 2.7593e-11,\n 1.8473e-07, 9.4365e-08, 1.7907e-07, 1.1016e-07, 7.6176e-08, 1.1599e-07,\n 1.9837e-07, 6.5648e-08, 1.3194e-07, 1.1039e-07, 1.8114e-07, 7.1754e-14,\n 7.9211e-08, 3.5429e-08, 9.5254e-18, 2.5400e-07, 1.8547e-07, 1.3168e-07,\n 1.0945e-07, 8.1519e-08, 1.6609e-07, 1.1073e-14, 1.1400e-07, 1.4864e-07,\n 1.0909e-07, 1.1116e-07, 7.3128e-08, 1.2005e-07, 1.1564e-07, 7.4782e-08,\n 2.4570e-07, 1.0353e-07, 1.8088e-07, 1.6858e-07, 1.9503e-07, 2.0353e-07,\n 2.8291e-07, 6.0443e-08, 1.7968e-07, 1.5253e-07, 1.6400e-07, 9.1780e-08,\n 1.8151e-07, 3.2965e-08, 1.5789e-07, 5.0069e-14, 2.4271e-07, 5.8095e-11,\n 1.8866e-07, 2.9964e-07, 2.8999e-07, 2.3843e-07, 2.1427e-07, 2.6388e-07,\n 9.8598e-08, 2.0798e-07, 4.9677e-15, 1.9585e-07, 1.9750e-07, 6.1221e-08,\n 3.7398e-08, 3.8916e-08, 1.3042e-12, 4.0648e-14, 1.7173e-07, 1.8718e-07,\n 1.5488e-16, 1.3757e-07, 2.0822e-07, 2.4024e-07, 2.7724e-07, 1.6496e-14,\n 1.6027e-07, 8.3112e-08, 8.7305e-08, 1.1929e-07, 2.0394e-07, 1.7410e-07,\n 2.1293e-07, 7.2864e-08, 1.7607e-07, 2.0699e-07, 2.5212e-07, 1.9218e-07,\n 1.1574e-07, 1.4342e-07, 2.6811e-07, 6.7538e-08, 1.3887e-07, 8.7046e-08,\n 3.1437e-07, 8.2063e-08, 2.2938e-07, 1.2597e-07, 1.0912e-07, 2.8958e-07,\n 2.5934e-07, 5.7909e-08, 1.2146e-07, 8.0593e-08, 1.4381e-07, 1.4497e-15,\n 7.3044e-08, 1.5426e-07, 1.3671e-07, 1.7924e-07, 3.0557e-07, 1.3344e-07,\n 8.1144e-11, 1.5608e-13, 2.7785e-07, 1.4119e-07, 1.6066e-07, 3.6924e-10,\n 1.7762e-07, 6.8190e-11, 2.1135e-07, 1.1818e-07, 1.3039e-07, 2.2939e-07,\n 1.0824e-07, 1.1487e-07, 5.6391e-10, 6.2336e-08, 1.1870e-07, 1.0706e-07,\n 2.2749e-07, 1.4476e-07, 3.6668e-08, 8.5882e-08, 1.7310e-07, 1.9516e-07,\n 1.5165e-07, 2.1458e-07, 2.1055e-07, 1.3111e-07, 3.5269e-13, 4.4671e-07,\n 2.8273e-07, 2.7797e-15, 2.5016e-07, 1.0818e-07, 5.8329e-08, 1.7170e-07,\n 1.8592e-07, 5.4891e-08, 1.7747e-07, 1.6642e-07, 1.0317e-07, 2.1918e-07,\n 2.4807e-10, 3.3138e-07, 1.5726e-07, 2.2528e-07, 1.3155e-07, 7.6904e-08,\n 1.8493e-07, 2.7469e-07, 4.6596e-07, 2.2559e-07, 8.2020e-08, 1.5538e-07,\n 1.5293e-07, 2.4268e-07, 3.3305e-10, 7.1665e-08, 1.7651e-07, 1.5316e-07,\n 1.1554e-07, 1.0802e-07, 1.5200e-07, 1.7261e-07, 5.2591e-08, 1.1501e-07,\n 1.1969e-07, 1.6211e-07, 1.4236e-07, 2.2191e-10, 1.4357e-07, 6.6674e-10,\n 1.4838e-07, 1.0033e-07, 1.2919e-07, 1.3396e-07, 1.9060e-07, 2.6000e-07,\n 4.6359e-08, 1.0982e-07, 1.8557e-07, 1.8185e-07, 6.8053e-08, 1.1152e-07,\n 1.9576e-07, 2.1048e-08, 3.5304e-13, 5.3363e-20, 1.9559e-07, 1.2772e-07],\n device='cuda:0')" }, "4": { - "step": "tensor(6260.)", - "exp_avg": "tensor([ 5.6052e-45, 5.0984e-05, 5.6052e-45, ..., -3.8832e-04,\n 1.5855e-04, -6.5029e-10], device='cuda:0')", - "exp_avg_sq": "tensor([8.6871e-10, 4.7850e-07, 1.6634e-10, ..., 1.5138e-06, 1.2812e-06,\n 1.4478e-10], device='cuda:0')" + "step": "tensor(12520.)", + "exp_avg": "tensor([[ 3.7913e-06, 2.3616e-06, 2.6045e-06, ..., 5.6052e-45,\n 2.3472e-06, 5.2559e-06],\n [ 4.5047e-07, -3.1322e-06, -3.4756e-07, ..., -5.6052e-45,\n 1.6621e-05, 2.3197e-06],\n [ 1.2454e-05, -1.6514e-06, 2.9350e-06, ..., -5.6052e-45,\n -8.7886e-06, 8.9616e-06],\n ...,\n [-5.3137e-06, 1.4171e-06, -4.4351e-06, ..., -5.6052e-45,\n -1.2921e-05, -4.1211e-06],\n [ 4.1929e-07, 9.0702e-07, 2.7188e-06, ..., -5.6052e-45,\n 2.0623e-06, 1.0666e-05],\n [ 9.3573e-06, -3.4758e-06, 1.0540e-05, ..., -5.6052e-45,\n -8.1107e-06, 5.2951e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.1060e-10, 1.2806e-10, 2.2165e-10, ..., 1.4957e-22, 7.5602e-10,\n 2.4200e-10],\n [1.3156e-09, 2.0569e-10, 4.8627e-10, ..., 7.6442e-21, 1.5856e-09,\n 1.2756e-09],\n [2.0091e-09, 2.0868e-10, 1.5649e-09, ..., 3.3338e-21, 1.4527e-09,\n 1.1337e-09],\n ...,\n [1.8264e-09, 3.1459e-10, 5.5927e-10, ..., 2.9167e-21, 7.8048e-10,\n 7.1168e-10],\n [2.1710e-09, 2.3202e-10, 9.5616e-10, ..., 4.3469e-23, 1.3147e-09,\n 1.4398e-09],\n [1.9604e-09, 2.8769e-10, 1.5251e-09, ..., 2.7858e-21, 1.0443e-09,\n 1.1364e-09]], device='cuda:0')" }, "5": { - "step": "tensor(6260.)", - "exp_avg": "tensor([[ 5.6052e-45, -1.0455e-06, -5.6052e-45, ..., 4.6188e-07,\n 3.0311e-06, -1.9946e-11],\n [ 5.6052e-45, 1.4908e-06, -5.6052e-45, ..., -2.0474e-06,\n -1.1939e-06, 3.8515e-11],\n [ 5.6052e-45, 9.1025e-07, 5.6052e-45, ..., 1.0417e-06,\n 3.8167e-06, 1.4708e-10],\n ...,\n [-5.6052e-45, -2.0839e-06, 5.6052e-45, ..., -5.8479e-06,\n 2.3616e-06, -2.1579e-10],\n [ 5.6052e-45, 1.5614e-06, 5.6052e-45, ..., -3.0806e-06,\n -1.6603e-06, 6.2062e-11],\n [-5.6052e-45, -6.4165e-07, 5.6052e-45, ..., 6.4397e-06,\n -2.3729e-06, -1.1015e-10]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.6938e-14, 2.0620e-11, 2.8350e-14, ..., 1.4765e-10, 1.0219e-10,\n 1.0152e-12],\n [8.3953e-14, 7.7299e-11, 7.5424e-14, ..., 1.1041e-10, 8.8559e-11,\n 1.9811e-12],\n [5.6070e-14, 4.3398e-11, 4.6499e-13, ..., 2.5987e-10, 1.1993e-10,\n 1.2245e-12],\n ...,\n [1.3614e-14, 2.9772e-11, 1.1203e-12, ..., 3.1999e-10, 1.2742e-10,\n 1.8369e-12],\n [2.8116e-13, 2.3667e-10, 1.0623e-13, ..., 3.8768e-10, 2.0683e-10,\n 2.1229e-12],\n [8.6972e-15, 4.2253e-11, 1.0766e-13, ..., 4.0483e-10, 1.4063e-10,\n 1.0507e-12]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 1.1599e-07, 2.2857e-07, -3.4151e-11, ..., -3.1298e-08,\n 2.0240e-07, 2.0271e-08],\n [-5.3855e-07, 3.7336e-06, 2.1577e-09, ..., -2.6119e-06,\n 1.2509e-06, 3.5843e-09],\n [-6.0029e-07, -2.2290e-07, -3.1713e-09, ..., 5.7409e-07,\n 8.5152e-07, 4.8019e-07],\n ...,\n [ 2.3804e-06, -2.5157e-07, -5.1364e-09, ..., 1.6934e-08,\n -1.8594e-09, 3.6059e-06],\n [-4.2838e-10, -9.1303e-06, 1.7475e-25, ..., 1.0324e-06,\n -2.1559e-06, 1.9967e-08],\n [-7.0799e-06, 8.2117e-07, 7.1301e-08, ..., 2.0003e-06,\n -1.6733e-07, 6.6642e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2898e-12, 3.3457e-12, 1.9259e-13, ..., 2.5661e-11, 3.3699e-12,\n 1.1815e-13],\n [9.6188e-11, 7.7773e-11, 1.1753e-13, ..., 1.4002e-10, 4.5675e-11,\n 4.5412e-12],\n [3.7172e-12, 7.0033e-12, 1.5784e-14, ..., 2.1418e-12, 5.2960e-11,\n 2.5451e-12],\n ...,\n [1.9349e-10, 5.0861e-11, 4.3159e-15, ..., 7.7778e-12, 3.3042e-11,\n 8.4707e-10],\n [2.1370e-12, 3.5325e-11, 6.6160e-17, ..., 1.2003e-10, 9.3455e-11,\n 1.2041e-12],\n [3.2000e-11, 8.0542e-12, 4.6934e-12, ..., 1.0697e-10, 2.7268e-11,\n 2.2689e-10]], device='cuda:0')" }, - "15": { - "step": "tensor(8764.)", + "6": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-1.5622e-06, 2.7535e-05, 1.4905e-05, ..., 1.4736e-05,\n -7.9414e-05, -1.2797e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.2895e-09, 5.8635e-09, 3.0205e-09, ..., 4.9408e-09, 4.8330e-09,\n 3.4006e-09], device='cuda:0')" + }, + "7": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-4.0175e-07, 2.6153e-07, -4.4164e-07, ..., 1.5109e-08,\n -3.6510e-07, -2.9767e-08],\n [ 3.6301e-07, -1.6075e-07, -1.7987e-07, ..., -2.7110e-07,\n 1.6054e-07, 4.4112e-08],\n [ 4.3648e-07, 1.2500e-07, 4.6372e-07, ..., 3.8124e-07,\n 4.4902e-07, 1.5755e-07],\n ...,\n [-1.0879e-07, 4.6580e-07, 2.0011e-07, ..., -6.7723e-08,\n -4.2241e-07, -1.6531e-07],\n [-1.9719e-08, -5.7121e-07, 9.1336e-07, ..., -1.8679e-07,\n 3.8208e-06, -3.0580e-07],\n [ 8.3522e-08, 1.4345e-06, 2.5744e-06, ..., -8.0709e-08,\n -3.2101e-06, 3.6498e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4865e-12, 2.8154e-12, 1.4179e-11, ..., 2.5227e-12, 2.0677e-12,\n 2.4300e-12],\n [2.4543e-12, 8.7770e-12, 1.0210e-11, ..., 3.8514e-12, 4.0809e-12,\n 3.1522e-12],\n [2.9186e-12, 6.6856e-12, 5.1441e-12, ..., 3.7565e-12, 2.6595e-12,\n 5.2788e-12],\n ...,\n [1.5313e-12, 5.9537e-12, 6.3315e-12, ..., 5.8116e-12, 2.9812e-12,\n 5.9506e-12],\n [2.2939e-12, 8.7969e-12, 2.5779e-11, ..., 5.3767e-12, 3.7568e-12,\n 5.0467e-12],\n [2.9056e-12, 4.8691e-12, 5.6937e-11, ..., 4.2881e-12, 5.2000e-12,\n 3.3737e-12]], device='cuda:0')" + }, + "14": { + "step": "tensor(11268.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.5001e-08], device='cuda:0')" + "exp_avg_sq": "tensor([3.3510e-10], device='cuda:0')" }, - "16": { - "step": "tensor(8764.)", + "15": { + "step": "tensor(11268.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.7827e-11, 1.5960e-10, 5.8611e-11], device='cuda:0')" + "exp_avg_sq": "tensor([4.3230e-13, 2.4472e-11, 1.8399e-11], device='cuda:0')" }, - "17": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([1.9904e-07, 1.4055e-08, 8.2488e-09, 1.5782e-08, 1.2417e-08],\n device='cuda:0')" + "16": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.6032e-08, 2.6823e-09, 2.9023e-09, 3.1003e-09], device='cuda:0')" + }, + "18": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7754e-14, 1.7326e-14, 0.0000e+00, ..., 1.1432e-13, 4.1116e-14,\n 1.3197e-15],\n [5.9214e-15, 1.5222e-14, 0.0000e+00, ..., 1.3994e-14, 4.9209e-14,\n 1.3868e-14],\n [1.7796e-15, 6.2805e-15, 0.0000e+00, ..., 2.0989e-15, 1.3607e-14,\n 4.4564e-15],\n ...,\n [2.2550e-16, 1.2128e-15, 0.0000e+00, ..., 1.7296e-15, 2.3952e-14,\n 1.3494e-16],\n [4.3435e-14, 4.5663e-14, 0.0000e+00, ..., 5.5269e-14, 2.3166e-13,\n 1.3902e-14],\n [1.3567e-16, 1.0544e-16, 0.0000e+00, ..., 2.0869e-16, 2.0700e-15,\n 4.9030e-16]], device='cuda:0')" }, "19": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.5531e-16, 7.4714e-16, 1.3315e-16, ..., 3.0119e-17, 3.5662e-16,\n 1.9227e-16],\n [8.3769e-14, 8.4160e-14, 6.4408e-17, ..., 1.5492e-14, 3.0981e-15,\n 6.6014e-15],\n [8.7360e-13, 1.0002e-12, 9.9412e-17, ..., 8.0651e-14, 1.0915e-13,\n 3.3130e-14],\n ...,\n [2.6245e-14, 1.1832e-14, 2.2663e-15, ..., 7.5450e-16, 7.0051e-15,\n 8.6179e-16],\n [3.3201e-15, 2.7099e-15, 6.9125e-17, ..., 6.1889e-17, 3.4370e-16,\n 3.9804e-17],\n [3.0003e-12, 3.5136e-12, 5.3567e-16, ..., 2.7095e-13, 3.8508e-13,\n 1.3151e-13]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.9263e-11, 1.7902e-11, 4.0904e-12, 7.4258e-12, 2.7016e-12, 2.6180e-14,\n 4.6482e-13, 7.3727e-13, 1.8842e-12, 1.0666e-13, 5.9756e-12, 3.0389e-11,\n 1.2522e-11, 1.5169e-11, 2.2901e-12, 1.4282e-11, 3.4040e-11, 2.8216e-12,\n 1.8873e-11, 3.0916e-11, 3.6904e-13, 1.2989e-11, 2.8081e-12, 2.7765e-11,\n 5.6171e-12, 1.1368e-11, 5.0664e-12, 3.1516e-12, 8.5820e-13, 5.3166e-12,\n 1.5905e-11, 7.9710e-12, 2.8963e-13, 7.3115e-11, 3.7909e-11, 1.8431e-11,\n 1.9111e-12, 2.1076e-13, 1.4486e-11, 1.4118e-11, 6.9264e-11, 1.7133e-12,\n 3.0573e-13, 9.6821e-13, 2.4442e-12, 1.7758e-11, 7.6355e-12, 4.9773e-12,\n 4.8433e-12, 5.6777e-12, 2.3226e-13, 8.1755e-12, 2.0765e-11, 1.2668e-11,\n 2.6864e-12, 1.4009e-12, 9.5468e-12, 1.2161e-13, 3.6722e-12, 1.3203e-12,\n 4.1125e-12, 9.0858e-12, 5.8595e-12, 3.3960e-12, 3.3284e-13, 2.1675e-12,\n 7.8816e-12, 4.6363e-13, 9.9344e-12, 9.3506e-12, 8.9432e-14, 3.0187e-12,\n 7.9856e-12, 3.6302e-13, 3.2038e-12, 2.7449e-14, 2.1242e-12, 5.4243e-11,\n 1.0785e-12, 9.9677e-14, 1.6628e-11, 2.2256e-11, 3.6474e-15, 1.2411e-13,\n 3.9648e-12, 1.3771e-11, 4.1435e-13, 7.0479e-12, 9.0351e-13, 1.5671e-11,\n 2.4885e-12, 2.2729e-11, 2.8972e-11, 2.0501e-12, 2.8702e-12, 1.1426e-11,\n 7.5324e-13, 4.4918e-11, 1.4983e-11, 5.3667e-11, 1.5317e-11, 1.0613e-12,\n 2.1178e-13, 6.4812e-12, 5.3865e-13, 5.3503e-11, 1.3430e-12, 3.8052e-12,\n 8.5742e-12, 6.6151e-14, 6.9027e-11, 1.2787e-12, 2.0530e-11, 8.7188e-12,\n 2.9576e-12, 4.0686e-11, 1.0137e-12, 4.9319e-12, 1.3617e-11, 6.5153e-12,\n 7.1071e-12, 1.3151e-11, 1.3059e-12, 2.1952e-13, 8.5604e-14, 4.7821e-12,\n 1.1405e-10, 8.8927e-13, 7.4857e-12, 1.4846e-12, 1.9978e-13, 3.6675e-11,\n 3.0824e-12, 5.1624e-12, 2.0893e-11, 1.4919e-11, 1.2609e-11, 3.8658e-11,\n 1.6669e-11, 3.6672e-15, 1.5205e-13, 2.4671e-13, 1.3425e-11, 5.5091e-11,\n 3.0856e-11, 2.0272e-12, 2.0734e-12, 2.6766e-12, 1.6512e-11, 3.7393e-14,\n 8.5571e-13, 1.7190e-11, 5.9344e-12, 6.6091e-12, 1.4785e-11, 5.0866e-13,\n 4.6443e-12, 1.2202e-11, 2.3706e-12, 3.9524e-11, 1.8740e-11, 3.0752e-12,\n 7.2945e-12, 1.9805e-11, 1.2520e-12, 8.9577e-12, 8.8182e-11, 1.3398e-11,\n 9.2578e-12, 4.2689e-12, 6.1745e-13, 1.4729e-11, 6.0208e-11, 1.5144e-11,\n 1.9901e-11, 1.4497e-13, 2.0066e-12, 6.9286e-12, 7.8958e-12, 8.4651e-12,\n 1.0389e-11, 4.8507e-12, 9.7399e-13, 5.8485e-12, 1.1132e-11, 3.0891e-14,\n 2.9158e-12, 1.1975e-11, 3.0375e-12, 4.8272e-11, 3.6084e-11, 3.7422e-12,\n 2.4773e-12, 1.0730e-12, 7.1612e-15, 1.7292e-10, 2.0264e-12, 3.4890e-12,\n 3.8521e-12, 8.4457e-11, 8.4143e-16, 7.8870e-11, 1.4836e-11, 2.2572e-12,\n 1.4411e-11, 1.7707e-10, 1.6280e-11, 5.0807e-12, 7.3738e-14, 2.2854e-11,\n 1.8452e-12, 3.3353e-11, 5.1931e-12, 1.0581e-11, 8.0999e-14, 1.3474e-11,\n 1.0361e-10, 5.8654e-14, 1.1420e-12, 3.3494e-12, 2.4659e-12, 6.8929e-11,\n 8.6808e-12, 4.9563e-11, 6.8710e-12, 8.3435e-15, 1.7702e-13, 1.0141e-12,\n 3.7587e-13, 1.4471e-11, 1.2372e-13, 1.3224e-13, 2.5119e-11, 9.8235e-12,\n 7.6614e-12, 1.7070e-12, 2.0433e-12, 1.5466e-11, 1.9364e-12, 1.7015e-12,\n 1.9739e-11, 8.0333e-14, 4.3457e-12, 2.6647e-11, 4.6451e-12, 1.3307e-13,\n 2.4868e-13, 1.1182e-11, 1.2625e-12, 4.0219e-11, 2.0968e-11, 2.1068e-12,\n 1.8112e-11, 2.1385e-12, 6.0488e-11, 1.0696e-12], device='cuda:0')" }, "20": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1993e-13, 7.8042e-11, 5.4291e-10, 1.3689e-11, 9.3312e-11, 3.6256e-10,\n 4.1016e-12, 5.5735e-10, 4.3543e-11, 1.0119e-12, 2.5140e-09, 8.0110e-12,\n 2.5368e-12, 5.7583e-11, 5.9518e-11, 4.7192e-10, 1.5927e-10, 1.5854e-11,\n 7.8933e-10, 2.3707e-10, 2.3740e-10, 2.2826e-11, 8.1327e-11, 1.6566e-10,\n 9.8799e-11, 1.6054e-11, 7.3321e-12, 1.0956e-09, 7.3622e-13, 2.3163e-11,\n 4.4634e-11, 2.1903e-10, 3.6287e-10, 1.3728e-11, 1.9251e-11, 5.2008e-11,\n 1.0201e-12, 6.5832e-11, 3.9647e-10, 2.0890e-12, 8.4284e-12, 2.6809e-11,\n 7.1898e-10, 1.2297e-10, 8.5002e-12, 6.6589e-13, 2.9509e-12, 2.3593e-11,\n 5.7936e-12, 1.2868e-13, 4.4890e-13, 5.2104e-12, 2.7854e-11, 1.5026e-10,\n 1.9683e-12, 2.2300e-10, 3.9255e-10, 1.7645e-09, 2.0088e-10, 3.7724e-13,\n 2.8785e-09, 9.7474e-14, 9.6493e-12, 1.4906e-10, 4.3695e-10, 6.4135e-10,\n 1.0443e-11, 1.1028e-11, 2.3170e-09, 2.1662e-11, 3.3466e-10, 9.1967e-11,\n 1.7187e-10, 2.9924e-11, 4.5219e-13, 4.3080e-11, 3.1179e-10, 5.0752e-12,\n 4.3665e-15, 1.7548e-13, 4.6004e-13, 6.8517e-12, 7.2313e-10, 3.0455e-10,\n 4.7121e-11, 1.1366e-11, 2.4642e-10, 4.2242e-11, 6.5283e-11, 1.6307e-09,\n 9.3666e-12, 6.1040e-11, 1.1250e-11, 4.6669e-10, 7.3266e-11, 1.3251e-11,\n 6.1616e-14, 1.8314e-11, 5.9164e-10, 1.1752e-10, 3.2017e-13, 4.3448e-10,\n 3.5014e-12, 2.1819e-13, 1.8493e-10, 3.2666e-11, 6.0012e-13, 7.8919e-12,\n 2.4542e-11, 3.2153e-11, 3.0758e-13, 6.6013e-12, 2.6035e-12, 7.4218e-13,\n 3.7562e-11, 1.5889e-11, 4.6463e-10, 1.2856e-11, 2.8099e-10, 1.2304e-10,\n 5.9203e-12, 1.0562e-10, 1.5968e-10, 1.2383e-11, 2.0975e-10, 2.3904e-11,\n 9.2949e-10, 1.5843e-11, 3.6681e-11, 2.5330e-12, 1.0826e-12, 1.6974e-11,\n 5.9689e-12, 7.8825e-11, 1.5607e-11, 2.5941e-10, 9.3860e-13, 6.0176e-12,\n 1.6391e-10, 2.3186e-11, 4.9209e-13, 1.3455e-12, 2.4220e-09, 6.1719e-09,\n 1.5019e-10, 6.5763e-13, 1.9748e-10, 1.6797e-10, 1.3035e-10, 3.4693e-12,\n 6.5308e-13, 1.1726e-10, 4.2792e-10, 1.2478e-10, 2.1098e-10, 6.3273e-11,\n 1.2145e-12, 6.0776e-12, 9.4243e-12, 6.4635e-10, 3.3048e-12, 9.6857e-11,\n 1.2147e-10, 3.0845e-12, 3.1292e-12, 4.4131e-09, 4.0578e-10, 9.9501e-13,\n 1.2544e-09, 9.1192e-14, 1.1757e-10, 6.3366e-12, 1.5588e-10, 3.3647e-11,\n 6.2527e-11, 8.6558e-10, 1.4609e-10, 6.8550e-11, 1.4921e-11, 3.9012e-10,\n 6.0051e-11, 1.4413e-10, 3.6449e-09, 4.6543e-11, 2.3450e-12, 3.6594e-14,\n 6.0651e-12, 5.6026e-10, 2.8969e-11, 1.2245e-10, 6.1443e-10, 2.3419e-10,\n 4.9078e-12, 2.7892e-12, 4.5074e-10, 7.8403e-11, 8.0988e-13, 1.7552e-10,\n 1.2943e-12, 1.5009e-10, 3.3170e-10, 1.8609e-11, 8.1966e-11, 2.3163e-12,\n 3.8530e-11, 1.1874e-09, 1.8415e-11, 1.3468e-10, 4.1609e-13, 1.0197e-09,\n 1.9328e-10, 1.1887e-12, 9.3050e-12, 7.6532e-13, 7.1683e-10, 7.2042e-12,\n 5.1082e-11, 3.0326e-10, 1.5739e-11, 1.3423e-10, 1.0798e-10, 8.3552e-11,\n 4.8426e-12, 2.7325e-10, 2.0664e-11, 9.5733e-11, 1.1143e-09, 1.0008e-11,\n 4.3131e-13, 3.5069e-12, 4.2312e-11, 6.1618e-11, 2.4125e-12, 8.0132e-11,\n 3.3469e-11, 3.8450e-10, 4.6410e-11, 2.6779e-10, 2.8359e-11, 4.7193e-12,\n 1.7013e-11, 2.8592e-12, 7.7423e-10, 5.1262e-11, 7.1542e-12, 1.5243e-10,\n 5.5489e-12, 1.6012e-11, 5.0363e-10, 1.3755e-11, 1.7402e-10, 9.9315e-13,\n 1.1153e-10, 7.5897e-12, 9.6023e-13, 1.8449e-09], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.6867e-14, 8.8582e-14, 5.9571e-15, 3.8874e-14, 4.5606e-15, 3.7613e-16,\n 8.2706e-16, 3.8406e-15, 2.2806e-15, 1.3397e-16, 1.1007e-14, 5.5119e-14,\n 3.1735e-14, 7.1388e-14, 7.3486e-15, 3.3640e-14, 1.1033e-13, 4.8938e-15,\n 6.1049e-14, 8.7960e-14, 1.8228e-16, 5.4636e-14, 6.1776e-15, 1.1331e-13,\n 2.2025e-14, 2.2063e-14, 1.2813e-14, 1.0472e-14, 3.1144e-15, 9.4101e-15,\n 4.8574e-14, 2.3649e-14, 1.8446e-15, 1.9684e-13, 1.4688e-13, 4.5965e-14,\n 3.7942e-15, 3.8399e-18, 3.0101e-14, 2.5521e-14, 2.9298e-13, 4.0870e-15,\n 1.3007e-15, 4.2477e-15, 4.8831e-15, 3.9947e-14, 1.6632e-14, 1.0500e-14,\n 7.4072e-15, 8.7724e-15, 1.8198e-16, 2.2338e-14, 4.3112e-14, 2.7616e-14,\n 3.6799e-15, 6.6063e-15, 3.0886e-14, 2.5444e-16, 1.2120e-14, 3.7396e-15,\n 7.4253e-15, 6.7780e-14, 1.6435e-14, 8.1567e-15, 5.0368e-18, 3.4627e-15,\n 1.7209e-14, 2.5172e-16, 1.8343e-14, 3.0274e-14, 2.1045e-15, 6.9360e-15,\n 8.8664e-15, 1.2188e-15, 5.8595e-15, 1.7740e-15, 1.2386e-14, 2.7541e-13,\n 9.4394e-16, 2.8168e-19, 6.9647e-14, 5.7702e-14, 5.3028e-16, 6.1310e-16,\n 7.7989e-15, 3.0768e-14, 1.6512e-15, 3.2994e-14, 2.6480e-15, 2.9221e-14,\n 7.5376e-15, 5.1937e-14, 1.0532e-13, 2.5993e-15, 7.9409e-15, 4.0271e-14,\n 1.6310e-15, 9.5674e-14, 5.8310e-14, 1.5090e-13, 4.5905e-14, 4.9408e-15,\n 2.5898e-15, 1.1487e-14, 2.2469e-16, 1.2573e-13, 1.4039e-15, 8.9709e-15,\n 1.7853e-14, 9.3489e-17, 1.7618e-13, 3.9861e-15, 5.8181e-14, 1.8307e-14,\n 5.3969e-15, 1.6422e-13, 3.9937e-15, 1.0516e-14, 4.1058e-14, 1.5805e-14,\n 1.3077e-14, 2.7389e-14, 1.0975e-15, 2.1182e-17, 8.7359e-16, 7.3591e-15,\n 5.1570e-13, 3.3724e-15, 2.3909e-14, 2.2643e-15, 7.3207e-17, 1.0938e-13,\n 7.5428e-15, 1.5359e-14, 4.0106e-14, 3.3692e-14, 3.5416e-14, 9.9776e-14,\n 4.5602e-14, 6.1868e-16, 3.8992e-15, 1.4137e-15, 2.9497e-14, 1.6193e-13,\n 1.9389e-13, 8.0249e-15, 8.3292e-15, 6.2655e-15, 3.4205e-14, 9.4836e-18,\n 8.5657e-15, 3.7694e-14, 1.9160e-14, 7.5240e-15, 3.4686e-14, 3.0816e-15,\n 6.8540e-15, 2.0789e-14, 3.8997e-15, 1.5632e-13, 3.6544e-14, 3.4802e-15,\n 1.6994e-14, 6.6021e-14, 3.6383e-15, 1.1641e-14, 3.0312e-13, 2.5411e-14,\n 2.8329e-14, 1.0879e-14, 3.1642e-16, 5.8747e-14, 1.4503e-13, 5.6519e-14,\n 3.7462e-14, 5.5766e-16, 3.1358e-15, 3.0992e-14, 2.4186e-14, 2.2108e-14,\n 2.6828e-14, 9.3872e-15, 1.1828e-15, 8.7811e-15, 2.7868e-14, 1.6659e-20,\n 4.4918e-15, 2.6640e-14, 2.6751e-15, 1.2087e-13, 9.4181e-14, 4.7135e-15,\n 7.3672e-15, 5.8709e-15, 6.7503e-16, 6.7030e-13, 2.5798e-15, 5.5868e-15,\n 1.1658e-14, 2.3212e-13, 1.9997e-16, 2.1191e-13, 2.6771e-14, 3.0833e-15,\n 4.7935e-14, 4.5128e-13, 3.5263e-14, 1.2821e-14, 2.4171e-17, 3.5713e-14,\n 2.5275e-15, 8.3138e-14, 3.5262e-14, 3.2291e-14, 4.8407e-17, 2.3399e-14,\n 4.9294e-13, 2.0110e-17, 4.2875e-15, 4.0122e-15, 3.1496e-15, 3.0320e-13,\n 4.8304e-14, 1.2061e-13, 2.0313e-14, 1.7752e-16, 8.6044e-18, 3.0069e-15,\n 3.7228e-16, 4.0689e-14, 1.0000e-15, 9.3130e-17, 1.4817e-13, 4.3378e-14,\n 2.3187e-14, 3.9763e-15, 2.6135e-15, 4.4984e-14, 5.3146e-15, 1.6989e-15,\n 4.8875e-14, 1.8714e-17, 1.3039e-14, 6.8632e-14, 7.0438e-15, 2.2812e-17,\n 5.8573e-17, 2.1390e-14, 1.9913e-15, 1.0854e-13, 4.2606e-14, 2.9612e-15,\n 4.0569e-14, 4.2322e-15, 1.9371e-13, 5.6323e-15], device='cuda:0')" }, "21": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.0178e-15, 8.9369e-15, 1.2429e-12, 7.7579e-15, 3.8173e-14, 4.3680e-14,\n 2.0705e-14, 1.4487e-13, 9.3181e-16, 4.9983e-15, 1.5520e-12, 3.0714e-15,\n 4.5256e-16, 2.1294e-14, 1.1661e-13, 3.9978e-13, 1.7198e-13, 1.2328e-15,\n 2.9308e-12, 1.4703e-12, 1.3249e-12, 1.7419e-15, 2.8055e-15, 1.0151e-13,\n 8.3815e-13, 1.1969e-14, 1.9103e-14, 1.0568e-12, 3.4702e-16, 2.6460e-14,\n 8.9885e-15, 1.7224e-13, 1.2616e-12, 3.3812e-15, 1.5910e-14, 3.9088e-13,\n 5.6021e-15, 3.1617e-14, 7.7586e-13, 4.7535e-17, 3.7867e-15, 1.6426e-15,\n 7.9892e-13, 4.4127e-14, 6.8087e-15, 5.5667e-16, 3.9877e-15, 3.2681e-15,\n 1.9064e-15, 5.8425e-15, 5.9088e-16, 6.4834e-15, 4.4155e-14, 3.4232e-14,\n 2.9376e-17, 3.9913e-14, 2.2243e-14, 1.3450e-12, 9.8734e-14, 1.0873e-15,\n 6.2782e-12, 4.0055e-17, 2.8034e-16, 1.8418e-14, 8.5380e-13, 5.2084e-13,\n 6.9864e-15, 1.1270e-14, 8.1773e-12, 3.0598e-15, 6.0947e-13, 1.9274e-15,\n 2.3779e-14, 1.6920e-13, 1.8588e-15, 1.4201e-14, 3.1758e-13, 2.5062e-14,\n 5.3606e-15, 4.7602e-17, 2.4777e-15, 1.6100e-14, 1.7954e-13, 8.9332e-13,\n 4.4266e-14, 6.1732e-14, 3.3289e-14, 2.5720e-15, 1.6080e-14, 4.4464e-12,\n 2.1690e-14, 1.9174e-14, 5.5202e-15, 5.6426e-13, 1.0226e-14, 3.7022e-14,\n 1.1247e-16, 5.1014e-14, 1.0288e-12, 2.0802e-13, 7.4023e-16, 1.1554e-12,\n 5.4363e-16, 1.5944e-15, 5.2175e-14, 5.7095e-14, 2.2521e-15, 7.8795e-15,\n 5.4757e-15, 4.6538e-15, 7.6421e-17, 8.5042e-15, 1.4205e-14, 2.0159e-15,\n 9.1908e-15, 1.0417e-15, 1.2284e-12, 3.0224e-14, 3.8674e-13, 1.7415e-14,\n 1.5542e-14, 1.0410e-13, 1.3142e-13, 8.7197e-15, 4.2823e-13, 1.9156e-16,\n 9.6404e-13, 2.3466e-16, 1.1063e-14, 8.8750e-16, 8.3089e-16, 9.2924e-16,\n 1.2699e-15, 3.1328e-14, 3.2617e-15, 6.6934e-13, 1.5291e-15, 7.9330e-16,\n 9.9519e-14, 3.4940e-15, 1.4785e-15, 6.1303e-15, 1.0704e-11, 1.2150e-11,\n 2.3288e-14, 2.7729e-17, 2.1330e-13, 1.8302e-13, 3.0762e-13, 1.5766e-16,\n 1.2138e-15, 2.3569e-13, 4.5166e-13, 1.2830e-14, 3.9659e-14, 1.1669e-13,\n 9.1738e-18, 1.9789e-14, 4.3969e-15, 4.4531e-14, 5.9502e-16, 1.5875e-13,\n 6.0380e-15, 1.8267e-14, 5.7948e-15, 1.0323e-11, 2.3791e-13, 6.2095e-16,\n 1.6442e-12, 1.1797e-17, 1.2934e-14, 9.8434e-15, 1.0511e-12, 4.4848e-14,\n 2.7584e-14, 5.0742e-13, 4.6946e-14, 3.3520e-15, 2.1569e-14, 4.4199e-13,\n 1.1521e-15, 4.1826e-14, 9.5431e-12, 1.5816e-14, 8.5754e-16, 7.2295e-16,\n 2.7494e-16, 4.4939e-13, 6.5772e-14, 1.2209e-13, 3.8333e-13, 3.0985e-14,\n 1.9097e-15, 2.3607e-16, 2.6435e-13, 1.5665e-15, 1.2847e-16, 1.0332e-14,\n 1.8781e-15, 4.8620e-14, 8.8260e-14, 1.6152e-15, 8.5733e-15, 2.0126e-15,\n 2.5897e-15, 2.8121e-12, 1.9434e-14, 1.1778e-14, 6.5453e-15, 1.6461e-12,\n 6.1084e-14, 8.3693e-16, 7.0096e-16, 1.0873e-14, 7.0094e-14, 2.0724e-16,\n 1.8027e-15, 5.0665e-13, 7.3660e-14, 2.1447e-14, 1.9831e-13, 3.9376e-13,\n 6.2489e-16, 2.2070e-13, 1.1134e-14, 3.2003e-14, 2.2468e-13, 3.9735e-15,\n 1.2764e-16, 3.4319e-16, 5.0606e-15, 1.0917e-13, 3.5808e-16, 1.5794e-13,\n 3.8421e-15, 4.4955e-13, 4.6897e-13, 1.7738e-13, 2.8389e-14, 1.0114e-16,\n 2.6552e-15, 5.2505e-16, 2.8633e-13, 1.1562e-14, 2.7011e-15, 7.3885e-14,\n 6.4501e-16, 5.2525e-16, 2.6571e-12, 1.2501e-14, 1.8451e-14, 1.6249e-15,\n 8.0254e-14, 1.3035e-14, 1.7264e-15, 5.1606e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6614e-13, 8.4806e-14, 1.1676e-14, 3.8164e-14, 6.0014e-15, 2.5494e-16,\n 1.1563e-15, 6.2805e-15, 3.8334e-15, 7.5124e-17, 1.8245e-14, 1.0298e-13,\n 3.6319e-14, 7.0773e-14, 1.2368e-14, 6.5240e-14, 1.4754e-13, 7.8009e-15,\n 5.4221e-14, 1.2802e-13, 2.0608e-16, 6.1174e-14, 1.4159e-14, 1.1953e-13,\n 2.9426e-14, 3.4074e-14, 2.3595e-14, 1.6556e-14, 6.4134e-15, 1.3718e-14,\n 7.1327e-14, 3.7111e-14, 2.8844e-15, 2.5164e-13, 1.6213e-13, 7.9180e-14,\n 2.7419e-15, 7.9699e-19, 6.3934e-14, 4.8558e-14, 2.2532e-13, 9.5070e-15,\n 3.1372e-15, 7.5036e-15, 8.3721e-15, 5.7086e-14, 3.5260e-14, 2.4576e-14,\n 1.2170e-14, 1.8318e-14, 2.9740e-16, 3.6146e-14, 6.7688e-14, 3.9705e-14,\n 6.6988e-15, 9.0625e-15, 4.4125e-14, 2.2942e-16, 2.0462e-14, 7.8585e-15,\n 9.8411e-15, 4.7735e-14, 2.6806e-14, 1.5535e-14, 8.2755e-17, 5.4171e-15,\n 3.7262e-14, 6.8019e-16, 4.4168e-14, 4.5315e-14, 2.3135e-15, 1.5323e-14,\n 2.7757e-14, 2.6303e-15, 7.6182e-15, 1.6258e-15, 1.3191e-14, 2.3970e-13,\n 1.4225e-15, 2.2605e-19, 8.0323e-14, 1.0022e-13, 5.8418e-16, 1.3264e-15,\n 2.0211e-14, 6.4128e-14, 2.9160e-15, 3.7315e-14, 2.9300e-15, 6.8963e-14,\n 1.3027e-14, 9.7827e-14, 9.3301e-14, 5.4370e-15, 1.2916e-14, 3.0860e-14,\n 2.1391e-15, 1.5338e-13, 7.0255e-14, 2.2477e-13, 6.7519e-14, 7.3750e-15,\n 2.9535e-15, 1.7667e-14, 3.3190e-16, 1.7659e-13, 2.3918e-15, 1.9913e-14,\n 2.4128e-14, 6.2038e-17, 2.3180e-13, 7.7812e-15, 9.2346e-14, 3.2123e-14,\n 7.1040e-15, 1.2852e-13, 5.4902e-15, 1.3299e-14, 6.2287e-14, 3.1498e-14,\n 2.0139e-14, 3.8792e-14, 2.0192e-15, 5.8413e-17, 1.6268e-15, 1.3718e-14,\n 4.5847e-13, 4.9907e-15, 3.7795e-14, 4.1805e-15, 8.4516e-17, 1.5883e-13,\n 1.5734e-14, 2.4457e-14, 6.5860e-14, 4.8689e-14, 5.5854e-14, 1.3037e-13,\n 7.4210e-14, 7.5454e-16, 2.9484e-15, 2.3123e-15, 4.2967e-14, 1.8847e-13,\n 1.4339e-13, 1.2015e-14, 1.1893e-14, 1.3989e-14, 5.4140e-14, 2.1173e-17,\n 7.8547e-15, 5.1718e-14, 3.0082e-14, 1.8958e-14, 4.1052e-14, 4.6853e-15,\n 1.1625e-14, 4.4238e-14, 6.2869e-15, 1.6752e-13, 6.1538e-14, 8.5564e-15,\n 1.7159e-14, 8.9069e-14, 6.7279e-15, 2.5584e-14, 3.1487e-13, 4.2708e-14,\n 4.2439e-14, 2.0854e-14, 8.6969e-16, 3.8355e-14, 2.0652e-13, 7.1779e-14,\n 5.9846e-14, 1.1393e-15, 4.6647e-15, 3.7040e-14, 1.8065e-14, 4.0471e-14,\n 2.7438e-14, 1.2120e-14, 1.9724e-15, 1.5978e-14, 4.6672e-14, 4.7891e-18,\n 8.9490e-15, 3.4172e-14, 8.1649e-15, 1.9816e-13, 1.2043e-13, 9.8808e-15,\n 1.3480e-14, 7.0822e-15, 8.8664e-16, 7.0952e-13, 4.0994e-15, 7.5332e-15,\n 1.8704e-14, 2.9761e-13, 5.0013e-16, 3.2837e-13, 4.7737e-14, 5.0112e-15,\n 6.7591e-14, 6.1222e-13, 4.8790e-14, 1.1942e-14, 6.1053e-17, 7.9454e-14,\n 3.0576e-15, 1.4384e-13, 2.9601e-14, 5.1117e-14, 5.7872e-17, 4.4943e-14,\n 3.4793e-13, 1.3557e-17, 7.2163e-15, 7.4746e-15, 4.5802e-15, 2.8905e-13,\n 4.5029e-14, 2.1087e-13, 3.5307e-14, 3.2684e-16, 5.8982e-18, 6.8323e-15,\n 9.4974e-16, 6.2199e-14, 2.0006e-15, 2.0115e-16, 1.1234e-13, 4.6439e-14,\n 3.5346e-14, 8.5473e-15, 5.2912e-15, 6.9384e-14, 1.1026e-14, 4.1249e-15,\n 8.9082e-14, 1.7685e-17, 2.1152e-14, 8.3378e-14, 1.3221e-14, 4.4099e-17,\n 5.4105e-17, 3.5619e-14, 1.9885e-15, 1.7317e-13, 6.6904e-14, 4.9079e-15,\n 5.4377e-14, 7.1806e-15, 2.0058e-13, 7.8484e-15], device='cuda:0')" }, "22": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.0793e-16, 1.0945e-13, 8.6739e-13, 1.2175e-14, 1.3361e-13, 5.0171e-13,\n 5.9456e-15, 7.4453e-13, 6.4339e-14, 8.6220e-15, 3.0403e-12, 4.2549e-14,\n 1.0934e-14, 1.0009e-13, 1.2302e-13, 4.8617e-13, 2.5052e-13, 2.8407e-14,\n 8.3725e-13, 4.4801e-13, 3.9436e-13, 3.8306e-14, 8.4289e-14, 3.2659e-13,\n 3.1427e-13, 2.2742e-14, 6.5085e-15, 1.2420e-12, 1.3864e-16, 4.4434e-14,\n 1.9260e-14, 3.0216e-13, 5.8063e-13, 4.1986e-14, 3.3833e-14, 2.2632e-13,\n 2.9407e-15, 4.3429e-14, 6.1968e-13, 3.9705e-16, 3.3267e-14, 4.1939e-14,\n 1.0914e-12, 1.8445e-13, 4.7093e-14, 4.3847e-15, 1.1398e-15, 4.7641e-15,\n 1.1228e-14, 4.7003e-16, 2.7100e-16, 9.0374e-15, 5.6394e-14, 1.3651e-13,\n 8.4639e-17, 2.4207e-13, 5.3233e-13, 2.3837e-12, 2.0653e-13, 4.9799e-15,\n 3.8594e-12, 1.3696e-15, 5.3130e-15, 1.9044e-13, 6.3871e-13, 9.0115e-13,\n 2.1200e-14, 3.2292e-14, 3.1561e-12, 3.8184e-14, 4.7945e-13, 9.5208e-14,\n 2.1875e-13, 1.5512e-13, 1.0819e-15, 3.0035e-14, 4.5679e-13, 1.8012e-14,\n 1.1691e-16, 1.1513e-15, 1.6685e-14, 5.6910e-15, 9.8360e-13, 5.5363e-13,\n 8.3787e-14, 8.0855e-14, 2.6791e-13, 6.5380e-14, 1.0621e-13, 1.9333e-12,\n 6.2764e-14, 9.7896e-14, 1.2642e-15, 6.5938e-13, 1.0736e-13, 1.3940e-14,\n 5.0579e-15, 1.8356e-14, 8.5667e-13, 2.2852e-13, 4.2371e-15, 6.4020e-13,\n 7.4603e-15, 2.7138e-15, 1.9618e-13, 1.3398e-13, 1.3892e-15, 5.1669e-14,\n 3.8607e-14, 5.2483e-14, 1.7708e-15, 2.3383e-15, 2.5628e-14, 7.2272e-16,\n 5.1947e-14, 2.7562e-14, 7.0725e-13, 9.0456e-15, 4.0704e-13, 1.7852e-13,\n 4.9482e-15, 1.3647e-13, 2.4665e-13, 2.8277e-14, 3.8743e-13, 1.6676e-15,\n 1.0495e-12, 2.8051e-15, 6.0632e-14, 4.0628e-16, 2.4191e-15, 2.2911e-15,\n 1.9686e-15, 9.9643e-14, 2.3584e-14, 4.3529e-13, 7.5459e-15, 3.5295e-15,\n 2.4382e-13, 3.4407e-14, 1.3033e-15, 1.5490e-16, 3.0149e-12, 7.7764e-12,\n 1.4344e-13, 1.0369e-15, 3.2450e-13, 2.5957e-13, 2.1455e-13, 6.9981e-15,\n 6.4661e-15, 2.7550e-13, 4.4754e-13, 1.8203e-13, 2.9965e-13, 9.8337e-14,\n 4.0543e-18, 4.6656e-15, 1.6586e-14, 7.8219e-13, 2.2559e-16, 1.5679e-13,\n 1.7379e-13, 5.2151e-15, 2.6513e-15, 5.8403e-12, 5.8065e-13, 2.4705e-15,\n 1.4965e-12, 3.5406e-16, 1.1999e-13, 3.4896e-15, 3.5015e-13, 6.7521e-14,\n 1.0598e-13, 1.0213e-12, 1.2146e-13, 6.9751e-14, 5.6347e-15, 5.6201e-13,\n 6.7834e-14, 1.5148e-13, 4.8679e-12, 3.6676e-14, 6.4318e-15, 2.1666e-16,\n 1.1842e-14, 6.0417e-13, 1.2458e-13, 1.9560e-13, 8.4897e-13, 2.6533e-13,\n 8.5589e-15, 5.6568e-15, 4.7506e-13, 1.1022e-13, 1.1048e-16, 2.4764e-13,\n 8.2817e-15, 1.3021e-13, 3.4607e-13, 3.2593e-14, 7.9568e-14, 2.1403e-14,\n 3.7735e-14, 1.6543e-12, 1.6008e-14, 1.9434e-13, 7.5694e-16, 1.3929e-12,\n 2.0173e-13, 4.0381e-18, 7.6203e-16, 1.9958e-15, 9.6633e-13, 3.1329e-15,\n 7.6846e-14, 4.4655e-13, 2.3358e-14, 1.9849e-13, 2.1791e-13, 1.8011e-13,\n 1.0456e-14, 3.1575e-13, 6.1296e-14, 1.1047e-13, 1.4887e-12, 3.3346e-14,\n 1.2811e-15, 1.5146e-15, 5.5081e-14, 1.0899e-13, 4.0316e-16, 1.9015e-13,\n 1.2845e-14, 5.9236e-13, 2.1498e-13, 3.8806e-13, 1.1106e-13, 3.2749e-17,\n 2.5784e-14, 6.5634e-15, 9.0019e-13, 9.6964e-14, 2.8915e-14, 2.2937e-13,\n 9.5292e-15, 2.7026e-14, 8.4867e-13, 2.8381e-14, 2.4829e-13, 5.7266e-16,\n 9.9488e-14, 1.0327e-14, 5.7826e-16, 2.6076e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1739e-14, 4.4097e-14, 0.0000e+00, ..., 6.4209e-14, 1.0500e-13,\n 1.6065e-14],\n [5.3846e-15, 6.0095e-15, 0.0000e+00, ..., 9.9170e-15, 3.0590e-14,\n 1.9445e-17],\n [7.2292e-15, 3.9271e-15, 0.0000e+00, ..., 6.6041e-15, 1.4110e-14,\n 1.1364e-15],\n ...,\n [1.2348e-14, 1.5224e-14, 0.0000e+00, ..., 1.8804e-14, 3.5582e-14,\n 1.1319e-14],\n [3.7596e-14, 1.1937e-14, 0.0000e+00, ..., 1.1866e-14, 7.5295e-14,\n 7.1681e-15],\n [1.8402e-16, 2.5771e-16, 0.0000e+00, ..., 2.7789e-15, 4.4290e-15,\n 9.0399e-16]], device='cuda:0')" }, "23": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.9207e-12, 2.0368e-12, 1.4657e-15, ..., 1.4359e-13, 2.5794e-13,\n 4.7783e-14],\n [1.5085e-12, 1.7074e-12, 1.0688e-17, ..., 9.5156e-14, 1.9920e-13,\n 4.1447e-14],\n [4.5036e-13, 5.2945e-13, 1.1137e-15, ..., 4.0548e-14, 4.9911e-14,\n 1.5882e-14],\n ...,\n [4.4377e-14, 2.7858e-14, 2.1659e-16, ..., 2.7505e-15, 7.6648e-15,\n 1.1605e-15],\n [1.5197e-13, 1.6166e-13, 2.0907e-16, ..., 1.5548e-14, 1.8926e-14,\n 7.5120e-15],\n [1.3670e-13, 1.9878e-13, 1.7502e-16, ..., 1.8780e-14, 1.7775e-14,\n 1.0470e-14]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.5859e-11, 3.5298e-12, 7.7962e-12, 5.8672e-12, 2.0007e-12, 1.0732e-12,\n 4.6161e-12, 2.7465e-15, 1.8407e-12, 3.1221e-13, 3.4660e-11, 6.8629e-12,\n 9.2335e-12, 5.9910e-12, 1.7532e-12, 4.3267e-12, 1.8132e-11, 1.1861e-12,\n 3.4328e-12, 3.6565e-11, 1.4433e-13, 1.3882e-11, 6.0429e-12, 3.4911e-12,\n 4.6718e-13, 1.4560e-11, 4.5994e-12, 7.3127e-13, 5.1133e-13, 4.0727e-12,\n 2.0117e-11, 4.1887e-12, 5.1384e-13, 1.4719e-10, 1.4539e-11, 1.0187e-11,\n 1.7446e-12, 1.2643e-16, 1.3594e-11, 6.8561e-11, 8.6226e-12, 5.2634e-12,\n 9.3043e-13, 5.3526e-12, 3.1255e-11, 2.3190e-11, 1.3114e-11, 7.6734e-12,\n 1.9333e-12, 3.3303e-11, 3.7344e-13, 6.0195e-11, 4.1723e-11, 9.5154e-12,\n 9.6556e-13, 1.7445e-13, 2.2890e-11, 7.2778e-13, 1.0452e-11, 2.1074e-12,\n 4.9184e-12, 5.6401e-12, 3.0007e-11, 1.5927e-11, 7.2672e-13, 1.8803e-12,\n 1.1223e-11, 2.0015e-12, 1.5715e-11, 3.7784e-13, 5.5506e-16, 6.4480e-12,\n 1.1859e-11, 2.0831e-13, 5.0294e-12, 6.1464e-13, 1.7430e-12, 3.3452e-11,\n 4.9180e-13, 2.1023e-13, 2.6876e-12, 2.3570e-11, 6.7220e-16, 1.7468e-12,\n 4.8944e-13, 1.7755e-11, 7.1450e-12, 4.0036e-12, 7.0637e-12, 1.0346e-10,\n 1.3860e-12, 2.3328e-11, 2.9751e-11, 4.4995e-12, 5.6453e-11, 4.2181e-12,\n 4.9947e-12, 3.1711e-11, 1.0657e-11, 1.2427e-11, 2.7605e-11, 1.5541e-12,\n 2.9936e-13, 1.1153e-11, 4.1331e-13, 5.9530e-11, 3.3535e-12, 6.7878e-12,\n 2.0734e-12, 9.5880e-15, 4.8682e-11, 3.4834e-12, 4.8349e-11, 1.4759e-11,\n 1.6148e-12, 2.6859e-12, 3.2001e-12, 2.7195e-12, 2.0520e-12, 4.9871e-12,\n 1.4928e-11, 8.7300e-12, 1.4832e-12, 1.1831e-15, 8.7792e-15, 5.3721e-12,\n 2.9889e-11, 1.9380e-11, 1.5682e-12, 2.6039e-11, 2.0431e-14, 1.2868e-10,\n 5.8543e-12, 1.0883e-11, 1.8670e-11, 5.0126e-11, 1.5903e-11, 5.0533e-11,\n 1.3784e-11, 6.2882e-15, 1.8031e-13, 9.4148e-13, 6.0515e-12, 5.4490e-11,\n 9.5011e-12, 2.5728e-12, 2.2768e-12, 8.8244e-13, 5.4165e-11, 1.1631e-12,\n 9.4988e-13, 4.5247e-12, 4.9440e-12, 1.9678e-11, 6.7082e-12, 8.9844e-13,\n 4.7362e-12, 5.0301e-11, 1.4221e-12, 6.9256e-11, 1.7602e-11, 2.5589e-11,\n 2.6713e-12, 2.3285e-11, 1.7811e-11, 1.4676e-11, 1.4140e-10, 1.9892e-11,\n 2.1877e-11, 2.8293e-12, 9.0249e-13, 3.8055e-12, 2.6226e-11, 8.0679e-12,\n 1.5047e-11, 1.6116e-13, 1.3256e-12, 8.8420e-13, 1.7950e-12, 6.3191e-12,\n 9.2826e-13, 5.1205e-12, 3.3793e-12, 4.5292e-12, 3.4943e-11, 1.1877e-13,\n 3.5394e-12, 1.0837e-11, 2.0812e-12, 5.6176e-11, 5.9231e-12, 8.4627e-12,\n 9.1395e-12, 7.2221e-13, 3.0725e-15, 1.6072e-10, 1.9107e-12, 1.9807e-12,\n 1.1824e-11, 3.0788e-11, 1.2483e-15, 8.7402e-12, 3.3121e-11, 3.9188e-13,\n 1.9907e-11, 6.6256e-11, 1.0441e-11, 2.1446e-12, 8.0105e-15, 9.0554e-11,\n 1.8615e-12, 3.1055e-11, 9.5425e-13, 4.2669e-12, 7.2201e-13, 8.6128e-12,\n 1.1386e-11, 3.9154e-14, 8.6644e-13, 1.1215e-12, 3.7456e-12, 1.4553e-11,\n 2.6961e-12, 5.1593e-11, 9.0939e-12, 3.7150e-13, 1.1480e-13, 5.5451e-13,\n 2.0609e-12, 4.7226e-12, 1.4330e-13, 5.0325e-13, 8.2279e-12, 6.6630e-12,\n 2.6766e-11, 8.3860e-12, 1.9602e-12, 1.4948e-11, 2.2359e-12, 3.9218e-12,\n 1.4336e-11, 2.9780e-14, 1.8749e-11, 2.3263e-11, 8.0784e-12, 1.8374e-14,\n 6.4159e-16, 8.0161e-11, 3.8701e-13, 2.9458e-11, 1.0106e-11, 1.7380e-12,\n 1.3270e-11, 1.3883e-11, 3.2225e-11, 9.0876e-13], device='cuda:0')" }, "24": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0230e-09, 8.0157e-10, 2.6289e-10, 1.6809e-10, 1.5985e-11, 1.0599e-09,\n 8.9518e-12, 9.4931e-10, 9.3016e-10, 8.4808e-13, 1.1558e-09, 7.8877e-11,\n 1.0520e-10, 7.3939e-12, 1.1479e-12, 6.7288e-12, 1.6298e-14, 1.1833e-11,\n 2.3205e-10, 2.7523e-10, 6.2284e-11, 1.1466e-10, 1.3775e-10, 1.4321e-12,\n 8.2523e-11, 8.7357e-11, 3.3447e-11, 2.1659e-10, 2.8090e-11, 1.1837e-12,\n 1.4129e-11, 7.9831e-11, 1.2811e-10, 1.6332e-10, 9.8834e-14, 6.4038e-14,\n 8.5916e-11, 1.1999e-12, 8.9497e-11, 6.9386e-11, 1.8575e-13, 4.3623e-10,\n 1.1106e-09, 2.9004e-13, 2.9699e-11, 1.0042e-12, 1.4265e-10, 8.4263e-13,\n 3.1238e-11, 2.8867e-10, 1.5551e-12, 2.2965e-11, 4.7091e-13, 1.3609e-10,\n 7.7234e-12, 3.1966e-12, 2.0937e-09, 1.8270e-09, 6.3372e-12, 2.3114e-13,\n 2.7311e-09, 1.7717e-11, 2.1144e-10, 7.3854e-10, 5.8752e-11, 1.2707e-09,\n 2.9746e-12, 1.1423e-11, 1.3448e-10, 2.2154e-11, 2.6750e-10, 5.0061e-11,\n 4.4895e-11, 1.7149e-11, 1.0120e-11, 2.8589e-11, 1.3946e-10, 6.4449e-14,\n 8.8297e-11, 2.6362e-11, 1.3120e-10, 3.0217e-10, 1.1328e-11, 5.1418e-13,\n 1.5264e-12, 7.8820e-11, 3.1737e-10, 1.8116e-12, 4.8434e-10, 2.9670e-11,\n 1.0835e-11, 7.9672e-13, 1.4400e-11, 2.7176e-10, 5.9937e-10, 3.1807e-10,\n 2.0657e-12, 1.5521e-12, 2.6140e-10, 2.0403e-11, 1.3407e-10, 1.5306e-12,\n 3.4018e-11, 3.3000e-13, 1.8447e-11, 6.3303e-13, 1.2380e-10, 3.9803e-14,\n 1.4558e-10, 3.7539e-11, 6.9607e-12, 6.7533e-12, 6.4692e-11, 6.9795e-13,\n 1.6085e-10, 1.5080e-10, 1.1380e-11, 1.5028e-09, 5.1519e-14, 5.6382e-10,\n 3.4387e-11, 5.1214e-12, 9.1239e-11, 8.1096e-13, 1.6607e-12, 1.5693e-12,\n 9.7241e-10, 5.8524e-11, 2.8285e-13, 4.4398e-12, 1.9573e-11, 3.1168e-11,\n 7.4341e-11, 1.1445e-10, 3.7004e-12, 9.2173e-11, 2.0576e-10, 1.8562e-11,\n 4.4633e-13, 9.7780e-12, 8.5617e-11, 2.0464e-10, 2.6048e-10, 5.1425e-09,\n 3.6112e-12, 9.4819e-14, 1.4950e-10, 1.9657e-11, 1.0414e-12, 1.3636e-11,\n 3.3437e-11, 8.8599e-12, 1.5211e-11, 9.6551e-11, 7.8017e-12, 7.6469e-12,\n 3.6603e-11, 8.3352e-10, 7.1576e-12, 2.7392e-09, 4.2394e-11, 4.3677e-12,\n 1.8972e-13, 6.2628e-10, 1.5702e-10, 3.1268e-10, 3.1468e-12, 5.9545e-11,\n 1.0928e-09, 2.4846e-12, 5.7293e-10, 1.4422e-10, 8.4830e-11, 1.5044e-11,\n 1.2335e-10, 8.6686e-10, 3.1102e-10, 1.4229e-10, 2.5602e-11, 3.8556e-10,\n 9.6085e-10, 9.3314e-11, 1.0849e-09, 1.0699e-13, 1.2255e-10, 6.6086e-14,\n 1.6491e-10, 8.2827e-12, 2.6211e-11, 2.8956e-12, 6.5461e-10, 4.2437e-13,\n 1.0453e-10, 2.3697e-12, 3.9231e-11, 4.8971e-11, 3.1702e-11, 9.6950e-10,\n 5.2964e-11, 2.3978e-12, 9.1668e-11, 4.1578e-12, 5.2834e-12, 1.9952e-11,\n 3.7452e-11, 1.8002e-10, 1.7409e-11, 1.6495e-11, 1.2925e-11, 2.4164e-12,\n 6.0482e-12, 1.8753e-10, 3.3481e-13, 6.7682e-10, 5.8337e-11, 3.1398e-11,\n 2.9510e-10, 6.5080e-11, 6.3438e-10, 1.8061e-10, 6.4647e-13, 2.5271e-10,\n 6.6151e-12, 3.1956e-13, 2.8328e-11, 8.1832e-12, 1.3475e-10, 5.2636e-10,\n 2.1241e-13, 4.6390e-12, 7.4813e-11, 4.8696e-11, 8.1406e-13, 6.3026e-12,\n 1.5230e-11, 3.0480e-10, 3.5024e-11, 2.2773e-11, 8.2215e-11, 2.1221e-11,\n 4.4963e-10, 4.1024e-11, 4.8891e-10, 1.7084e-10, 1.7357e-11, 7.8772e-12,\n 1.7137e-11, 1.7206e-10, 2.8074e-10, 3.6378e-12, 2.9607e-10, 2.9954e-12,\n 2.3350e-10, 1.7914e-11, 1.0012e-10, 1.2559e-10], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.2854e-13, 1.0860e-14, 1.2315e-14, 2.6649e-14, 3.1170e-15, 4.2087e-15,\n 1.0821e-14, 3.6218e-17, 2.2144e-15, 2.1244e-16, 1.2360e-13, 1.6859e-14,\n 1.9205e-14, 1.4293e-14, 4.9111e-15, 1.1782e-14, 4.3670e-14, 2.4134e-15,\n 7.5322e-15, 8.1294e-14, 1.8358e-18, 4.7634e-14, 1.8503e-14, 9.6343e-15,\n 1.9690e-15, 2.6768e-14, 1.0265e-14, 2.5550e-15, 2.0060e-15, 1.0057e-14,\n 7.6999e-14, 7.7304e-15, 1.2707e-15, 5.8276e-13, 3.3482e-14, 2.1460e-14,\n 4.2414e-15, 3.7183e-17, 3.7433e-14, 1.5231e-13, 1.6718e-14, 1.7605e-14,\n 2.1244e-15, 4.4303e-14, 9.3291e-14, 4.5603e-14, 5.9680e-14, 1.9615e-14,\n 3.0849e-15, 1.2764e-13, 3.2825e-16, 1.8131e-13, 1.0383e-13, 1.6166e-14,\n 8.2482e-16, 5.8211e-16, 7.5136e-14, 4.7070e-16, 2.9158e-14, 5.2548e-15,\n 1.1036e-14, 1.6242e-14, 1.2934e-13, 4.3737e-14, 5.1413e-16, 2.6377e-15,\n 2.9932e-14, 2.2076e-15, 3.2373e-14, 1.8827e-15, 6.5784e-16, 1.3039e-14,\n 2.4182e-14, 1.2445e-15, 9.0177e-15, 4.2134e-15, 5.5755e-15, 8.6200e-14,\n 7.8833e-16, 2.2400e-16, 6.3897e-15, 6.2026e-14, 5.9717e-17, 1.3954e-14,\n 2.2586e-15, 4.3151e-14, 3.2110e-14, 9.1629e-15, 1.2801e-14, 3.6737e-13,\n 3.7987e-15, 5.5658e-14, 1.0410e-13, 6.6684e-15, 3.6365e-13, 8.0989e-15,\n 6.4447e-15, 6.3716e-14, 2.7720e-14, 3.1964e-14, 1.1694e-13, 1.1719e-14,\n 1.5013e-15, 2.8983e-14, 1.7292e-16, 1.1061e-13, 6.5554e-15, 1.7042e-14,\n 3.2502e-15, 4.1294e-16, 1.0594e-13, 1.3226e-14, 3.0235e-13, 3.0144e-14,\n 1.4017e-15, 4.7266e-15, 8.8722e-15, 4.1306e-15, 7.3992e-15, 1.6700e-14,\n 4.2870e-14, 2.2215e-14, 1.6544e-15, 5.2720e-18, 2.0681e-16, 9.0086e-15,\n 6.5767e-14, 1.1511e-13, 3.4997e-15, 8.7621e-14, 2.6036e-17, 4.1693e-13,\n 1.4609e-14, 2.5379e-14, 2.7695e-14, 1.8208e-13, 3.6096e-14, 1.4671e-13,\n 3.2830e-14, 4.6769e-17, 1.8511e-15, 3.4620e-15, 1.0268e-14, 1.7011e-13,\n 2.0039e-14, 8.7198e-15, 7.0728e-15, 2.5143e-15, 1.8878e-13, 1.6618e-15,\n 4.1060e-15, 7.5846e-15, 1.4410e-14, 6.7585e-14, 1.1089e-14, 4.3932e-15,\n 7.0255e-15, 1.4948e-13, 3.1911e-15, 2.1243e-13, 3.6980e-14, 9.8402e-14,\n 3.7507e-15, 7.8857e-14, 1.0831e-13, 3.2835e-14, 4.8477e-13, 3.7704e-14,\n 9.4665e-14, 1.0554e-14, 6.6949e-16, 5.4511e-15, 4.9425e-14, 2.4610e-14,\n 2.7759e-14, 5.3062e-16, 1.5243e-15, 2.1011e-15, 3.1993e-15, 1.0938e-14,\n 1.6861e-15, 1.1973e-14, 5.7216e-15, 7.7797e-15, 7.4844e-14, 7.6297e-19,\n 6.4106e-15, 2.6364e-14, 2.7464e-15, 1.5261e-13, 7.4028e-15, 2.1852e-14,\n 4.1924e-14, 2.6114e-15, 7.5946e-17, 6.3469e-13, 3.4962e-15, 3.0640e-15,\n 4.7055e-14, 4.4240e-14, 2.0859e-16, 1.4827e-14, 1.1315e-13, 4.6509e-16,\n 6.1012e-14, 1.2438e-13, 1.7398e-14, 3.3746e-15, 2.6090e-18, 2.8827e-13,\n 3.8872e-15, 6.9610e-14, 4.5288e-15, 9.3483e-15, 8.4023e-16, 1.6557e-14,\n 2.1331e-14, 1.5921e-17, 2.6621e-15, 1.3714e-15, 9.1188e-15, 4.1993e-14,\n 1.2298e-14, 1.6589e-13, 3.1582e-14, 1.4530e-15, 5.2239e-17, 5.8138e-15,\n 3.2562e-15, 1.3576e-14, 7.4440e-16, 8.1867e-16, 1.4101e-14, 2.6788e-14,\n 6.7488e-14, 2.7367e-14, 3.0716e-15, 3.2806e-14, 6.8812e-15, 5.8805e-15,\n 3.6413e-14, 1.2630e-17, 1.5852e-13, 5.6780e-14, 1.3227e-14, 1.2510e-17,\n 2.2784e-16, 3.7817e-13, 4.0881e-16, 9.1271e-14, 1.9284e-14, 2.6040e-15,\n 3.0391e-14, 2.8784e-14, 7.0270e-14, 3.4083e-15], device='cuda:0')" }, "25": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.6101e-13, 8.2159e-13, 1.6356e-12, 2.8099e-15, 2.2933e-14, 2.6360e-12,\n 2.6477e-14, 1.0062e-12, 4.3483e-13, 1.4672e-15, 3.4396e-13, 1.1310e-12,\n 1.2910e-13, 4.9336e-16, 1.1538e-15, 1.0696e-14, 3.2744e-15, 8.3424e-16,\n 7.9754e-14, 8.5893e-13, 5.0928e-13, 1.3484e-14, 1.2159e-14, 1.6671e-14,\n 8.1004e-13, 2.9282e-14, 1.5713e-14, 2.8758e-14, 2.2691e-14, 1.8402e-15,\n 4.5338e-16, 8.7272e-14, 1.6531e-14, 2.3813e-13, 5.1983e-16, 7.9841e-16,\n 3.3696e-14, 7.4575e-18, 5.1415e-14, 4.3339e-14, 6.1144e-16, 2.0648e-12,\n 5.8626e-12, 2.3142e-15, 2.4145e-13, 1.6165e-16, 6.1374e-14, 3.1307e-17,\n 8.2277e-15, 6.9342e-14, 2.7152e-16, 2.1631e-15, 5.9609e-17, 1.4306e-13,\n 5.6892e-15, 1.9306e-15, 1.0085e-12, 2.2820e-12, 2.7856e-15, 5.6396e-16,\n 3.7730e-12, 4.0117e-15, 6.3858e-13, 3.5921e-13, 2.1123e-14, 1.8864e-12,\n 2.4746e-15, 1.5133e-14, 1.9776e-15, 6.3963e-15, 7.8329e-14, 5.3832e-15,\n 7.0858e-15, 1.6388e-14, 1.0796e-14, 3.5636e-15, 5.0355e-14, 1.5898e-15,\n 2.4400e-14, 4.5718e-15, 3.5851e-13, 4.8325e-14, 3.1680e-14, 8.3345e-15,\n 1.5097e-15, 1.0189e-12, 1.5190e-13, 9.5060e-16, 6.6262e-13, 3.6644e-15,\n 2.0980e-15, 8.5635e-16, 3.9533e-16, 1.3557e-13, 1.7329e-12, 8.0318e-14,\n 1.7895e-15, 1.2873e-15, 1.4794e-13, 1.3121e-14, 2.1683e-13, 1.9137e-15,\n 1.5014e-14, 1.6078e-15, 7.3794e-16, 1.5079e-16, 1.1281e-13, 4.2591e-16,\n 3.9565e-14, 4.2211e-15, 4.2024e-16, 8.6993e-14, 1.7445e-13, 6.4508e-16,\n 1.1454e-13, 4.5695e-13, 1.1278e-15, 3.4766e-12, 5.6139e-17, 8.8322e-13,\n 1.3218e-15, 5.4757e-16, 4.0329e-14, 1.3317e-17, 1.5410e-15, 7.2449e-17,\n 1.2251e-12, 1.6984e-14, 1.0437e-18, 5.4498e-16, 9.1874e-16, 2.5736e-15,\n 3.4174e-14, 1.7286e-13, 1.7393e-15, 2.1270e-14, 7.5874e-13, 3.4609e-15,\n 4.8203e-16, 2.9950e-14, 8.0484e-15, 4.3249e-14, 4.2107e-14, 1.2532e-11,\n 7.1476e-15, 1.2904e-17, 9.7904e-14, 2.5868e-15, 3.5982e-16, 9.6821e-15,\n 7.4417e-14, 1.1189e-15, 1.3599e-16, 3.7339e-14, 1.7502e-15, 2.2986e-14,\n 7.3665e-15, 7.6218e-13, 1.0375e-15, 2.6030e-12, 1.0109e-14, 1.6070e-16,\n 3.8201e-15, 1.8684e-12, 3.2264e-13, 4.5104e-15, 7.2104e-15, 8.4308e-15,\n 3.1168e-12, 5.5915e-17, 8.9352e-13, 1.4694e-14, 7.8961e-14, 4.6568e-15,\n 5.7419e-14, 8.6807e-13, 1.3385e-13, 3.3779e-14, 5.7687e-15, 2.8160e-13,\n 6.7345e-13, 3.7896e-15, 3.7282e-13, 2.0408e-16, 7.4216e-14, 1.2257e-15,\n 1.2261e-13, 1.0801e-14, 3.4623e-14, 5.1256e-15, 1.8818e-13, 3.4923e-15,\n 9.8650e-15, 1.0118e-15, 2.1464e-15, 5.0713e-15, 1.8061e-14, 2.7849e-12,\n 8.6472e-15, 2.3193e-15, 1.5038e-14, 2.3684e-16, 1.0228e-15, 4.9835e-14,\n 9.1863e-14, 3.8533e-14, 1.4689e-14, 7.3623e-16, 2.5878e-15, 5.5487e-15,\n 4.8842e-15, 2.2589e-13, 4.1152e-16, 2.2848e-12, 2.6448e-15, 2.9855e-15,\n 9.6407e-14, 8.5921e-15, 1.2520e-13, 1.6112e-14, 4.6731e-15, 5.3705e-13,\n 4.5530e-15, 8.3135e-16, 9.3663e-14, 3.5652e-16, 2.5047e-15, 9.5625e-13,\n 1.1955e-16, 3.6407e-16, 9.0466e-15, 3.9139e-14, 8.3861e-17, 1.7643e-15,\n 4.6055e-16, 1.0173e-12, 1.9630e-14, 7.0282e-16, 1.0247e-12, 1.7074e-15,\n 6.5624e-13, 1.2011e-14, 2.4236e-13, 2.4108e-13, 7.3316e-15, 3.1184e-16,\n 1.7054e-16, 2.6483e-13, 3.1279e-13, 3.0113e-14, 7.9391e-14, 1.4730e-15,\n 3.9973e-13, 5.0032e-14, 1.1566e-14, 5.2155e-14], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8657e-13, 1.6450e-14, 2.2196e-14, 3.0104e-14, 4.6704e-15, 6.6862e-15,\n 1.3537e-14, 4.2514e-17, 4.0055e-15, 3.7026e-16, 1.1057e-13, 2.4871e-14,\n 2.7783e-14, 2.7758e-14, 8.8854e-15, 2.1638e-14, 7.9055e-14, 3.3601e-15,\n 1.0988e-14, 1.5804e-13, 2.8826e-17, 6.3277e-14, 3.0131e-14, 1.6331e-14,\n 2.4791e-15, 4.7539e-14, 2.1815e-14, 4.4880e-15, 3.5907e-15, 1.2059e-14,\n 9.1154e-14, 1.8642e-14, 3.1821e-15, 5.1885e-13, 6.3600e-14, 4.4339e-14,\n 3.2992e-15, 2.8117e-17, 6.1023e-14, 2.4103e-13, 3.0196e-14, 2.5435e-14,\n 5.2963e-15, 2.9453e-14, 1.0220e-13, 8.1077e-14, 6.0271e-14, 3.7048e-14,\n 5.0085e-15, 1.0731e-13, 3.3235e-16, 2.4580e-13, 1.3554e-13, 3.0277e-14,\n 1.7414e-15, 1.4189e-15, 1.0209e-13, 1.3024e-15, 4.4905e-14, 1.1745e-14,\n 1.2673e-14, 2.6459e-14, 1.2894e-13, 6.9184e-14, 8.9229e-16, 5.0048e-15,\n 4.9891e-14, 4.6559e-15, 6.6793e-14, 2.3253e-15, 5.4358e-16, 3.0764e-14,\n 3.8134e-14, 1.3141e-15, 1.4390e-14, 4.7519e-15, 1.0240e-14, 1.3866e-13,\n 1.2930e-15, 1.9855e-16, 1.4231e-14, 1.0560e-13, 1.5940e-16, 1.1815e-14,\n 2.9626e-15, 7.5522e-14, 3.5880e-14, 1.8102e-14, 2.2005e-14, 4.1579e-13,\n 6.8461e-15, 1.0109e-13, 9.4744e-14, 1.3988e-14, 2.4193e-13, 1.3003e-14,\n 1.5775e-14, 1.1208e-13, 4.7499e-14, 5.8096e-14, 1.1882e-13, 1.0444e-14,\n 2.8456e-15, 3.2425e-14, 5.0105e-16, 2.0337e-13, 8.1982e-15, 3.2858e-14,\n 5.9488e-15, 7.3689e-16, 1.6715e-13, 1.7900e-14, 2.0633e-13, 5.0843e-14,\n 4.2654e-15, 8.7277e-15, 1.6491e-14, 8.2345e-15, 1.0020e-14, 2.5787e-14,\n 4.4145e-14, 2.6596e-14, 3.3294e-15, 3.4521e-18, 2.1885e-16, 1.6420e-14,\n 1.2742e-13, 8.8181e-14, 7.9302e-15, 7.9304e-14, 3.4555e-17, 5.2179e-13,\n 2.6532e-14, 4.6748e-14, 5.9527e-14, 1.6792e-13, 7.0830e-14, 1.7317e-13,\n 5.9343e-14, 1.0178e-16, 2.3479e-15, 6.3108e-15, 1.8836e-14, 1.8336e-13,\n 4.0316e-14, 1.3899e-14, 1.1452e-14, 4.6218e-15, 1.8361e-13, 1.8791e-15,\n 6.6780e-15, 1.4987e-14, 2.3939e-14, 5.8277e-14, 1.9339e-14, 6.3966e-15,\n 1.2735e-14, 1.7041e-13, 4.2343e-15, 2.8235e-13, 5.8503e-14, 7.7071e-14,\n 6.3270e-15, 1.0404e-13, 8.2673e-14, 4.6109e-14, 4.9356e-13, 6.7543e-14,\n 9.3054e-14, 1.3832e-14, 1.7268e-15, 1.0803e-14, 9.0837e-14, 3.7867e-14,\n 4.8886e-14, 1.2456e-15, 2.9454e-15, 4.2999e-15, 4.1415e-15, 2.8985e-14,\n 2.4732e-15, 1.2711e-14, 9.2592e-15, 1.4020e-14, 1.4601e-13, 1.2852e-17,\n 1.1496e-14, 3.2121e-14, 5.4655e-15, 2.3345e-13, 1.9893e-14, 2.4606e-14,\n 4.3401e-14, 4.1492e-15, 2.0005e-16, 6.4480e-13, 4.5575e-15, 5.2491e-15,\n 5.4972e-14, 1.0563e-13, 5.0468e-16, 3.8819e-14, 1.0420e-13, 8.7908e-16,\n 8.2428e-14, 2.3262e-13, 3.4346e-14, 5.5300e-15, 4.6276e-18, 3.2642e-13,\n 3.6231e-15, 1.3042e-13, 6.7414e-15, 2.0798e-14, 1.1351e-15, 2.8880e-14,\n 3.7977e-14, 1.5631e-16, 5.2759e-15, 2.5425e-15, 7.6225e-15, 6.3641e-14,\n 1.4393e-14, 2.1611e-13, 4.3016e-14, 2.9859e-15, 4.1440e-17, 5.1302e-15,\n 4.3234e-15, 2.2142e-14, 1.5911e-15, 8.8182e-16, 3.6999e-14, 3.3950e-14,\n 1.1500e-13, 3.9530e-14, 4.5540e-15, 6.3214e-14, 1.3009e-14, 1.1149e-14,\n 6.3583e-14, 2.7714e-17, 8.8254e-14, 7.7374e-14, 2.3637e-14, 9.4120e-17,\n 4.7122e-16, 2.7276e-13, 6.5713e-16, 1.1955e-13, 3.0876e-14, 4.0090e-15,\n 4.1039e-14, 4.1474e-14, 1.0480e-13, 6.1555e-15], device='cuda:0')" }, "26": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.3120e-12, 1.0002e-12, 5.5968e-13, 1.8523e-13, 4.5025e-14, 1.3670e-12,\n 8.7021e-15, 1.2956e-12, 1.1661e-12, 4.2786e-16, 1.4467e-12, 3.3647e-13,\n 1.9459e-13, 1.0411e-14, 4.6384e-16, 3.9911e-15, 1.4771e-16, 1.6223e-14,\n 2.7970e-13, 5.6800e-13, 2.2269e-13, 1.4606e-13, 1.5530e-13, 5.7656e-15,\n 3.2253e-13, 1.0648e-13, 8.4187e-14, 2.3910e-13, 3.5255e-14, 6.6146e-16,\n 4.1729e-15, 1.7078e-13, 1.6334e-13, 3.7378e-13, 5.6207e-17, 5.4212e-16,\n 1.1373e-13, 2.8594e-16, 1.1828e-13, 8.2595e-14, 3.2251e-15, 6.2172e-13,\n 1.6848e-12, 8.2941e-16, 1.6722e-13, 1.6142e-15, 1.2335e-13, 1.2565e-15,\n 3.8720e-14, 3.6328e-13, 2.6012e-15, 3.0955e-14, 7.1100e-16, 1.0679e-13,\n 1.0523e-14, 7.4590e-16, 2.6326e-12, 2.3092e-12, 9.2773e-16, 4.0897e-15,\n 3.3998e-12, 2.6279e-14, 2.6239e-13, 9.5413e-13, 7.3285e-14, 1.6286e-12,\n 9.6659e-16, 1.9370e-14, 1.7076e-13, 2.9944e-14, 3.3515e-13, 5.0157e-14,\n 7.2468e-14, 6.4982e-14, 1.3369e-14, 8.8998e-15, 1.7667e-13, 7.6487e-15,\n 1.4000e-13, 3.4743e-14, 1.9047e-13, 3.7854e-13, 1.0884e-14, 2.9454e-15,\n 1.0601e-14, 2.3277e-13, 3.9881e-13, 2.2106e-15, 6.5429e-13, 4.3815e-14,\n 1.5754e-14, 6.9896e-15, 6.9960e-15, 3.3527e-13, 7.4376e-13, 4.3253e-13,\n 1.9280e-14, 1.6597e-16, 4.0012e-13, 6.9615e-14, 2.0947e-13, 3.4782e-16,\n 5.7562e-14, 2.0286e-15, 1.0078e-14, 9.1749e-15, 1.6156e-13, 1.9466e-17,\n 2.1629e-13, 4.8516e-14, 9.6402e-15, 7.7550e-14, 1.2182e-13, 5.4323e-16,\n 2.2196e-13, 2.1858e-13, 1.6255e-14, 2.0107e-12, 2.7147e-16, 6.8187e-13,\n 2.6294e-14, 5.9131e-15, 1.1328e-13, 5.7592e-16, 1.4934e-14, 1.2633e-17,\n 1.1745e-12, 4.8869e-14, 7.4110e-18, 2.0267e-15, 2.6640e-14, 1.6014e-14,\n 5.8836e-14, 2.0099e-13, 2.5633e-15, 1.1947e-13, 3.8296e-13, 2.1720e-14,\n 4.7076e-16, 1.1687e-14, 1.1076e-13, 2.1176e-13, 3.9876e-13, 6.6778e-12,\n 1.3603e-15, 1.2411e-16, 2.1038e-13, 2.6244e-14, 4.2726e-17, 1.1541e-14,\n 1.3533e-13, 1.3747e-14, 1.0390e-14, 1.2196e-13, 1.0347e-14, 4.4825e-14,\n 9.2015e-15, 1.0341e-12, 8.2889e-15, 3.4467e-12, 1.7938e-14, 6.4712e-15,\n 6.1375e-16, 8.9965e-13, 1.8373e-13, 3.9794e-13, 4.6634e-15, 7.6071e-14,\n 1.3792e-12, 3.6022e-15, 6.9290e-13, 1.6275e-13, 2.0154e-13, 1.9231e-14,\n 1.7162e-13, 1.1100e-12, 3.4226e-13, 1.9144e-13, 2.0680e-14, 4.7902e-13,\n 1.1943e-12, 1.0238e-13, 1.3663e-12, 6.0842e-15, 1.5301e-13, 4.6911e-16,\n 2.1750e-13, 3.7606e-15, 8.4406e-14, 2.0682e-15, 8.2201e-13, 3.6923e-16,\n 1.3280e-13, 3.0846e-15, 2.9450e-14, 6.3959e-14, 2.5732e-14, 1.2160e-12,\n 6.9186e-14, 6.3322e-16, 7.7156e-14, 6.1222e-15, 3.0372e-15, 8.4645e-14,\n 3.0892e-14, 2.2716e-13, 2.5935e-14, 2.1864e-14, 3.0185e-14, 1.1746e-14,\n 1.8021e-15, 2.1564e-13, 4.9356e-16, 9.8633e-13, 7.5364e-14, 2.8700e-14,\n 3.7604e-13, 8.2067e-14, 7.9665e-13, 2.3206e-13, 1.7480e-15, 4.3693e-13,\n 3.9002e-15, 3.1810e-16, 1.2259e-13, 7.7637e-15, 1.7336e-13, 7.6012e-13,\n 6.3998e-17, 1.0088e-15, 1.1248e-13, 6.0236e-14, 1.4120e-16, 3.0388e-14,\n 3.4835e-15, 5.6906e-13, 6.6507e-14, 2.9973e-14, 3.2110e-13, 5.7111e-15,\n 5.6321e-13, 5.6414e-14, 5.7121e-13, 3.1341e-13, 5.1141e-14, 1.0841e-14,\n 2.3240e-14, 2.1811e-13, 3.7990e-13, 4.9127e-14, 3.7951e-13, 4.7615e-16,\n 2.7306e-13, 1.6262e-14, 1.2647e-13, 2.8007e-13], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.9964e-15, 1.2436e-14, 0.0000e+00, ..., 2.6322e-14, 4.2445e-14,\n 5.1848e-15],\n [8.3171e-16, 1.1563e-15, 0.0000e+00, ..., 3.2315e-17, 1.0417e-15,\n 3.3267e-17],\n [6.7224e-15, 5.2533e-15, 0.0000e+00, ..., 5.4073e-15, 5.8400e-15,\n 3.7985e-15],\n ...,\n [3.3475e-15, 2.7510e-15, 0.0000e+00, ..., 1.9406e-15, 1.8953e-14,\n 5.4700e-16],\n [1.9644e-14, 6.0793e-15, 0.0000e+00, ..., 5.2391e-15, 4.2938e-14,\n 9.5091e-15],\n [3.9726e-15, 6.9843e-16, 0.0000e+00, ..., 9.6362e-16, 8.2776e-15,\n 8.7026e-17]], device='cuda:0')" }, "27": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.0724e-15, 5.7067e-17, 1.1529e-15, ..., 8.9041e-17, 3.2111e-16,\n 1.5754e-17],\n [1.9741e-12, 1.9534e-12, 1.3587e-15, ..., 1.3769e-13, 2.6459e-13,\n 5.5371e-14],\n [1.5819e-13, 1.8550e-13, 7.5436e-17, ..., 1.4633e-14, 2.1704e-14,\n 9.1127e-15],\n ...,\n [1.6362e-12, 1.7432e-12, 1.1952e-15, ..., 9.5188e-14, 2.2561e-13,\n 3.9069e-14],\n [2.2743e-14, 3.1343e-14, 4.4885e-16, ..., 2.5514e-15, 1.9969e-15,\n 6.3769e-16],\n [2.8793e-12, 2.9832e-12, 8.1891e-16, ..., 2.2180e-13, 3.6894e-13,\n 8.9299e-14]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5247e-11, 6.8379e-13, 5.4273e-12, 7.0522e-12, 2.1475e-12, 5.4130e-13,\n 7.1233e-12, 1.7336e-13, 3.1968e-12, 4.8788e-13, 1.6207e-11, 5.6094e-11,\n 9.5496e-12, 1.1230e-11, 1.1717e-11, 4.8500e-11, 5.4356e-12, 5.3887e-12,\n 1.8189e-11, 2.1148e-11, 6.7816e-14, 2.2721e-12, 4.5827e-12, 5.6818e-12,\n 5.2623e-12, 9.7554e-12, 7.7554e-12, 1.5233e-12, 2.2259e-13, 2.5354e-12,\n 1.3473e-11, 8.9015e-12, 1.3958e-12, 6.8463e-11, 2.4798e-11, 1.5199e-11,\n 4.5214e-13, 8.5694e-15, 6.3756e-12, 2.0587e-10, 3.0712e-11, 4.3178e-12,\n 1.3535e-12, 8.4712e-13, 3.5508e-11, 6.2530e-11, 9.7401e-12, 2.1578e-11,\n 2.6881e-12, 1.7274e-11, 3.3312e-13, 3.6776e-11, 9.3766e-12, 8.4474e-12,\n 1.3425e-12, 1.1710e-13, 1.6014e-11, 5.9427e-13, 1.0643e-11, 9.3140e-13,\n 5.4694e-12, 1.0471e-11, 2.6396e-11, 1.4068e-11, 3.9677e-13, 9.8171e-13,\n 1.0554e-11, 6.8709e-13, 6.6926e-12, 1.2716e-12, 2.3215e-14, 6.4858e-12,\n 2.7412e-11, 2.0133e-12, 5.5499e-12, 3.7100e-14, 3.6041e-13, 2.2390e-11,\n 1.4449e-12, 4.7712e-16, 1.3253e-11, 1.5949e-11, 2.2839e-14, 1.3538e-12,\n 9.4492e-12, 4.0361e-11, 3.2444e-12, 1.3936e-11, 2.7606e-12, 2.3020e-11,\n 1.6302e-11, 1.0027e-11, 2.5487e-11, 4.6504e-12, 8.2121e-12, 1.2587e-12,\n 4.2814e-12, 2.9312e-11, 7.1121e-12, 5.9196e-11, 5.2439e-11, 1.2831e-12,\n 2.2806e-14, 7.0428e-12, 3.9581e-13, 4.6438e-11, 3.7218e-12, 3.3990e-12,\n 1.0128e-11, 6.3158e-14, 5.0203e-12, 3.2760e-14, 3.2719e-11, 3.2050e-11,\n 2.5265e-12, 2.8018e-11, 2.3924e-12, 4.5695e-12, 6.7701e-12, 6.9537e-12,\n 2.4234e-12, 5.5138e-13, 9.9429e-13, 1.3190e-15, 1.2920e-13, 6.2232e-12,\n 5.2887e-11, 8.7350e-12, 2.2088e-11, 1.0043e-11, 5.5464e-14, 6.3733e-11,\n 1.0275e-11, 9.4976e-12, 2.3194e-11, 1.3533e-11, 1.5082e-11, 2.0238e-11,\n 1.3747e-11, 5.2964e-14, 4.0117e-13, 7.3915e-13, 5.4424e-12, 6.9256e-11,\n 1.3232e-11, 8.0834e-13, 1.2479e-11, 1.5075e-12, 1.2741e-11, 3.1518e-13,\n 6.1401e-13, 2.1590e-11, 1.5751e-11, 1.9711e-11, 4.1855e-12, 7.0563e-13,\n 1.2551e-11, 3.7155e-11, 6.5183e-12, 8.2133e-12, 1.0147e-11, 1.1650e-11,\n 6.3249e-12, 8.4453e-12, 7.9071e-12, 7.4425e-12, 5.3425e-11, 4.1148e-11,\n 6.0065e-12, 2.2663e-11, 2.2563e-12, 1.6282e-12, 3.3625e-11, 3.1992e-12,\n 2.4014e-11, 1.7861e-12, 6.8724e-13, 2.1683e-12, 2.3188e-12, 2.5784e-12,\n 3.0392e-12, 2.6159e-12, 9.5288e-13, 6.7880e-12, 1.0445e-10, 1.5956e-14,\n 5.1784e-12, 4.3423e-12, 3.0655e-12, 3.1329e-11, 9.2098e-12, 1.1295e-12,\n 4.6577e-12, 6.4217e-12, 4.8775e-14, 5.6899e-11, 2.9939e-12, 4.5595e-13,\n 1.4220e-11, 6.3100e-11, 5.2840e-17, 6.1705e-12, 3.0627e-11, 6.8161e-12,\n 5.6916e-12, 4.4435e-11, 1.0990e-11, 2.3677e-12, 2.3600e-15, 5.3291e-11,\n 1.0825e-12, 5.3399e-11, 3.2808e-14, 1.1841e-11, 1.6995e-13, 1.4227e-11,\n 2.0026e-11, 3.4044e-16, 1.6929e-12, 2.3540e-12, 2.0038e-12, 6.5169e-11,\n 3.4452e-12, 1.3669e-11, 3.0776e-12, 1.2663e-13, 7.0499e-14, 2.5115e-13,\n 2.3026e-12, 4.5880e-12, 3.8464e-13, 5.6439e-14, 2.4006e-11, 1.7135e-12,\n 1.2596e-12, 8.7835e-12, 7.4715e-12, 1.0994e-11, 4.5467e-12, 3.5279e-12,\n 1.1922e-12, 2.7007e-15, 9.0518e-12, 5.8695e-12, 2.6259e-12, 8.3881e-14,\n 5.4195e-14, 4.4840e-11, 2.0835e-12, 5.1698e-11, 1.6434e-11, 1.7940e-12,\n 2.4676e-11, 6.9742e-12, 1.4569e-11, 2.7690e-12], device='cuda:0')" }, "28": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.8275e-13, 1.0709e-09, 1.1051e-10, 2.2790e-10, 4.3589e-11, 3.6154e-12,\n 1.1150e-10, 5.5229e-10, 3.2279e-10, 4.0162e-12, 1.2754e-11, 7.6643e-12,\n 1.7272e-11, 1.6601e-11, 3.2868e-12, 1.6170e-11, 5.7352e-13, 6.1628e-11,\n 4.8793e-11, 3.9397e-10, 6.8054e-12, 1.7374e-10, 1.0770e-09, 2.0057e-10,\n 2.9179e-10, 2.0739e-12, 6.2027e-12, 1.1590e-09, 2.6211e-14, 1.8952e-12,\n 6.0882e-12, 2.2400e-10, 1.2838e-10, 9.2009e-11, 1.9934e-11, 5.3140e-11,\n 3.6320e-11, 1.3112e-11, 8.8112e-11, 1.7991e-11, 1.6516e-11, 8.3762e-11,\n 7.6916e-10, 7.3242e-11, 1.3070e-11, 6.8431e-11, 2.6055e-11, 5.7225e-11,\n 1.1959e-12, 1.3096e-10, 7.9155e-11, 1.6907e-09, 1.3236e-11, 3.0983e-11,\n 5.0705e-12, 4.4681e-10, 3.9520e-09, 8.5826e-10, 6.4905e-11, 9.1987e-11,\n 7.4703e-10, 1.3046e-11, 1.4794e-10, 1.5815e-09, 3.6001e-11, 5.7806e-10,\n 3.9181e-11, 3.7267e-11, 1.4097e-09, 2.8804e-10, 2.0274e-13, 1.8106e-11,\n 1.2323e-11, 1.7499e-11, 1.3675e-13, 5.6828e-12, 5.5020e-13, 2.1669e-12,\n 1.1423e-10, 9.7044e-12, 1.0330e-12, 2.9553e-10, 4.4539e-11, 2.1777e-10,\n 4.5448e-13, 3.5095e-11, 2.4448e-10, 4.0325e-12, 8.7486e-10, 5.2360e-12,\n 1.7529e-10, 7.2198e-11, 2.4697e-10, 3.6282e-11, 1.6476e-10, 1.8268e-09,\n 2.7869e-12, 1.2581e-12, 1.0911e-10, 4.4612e-11, 1.3005e-12, 1.8862e-11,\n 8.4743e-12, 9.9945e-11, 2.2070e-12, 3.1014e-11, 1.3886e-10, 5.4402e-14,\n 2.1135e-11, 1.0535e-12, 8.3291e-14, 1.4899e-11, 1.1906e-13, 8.5270e-12,\n 1.4380e-10, 2.4785e-10, 1.5327e-10, 1.1705e-09, 1.1623e-12, 7.2934e-12,\n 4.8385e-10, 9.6386e-13, 1.8671e-12, 1.5220e-13, 2.0574e-12, 1.7542e-12,\n 1.2481e-10, 1.9406e-11, 2.2806e-11, 9.4347e-11, 4.5598e-11, 7.4168e-12,\n 1.8566e-11, 5.7642e-12, 1.2406e-11, 3.5590e-14, 3.1650e-11, 2.6439e-11,\n 1.0074e-10, 1.8509e-09, 1.2854e-10, 7.4087e-13, 6.4208e-11, 3.1505e-09,\n 8.1255e-11, 1.3938e-11, 8.6079e-12, 1.1188e-11, 8.9197e-12, 2.2465e-11,\n 4.3763e-12, 3.0513e-12, 2.3979e-12, 2.3258e-10, 1.9681e-12, 9.6439e-13,\n 9.8869e-13, 4.4741e-10, 1.1792e-11, 7.5836e-11, 6.8840e-11, 6.0298e-14,\n 1.8968e-12, 7.1669e-10, 4.4378e-12, 4.6410e-10, 4.2665e-13, 9.0151e-11,\n 1.3380e-09, 2.1401e-11, 6.8716e-10, 1.5101e-11, 3.2299e-11, 1.1939e-13,\n 1.6429e-12, 1.7593e-11, 6.9715e-11, 9.1625e-14, 5.7456e-10, 1.2251e-11,\n 2.7666e-10, 1.9662e-10, 9.6688e-12, 6.6900e-12, 8.6640e-13, 3.5636e-11,\n 9.9327e-12, 4.0818e-12, 1.4544e-11, 6.4307e-11, 3.4322e-10, 2.0404e-14,\n 1.1355e-10, 1.8856e-10, 1.4860e-10, 4.6534e-09, 1.4901e-12, 8.1025e-11,\n 1.4469e-11, 1.1971e-10, 4.1146e-11, 1.3417e-10, 4.2286e-11, 2.2543e-11,\n 4.6009e-11, 4.1437e-11, 2.4022e-13, 8.8304e-11, 2.1006e-13, 1.0103e-09,\n 7.6855e-12, 1.8901e-12, 1.5560e-12, 1.0183e-09, 3.1667e-10, 7.5710e-11,\n 5.6010e-11, 8.7498e-10, 2.8785e-09, 7.8649e-13, 2.4175e-12, 2.5938e-10,\n 1.1565e-12, 1.7044e-10, 1.3209e-11, 3.5924e-12, 8.1013e-10, 6.5357e-10,\n 7.2375e-13, 2.7424e-12, 6.0560e-10, 3.3599e-11, 4.7505e-11, 2.5389e-11,\n 3.1477e-11, 5.6344e-13, 1.9187e-11, 6.3196e-13, 9.6362e-11, 1.5906e-11,\n 4.6666e-13, 8.0201e-11, 6.6866e-10, 3.3050e-11, 1.0297e-10, 2.6526e-11,\n 7.7154e-10, 7.4028e-12, 3.1025e-10, 5.4171e-11, 9.3630e-12, 4.1234e-14,\n 2.4752e-11, 7.3684e-10, 1.3928e-11, 1.5281e-09], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.7281e-14, 3.3842e-15, 9.9367e-15, 2.3043e-14, 4.1104e-15, 2.5398e-15,\n 1.4664e-14, 9.7165e-16, 4.9708e-15, 6.0484e-16, 3.0398e-14, 1.4829e-13,\n 2.0846e-14, 2.8092e-14, 4.5059e-14, 2.0108e-13, 1.6401e-14, 1.3248e-14,\n 6.1324e-14, 4.1933e-14, 7.4162e-18, 6.0168e-15, 1.7056e-14, 1.1564e-14,\n 1.8764e-14, 2.3529e-14, 1.6144e-14, 4.8055e-15, 5.7192e-16, 4.4383e-15,\n 3.6471e-14, 2.1180e-14, 3.2850e-15, 1.7179e-13, 6.4869e-14, 4.5882e-14,\n 9.6862e-16, 3.6595e-18, 1.5468e-14, 1.2586e-12, 6.8915e-14, 1.3813e-14,\n 4.5200e-15, 2.5277e-15, 1.2020e-13, 2.7592e-13, 3.0071e-14, 9.8290e-14,\n 2.9193e-15, 3.5827e-14, 3.3192e-16, 9.2884e-14, 1.7997e-14, 1.8666e-14,\n 1.8999e-15, 4.4382e-16, 5.1319e-14, 5.0513e-16, 4.3481e-14, 3.2350e-15,\n 1.2669e-14, 7.5181e-14, 1.2585e-13, 4.9270e-14, 1.8564e-16, 1.1359e-15,\n 2.8576e-14, 6.0666e-16, 1.0957e-14, 4.1259e-15, 9.0982e-16, 1.9561e-14,\n 6.3739e-14, 4.7507e-15, 1.3305e-14, 3.8936e-16, 1.2954e-15, 6.4215e-14,\n 2.0492e-15, 3.6490e-17, 4.9248e-14, 3.4320e-14, 3.9863e-17, 6.5771e-15,\n 3.7444e-14, 1.1744e-13, 9.3946e-15, 5.9470e-14, 6.7560e-15, 5.2749e-14,\n 9.2383e-14, 2.5115e-14, 6.2969e-14, 6.8099e-15, 1.7763e-14, 1.2299e-15,\n 8.6431e-15, 5.2064e-14, 1.6738e-14, 1.6566e-13, 3.4333e-13, 5.8542e-15,\n 1.3095e-16, 1.6056e-14, 2.0979e-16, 1.0270e-13, 1.1348e-14, 9.2587e-15,\n 3.0305e-14, 2.6187e-19, 1.3196e-14, 6.6232e-16, 1.0011e-13, 6.3167e-14,\n 3.5290e-15, 7.3768e-14, 8.0041e-15, 8.7656e-15, 1.6982e-14, 1.8294e-14,\n 3.1793e-15, 2.2631e-15, 1.8406e-15, 7.8048e-18, 5.1456e-16, 1.1733e-14,\n 1.3469e-13, 2.7127e-14, 1.1240e-13, 2.2766e-14, 1.1967e-17, 1.6643e-13,\n 4.7684e-14, 1.8458e-14, 7.2304e-14, 2.4563e-14, 2.5810e-14, 3.4379e-14,\n 2.9623e-14, 2.9111e-18, 1.9999e-15, 4.5015e-15, 6.4739e-15, 1.9779e-13,\n 3.3972e-14, 2.3903e-15, 5.6602e-14, 3.9656e-15, 2.2534e-14, 6.3833e-17,\n 1.6805e-15, 5.6099e-14, 9.7122e-14, 7.9871e-14, 4.2153e-15, 6.0548e-15,\n 5.9443e-14, 9.9064e-14, 1.4330e-14, 1.7663e-14, 1.3818e-14, 1.9478e-14,\n 3.1019e-14, 1.5571e-14, 2.4598e-14, 1.7418e-14, 9.6770e-14, 1.3572e-13,\n 1.2358e-14, 5.3369e-14, 4.1593e-15, 2.3697e-15, 7.3788e-14, 6.2942e-15,\n 7.0227e-14, 1.0750e-14, 1.4899e-15, 5.0379e-15, 4.5225e-15, 9.0607e-15,\n 4.0943e-15, 5.7055e-15, 1.0743e-15, 1.2615e-14, 3.2120e-13, 1.0858e-16,\n 8.2249e-15, 7.8925e-15, 3.4143e-15, 6.8724e-14, 1.3606e-14, 1.8775e-15,\n 1.6367e-14, 2.9539e-14, 2.0413e-16, 1.2637e-13, 1.0124e-14, 7.0020e-16,\n 6.6009e-14, 1.5632e-13, 2.0867e-17, 1.4057e-14, 1.1170e-13, 2.2429e-14,\n 2.1048e-14, 8.8363e-14, 2.1390e-14, 6.1483e-15, 8.6269e-18, 1.1394e-13,\n 1.7061e-15, 1.4490e-13, 3.7800e-16, 3.1674e-14, 1.7786e-16, 3.2102e-14,\n 2.8220e-14, 1.5317e-16, 5.6751e-15, 4.1434e-15, 5.0669e-15, 2.9515e-13,\n 1.2494e-14, 3.5775e-14, 8.5487e-15, 1.3712e-15, 1.9482e-18, 1.4405e-15,\n 3.1706e-15, 1.0471e-14, 1.4812e-15, 1.0755e-18, 8.9453e-14, 5.7871e-15,\n 4.5769e-15, 3.1735e-14, 2.7153e-14, 2.2729e-14, 2.0491e-14, 7.9068e-15,\n 6.1535e-15, 5.5133e-17, 3.0382e-14, 9.8261e-15, 4.7894e-15, 8.4312e-17,\n 9.7652e-17, 1.4908e-13, 4.7709e-15, 1.6880e-13, 2.7977e-14, 3.0928e-15,\n 1.0042e-13, 1.2646e-14, 2.6281e-14, 1.7900e-14], device='cuda:0')" }, "29": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.6610e-15, 1.7988e-12, 4.3504e-14, 4.8459e-15, 7.2993e-15, 1.8780e-14,\n 2.5412e-15, 2.0544e-13, 5.6539e-14, 1.1659e-14, 4.4370e-14, 2.2212e-15,\n 1.0941e-14, 9.3559e-14, 1.1890e-14, 2.8054e-14, 2.6399e-16, 1.0513e-13,\n 2.0875e-15, 6.3324e-13, 1.2573e-14, 4.4593e-14, 4.9453e-12, 8.2392e-14,\n 8.4192e-13, 3.4552e-16, 1.5181e-15, 7.5987e-13, 9.2814e-16, 5.0364e-17,\n 1.6463e-15, 1.7647e-13, 2.8476e-14, 1.5709e-13, 2.2449e-15, 4.8715e-14,\n 3.2877e-15, 2.2782e-15, 8.9581e-14, 1.5697e-15, 3.4665e-14, 1.8763e-14,\n 1.0185e-12, 8.6138e-14, 2.7182e-14, 2.0844e-13, 1.3616e-15, 2.3424e-14,\n 1.9402e-15, 1.5753e-13, 4.1719e-14, 5.0133e-12, 7.7189e-15, 2.2593e-15,\n 1.1697e-14, 3.5702e-13, 8.7091e-12, 3.8658e-13, 2.4313e-14, 5.3051e-14,\n 1.0308e-13, 2.4739e-14, 3.9959e-14, 7.8766e-13, 1.8160e-15, 2.7784e-13,\n 3.5539e-14, 2.1976e-13, 5.2786e-12, 4.5900e-13, 2.3670e-15, 3.6921e-14,\n 4.7492e-15, 4.5383e-14, 1.7605e-14, 7.7830e-16, 7.2892e-15, 2.5674e-16,\n 3.0300e-14, 8.1863e-15, 2.6703e-17, 1.8579e-13, 2.2008e-15, 1.9224e-12,\n 6.2212e-16, 4.2292e-13, 3.4294e-14, 4.4561e-15, 1.7962e-12, 1.5537e-14,\n 4.5679e-13, 5.5143e-15, 6.8971e-14, 2.3650e-15, 3.4035e-14, 1.9379e-12,\n 8.8079e-14, 3.5435e-16, 7.5601e-14, 2.9511e-14, 9.9252e-15, 1.3183e-15,\n 3.7868e-15, 1.4411e-14, 2.9552e-15, 1.0707e-13, 2.9516e-13, 7.4517e-17,\n 4.6535e-15, 4.4381e-15, 2.2999e-15, 3.8898e-14, 2.3353e-14, 5.1608e-16,\n 6.6549e-13, 2.9716e-13, 1.0505e-13, 2.6156e-12, 5.7894e-17, 2.1176e-15,\n 2.7585e-13, 2.4399e-15, 3.0008e-16, 6.1583e-16, 9.9360e-16, 2.2103e-16,\n 2.3356e-15, 1.0401e-15, 1.3168e-15, 7.6072e-15, 8.2389e-15, 1.5423e-16,\n 5.9987e-16, 3.2204e-15, 4.5193e-15, 1.0582e-15, 1.6875e-14, 2.7009e-14,\n 2.0722e-13, 5.8652e-12, 1.9162e-14, 4.6404e-15, 1.2254e-14, 1.3851e-12,\n 1.4998e-14, 1.6811e-14, 8.8568e-16, 2.5619e-16, 1.0522e-15, 7.6465e-15,\n 1.4395e-15, 4.5057e-16, 3.3716e-15, 7.7289e-13, 4.6316e-16, 2.2015e-15,\n 9.0375e-17, 2.9481e-13, 4.3968e-17, 5.2846e-15, 2.8364e-14, 6.2226e-16,\n 1.7281e-14, 2.5554e-12, 2.5655e-16, 4.2762e-14, 4.2195e-15, 4.8171e-14,\n 1.7929e-12, 3.4149e-14, 1.4550e-12, 3.0393e-16, 3.9265e-14, 6.9802e-16,\n 1.2175e-14, 2.9404e-14, 8.7905e-15, 2.6223e-15, 1.2672e-13, 3.9362e-16,\n 2.9046e-14, 2.0571e-13, 5.3545e-14, 4.1260e-14, 4.9007e-15, 2.1817e-14,\n 2.5133e-14, 2.0090e-15, 1.5090e-14, 3.6834e-14, 1.6533e-13, 5.2647e-15,\n 2.1308e-14, 1.4876e-13, 1.4021e-14, 1.3493e-11, 2.7847e-17, 5.7994e-15,\n 5.9814e-16, 1.7656e-14, 2.4841e-15, 1.7878e-13, 3.3168e-15, 5.6370e-14,\n 1.2268e-14, 7.1858e-16, 1.6148e-15, 1.6011e-14, 1.6687e-15, 2.0695e-12,\n 5.1078e-15, 5.6810e-16, 1.1716e-15, 3.4308e-12, 1.2818e-14, 1.2126e-14,\n 6.1130e-15, 2.0144e-12, 5.5013e-12, 7.9478e-15, 1.1194e-15, 7.5302e-13,\n 1.5007e-16, 6.6709e-14, 6.6722e-15, 2.4435e-16, 1.5302e-13, 4.8083e-13,\n 2.5065e-17, 2.3278e-15, 1.0818e-13, 2.5810e-14, 2.3512e-13, 6.6417e-13,\n 2.4354e-15, 3.3148e-15, 2.6228e-14, 2.1342e-15, 1.7417e-13, 1.4163e-16,\n 7.0958e-15, 1.0169e-13, 7.7536e-13, 2.3746e-14, 9.0759e-14, 4.2750e-15,\n 6.7858e-13, 2.4557e-16, 2.3042e-13, 1.2936e-13, 1.3065e-15, 2.5884e-17,\n 2.7797e-15, 4.8951e-14, 1.0702e-15, 2.9867e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.0847e-14, 3.3436e-15, 1.8174e-14, 3.0951e-14, 4.9777e-15, 3.4575e-15,\n 2.1957e-14, 1.5730e-15, 9.0649e-15, 9.9095e-16, 5.3492e-14, 2.0471e-13,\n 3.1888e-14, 4.5705e-14, 5.3298e-14, 2.0833e-13, 2.7999e-14, 1.8185e-14,\n 5.9803e-14, 9.1248e-14, 1.0282e-18, 1.0718e-14, 2.2205e-14, 2.6256e-14,\n 2.5624e-14, 3.2386e-14, 3.4628e-14, 8.1598e-15, 1.4103e-15, 7.9083e-15,\n 6.0913e-14, 3.8288e-14, 7.3504e-15, 2.4852e-13, 1.0725e-13, 6.8785e-14,\n 7.7714e-16, 1.6814e-17, 2.8362e-14, 7.4912e-13, 1.0717e-13, 2.1774e-14,\n 7.3486e-15, 4.9894e-15, 1.2352e-13, 2.1928e-13, 4.4757e-14, 9.7544e-14,\n 7.2911e-15, 6.0279e-14, 5.7950e-16, 1.5634e-13, 3.0852e-14, 2.8983e-14,\n 3.4585e-15, 1.1417e-15, 6.9715e-14, 1.5466e-15, 5.0136e-14, 5.2514e-15,\n 1.6009e-14, 5.1402e-14, 1.1438e-13, 6.2630e-14, 6.5428e-16, 2.8200e-15,\n 4.7681e-14, 1.4885e-15, 2.9484e-14, 6.2790e-15, 5.8753e-16, 3.1072e-14,\n 9.1566e-14, 1.0910e-14, 1.7046e-14, 3.2715e-16, 2.0935e-15, 9.7719e-14,\n 3.0267e-15, 4.2455e-17, 5.8674e-14, 7.1384e-14, 1.5306e-16, 7.6612e-15,\n 4.5091e-14, 1.7467e-13, 1.4729e-14, 6.0740e-14, 9.4947e-15, 1.0151e-13,\n 7.5237e-14, 4.4700e-14, 8.9299e-14, 1.5069e-14, 3.6023e-14, 3.6815e-15,\n 1.4014e-14, 1.0803e-13, 3.3918e-14, 2.3850e-13, 2.2641e-13, 7.7021e-15,\n 2.2492e-16, 2.1692e-14, 3.1347e-16, 1.6966e-13, 9.5463e-15, 1.6368e-14,\n 3.2297e-14, 6.3450e-18, 1.7882e-14, 1.8033e-16, 1.3655e-13, 1.1431e-13,\n 8.0626e-15, 9.7751e-14, 1.1638e-14, 1.4456e-14, 3.0759e-14, 2.9360e-14,\n 7.7058e-15, 1.8306e-15, 2.1208e-15, 2.0353e-17, 1.2141e-15, 1.9675e-14,\n 2.2439e-13, 4.0356e-14, 9.7228e-14, 3.2544e-14, 7.2775e-19, 2.5658e-13,\n 4.8711e-14, 4.2744e-14, 7.9006e-14, 4.6404e-14, 6.5672e-14, 7.4527e-14,\n 6.0308e-14, 6.2631e-18, 2.9806e-15, 5.2637e-15, 1.8728e-14, 2.4254e-13,\n 5.5706e-14, 4.5204e-15, 5.7017e-14, 7.9422e-15, 4.2947e-14, 5.2062e-16,\n 3.6794e-15, 7.2950e-14, 7.1550e-14, 6.5430e-14, 1.2776e-14, 5.1294e-15,\n 3.8419e-14, 1.3569e-13, 2.0153e-14, 3.7886e-14, 3.6004e-14, 3.9525e-14,\n 1.7129e-14, 3.6125e-14, 3.5594e-14, 2.4265e-14, 1.9629e-13, 1.4246e-13,\n 2.9048e-14, 1.0120e-13, 5.6229e-15, 5.5447e-15, 1.2014e-13, 1.4799e-14,\n 8.0496e-14, 1.0994e-14, 1.9813e-15, 1.0926e-14, 5.9589e-15, 1.3013e-14,\n 9.0296e-15, 7.6178e-15, 2.5445e-15, 2.1111e-14, 4.0969e-13, 2.0355e-16,\n 1.7311e-14, 1.3622e-14, 8.8594e-15, 1.2930e-13, 3.3751e-14, 3.4999e-15,\n 2.1956e-14, 3.2275e-14, 7.4865e-16, 2.4637e-13, 8.0979e-15, 1.2001e-15,\n 6.6234e-14, 2.3585e-13, 1.9153e-16, 2.8274e-14, 1.0566e-13, 1.8747e-14,\n 2.7902e-14, 1.7326e-13, 3.6953e-14, 7.2275e-15, 7.3518e-17, 1.9414e-13,\n 2.3672e-15, 2.1498e-13, 3.5647e-16, 5.2072e-14, 1.4092e-16, 5.2708e-14,\n 6.9201e-14, 1.8003e-16, 9.3974e-15, 7.5157e-15, 4.2214e-15, 2.7041e-13,\n 1.7121e-14, 6.3262e-14, 1.6086e-14, 1.8027e-15, 3.0352e-18, 2.3401e-15,\n 6.0554e-15, 2.0424e-14, 2.6446e-15, 3.2020e-17, 1.0588e-13, 8.5362e-15,\n 6.1324e-15, 4.0665e-14, 2.1752e-14, 4.9786e-14, 2.2764e-14, 1.0385e-14,\n 4.8449e-15, 1.2928e-16, 4.1602e-14, 2.0418e-14, 8.7709e-15, 1.0257e-16,\n 4.2603e-17, 1.5535e-13, 4.9534e-15, 2.1347e-13, 5.5387e-14, 4.8690e-15,\n 8.1136e-14, 2.2445e-14, 5.5484e-14, 1.5380e-14], device='cuda:0')" }, "30": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.2515e-15, 1.4744e-12, 1.7234e-13, 2.6171e-13, 6.8332e-14, 8.7499e-15,\n 1.5258e-13, 7.5904e-13, 4.4648e-13, 1.8463e-14, 1.2592e-14, 2.0386e-14,\n 5.9811e-14, 1.1176e-13, 2.6125e-14, 9.6760e-15, 9.8563e-16, 1.1291e-13,\n 5.7326e-14, 5.8234e-13, 4.5937e-15, 2.5249e-13, 1.0723e-12, 3.4961e-13,\n 4.8463e-13, 4.0284e-15, 1.4723e-14, 1.3100e-12, 2.7162e-17, 3.1825e-15,\n 5.7611e-15, 3.3790e-13, 1.8781e-13, 2.9381e-13, 4.6819e-15, 1.3228e-13,\n 5.6911e-14, 1.2576e-14, 1.5498e-13, 1.0247e-14, 6.0178e-14, 1.2844e-13,\n 1.1008e-12, 1.3560e-13, 7.0476e-14, 1.2894e-13, 2.0679e-14, 1.4235e-14,\n 6.9117e-16, 1.5911e-13, 1.1883e-13, 2.4304e-12, 2.7380e-14, 2.1679e-14,\n 1.4800e-14, 4.8014e-13, 5.2971e-12, 1.1690e-12, 6.5505e-14, 1.3658e-13,\n 9.8619e-13, 4.0275e-14, 1.4616e-13, 1.9194e-12, 5.3514e-14, 8.0207e-13,\n 6.1632e-14, 1.1806e-13, 2.0025e-12, 4.3415e-13, 2.1863e-16, 1.3577e-14,\n 2.6777e-14, 9.8458e-14, 2.2376e-14, 2.0564e-15, 2.3968e-15, 9.9063e-17,\n 1.7231e-13, 2.3957e-14, 6.7697e-16, 3.7194e-13, 5.9599e-14, 5.0067e-13,\n 3.6906e-15, 1.3692e-13, 2.7866e-13, 2.2614e-15, 1.2965e-12, 4.0931e-15,\n 3.1890e-13, 1.0352e-13, 2.5784e-13, 4.9875e-14, 2.3351e-13, 2.2210e-12,\n 5.8833e-14, 1.0351e-16, 1.8980e-13, 9.6300e-14, 5.7243e-15, 2.8366e-14,\n 2.5034e-14, 1.2534e-13, 1.0906e-15, 1.5206e-13, 2.3373e-13, 1.3373e-15,\n 6.0036e-14, 9.4919e-16, 1.0361e-15, 4.3616e-14, 2.2644e-14, 1.2166e-14,\n 1.5847e-13, 3.7752e-13, 2.3986e-13, 1.4233e-12, 2.2535e-15, 9.2480e-15,\n 5.1699e-13, 4.0077e-16, 3.0512e-15, 3.2800e-16, 1.2800e-14, 3.6361e-15,\n 1.3318e-13, 1.0524e-14, 1.1536e-14, 8.2599e-14, 6.8880e-14, 1.7614e-15,\n 1.5481e-14, 8.8106e-16, 1.2353e-14, 6.1258e-16, 6.8428e-14, 4.1912e-14,\n 1.7040e-13, 2.5864e-12, 1.7986e-13, 4.4767e-16, 1.2621e-13, 3.9580e-12,\n 7.1334e-14, 7.0219e-15, 1.7382e-14, 1.7257e-14, 1.6008e-14, 3.7849e-14,\n 3.3284e-16, 7.5509e-15, 1.1053e-15, 3.7423e-13, 2.3500e-15, 3.3413e-16,\n 1.0920e-17, 6.2451e-13, 8.3538e-15, 8.9040e-14, 3.2078e-14, 5.0972e-17,\n 7.1477e-15, 8.6917e-13, 8.0614e-15, 6.1611e-13, 3.0372e-15, 1.4564e-13,\n 1.5388e-12, 2.4307e-14, 7.0753e-13, 9.5102e-15, 1.0383e-13, 2.1121e-16,\n 6.1590e-15, 8.6060e-15, 5.6995e-14, 7.4518e-16, 6.3985e-13, 1.7900e-14,\n 3.2685e-13, 1.7567e-13, 1.3540e-14, 1.9758e-14, 1.6262e-15, 6.5624e-14,\n 5.2954e-14, 1.2216e-15, 6.9104e-14, 1.0630e-13, 4.7806e-13, 7.7926e-18,\n 1.6630e-13, 2.7275e-13, 1.4428e-13, 6.2127e-12, 1.2268e-16, 1.1649e-13,\n 2.1962e-14, 1.0454e-13, 3.2786e-14, 2.2096e-13, 3.4455e-14, 6.8828e-14,\n 4.7212e-14, 5.8928e-14, 1.1710e-15, 1.2827e-13, 2.0567e-15, 1.3057e-12,\n 1.8671e-15, 3.2607e-16, 3.4141e-16, 1.3074e-12, 4.2234e-13, 7.0489e-14,\n 8.0704e-14, 1.2431e-12, 3.8674e-12, 5.4870e-15, 4.5811e-15, 3.7094e-13,\n 2.0480e-15, 1.9977e-13, 5.0186e-14, 1.6094e-15, 1.0731e-12, 8.5025e-13,\n 7.7994e-16, 6.6092e-15, 7.5873e-13, 5.7072e-14, 7.5559e-14, 1.5407e-13,\n 1.3715e-14, 4.5816e-15, 8.6443e-14, 2.3527e-16, 2.5273e-13, 7.1763e-15,\n 6.2811e-16, 1.5477e-13, 7.4696e-13, 7.1678e-14, 1.5019e-13, 4.3775e-14,\n 1.0692e-12, 1.2680e-14, 4.5927e-13, 8.4724e-14, 1.3897e-14, 3.7484e-17,\n 2.9367e-14, 9.7165e-13, 2.0230e-14, 2.1193e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.6028e-15, 1.8922e-14, 0.0000e+00, ..., 6.5091e-14, 3.7039e-14,\n 3.9596e-14],\n [8.9504e-15, 3.3431e-15, 0.0000e+00, ..., 2.7960e-15, 3.2386e-14,\n 1.4604e-15],\n [7.2898e-15, 1.2864e-14, 0.0000e+00, ..., 8.9481e-15, 6.0702e-14,\n 3.5889e-15],\n ...,\n [1.7624e-15, 4.7981e-15, 0.0000e+00, ..., 6.1026e-15, 9.0703e-15,\n 5.5995e-17],\n [4.0241e-14, 3.5772e-14, 0.0000e+00, ..., 5.2018e-14, 2.2189e-13,\n 4.5234e-14],\n [3.2486e-16, 4.3437e-16, 0.0000e+00, ..., 1.6754e-15, 3.9182e-15,\n 4.0773e-17]], device='cuda:0')" }, "31": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.4534e-13, 6.7218e-13, 3.3979e-16, ..., 4.7111e-14, 7.7059e-14,\n 2.2394e-14],\n [8.7013e-15, 1.1241e-14, 2.0206e-17, ..., 8.1730e-16, 1.0188e-15,\n 2.2014e-16],\n [5.7045e-13, 6.2788e-13, 2.4815e-16, ..., 5.0196e-14, 6.0521e-14,\n 2.1837e-14],\n ...,\n [4.0982e-12, 4.6013e-12, 9.1760e-17, ..., 3.7207e-13, 4.6027e-13,\n 2.0902e-13],\n [3.0288e-13, 3.4681e-13, 7.4077e-16, ..., 2.4310e-14, 4.1763e-14,\n 1.2300e-14],\n [3.4903e-15, 3.6789e-15, 6.3655e-19, ..., 2.4239e-16, 2.9372e-16,\n 6.6882e-17]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.4719e-11, 1.0792e-11, 1.3723e-11, 4.2706e-13, 3.8803e-12, 4.1044e-13,\n 4.2098e-12, 3.6002e-13, 2.4599e-12, 1.5766e-12, 3.2389e-11, 3.1925e-11,\n 4.3816e-12, 3.1632e-12, 6.1892e-12, 8.3730e-13, 1.5719e-11, 4.6759e-12,\n 1.5701e-11, 4.3716e-11, 7.1908e-14, 1.2532e-11, 2.7613e-12, 2.5654e-11,\n 1.8922e-12, 4.1552e-12, 2.7475e-11, 4.4416e-13, 1.5600e-12, 4.7632e-12,\n 8.2629e-12, 2.7199e-11, 3.5945e-14, 4.8952e-11, 2.8165e-11, 3.3436e-11,\n 1.2682e-12, 4.5202e-14, 1.8169e-11, 1.1997e-10, 5.0420e-11, 1.4199e-12,\n 1.2904e-12, 1.3007e-12, 9.1687e-12, 2.0929e-11, 1.2756e-11, 5.8952e-12,\n 1.2840e-12, 1.7735e-11, 4.7534e-13, 4.4411e-11, 2.2259e-11, 1.4830e-11,\n 6.5648e-13, 1.3634e-12, 9.9812e-12, 1.1677e-12, 7.3999e-12, 2.3397e-12,\n 2.8590e-12, 7.3995e-12, 1.2180e-11, 2.9960e-11, 3.1387e-13, 7.4675e-12,\n 7.1923e-12, 1.7388e-12, 1.4327e-11, 2.0801e-12, 1.4588e-14, 5.7965e-12,\n 2.3789e-11, 2.3185e-12, 1.4197e-12, 1.8377e-14, 3.7318e-12, 4.1752e-11,\n 1.6336e-12, 1.1724e-14, 1.7012e-11, 5.4330e-11, 2.4282e-14, 3.9961e-13,\n 6.7475e-12, 4.0221e-11, 1.8160e-12, 8.2494e-12, 6.0349e-12, 1.5448e-12,\n 8.5460e-12, 3.0814e-11, 9.3865e-12, 1.0890e-11, 3.4924e-11, 8.7048e-12,\n 1.5435e-11, 5.2368e-11, 8.1629e-12, 6.3347e-11, 5.9010e-12, 1.2085e-12,\n 2.3544e-15, 6.2484e-12, 6.8590e-14, 7.5510e-11, 2.1614e-12, 8.5421e-12,\n 6.4952e-12, 4.5150e-14, 5.7254e-11, 2.5326e-12, 1.7785e-11, 2.6784e-11,\n 4.6797e-12, 3.7291e-11, 8.0381e-13, 2.9611e-12, 4.6327e-12, 2.4863e-11,\n 2.5928e-12, 1.9853e-12, 1.1053e-12, 1.8561e-14, 1.8374e-13, 1.3359e-11,\n 2.2355e-11, 3.2935e-12, 1.1078e-11, 2.2704e-11, 2.4789e-13, 5.0905e-11,\n 1.2432e-11, 1.8611e-12, 2.1549e-11, 8.1525e-12, 4.0793e-11, 7.9048e-12,\n 6.2293e-12, 4.4817e-15, 1.5885e-14, 1.5355e-13, 2.7937e-11, 1.0358e-10,\n 1.8137e-11, 3.5707e-12, 4.3817e-12, 6.9322e-12, 6.1217e-12, 5.2312e-13,\n 1.6016e-12, 1.4188e-11, 1.0289e-11, 2.5182e-12, 3.0819e-12, 1.8229e-12,\n 4.9023e-12, 5.5175e-11, 2.4414e-12, 2.6448e-12, 8.7403e-12, 2.7362e-12,\n 2.6054e-12, 7.8057e-12, 2.4774e-12, 1.1625e-11, 5.3307e-11, 3.8566e-12,\n 1.8206e-11, 3.3010e-12, 1.6733e-13, 6.0455e-12, 1.1173e-11, 1.7478e-12,\n 2.2615e-11, 3.8445e-12, 2.6695e-12, 2.5498e-12, 2.8422e-12, 7.6846e-12,\n 9.2791e-12, 2.7685e-12, 4.1421e-12, 6.8790e-12, 6.0781e-11, 3.5520e-14,\n 4.7656e-12, 1.7914e-11, 3.1811e-12, 5.8374e-11, 1.0141e-11, 5.9710e-12,\n 3.4330e-12, 1.1971e-12, 5.2521e-15, 1.6188e-10, 2.2618e-12, 4.8284e-13,\n 4.7448e-12, 2.2198e-11, 2.5477e-14, 6.5852e-11, 4.5392e-12, 5.6645e-12,\n 5.5651e-12, 1.5292e-11, 2.7762e-11, 5.6534e-13, 1.2930e-14, 1.0598e-11,\n 3.0614e-12, 7.7826e-12, 2.9004e-13, 1.9369e-11, 3.8356e-13, 4.2177e-11,\n 4.8938e-11, 1.9263e-13, 4.8194e-13, 4.9028e-12, 1.9970e-12, 2.7325e-11,\n 2.5411e-12, 3.3084e-11, 4.4623e-12, 6.1255e-16, 5.6744e-15, 4.3750e-13,\n 6.7903e-13, 7.1505e-12, 1.8821e-13, 6.3489e-13, 7.3872e-12, 4.2855e-12,\n 2.9245e-11, 2.2510e-13, 2.0553e-12, 1.3692e-11, 3.8252e-12, 4.4809e-12,\n 8.1256e-12, 3.3136e-14, 3.2174e-12, 1.1479e-11, 4.7170e-12, 1.1381e-13,\n 2.6524e-13, 2.1925e-11, 1.5196e-12, 4.9066e-12, 3.9460e-11, 6.0445e-13,\n 2.4610e-11, 2.1760e-12, 5.8590e-11, 1.4014e-12], device='cuda:0')" }, "32": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.3704e-10, 4.0588e-12, 3.2403e-10, 1.3054e-11, 2.7765e-13, 4.4397e-12,\n 6.5231e-11, 6.3225e-10, 2.3554e-10, 1.3337e-11, 2.3389e-09, 1.8284e-11,\n 1.1943e-13, 1.1348e-13, 1.8093e-12, 4.3637e-11, 2.7197e-12, 2.1168e-10,\n 3.5121e-10, 1.3230e-10, 2.3802e-10, 6.4327e-12, 2.1297e-11, 9.3231e-10,\n 6.1740e-12, 8.2342e-11, 2.9282e-10, 5.2814e-12, 3.9858e-13, 1.1249e-10,\n 4.5240e-12, 3.9469e-10, 1.0224e-10, 8.4786e-11, 2.5070e-11, 2.6507e-10,\n 6.2862e-12, 3.3589e-11, 2.4102e-10, 1.7951e-11, 8.3601e-12, 6.8894e-11,\n 2.3175e-10, 5.8352e-12, 4.2184e-13, 2.1732e-12, 1.0931e-10, 1.7637e-12,\n 2.2461e-10, 2.0441e-10, 3.4597e-11, 2.0692e-10, 2.7347e-12, 1.0259e-10,\n 3.8502e-11, 2.9612e-10, 2.3360e-10, 8.7333e-10, 4.6564e-11, 1.6413e-10,\n 3.1248e-09, 2.2425e-12, 3.1136e-12, 2.9620e-10, 3.7241e-12, 6.1854e-10,\n 7.4389e-12, 6.1583e-12, 5.0796e-13, 1.1869e-10, 1.7197e-10, 2.4968e-10,\n 2.0618e-10, 5.5984e-11, 1.4047e-12, 1.4501e-12, 2.6868e-11, 1.9371e-11,\n 2.3736e-10, 7.9065e-12, 1.3342e-13, 1.0217e-09, 8.6336e-10, 2.5397e-13,\n 7.6306e-11, 2.2308e-11, 3.5511e-10, 1.0780e-10, 1.0322e-09, 4.3590e-12,\n 2.7074e-10, 1.2455e-10, 1.5616e-11, 5.0759e-12, 4.4331e-11, 7.6381e-10,\n 1.2425e-10, 1.5977e-12, 5.7399e-10, 4.3265e-11, 3.0396e-10, 8.1099e-13,\n 1.4367e-11, 7.0667e-11, 1.2290e-11, 6.1992e-11, 3.1232e-11, 3.8719e-13,\n 4.1928e-11, 2.5725e-12, 1.6617e-10, 1.6033e-15, 1.6106e-12, 7.6604e-13,\n 5.0717e-10, 8.0895e-11, 1.9108e-10, 4.2991e-10, 8.0863e-12, 1.0021e-09,\n 4.4733e-11, 4.0653e-11, 1.1061e-10, 5.1078e-13, 9.3635e-12, 3.7687e-12,\n 3.7795e-12, 5.8135e-11, 1.1435e-11, 4.5176e-12, 2.7771e-12, 7.8899e-11,\n 3.1612e-12, 5.2906e-14, 1.3916e-12, 8.2332e-12, 2.7405e-12, 1.1289e-11,\n 5.8531e-11, 9.7701e-10, 1.8900e-10, 1.1332e-11, 1.2358e-09, 6.7512e-09,\n 5.0407e-10, 1.7280e-11, 6.1736e-12, 2.4818e-10, 1.4077e-13, 2.3532e-11,\n 2.7238e-11, 1.1127e-12, 8.5317e-12, 2.3238e-10, 2.1381e-10, 8.2778e-11,\n 2.7482e-11, 5.5386e-10, 6.7346e-12, 3.8248e-11, 3.4120e-12, 5.5316e-13,\n 1.0488e-10, 2.3444e-10, 2.5194e-12, 1.5589e-10, 1.7133e-10, 5.6059e-12,\n 6.3551e-10, 1.9503e-12, 9.0484e-10, 2.3887e-10, 2.0918e-10, 4.6449e-13,\n 8.6060e-12, 2.3952e-11, 9.5102e-11, 3.7851e-10, 5.7122e-10, 1.1701e-12,\n 1.5281e-09, 1.2222e-12, 1.1493e-09, 1.2189e-11, 7.3756e-12, 7.4539e-11,\n 3.6334e-12, 6.5783e-12, 8.9600e-11, 1.9766e-11, 3.4503e-11, 1.1356e-11,\n 5.0405e-10, 2.3913e-10, 3.1207e-11, 4.5238e-09, 1.5369e-11, 1.3213e-12,\n 8.6961e-12, 1.1582e-11, 7.1860e-10, 3.6666e-12, 1.4375e-10, 1.8233e-12,\n 7.0633e-12, 1.3769e-10, 4.2337e-12, 4.8795e-10, 4.2915e-12, 1.7576e-09,\n 1.2049e-12, 1.6769e-10, 3.6761e-12, 4.5026e-10, 2.5934e-11, 7.9970e-12,\n 3.4833e-13, 1.8676e-11, 3.0052e-09, 2.4819e-10, 3.9893e-11, 2.6574e-12,\n 9.3059e-13, 4.5826e-11, 2.2160e-11, 6.0783e-11, 7.8254e-10, 3.3173e-11,\n 1.0376e-11, 8.3865e-12, 5.8348e-12, 2.7071e-11, 2.2924e-12, 1.3463e-10,\n 3.6220e-11, 1.8520e-10, 1.8065e-11, 1.0274e-12, 1.0190e-10, 2.8061e-10,\n 4.1888e-10, 2.9830e-12, 2.4926e-10, 1.7474e-10, 1.6336e-12, 2.4927e-12,\n 2.7904e-10, 7.9819e-11, 3.9149e-11, 4.2685e-12, 2.1579e-10, 7.8449e-13,\n 1.8526e-10, 2.6434e-09, 1.9510e-10, 1.7543e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.4833e-14, 3.4591e-14, 3.8331e-14, 2.2686e-15, 8.5952e-15, 1.2184e-15,\n 6.1402e-15, 2.9299e-15, 3.3565e-15, 3.7844e-15, 5.9629e-14, 5.2491e-14,\n 9.0689e-15, 8.0156e-15, 2.0436e-14, 6.0798e-15, 3.7511e-14, 8.1373e-15,\n 3.8418e-14, 1.0395e-13, 3.4598e-17, 4.3993e-14, 1.3161e-14, 5.4036e-14,\n 6.3513e-15, 4.8086e-15, 1.3120e-13, 1.3675e-15, 5.4254e-15, 9.1422e-15,\n 1.2502e-14, 1.9811e-13, 3.3802e-16, 8.3697e-14, 8.3762e-14, 1.2734e-13,\n 1.5882e-15, 5.1210e-17, 5.3006e-14, 3.0366e-13, 1.4912e-13, 2.3614e-15,\n 3.4383e-15, 5.9213e-15, 1.1886e-14, 3.7538e-14, 5.2619e-14, 1.4196e-14,\n 1.6389e-15, 4.1185e-14, 5.1914e-16, 1.3606e-13, 4.6260e-14, 3.2304e-14,\n 7.6817e-16, 9.7224e-15, 2.1504e-14, 1.2922e-15, 3.7463e-14, 8.0673e-15,\n 6.5750e-15, 3.0348e-14, 3.8669e-14, 1.3454e-13, 3.7840e-16, 2.1731e-14,\n 1.9597e-14, 2.7882e-15, 3.3278e-14, 4.5096e-15, 4.2255e-18, 2.1140e-14,\n 6.0811e-14, 7.6662e-15, 2.1463e-15, 6.9492e-17, 2.6528e-14, 1.2137e-13,\n 3.4017e-15, 6.2794e-19, 9.9310e-14, 2.9902e-13, 1.4746e-16, 2.2445e-15,\n 3.0362e-14, 1.1745e-13, 6.9536e-15, 3.2190e-14, 1.2854e-14, 8.1125e-15,\n 2.3257e-14, 7.3838e-14, 1.3592e-14, 2.8760e-14, 1.1938e-13, 1.7368e-14,\n 4.1283e-14, 1.1986e-13, 2.0807e-14, 1.8348e-13, 1.3047e-14, 7.3233e-15,\n 3.3594e-16, 1.1969e-14, 3.4816e-17, 2.3248e-13, 4.2945e-15, 4.3286e-14,\n 1.0439e-14, 1.6608e-20, 1.7535e-13, 7.4572e-15, 4.4817e-14, 4.6614e-14,\n 8.3324e-15, 1.1715e-13, 2.5156e-15, 4.8449e-15, 9.2287e-15, 1.0746e-13,\n 3.1597e-15, 2.8445e-15, 1.1728e-15, 8.6840e-16, 1.7054e-15, 4.4225e-14,\n 4.0029e-14, 8.2864e-15, 3.4095e-14, 8.5597e-14, 1.2918e-17, 9.5537e-14,\n 4.7969e-14, 5.6970e-15, 3.5717e-14, 1.1732e-14, 1.3922e-13, 1.7833e-14,\n 1.5778e-14, 6.9011e-19, 6.5274e-18, 7.5573e-16, 6.7137e-14, 4.2867e-13,\n 6.2858e-14, 1.3575e-14, 1.0753e-14, 4.1396e-14, 7.2456e-15, 5.2136e-16,\n 1.0500e-14, 2.8969e-14, 4.0715e-14, 4.1078e-15, 4.0637e-15, 1.4568e-14,\n 8.6754e-15, 1.4018e-13, 3.6885e-15, 1.1513e-14, 1.9370e-14, 4.0681e-15,\n 3.1531e-15, 2.6602e-14, 5.9418e-15, 2.5242e-14, 9.9902e-14, 7.3898e-15,\n 8.7661e-14, 1.0873e-14, 5.2717e-17, 1.1346e-14, 1.8154e-14, 5.3349e-15,\n 6.4190e-14, 2.6652e-14, 3.9324e-15, 8.2297e-15, 5.9376e-15, 1.6430e-14,\n 2.1019e-14, 4.2527e-15, 8.8365e-15, 1.1272e-14, 1.4112e-13, 2.2919e-18,\n 8.0325e-15, 6.5344e-14, 3.6390e-15, 1.2736e-13, 1.7069e-14, 1.4700e-14,\n 1.3283e-14, 3.7626e-15, 1.3549e-17, 6.6320e-13, 2.4298e-15, 3.5779e-16,\n 9.9010e-15, 4.2544e-14, 8.7788e-16, 2.8918e-13, 7.9642e-15, 1.5635e-14,\n 1.1098e-14, 3.4133e-14, 7.1615e-14, 9.9347e-16, 9.3283e-19, 1.8443e-14,\n 6.3142e-15, 1.7632e-14, 9.1535e-16, 6.7831e-14, 2.7697e-16, 1.2225e-13,\n 1.1531e-13, 2.3340e-17, 1.3415e-15, 1.0509e-14, 3.8912e-15, 6.6426e-14,\n 8.5898e-15, 7.0356e-14, 1.2228e-14, 5.5549e-17, 4.1190e-17, 1.6006e-15,\n 9.6471e-16, 2.1588e-14, 5.6355e-16, 3.6485e-16, 2.5565e-14, 1.4992e-14,\n 9.2185e-14, 1.5268e-15, 3.0369e-15, 2.9244e-14, 1.6344e-14, 8.2069e-15,\n 1.5961e-14, 1.8039e-17, 9.1347e-15, 3.2724e-14, 7.7134e-15, 8.9883e-18,\n 1.1399e-16, 3.5872e-14, 2.4012e-15, 1.6764e-14, 1.1604e-13, 5.6502e-16,\n 8.8243e-14, 4.1007e-15, 1.5625e-13, 4.4170e-15], device='cuda:0')" }, "33": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7465e-14, 1.4399e-14, 1.1569e-13, 8.5127e-15, 3.0408e-14, 1.9474e-14,\n 3.0939e-15, 5.8285e-13, 2.7183e-14, 1.8326e-14, 3.5208e-12, 7.9546e-15,\n 6.0908e-15, 3.4792e-16, 3.7409e-14, 1.2381e-15, 2.3465e-15, 5.9981e-13,\n 1.9314e-13, 8.2329e-14, 1.5116e-13, 2.3235e-14, 1.0849e-15, 1.7995e-12,\n 1.6809e-15, 2.3294e-14, 1.6941e-13, 3.1222e-15, 3.5197e-15, 1.5648e-13,\n 7.8220e-16, 7.5907e-13, 1.3175e-14, 9.7796e-14, 3.6290e-15, 1.5106e-12,\n 3.1228e-15, 5.9205e-15, 1.3671e-13, 5.3397e-16, 3.6647e-15, 1.2505e-14,\n 8.5548e-14, 1.1389e-13, 4.9029e-16, 1.7119e-16, 3.7054e-14, 2.9464e-17,\n 7.4913e-13, 6.5511e-14, 2.1234e-14, 9.4424e-15, 1.1806e-14, 2.6311e-14,\n 3.9381e-14, 6.1628e-14, 2.8930e-14, 3.7812e-13, 1.0718e-14, 8.2085e-14,\n 3.9255e-12, 6.7872e-16, 1.0703e-14, 2.2583e-14, 5.4147e-16, 3.5809e-13,\n 3.9197e-16, 6.4562e-16, 1.6134e-14, 2.9075e-14, 8.6974e-14, 1.1361e-14,\n 1.8690e-14, 1.4887e-13, 9.3487e-15, 3.7274e-16, 3.6284e-16, 2.9686e-15,\n 6.6097e-14, 1.4753e-13, 2.2688e-17, 2.9965e-12, 6.8059e-13, 5.6044e-15,\n 3.5542e-14, 2.3513e-14, 1.6462e-13, 4.1971e-14, 3.5983e-12, 3.0383e-15,\n 1.2731e-12, 8.4414e-13, 3.4493e-16, 2.7712e-14, 3.8158e-15, 1.7722e-13,\n 4.3932e-13, 3.2452e-15, 2.4325e-13, 1.9962e-14, 3.4355e-13, 2.4477e-15,\n 9.1005e-15, 8.9586e-15, 2.9330e-15, 5.4670e-13, 1.5938e-15, 3.5843e-15,\n 7.1212e-15, 1.0814e-15, 7.0709e-14, 3.2431e-16, 7.2682e-15, 4.2584e-15,\n 8.1106e-13, 4.0519e-13, 1.6761e-13, 1.4931e-13, 1.6076e-15, 1.3680e-12,\n 1.6935e-15, 1.3768e-15, 2.0451e-13, 1.3415e-16, 6.2212e-15, 7.8302e-16,\n 3.3821e-15, 1.1579e-14, 1.2365e-14, 7.1076e-15, 1.5093e-14, 4.1993e-14,\n 1.8535e-15, 4.6177e-17, 8.6316e-16, 1.5976e-15, 6.9711e-15, 3.7280e-15,\n 2.9662e-14, 8.1718e-13, 4.3040e-13, 3.8605e-15, 1.3851e-12, 2.6961e-11,\n 5.1574e-13, 5.3624e-15, 7.6554e-16, 1.6167e-13, 3.1858e-16, 3.9414e-14,\n 1.1078e-13, 1.5203e-15, 1.2861e-14, 1.8148e-13, 3.4834e-14, 5.4028e-13,\n 2.0412e-15, 2.3090e-13, 2.0064e-15, 1.0851e-13, 8.6967e-16, 1.1134e-15,\n 2.8733e-15, 2.3762e-13, 1.4287e-15, 1.4345e-15, 4.2079e-13, 4.6628e-16,\n 8.6485e-13, 1.5160e-15, 1.9958e-12, 7.3858e-14, 2.8097e-13, 2.5337e-16,\n 2.0364e-15, 9.8592e-15, 2.4764e-14, 1.3596e-13, 8.3776e-14, 4.3857e-15,\n 1.0676e-12, 2.5366e-15, 3.2729e-13, 3.0742e-14, 6.8337e-16, 2.3861e-14,\n 1.6083e-15, 1.0029e-14, 2.0674e-13, 8.7461e-16, 1.8524e-15, 2.7830e-15,\n 2.1737e-12, 3.3534e-13, 1.3120e-15, 1.1165e-11, 3.9668e-16, 4.5520e-15,\n 9.4409e-16, 1.4309e-15, 5.6612e-13, 1.7387e-14, 2.8889e-14, 6.9675e-15,\n 6.4624e-15, 2.9013e-14, 4.1027e-14, 2.5227e-13, 2.2687e-15, 5.5189e-12,\n 3.0213e-15, 4.6846e-14, 3.3777e-16, 6.5314e-13, 1.3613e-14, 1.4314e-14,\n 1.8054e-15, 1.1856e-15, 4.6437e-12, 6.8329e-13, 7.4388e-14, 6.6217e-16,\n 6.1370e-17, 8.0276e-14, 9.9646e-15, 1.2015e-14, 1.5705e-13, 3.8924e-15,\n 5.2621e-16, 1.9912e-15, 2.5860e-14, 8.2038e-16, 4.4444e-16, 2.1955e-13,\n 6.3162e-15, 5.5867e-14, 5.4582e-15, 2.0003e-16, 6.6807e-13, 2.8624e-13,\n 8.1146e-13, 5.3368e-16, 2.2847e-14, 8.4443e-14, 5.7392e-16, 9.9213e-17,\n 1.1154e-13, 3.8079e-14, 2.9390e-15, 5.6256e-15, 5.4989e-14, 2.5293e-16,\n 2.3312e-13, 2.0199e-12, 9.3309e-14, 1.8445e-14], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4353e-13, 5.0604e-14, 4.0212e-14, 2.6127e-15, 8.3924e-15, 3.2630e-15,\n 1.1555e-14, 3.7839e-15, 5.6599e-15, 2.6326e-15, 1.0575e-13, 1.1361e-13,\n 1.3641e-14, 1.5119e-14, 3.2148e-14, 3.4800e-15, 7.0444e-14, 1.1984e-14,\n 5.0493e-14, 1.8227e-13, 2.8518e-17, 5.8116e-14, 1.6246e-14, 1.0780e-13,\n 1.0526e-14, 1.3136e-14, 1.2332e-13, 2.6041e-15, 9.4469e-15, 1.2488e-14,\n 3.9232e-14, 1.2734e-13, 3.8015e-16, 1.6764e-13, 1.1943e-13, 1.4785e-13,\n 2.6692e-15, 9.1919e-17, 8.0975e-14, 4.2012e-13, 1.6880e-13, 8.3617e-15,\n 7.6502e-15, 8.1109e-15, 3.0919e-14, 6.9858e-14, 6.1588e-14, 2.8771e-14,\n 3.4059e-15, 6.0040e-14, 4.4701e-16, 1.8884e-13, 7.5654e-14, 4.4989e-14,\n 1.3060e-15, 1.0423e-14, 4.4319e-14, 2.8697e-15, 3.8222e-14, 1.2821e-14,\n 6.3638e-15, 3.7796e-14, 5.8054e-14, 1.3335e-13, 5.2203e-16, 1.9537e-14,\n 3.5766e-14, 3.7266e-15, 6.1958e-14, 1.0597e-14, 1.0571e-19, 2.9044e-14,\n 7.6751e-14, 1.3170e-14, 3.9400e-15, 3.1940e-16, 2.1622e-14, 1.7257e-13,\n 2.4916e-15, 1.7113e-17, 8.0572e-14, 2.3608e-13, 2.0135e-16, 3.6016e-15,\n 3.4258e-14, 1.6808e-13, 1.0385e-14, 4.0846e-14, 1.9664e-14, 6.8092e-15,\n 3.9316e-14, 1.3614e-13, 3.0555e-14, 3.4724e-14, 1.5235e-13, 2.5063e-14,\n 4.7186e-14, 1.7891e-13, 4.0776e-14, 2.6614e-13, 2.8453e-14, 7.9216e-15,\n 7.1194e-16, 1.7463e-14, 2.1098e-17, 2.5480e-13, 4.4303e-15, 4.2527e-14,\n 1.8998e-14, 2.2000e-17, 1.9274e-13, 1.3769e-14, 8.0118e-14, 9.2088e-14,\n 1.1635e-14, 1.2375e-13, 4.2968e-15, 8.3673e-15, 2.0920e-14, 1.1432e-13,\n 7.8812e-15, 5.6791e-15, 2.0822e-15, 4.5962e-16, 2.1193e-15, 3.9274e-14,\n 9.4061e-14, 1.6596e-14, 5.1296e-14, 6.8422e-14, 6.9723e-17, 2.0857e-13,\n 5.8397e-14, 9.7562e-15, 7.0740e-14, 2.6349e-14, 1.7257e-13, 2.7270e-14,\n 2.9279e-14, 2.8104e-17, 6.0851e-17, 1.3346e-15, 8.5739e-14, 3.4909e-13,\n 7.9683e-14, 1.8883e-14, 2.0884e-14, 3.7110e-14, 2.0331e-14, 5.5469e-16,\n 1.0804e-14, 4.5113e-14, 5.0053e-14, 7.6131e-15, 9.4500e-15, 1.3186e-14,\n 1.3776e-14, 1.8845e-13, 6.9650e-15, 1.1866e-14, 2.9203e-14, 8.0261e-15,\n 6.4061e-15, 3.5872e-14, 1.2737e-14, 3.6418e-14, 1.8771e-13, 1.2753e-14,\n 8.6499e-14, 1.5941e-14, 2.9096e-16, 1.7240e-14, 3.9413e-14, 9.5440e-15,\n 7.3192e-14, 2.3495e-14, 6.7182e-15, 1.4533e-14, 5.9758e-15, 3.4915e-14,\n 2.5422e-14, 7.2883e-15, 9.9275e-15, 1.9765e-14, 2.5671e-13, 1.8965e-17,\n 1.4995e-14, 5.1633e-14, 8.7483e-15, 2.3672e-13, 3.4187e-14, 1.7333e-14,\n 1.7627e-14, 7.6629e-15, 1.2857e-17, 6.5393e-13, 5.3199e-15, 8.8767e-16,\n 2.3515e-14, 7.9614e-14, 1.5418e-15, 2.8350e-13, 1.6090e-14, 1.4290e-14,\n 2.6447e-14, 5.7677e-14, 8.5336e-14, 1.6158e-15, 4.4219e-17, 3.7993e-14,\n 6.6344e-15, 3.2600e-14, 2.1983e-15, 8.5271e-14, 3.1094e-16, 1.3918e-13,\n 1.6565e-13, 2.8558e-17, 2.7345e-15, 1.2254e-14, 3.0577e-15, 1.1917e-13,\n 1.4302e-14, 1.4171e-13, 2.1391e-14, 1.4722e-16, 3.3725e-17, 3.3793e-15,\n 1.6136e-15, 3.4601e-14, 1.2516e-15, 7.4273e-16, 3.3547e-14, 2.2136e-14,\n 1.2701e-13, 1.1537e-15, 4.9456e-15, 6.1441e-14, 2.0919e-14, 1.1863e-14,\n 3.6515e-14, 1.8836e-17, 1.7182e-14, 4.0525e-14, 1.4038e-14, 4.0817e-19,\n 1.5847e-16, 7.1199e-14, 3.3886e-15, 2.2562e-14, 1.3264e-13, 1.2295e-15,\n 7.3504e-14, 6.7546e-15, 2.0368e-13, 8.6268e-15], device='cuda:0')" }, "34": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.1093e-13, 5.3756e-15, 4.6288e-13, 1.1291e-14, 3.0781e-14, 7.0792e-15,\n 9.4547e-14, 9.0312e-13, 3.2668e-13, 3.0089e-14, 2.7952e-12, 7.4747e-14,\n 2.1146e-15, 3.9472e-16, 4.3101e-14, 4.2523e-14, 7.1624e-15, 3.4571e-13,\n 3.9103e-13, 2.6445e-13, 3.7342e-13, 9.5383e-15, 1.7621e-14, 1.2986e-12,\n 3.0337e-14, 1.2209e-13, 3.9008e-13, 1.6024e-15, 1.1860e-15, 1.7314e-13,\n 3.6910e-15, 5.3275e-13, 1.5083e-13, 2.4440e-13, 2.1868e-15, 5.0661e-13,\n 2.2015e-14, 1.4839e-14, 3.4890e-13, 2.2926e-15, 2.4290e-14, 1.0649e-13,\n 3.8666e-13, 5.4594e-14, 4.7329e-15, 5.2953e-15, 9.7702e-14, 3.1827e-16,\n 3.3413e-13, 2.6554e-13, 5.3096e-14, 2.8510e-13, 9.9932e-15, 7.8240e-14,\n 6.5082e-14, 3.3459e-13, 3.1379e-13, 1.1816e-12, 5.2411e-14, 2.1268e-13,\n 4.1171e-12, 3.8485e-15, 2.7446e-15, 3.6172e-13, 6.2634e-15, 8.5445e-13,\n 8.8135e-17, 1.1259e-14, 1.7067e-15, 1.7391e-13, 2.4217e-13, 2.6577e-13,\n 2.7264e-13, 1.6631e-13, 1.0875e-14, 2.0833e-16, 4.0128e-14, 1.3531e-14,\n 3.2204e-13, 7.5559e-14, 2.4305e-16, 1.1814e-12, 1.1677e-12, 2.2031e-15,\n 7.6085e-14, 5.2907e-14, 4.1411e-13, 1.5891e-13, 1.5244e-12, 3.1121e-15,\n 4.9297e-13, 2.9685e-13, 7.7004e-15, 8.0223e-15, 6.7538e-14, 9.7092e-13,\n 2.3396e-13, 8.1992e-15, 7.9591e-13, 1.0941e-13, 4.5652e-13, 9.2705e-16,\n 6.7238e-14, 8.5846e-14, 6.6704e-15, 2.7455e-13, 4.6473e-14, 1.0845e-15,\n 8.2091e-14, 5.6320e-16, 2.4248e-13, 5.5127e-17, 1.4291e-14, 1.9309e-15,\n 6.1423e-13, 1.7652e-13, 2.9272e-13, 5.4835e-13, 1.5163e-14, 1.3601e-12,\n 3.4283e-14, 4.3308e-14, 1.8644e-13, 2.5278e-15, 4.2415e-14, 2.7801e-16,\n 1.3489e-15, 3.0278e-14, 4.9917e-15, 2.3901e-15, 5.3687e-15, 3.7276e-14,\n 3.1377e-15, 2.7148e-15, 4.0531e-15, 1.4184e-14, 2.6480e-15, 2.0465e-14,\n 9.3986e-14, 1.3288e-12, 3.0350e-13, 8.1138e-15, 1.5888e-12, 8.3849e-12,\n 5.0110e-13, 2.1964e-14, 2.6249e-14, 3.6118e-13, 3.9773e-16, 3.4094e-14,\n 1.3736e-13, 6.4263e-15, 4.2792e-15, 3.4078e-13, 3.0157e-13, 1.3215e-13,\n 1.1694e-14, 7.5609e-13, 1.2546e-14, 4.2853e-14, 2.7252e-16, 6.5574e-16,\n 1.4459e-13, 3.2505e-13, 3.6925e-16, 2.1270e-13, 2.8935e-13, 1.1519e-14,\n 6.9128e-13, 4.8212e-16, 9.8309e-13, 2.6651e-13, 3.5793e-13, 6.0503e-16,\n 1.8077e-14, 3.4167e-14, 8.2916e-14, 4.5738e-13, 6.4171e-13, 1.6561e-15,\n 1.8137e-12, 1.9914e-16, 1.5372e-12, 3.4304e-14, 1.4060e-14, 1.1824e-13,\n 1.9777e-14, 4.2684e-15, 2.2883e-13, 3.1867e-14, 5.1510e-14, 1.6357e-14,\n 8.3866e-13, 3.3952e-13, 2.3704e-14, 5.9794e-12, 5.5191e-16, 2.4917e-15,\n 1.4896e-14, 7.2326e-15, 7.7376e-13, 2.7459e-14, 1.3362e-13, 2.4037e-14,\n 5.8376e-15, 2.0023e-13, 1.8704e-14, 6.6841e-13, 1.2921e-14, 2.2130e-12,\n 1.2144e-16, 1.7864e-13, 1.6136e-15, 6.2171e-13, 3.4993e-14, 4.7749e-15,\n 6.0358e-16, 2.8962e-14, 3.9919e-12, 4.1746e-13, 1.0926e-13, 1.6016e-14,\n 2.7823e-15, 6.9826e-14, 7.2873e-14, 5.0647e-14, 1.0516e-12, 6.6216e-14,\n 1.8227e-14, 1.8425e-15, 7.4548e-15, 4.1824e-14, 2.2103e-16, 2.5440e-13,\n 1.6788e-14, 2.7453e-13, 6.9405e-14, 2.4673e-15, 3.4061e-13, 2.6199e-13,\n 6.1289e-13, 6.2326e-15, 2.7959e-13, 2.7031e-13, 1.2978e-14, 5.1604e-15,\n 3.9285e-13, 1.2360e-13, 5.7393e-14, 2.6176e-14, 3.0857e-13, 7.3449e-17,\n 2.0853e-13, 3.4854e-12, 2.7577e-13, 5.4441e-15], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.1158e-16, 1.8473e-15, 1.1269e-15, ..., 2.5506e-16, 1.3965e-15,\n 8.3917e-16],\n [3.6412e-17, 5.4473e-18, 5.9371e-17, ..., 1.5844e-16, 1.8419e-16,\n 6.5646e-18],\n [1.7748e-16, 7.8058e-16, 2.4613e-16, ..., 1.2594e-16, 7.5843e-16,\n 1.4512e-16],\n ...,\n [4.4412e-15, 1.0535e-14, 1.0917e-14, ..., 1.9399e-15, 1.6380e-14,\n 2.3706e-14],\n [2.9656e-14, 4.6907e-14, 7.2397e-14, ..., 1.6077e-14, 7.0837e-14,\n 1.3006e-13],\n [4.0689e-13, 9.0251e-13, 1.0785e-12, ..., 2.2959e-13, 1.3584e-12,\n 2.1312e-12]], device='cuda:0')" }, "35": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.3943e-14, 4.0070e-14, 2.0902e-17, ..., 3.0345e-15, 3.4960e-15,\n 1.4958e-15],\n [1.7394e-15, 9.9156e-16, 1.0149e-16, ..., 4.2828e-17, 4.9975e-16,\n 2.3068e-16],\n [1.2043e-12, 1.3832e-12, 1.7058e-15, ..., 9.7715e-14, 1.6664e-13,\n 4.3322e-14],\n ...,\n [4.0308e-12, 4.1485e-12, 4.5403e-15, ..., 2.7310e-13, 4.8852e-13,\n 1.3434e-13],\n [1.1167e-16, 3.8533e-18, 4.2223e-17, ..., 2.1187e-17, 2.4605e-17,\n 7.1822e-18],\n [2.2587e-12, 2.5723e-12, 1.6423e-15, ..., 1.9248e-13, 3.0881e-13,\n 8.0374e-14]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0314e-14, 3.3148e-16, 2.9592e-15, 1.9474e-16, 1.3039e-15, 1.9277e-15,\n 2.8465e-16, 2.4624e-16, 1.1996e-17, 2.0097e-15, 1.6301e-15, 2.8193e-15,\n 7.0024e-17, 5.9280e-17, 7.2126e-16, 4.7305e-16, 1.5605e-16, 6.1762e-15,\n 2.1077e-15, 2.6996e-17, 1.0628e-16, 8.6245e-16, 1.9914e-15, 7.6815e-17,\n 3.7204e-15, 4.3633e-16, 6.1780e-16, 6.2993e-17, 1.4358e-16, 2.9370e-16,\n 2.9313e-16, 4.8755e-15, 7.0656e-16, 3.7821e-17, 1.4267e-18, 1.4576e-15,\n 5.1870e-16, 2.0897e-16, 6.2485e-18, 2.7861e-19, 8.4653e-16, 1.0593e-16,\n 5.1726e-16, 2.4424e-16, 4.1781e-17, 4.7565e-16, 6.0228e-16, 6.1107e-16,\n 3.2265e-16, 1.3710e-15, 5.3205e-16, 1.5133e-16, 1.9746e-16, 1.1628e-16,\n 4.3620e-18, 5.9307e-18, 1.3574e-16, 1.0895e-17, 4.3675e-16, 1.8574e-17,\n 1.5425e-15, 5.1444e-16, 1.2140e-18, 9.3588e-16, 1.8100e-15, 6.1647e-16,\n 9.2429e-16, 1.8302e-16, 2.3439e-17, 1.4448e-17, 9.4702e-16, 1.0004e-16,\n 7.9599e-18, 3.9108e-16, 2.2787e-16, 6.1424e-16, 4.5178e-16, 1.1700e-16,\n 1.9377e-21, 4.2429e-16, 3.2312e-16, 2.8441e-15, 4.3521e-17, 4.0209e-16,\n 5.4699e-17, 2.7599e-16, 1.6088e-16, 4.4518e-19, 2.7374e-16, 3.5529e-17,\n 1.2708e-18, 2.0618e-16, 1.8437e-16, 5.2543e-15, 1.7312e-15, 1.2039e-15,\n 1.3116e-16, 6.1889e-15, 4.2750e-16, 7.9281e-16, 7.3686e-17, 8.4764e-17,\n 4.1748e-17, 4.5342e-16, 5.5114e-18, 1.5812e-15, 2.5966e-15, 2.8862e-16,\n 3.2772e-16, 4.9268e-17, 4.9838e-16, 5.6595e-15, 4.2812e-17, 6.4838e-15,\n 8.9936e-16, 4.0066e-15, 3.9317e-22, 3.6752e-15, 2.6772e-15, 1.3250e-15,\n 5.0707e-15, 2.2685e-17, 2.6473e-15, 9.9822e-18, 1.4448e-15, 2.4065e-15,\n 2.1809e-15, 1.5796e-15, 2.9158e-15, 4.4125e-15, 5.0745e-16, 9.8972e-16,\n 2.9806e-17, 1.7008e-15, 4.8863e-16, 4.4693e-16, 1.0334e-15, 1.3992e-15,\n 1.1757e-18, 2.7299e-16, 3.4322e-15, 1.2631e-17, 7.2189e-16, 2.6635e-16,\n 1.6673e-15, 6.9357e-15, 1.3791e-15, 1.7712e-17, 4.3742e-15, 6.6333e-17,\n 1.4108e-16, 1.1461e-15, 4.7587e-15, 3.9113e-16, 1.6805e-15, 2.5211e-15,\n 1.8565e-17, 4.6817e-18, 1.7161e-16, 8.4369e-16, 8.0491e-16, 2.2323e-16,\n 3.3605e-15, 1.7382e-15, 1.9356e-16, 1.8677e-18, 3.7297e-17, 5.2840e-18,\n 1.0551e-16, 4.7513e-16, 9.6075e-17, 5.8284e-17, 4.0236e-16, 4.8742e-16,\n 4.4367e-17, 1.2448e-17, 4.6642e-16, 1.0769e-16, 6.6878e-17, 9.5316e-18,\n 7.7713e-16, 2.2605e-18, 1.6808e-16, 7.2560e-16, 2.0657e-16, 9.6323e-17,\n 7.1981e-16, 2.4165e-16, 2.4248e-15, 7.9167e-17, 6.7617e-16, 5.5733e-17,\n 6.1244e-16, 6.0815e-15, 8.5835e-16, 1.8291e-15, 9.3560e-16, 2.7348e-15,\n 3.1280e-19, 3.3211e-17, 1.5908e-16, 2.3619e-16, 4.0719e-16, 3.3544e-15,\n 8.4212e-17, 2.5214e-16, 1.8855e-17, 2.8399e-16, 3.2712e-17, 7.5963e-16,\n 1.2999e-19, 5.1081e-16, 3.4512e-17, 3.7624e-16, 3.4617e-15, 1.7549e-15,\n 2.5780e-15, 6.9720e-16, 8.8226e-16, 2.7447e-15, 6.9701e-16, 1.3310e-17,\n 3.5289e-16, 3.1390e-15, 2.0079e-17, 4.1441e-16, 2.5094e-15, 8.9219e-16,\n 7.8705e-16, 7.6145e-16, 6.5903e-16, 1.9622e-17, 2.8943e-16, 1.5289e-16,\n 7.0928e-16, 1.9467e-16, 4.9021e-18, 4.4970e-17, 1.8160e-16, 1.9125e-17,\n 1.7592e-15, 4.8522e-16, 7.4282e-17, 5.8803e-16, 1.8011e-17, 6.7251e-16,\n 1.8063e-15, 2.3177e-16, 7.6796e-16, 2.3940e-16, 1.8263e-16, 1.8782e-15,\n 7.2947e-16, 2.0783e-15, 1.9442e-15, 4.2143e-16, 6.6860e-32, 2.9556e-33,\n 1.2058e-30, 8.4780e-32, 1.3500e-30, 5.3059e-32, 3.4804e-32, 7.3728e-31,\n 4.8315e-31, 7.0326e-31, 2.1231e-32, 2.6297e-32, 7.3749e-31, 6.6280e-31,\n 3.6539e-32, 8.0516e-32, 2.9661e-31, 1.8897e-34, 1.8751e-30, 2.9679e-31,\n 9.7664e-32, 6.6711e-31, 2.9669e-31, 3.0458e-31, 2.2270e-31, 7.1131e-32,\n 1.7476e-31, 2.0001e-32, 1.5112e-30, 3.1060e-31, 2.6981e-31, 2.4031e-31,\n 1.3007e-30, 3.9401e-31, 4.2503e-32, 1.5061e-31, 2.3934e-31, 3.4151e-33,\n 3.2874e-32, 1.0312e-32, 7.0995e-34, 3.1730e-32, 1.5712e-31, 2.2556e-33,\n 2.5770e-31, 7.5760e-32, 4.0034e-31, 2.0685e-33, 3.2743e-31, 2.8207e-33,\n 8.6317e-32, 9.8595e-36, 1.2122e-31, 5.7705e-31, 1.6744e-31, 3.2136e-32,\n 1.2536e-30, 1.6128e-31, 2.3295e-30, 1.8744e-31, 5.3851e-33, 2.9954e-32,\n 2.7193e-31, 1.6123e-31, 4.1539e-32, 2.1649e-31, 7.9232e-31, 1.0382e-30,\n 5.5290e-31, 2.2720e-32, 1.9344e-30, 1.4101e-30, 1.4819e-30, 2.4733e-30,\n 5.1169e-30, 1.6090e-31, 3.1887e-30, 6.7814e-32, 2.3486e-31, 4.6835e-32,\n 6.9021e-31, 2.7339e-30, 1.6335e-30, 1.0643e-30, 2.8318e-31, 5.1218e-31,\n 2.4925e-30, 5.6058e-30, 3.1394e-31, 8.5984e-31, 4.9495e-31, 2.5918e-30,\n 2.7195e-31, 1.5891e-32, 1.8033e-31, 1.7217e-31, 1.6011e-30, 5.4435e-33,\n 3.2645e-31, 2.3607e-33, 1.8561e-31, 8.7641e-31, 1.2885e-30, 1.1043e-31,\n 2.9130e-32, 1.2725e-31, 2.2018e-31, 1.2627e-32, 7.7560e-33, 3.6255e-32,\n 4.1911e-32, 7.1666e-33, 1.6588e-31, 2.0733e-31, 2.9327e-31, 2.6599e-31,\n 1.8916e-31, 2.4980e-33, 8.4374e-35, 4.8609e-33, 3.4428e-33, 3.5764e-31,\n 4.9587e-31, 3.6294e-31, 1.7137e-31, 4.6496e-31, 2.3046e-33, 7.7919e-31,\n 8.2957e-33, 9.5284e-32, 3.8812e-32, 1.2368e-30, 4.9435e-31, 7.7294e-31,\n 6.4305e-31, 1.3620e-32, 1.2115e-32, 1.4301e-31, 2.8631e-31, 8.8615e-31,\n 9.3671e-32, 4.4439e-32, 2.5498e-32, 3.8622e-33, 1.4537e-30, 1.3673e-31,\n 4.1095e-31, 3.0963e-32, 4.8686e-32, 3.1490e-30, 5.3531e-31, 6.0867e-33,\n 4.3445e-31, 5.1571e-31, 1.4968e-30, 8.8979e-31, 1.1397e-30, 5.4522e-31,\n 8.2280e-33, 6.4161e-30, 1.4537e-30, 1.7828e-32, 1.7584e-30, 4.4670e-31,\n 3.8878e-31, 2.2894e-32, 2.6394e-31, 1.2581e-31, 1.3106e-31, 5.1510e-31,\n 4.1196e-31, 4.9024e-35, 3.8223e-31, 3.8634e-32, 5.9482e-33, 9.0908e-32,\n 9.3085e-31, 6.5872e-31, 3.4059e-33, 5.1883e-31, 1.5061e-31, 1.0817e-31,\n 1.8127e-30, 1.8156e-30, 1.9217e-31, 9.8759e-33, 1.8552e-30, 1.7189e-30,\n 8.0153e-31, 1.0195e-31, 1.0403e-31, 4.1315e-31, 4.8450e-31, 5.4260e-31,\n 2.6700e-30, 1.8794e-31, 5.8873e-32, 9.2952e-31, 1.7942e-31, 8.9672e-31,\n 1.4152e-31, 1.5340e-31, 1.4548e-31, 6.9298e-31, 1.3521e-30, 9.7505e-31,\n 6.4951e-33, 2.4628e-32, 3.1648e-31, 1.3076e-34, 9.5980e-32, 6.0221e-31,\n 6.4276e-32, 1.8651e-31, 2.7517e-31, 5.4922e-31, 1.7160e-31, 2.7302e-31,\n 5.6508e-31, 3.0924e-31, 2.6703e-32, 6.1914e-32, 1.9724e-32, 3.5845e-32,\n 2.4363e-31, 1.6156e-31, 4.5570e-31, 1.5651e-30, 9.8681e-31, 6.7719e-31,\n 4.8466e-31, 5.7883e-31, 2.0906e-32, 1.1991e-30, 3.6289e-32, 5.4685e-32,\n 2.5796e-31, 1.4958e-31, 2.7254e-31, 5.5055e-32, 4.2238e-32, 8.4782e-36,\n 2.5585e-32, 2.1172e-32, 3.9383e-31, 7.0014e-31, 5.4466e-32, 4.7057e-31,\n 2.0704e-31, 6.1879e-31, 5.1131e-33, 1.7376e-31, 4.9524e-32, 7.3310e-31,\n 1.1369e-31, 6.5556e-32, 1.3605e-12, 2.1297e-11, 3.7488e-12, 3.5751e-12,\n 2.8287e-11, 2.5133e-11, 3.4284e-13, 1.8274e-13, 4.5813e-13, 4.5853e-12,\n 6.0733e-15, 1.2224e-12, 2.2190e-11, 7.4642e-13, 1.2282e-12, 5.6237e-12,\n 8.2560e-12, 1.6959e-12, 1.5116e-11, 5.7033e-12, 6.3257e-15, 1.7765e-14,\n 3.0679e-12, 2.0211e-11, 1.7027e-11, 5.9319e-14, 7.6696e-12, 4.1250e-13,\n 2.1580e-12, 4.3781e-13, 9.5440e-12, 7.9694e-12, 3.0889e-11, 3.3157e-13,\n 6.3194e-12, 9.0715e-12, 5.3514e-12, 5.4162e-12, 4.7768e-12, 1.8880e-13,\n 2.9253e-11, 9.5797e-14, 4.7323e-13, 8.3320e-13, 1.4167e-11, 1.2115e-12,\n 2.6492e-12, 1.1055e-12, 2.1264e-13, 2.0248e-12, 4.0587e-12, 7.2498e-13,\n 1.1838e-11, 1.1173e-11, 1.2041e-12, 8.4383e-13, 1.3057e-12, 9.4636e-14,\n 7.6454e-12, 2.9003e-12, 6.1819e-12, 6.1182e-12, 6.4417e-13, 1.6681e-13,\n 3.4439e-16, 1.6790e-14, 5.1860e-14, 1.5510e-13, 2.2368e-12, 1.5940e-13,\n 1.9918e-12, 5.1944e-12, 7.4757e-12, 1.1916e-12, 2.7004e-12, 3.4097e-14,\n 1.5229e-12, 4.5095e-11, 2.3661e-13, 2.2733e-12, 3.0521e-12, 2.0873e-11,\n 7.6607e-15, 5.9203e-12, 1.7058e-12, 8.2389e-12, 5.2601e-12, 6.1081e-12,\n 4.9910e-16, 1.9483e-11, 1.3097e-11, 1.1923e-14, 5.8656e-13, 1.4109e-11,\n 6.5532e-13, 2.7371e-12, 3.8685e-14, 9.7216e-12, 1.8718e-12, 9.0977e-12,\n 4.7419e-12, 1.6444e-11, 2.0710e-13, 3.9760e-12, 8.1519e-12, 2.9706e-11,\n 1.3136e-11, 2.3185e-14, 4.9807e-13, 4.9876e-12, 3.1175e-12, 5.7570e-13,\n 8.7833e-13, 1.0383e-12, 3.0794e-12, 4.5799e-13, 3.8802e-12, 7.7007e-14,\n 1.1918e-11, 1.5063e-13, 1.1677e-11, 1.3378e-11, 4.1219e-12, 1.2147e-13,\n 1.4087e-12, 1.6491e-13, 3.8565e-12, 9.1402e-12, 9.1266e-13, 2.3676e-13,\n 9.5032e-12, 4.7154e-13, 5.5501e-12, 3.1273e-12, 1.6111e-12, 1.0973e-14,\n 2.2388e-13, 2.3713e-11, 3.0558e-13, 1.6451e-11, 2.8406e-15, 8.1430e-12,\n 3.0854e-12, 3.0821e-12, 9.0966e-13, 1.1083e-11, 4.1693e-12, 1.6192e-12,\n 6.7212e-12, 2.0977e-14, 1.5276e-11, 9.6218e-13, 1.1846e-13, 1.5884e-12,\n 3.1406e-12, 1.2318e-11, 2.3737e-12, 5.6421e-12, 8.7515e-14, 1.4191e-11,\n 4.1491e-12, 4.2403e-12, 3.3275e-12, 2.6024e-11, 2.3701e-14, 7.3628e-13,\n 2.2582e-11, 1.7625e-12, 2.0821e-12, 1.0832e-13, 3.6080e-12, 9.3345e-12,\n 8.1110e-12, 1.2335e-11, 2.9931e-13, 3.6713e-12, 2.1424e-12, 1.9171e-12,\n 1.9594e-12, 1.2343e-12, 3.1232e-12, 7.8353e-12, 6.0178e-13, 1.3361e-11,\n 1.4921e-11, 3.6257e-12, 3.5609e-12, 3.3892e-12, 2.4513e-14, 7.2524e-13,\n 7.3862e-13, 1.5929e-12, 7.2061e-12, 9.1273e-13, 3.9714e-13, 5.4934e-12,\n 1.5159e-15, 1.3355e-11, 1.1293e-13, 1.4633e-11, 6.2254e-12, 1.7499e-11,\n 4.5170e-13, 2.9932e-12, 1.1754e-11, 4.4227e-12, 4.2940e-13, 1.1345e-11,\n 8.6434e-13, 5.4989e-12, 2.7934e-14, 8.4723e-13, 2.4746e-11, 5.3986e-12,\n 2.4836e-11, 2.7945e-12, 3.9900e-12, 9.8829e-15, 1.3801e-12, 3.4802e-12,\n 3.2002e-14, 2.3427e-13, 3.0064e-12, 9.6260e-13, 1.3831e-12, 7.8780e-13,\n 8.9259e-12, 8.9410e-13, 2.2579e-13, 7.4531e-14, 2.4679e-14, 4.2981e-14,\n 5.3855e-13, 7.3816e-12, 5.1498e-13, 1.0530e-12, 1.3161e-13, 1.2567e-11,\n 6.1626e-12, 1.1950e-11, 2.7190e-11, 5.6315e-13, 1.4717e-11, 6.0062e-12,\n 1.3337e-12, 4.9955e-11, 1.0780e-12, 5.1407e-12, 1.7236e-12, 1.2368e-12,\n 6.4342e-13, 5.5270e-12, 2.9774e-12, 1.4381e-13, 7.4478e-13, 1.2798e-11],\n device='cuda:0')" }, "36": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.9630e-11, 6.4308e-14, 7.0677e-10, 3.1892e-09, 7.7908e-11, 1.6625e-12,\n 4.6623e-10, 2.7422e-10, 1.9516e-10, 4.5028e-13, 2.6470e-11, 1.4448e-13,\n 1.5009e-10, 8.9559e-12, 4.6517e-12, 5.9808e-11, 8.0309e-12, 7.5476e-11,\n 2.8692e-12, 6.4542e-11, 1.6414e-11, 3.6871e-13, 4.8866e-10, 9.8955e-10,\n 7.8442e-11, 1.1833e-11, 1.1954e-10, 1.0578e-10, 4.4242e-12, 2.5316e-11,\n 3.8326e-11, 5.9120e-11, 1.1867e-10, 9.5351e-11, 1.0585e-12, 8.0900e-11,\n 2.8072e-10, 3.8440e-11, 2.1128e-10, 1.6108e-12, 4.3050e-10, 2.5058e-12,\n 2.7255e-10, 3.2849e-10, 6.7134e-12, 8.4620e-13, 2.7044e-11, 1.0838e-11,\n 3.3517e-12, 4.1787e-10, 8.5819e-11, 4.0808e-10, 1.2937e-13, 5.1899e-11,\n 1.5961e-11, 4.7651e-12, 1.1344e-09, 1.6948e-11, 1.0237e-10, 1.7492e-13,\n 5.3531e-13, 1.2223e-12, 1.5560e-10, 7.8118e-11, 6.0733e-11, 3.6797e-10,\n 7.4480e-11, 2.6947e-12, 8.7918e-11, 3.5221e-10, 7.1592e-11, 4.3159e-11,\n 2.8855e-11, 4.3237e-13, 7.7552e-14, 3.7172e-11, 6.5912e-11, 7.6675e-12,\n 6.4776e-10, 6.5600e-14, 9.0413e-13, 7.1298e-11, 9.7339e-10, 2.0641e-13,\n 7.8869e-11, 2.3711e-12, 8.3965e-11, 9.9942e-12, 1.2573e-10, 8.7686e-10,\n 1.2795e-10, 3.1163e-11, 5.2619e-12, 8.8529e-11, 5.8537e-10, 3.0886e-09,\n 1.1974e-12, 1.6243e-12, 1.9372e-11, 5.6148e-11, 1.0912e-10, 2.1011e-11,\n 2.8248e-11, 9.7950e-11, 5.9949e-10, 1.4335e-12, 1.2462e-13, 3.5521e-11,\n 4.3503e-11, 3.3646e-10, 3.3580e-10, 4.4509e-13, 2.7398e-13, 3.5228e-12,\n 4.7880e-12, 1.1014e-10, 1.0625e-10, 2.8646e-11, 1.2524e-10, 5.6840e-12,\n 9.7543e-10, 3.4627e-10, 4.2329e-13, 3.1684e-11, 9.5476e-11, 2.7712e-12,\n 9.6786e-13, 2.3837e-11, 8.1758e-13, 5.6233e-11, 7.8928e-12, 3.3200e-11,\n 2.8633e-10, 4.3690e-11, 3.0191e-11, 2.7120e-11, 1.1908e-10, 1.2576e-11,\n 2.3776e-11, 2.8813e-13, 4.4210e-12, 6.3811e-10, 1.8392e-10, 1.9962e-11,\n 6.4689e-10, 3.4992e-13, 1.1726e-13, 1.4554e-10, 2.0158e-11, 6.5028e-12,\n 4.0279e-12, 4.4505e-11, 2.3954e-11, 9.9318e-13, 4.3045e-12, 6.4449e-12,\n 4.0476e-12, 1.0913e-09, 3.2413e-10, 1.1093e-09, 2.4765e-11, 3.4097e-11,\n 1.2467e-09, 3.5765e-14, 2.3681e-11, 3.4855e-09, 4.0020e-10, 7.4479e-11,\n 9.3965e-10, 1.2967e-12, 2.3052e-11, 2.0446e-10, 4.1493e-11, 8.8977e-15,\n 2.7528e-11, 1.0691e-09, 6.1608e-12, 1.3412e-12, 1.5908e-09, 4.7090e-10,\n 5.6151e-13, 4.9074e-12, 6.0426e-13, 2.8938e-11, 1.1527e-10, 6.5215e-14,\n 4.0773e-12, 9.2737e-10, 4.9010e-12, 1.3420e-13, 1.0325e-12, 1.8583e-10,\n 2.1775e-10, 1.8150e-10, 2.3777e-10, 2.6682e-11, 2.9835e-13, 7.5353e-10,\n 9.9265e-11, 4.0827e-11, 5.6342e-11, 2.5986e-11, 2.2288e-11, 4.5382e-12,\n 2.8286e-12, 4.0610e-12, 2.5449e-12, 1.5309e-11, 9.6918e-13, 2.2968e-10,\n 8.1371e-11, 6.4467e-12, 7.3896e-12, 3.3280e-11, 2.0425e-09, 2.7046e-12,\n 1.2271e-11, 1.2545e-12, 5.8856e-10, 8.7437e-10, 9.8668e-14, 4.5623e-11,\n 3.3808e-11, 1.1856e-10, 2.0341e-13, 2.8428e-12, 3.9162e-10, 4.4522e-10,\n 1.2438e-13, 2.7352e-13, 6.4307e-14, 6.8425e-12, 6.4420e-11, 1.9220e-12,\n 1.4831e-11, 1.7162e-10, 2.9162e-11, 4.1234e-14, 2.3764e-10, 2.9689e-12,\n 3.7789e-10, 2.9202e-10, 8.0176e-10, 5.9665e-10, 1.4228e-11, 1.2400e-12,\n 1.9249e-12, 2.0914e-10, 4.7640e-10, 6.1008e-12, 6.0539e-10, 1.5409e-12,\n 1.0060e-10, 2.0657e-09, 2.3163e-14, 1.3229e-09], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5570e-13, 2.5447e-16, 2.5447e-12, ..., 6.5901e-15, 6.6343e-14,\n 1.8339e-13],\n [6.1769e-13, 6.4375e-17, 3.6094e-12, ..., 1.0330e-14, 9.8033e-14,\n 2.5890e-13],\n [3.4880e-13, 4.4368e-17, 2.0400e-12, ..., 5.3900e-15, 5.8059e-14,\n 1.3191e-13],\n ...,\n [2.6393e-13, 3.7672e-17, 1.6007e-12, ..., 2.4757e-15, 3.9936e-14,\n 1.0617e-13],\n [3.4674e-14, 9.2735e-17, 1.9099e-13, ..., 6.6429e-16, 3.7543e-15,\n 1.4394e-14],\n [1.8346e-13, 2.2107e-16, 1.0765e-12, ..., 3.1331e-15, 2.5072e-14,\n 7.3672e-14]], device='cuda:0')" }, "37": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.5459e-14, 8.8869e-15, 1.6644e-12, 5.9221e-12, 1.4045e-14, 1.7471e-14,\n 2.1610e-13, 9.0987e-14, 2.7946e-14, 9.4511e-17, 7.1918e-14, 3.2797e-15,\n 2.7413e-13, 1.5893e-14, 3.7025e-14, 1.3129e-15, 5.4452e-14, 3.0844e-14,\n 2.1762e-15, 4.9708e-14, 6.7668e-15, 2.6617e-15, 1.6706e-13, 4.8945e-12,\n 1.9256e-13, 3.3839e-15, 9.8710e-14, 3.9655e-15, 1.2135e-14, 2.9447e-15,\n 3.4997e-14, 1.1959e-13, 1.3219e-14, 1.2682e-13, 2.5867e-16, 5.6441e-13,\n 7.1543e-13, 1.6696e-14, 3.5726e-13, 6.1999e-15, 1.2556e-12, 9.5293e-16,\n 2.2387e-13, 1.1421e-12, 3.6029e-15, 1.4811e-14, 2.0425e-15, 3.3571e-16,\n 9.9852e-16, 3.2677e-13, 8.3637e-14, 1.1380e-13, 4.9160e-16, 3.8791e-15,\n 2.0100e-15, 7.3967e-16, 2.9532e-13, 7.1488e-14, 8.6529e-15, 2.5871e-15,\n 1.6042e-14, 1.4219e-15, 9.1865e-14, 2.3094e-15, 1.0417e-14, 9.8864e-14,\n 8.8179e-14, 4.9338e-16, 5.2141e-15, 7.7953e-13, 3.1482e-14, 2.0597e-15,\n 7.8847e-15, 2.2892e-15, 3.6034e-16, 2.3798e-15, 1.7396e-14, 2.1265e-15,\n 7.9175e-13, 6.4925e-17, 8.6817e-16, 6.1566e-15, 1.0212e-12, 5.8312e-15,\n 1.3280e-13, 2.8282e-16, 2.7571e-14, 1.8845e-15, 4.9030e-14, 5.1856e-13,\n 1.1645e-13, 5.4867e-15, 4.4623e-15, 3.9198e-15, 9.3034e-13, 1.3053e-11,\n 6.7216e-15, 3.7691e-15, 6.9636e-15, 1.4723e-13, 1.5826e-14, 1.4043e-15,\n 7.7150e-14, 3.1614e-14, 4.6015e-13, 1.6201e-16, 3.5957e-16, 1.2439e-13,\n 8.8787e-15, 4.3465e-13, 1.3172e-12, 4.2798e-16, 1.0766e-16, 3.5323e-16,\n 6.4609e-15, 4.1326e-14, 8.2048e-14, 3.7220e-15, 4.4577e-14, 2.6863e-14,\n 7.2404e-13, 7.5071e-13, 1.0236e-16, 3.6195e-14, 7.0546e-13, 2.7783e-16,\n 2.4804e-15, 3.0636e-15, 4.6047e-16, 1.2045e-14, 6.5631e-16, 1.5384e-14,\n 1.5940e-13, 8.6540e-15, 3.5422e-15, 1.3523e-15, 1.2714e-13, 6.7951e-16,\n 4.5126e-14, 9.8072e-15, 5.2416e-16, 4.1180e-13, 4.4143e-14, 1.1887e-13,\n 1.8182e-12, 4.7055e-16, 4.9821e-16, 1.3760e-13, 4.1087e-14, 1.1801e-15,\n 1.6048e-15, 4.4318e-14, 1.0121e-15, 4.7449e-16, 2.3385e-16, 2.2025e-13,\n 6.7193e-16, 4.0899e-12, 1.4603e-13, 3.3124e-13, 3.1803e-15, 3.2119e-14,\n 5.2574e-12, 2.1116e-15, 1.6286e-15, 6.4549e-12, 9.8516e-13, 1.7067e-13,\n 1.0404e-12, 7.4688e-15, 2.7980e-17, 9.4625e-14, 3.1224e-14, 7.0360e-15,\n 8.3228e-16, 1.1376e-12, 5.1080e-15, 3.2047e-15, 1.7929e-12, 9.4216e-13,\n 8.5547e-15, 1.0680e-14, 2.7632e-14, 2.2664e-14, 2.2711e-14, 5.9398e-16,\n 9.7633e-16, 1.4034e-12, 1.5076e-15, 7.6165e-16, 7.3162e-15, 2.7992e-14,\n 7.2507e-13, 3.0808e-13, 8.8355e-14, 1.1582e-13, 2.1051e-17, 4.0650e-12,\n 1.1430e-13, 1.3630e-15, 5.8035e-15, 8.4247e-16, 2.5636e-15, 5.2865e-14,\n 3.8480e-16, 7.4017e-16, 3.2752e-14, 9.4031e-17, 1.0643e-14, 8.5613e-14,\n 5.6179e-14, 5.4440e-16, 2.8542e-16, 9.6060e-15, 2.7409e-12, 8.5759e-17,\n 2.1831e-16, 9.1559e-16, 6.2113e-14, 1.9949e-12, 1.2271e-15, 1.3539e-14,\n 2.5679e-14, 2.2378e-13, 4.4323e-15, 4.6320e-15, 2.5011e-14, 1.6991e-13,\n 2.6896e-15, 1.5520e-15, 1.0333e-14, 1.5854e-15, 1.3810e-13, 1.2703e-15,\n 4.8188e-16, 2.7692e-13, 3.1023e-14, 1.8706e-16, 8.3491e-13, 7.1523e-16,\n 6.6416e-14, 3.9515e-13, 9.6176e-13, 1.8286e-12, 5.3014e-15, 6.1319e-15,\n 1.1504e-14, 1.4174e-13, 1.6304e-12, 3.7324e-15, 1.4442e-12, 1.1121e-16,\n 5.4625e-14, 1.5026e-12, 7.2910e-16, 8.7612e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.6486e-11, 2.2935e-11, 1.2265e-11, 6.5120e-11, 1.9763e-13, 1.8888e-12,\n 3.4589e-11, 6.4949e-11, 5.2533e-11, 2.6160e-11, 1.1339e-12, 5.4622e-11,\n 2.6985e-12, 1.3457e-13, 7.1031e-12, 6.7866e-14, 2.9668e-11, 5.6184e-11,\n 1.5713e-11, 3.3568e-12, 6.1177e-12, 4.5027e-12, 2.1434e-11, 2.7477e-13,\n 7.1422e-13, 6.0605e-12, 6.2491e-13, 9.1365e-12, 4.3298e-12, 1.3593e-11,\n 2.9724e-14, 7.2944e-12, 7.3810e-12, 2.5707e-12, 3.3096e-13, 1.4471e-11,\n 4.9551e-13, 8.3210e-11, 1.3954e-11, 9.9308e-13, 3.1348e-11, 2.0420e-12,\n 6.0001e-12, 5.4256e-13, 1.0948e-11, 1.8777e-11, 2.3578e-13, 2.4529e-14,\n 1.5403e-11, 1.3995e-11, 1.9992e-11, 8.3323e-13, 8.6888e-12, 3.2567e-11,\n 4.1604e-13, 3.6390e-12, 3.2918e-11, 1.3060e-13, 1.5106e-12, 2.7120e-12,\n 1.1384e-11, 8.9107e-12, 1.8331e-12, 1.7418e-11, 1.0950e-13, 2.7455e-11,\n 3.7394e-12, 7.1491e-12, 8.0502e-12, 2.9477e-12, 1.3848e-12, 2.7131e-11,\n 3.3242e-11, 6.9644e-11, 4.4943e-13, 5.4375e-14, 2.7425e-12, 2.3416e-11,\n 3.5482e-13, 6.7341e-14, 5.5043e-12, 3.4902e-13, 1.3955e-11, 2.3972e-12,\n 2.6378e-12, 1.3635e-12, 2.6643e-12, 3.8306e-12, 3.9489e-12, 4.6070e-11,\n 4.0148e-14, 2.7389e-11, 1.1358e-11, 3.0518e-11, 7.4374e-11, 4.7404e-11,\n 4.8139e-12, 6.4900e-11, 2.3330e-12, 8.2026e-13, 2.8887e-11, 9.9306e-15,\n 1.2593e-11, 7.6079e-12, 7.1720e-13, 2.0128e-11, 1.2097e-14, 1.1854e-10,\n 2.4559e-11, 2.5478e-11, 9.0040e-14, 7.3697e-13, 2.2822e-12, 2.2767e-11,\n 7.1768e-12, 1.9132e-11, 4.0457e-14, 2.3647e-11, 2.9735e-11, 1.1921e-11,\n 3.9755e-11, 1.5705e-11, 1.2275e-11, 1.7592e-12, 8.3169e-11, 4.3614e-11,\n 1.1085e-11, 4.5811e-12, 9.4973e-12, 3.3803e-14, 9.8900e-11, 9.6118e-12,\n 1.2486e-13, 3.6872e-14, 6.4239e-11, 2.4780e-11, 3.7312e-12, 1.6396e-12,\n 1.5162e-13, 4.9645e-14, 4.0954e-11, 8.5182e-14, 6.1449e-11, 1.1957e-12,\n 1.3872e-12, 2.9407e-12, 2.4470e-11, 2.0524e-11, 5.9608e-12, 7.1781e-14,\n 1.1818e-12, 3.7711e-13, 4.4751e-14, 4.0461e-11, 3.5801e-12, 5.3081e-11,\n 1.2736e-12, 2.9756e-11, 4.0260e-12, 4.0324e-11, 1.0541e-11, 1.7158e-12,\n 1.8102e-11, 1.5748e-11, 2.1849e-11, 4.8154e-11, 1.0117e-10, 4.7801e-14,\n 1.8983e-13, 9.2062e-12, 5.3236e-12, 8.1050e-14, 3.2647e-11, 4.2022e-12,\n 4.7199e-11, 5.8859e-14, 2.2057e-11, 2.4613e-12, 1.0619e-11, 1.0657e-11,\n 1.1362e-10, 1.1313e-11, 7.3053e-12, 1.0861e-14, 2.1285e-12, 3.2992e-11,\n 1.3476e-13, 1.2053e-11, 3.0708e-11, 2.7361e-12, 3.1374e-11, 2.7281e-12,\n 7.9092e-12, 7.2395e-12, 4.7837e-11, 2.5599e-11, 3.0120e-11, 2.2110e-13,\n 9.5950e-12, 6.1451e-11, 6.5702e-14, 5.6998e-12, 2.9381e-12, 4.3085e-11,\n 4.2094e-11, 6.0207e-12, 1.9915e-11, 4.1082e-12, 2.9427e-13, 1.1225e-13,\n 1.3331e-13, 6.7038e-12, 3.4542e-11, 3.0744e-11, 1.1094e-11, 3.8593e-11,\n 4.6876e-12, 2.3188e-11, 1.6892e-13, 6.0844e-13, 1.1619e-14, 3.2749e-12,\n 3.6326e-11, 2.7071e-12, 9.1296e-13, 1.8697e-11, 5.2632e-12, 1.6435e-11,\n 1.1687e-12, 2.3182e-11, 3.2444e-13, 1.2686e-11, 3.9431e-11, 1.6632e-13,\n 3.2833e-11, 7.3674e-13, 7.7961e-12, 2.9264e-14, 6.1904e-11, 1.2477e-12,\n 5.6744e-12, 1.2810e-12, 1.4987e-12, 8.7782e-12, 1.2919e-11, 8.9522e-12,\n 3.2361e-11, 1.2306e-11, 5.2388e-11, 5.0338e-12, 2.7247e-11, 5.9067e-16,\n 2.1476e-12, 1.0119e-11, 1.3080e-12, 6.9621e-12], device='cuda:0')" }, "38": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.0323e-14, 2.7353e-17, 1.1186e-12, 3.9899e-12, 1.2290e-13, 4.0112e-15,\n 6.7168e-13, 4.3161e-13, 2.9209e-13, 3.2621e-16, 2.0006e-14, 1.2192e-15,\n 3.0832e-13, 5.9485e-14, 4.1193e-14, 5.7594e-14, 4.7310e-14, 1.2485e-13,\n 2.5295e-15, 1.5961e-13, 5.4187e-14, 1.5209e-15, 6.0891e-13, 1.4967e-12,\n 2.3992e-13, 1.4431e-14, 2.1113e-13, 1.0841e-13, 4.6060e-15, 4.3187e-14,\n 5.9317e-14, 1.5385e-13, 1.7917e-13, 2.8602e-13, 2.4070e-15, 2.6976e-13,\n 4.9410e-13, 1.5576e-14, 3.5277e-13, 9.2125e-15, 6.0443e-13, 5.1828e-15,\n 5.0698e-13, 4.5863e-13, 3.4464e-14, 2.4732e-14, 1.9067e-14, 1.1685e-15,\n 3.3374e-16, 5.4389e-13, 1.3379e-13, 5.9225e-13, 4.4823e-16, 3.8703e-14,\n 8.3100e-15, 3.3616e-15, 1.5811e-12, 2.7741e-14, 1.2550e-13, 2.2885e-15,\n 5.1750e-18, 1.1422e-14, 1.5268e-13, 1.0291e-13, 9.3539e-14, 5.4264e-13,\n 1.0082e-13, 5.4707e-15, 1.3052e-13, 5.5692e-13, 1.1232e-13, 3.6929e-14,\n 7.2593e-14, 7.4935e-16, 2.2944e-16, 2.9428e-14, 1.0744e-13, 1.8864e-15,\n 8.9084e-13, 2.7185e-16, 1.9731e-15, 8.9175e-14, 1.3956e-12, 1.4081e-15,\n 9.6472e-14, 6.4881e-15, 7.5982e-14, 1.8313e-14, 2.0949e-13, 1.1307e-12,\n 2.1686e-13, 5.5580e-14, 1.6181e-15, 1.3257e-13, 8.6477e-13, 4.0145e-12,\n 3.2239e-15, 1.0164e-14, 5.6751e-14, 1.6014e-13, 1.6637e-13, 3.4380e-14,\n 1.3256e-13, 1.0913e-13, 7.1970e-13, 1.1634e-14, 4.3576e-16, 1.3735e-13,\n 7.5956e-14, 5.0412e-13, 5.6092e-13, 8.3124e-15, 2.6314e-15, 5.5896e-15,\n 2.1232e-15, 1.7683e-13, 1.7544e-13, 4.9412e-14, 1.9068e-13, 9.4817e-15,\n 1.1480e-12, 4.1502e-13, 9.7755e-16, 6.6789e-14, 3.1102e-13, 6.1591e-15,\n 2.3711e-17, 1.9461e-15, 1.4092e-18, 3.6917e-14, 1.4169e-14, 5.3991e-14,\n 3.1450e-13, 7.3871e-14, 4.8766e-14, 4.2348e-14, 2.3139e-13, 5.3104e-15,\n 4.6735e-14, 5.4359e-16, 7.4282e-15, 7.3857e-13, 3.0197e-13, 2.8086e-14,\n 6.5753e-13, 8.3666e-15, 7.6071e-17, 2.3205e-13, 5.2123e-14, 3.8765e-15,\n 1.5036e-14, 1.3733e-13, 2.0839e-14, 1.6403e-15, 7.2941e-15, 6.8089e-14,\n 1.9860e-16, 1.5930e-12, 4.7624e-13, 1.4037e-12, 8.1956e-15, 6.1281e-14,\n 1.8702e-12, 2.4095e-17, 3.9360e-14, 4.8640e-12, 6.3750e-13, 1.5331e-13,\n 1.1892e-12, 1.0943e-14, 1.8337e-14, 2.0835e-13, 1.3464e-13, 1.5651e-14,\n 4.4711e-14, 1.3640e-12, 1.8978e-15, 6.7298e-16, 1.9237e-12, 7.1071e-13,\n 7.3489e-16, 3.6424e-15, 8.8211e-16, 4.1726e-14, 1.7527e-13, 2.9244e-16,\n 6.7188e-15, 1.0822e-12, 2.4535e-14, 2.9706e-16, 3.6275e-16, 2.0998e-13,\n 4.1261e-13, 2.7636e-13, 2.5236e-13, 4.1763e-14, 1.0940e-16, 1.1722e-12,\n 1.8506e-13, 3.4723e-14, 4.2097e-14, 4.2122e-14, 1.7502e-14, 5.8807e-14,\n 1.1146e-16, 7.0767e-15, 1.6885e-14, 2.5822e-14, 1.9219e-15, 3.7923e-13,\n 7.9136e-14, 8.6622e-15, 1.3633e-14, 7.8159e-14, 2.8624e-12, 6.9997e-16,\n 1.9944e-14, 2.2387e-15, 8.2745e-13, 1.3199e-12, 8.0070e-17, 8.2649e-14,\n 2.1166e-14, 1.8621e-13, 1.2422e-15, 1.6793e-15, 5.4587e-13, 6.1267e-13,\n 3.1125e-16, 6.5520e-16, 9.1210e-17, 1.1760e-14, 1.0343e-13, 2.0254e-14,\n 5.7083e-15, 3.3704e-13, 9.2712e-14, 7.2345e-18, 4.5831e-13, 2.7588e-16,\n 5.4195e-13, 4.6849e-13, 9.6875e-13, 8.3577e-13, 2.9288e-14, 3.5227e-15,\n 4.5502e-15, 3.2068e-13, 8.0284e-13, 1.5966e-14, 9.8521e-13, 1.4032e-15,\n 1.1428e-13, 2.8712e-12, 1.4654e-16, 2.1266e-12], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')" }, "39": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3287e-15, 2.8412e-14, 5.7585e-15, ..., 2.2481e-17, 2.7446e-15,\n 8.8547e-14],\n [1.1228e-14, 1.5998e-15, 4.5647e-16, ..., 1.1126e-14, 1.5131e-17,\n 1.2690e-13],\n [1.8176e-16, 4.5866e-14, 1.1575e-15, ..., 7.6955e-16, 2.8640e-15,\n 7.1286e-14],\n ...,\n [2.1896e-13, 2.3608e-14, 3.8155e-13, ..., 1.5384e-14, 4.8418e-14,\n 3.8897e-12],\n [6.7009e-14, 6.4420e-14, 7.3286e-13, ..., 1.0622e-14, 1.4265e-14,\n 1.1902e-12],\n [2.0955e-13, 5.0239e-12, 6.3409e-11, ..., 1.4566e-12, 4.5216e-14,\n 3.5225e-11]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')" }, "40": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.5145e-14, 8.7059e-14, 2.3444e-14, 8.7920e-15, 7.2324e-14, 2.0166e-14,\n 8.0724e-15, 2.6342e-14, 6.9733e-15, 1.8897e-14, 6.3951e-14, 1.5864e-14,\n 5.8738e-16, 7.4363e-15, 2.3692e-15, 1.2043e-15, 9.7101e-14, 3.2818e-14,\n 1.6408e-14, 8.8112e-14, 7.6863e-15, 1.4613e-13, 2.0125e-14, 1.1030e-14,\n 3.4993e-14, 6.2112e-14, 4.3474e-16, 1.6799e-15, 4.4480e-14, 7.5792e-14,\n 5.4747e-14, 6.0144e-14, 3.3057e-14, 7.4873e-14, 1.1826e-13, 3.5776e-13,\n 9.9211e-14, 1.5131e-14, 1.1349e-13, 7.4755e-14, 3.0345e-14, 4.6800e-14,\n 7.1710e-14, 3.8089e-14, 7.4092e-14, 3.0880e-14, 5.6507e-14, 2.4083e-14,\n 1.5941e-15, 2.0761e-13, 2.7188e-14, 4.2850e-14, 3.2691e-15, 1.9833e-13,\n 5.5641e-15, 3.4481e-14, 8.4382e-14, 1.7709e-15, 1.0381e-13, 2.1349e-14,\n 3.4665e-13, 2.0904e-13, 7.2312e-15, 2.4174e-14, 1.5699e-15, 5.7084e-15,\n 2.1709e-14, 1.1264e-14, 3.0382e-14, 1.3133e-13, 5.3434e-14, 2.7376e-14,\n 1.4342e-14, 3.9922e-14, 1.4312e-13, 2.1198e-13, 4.8665e-14, 1.1942e-14,\n 7.9511e-14, 8.0744e-15, 1.5838e-13, 1.7086e-14, 4.2189e-15, 2.9034e-15,\n 3.4663e-15, 1.2862e-14, 3.0628e-13, 3.7446e-14, 1.3371e-15, 7.6506e-14,\n 9.7319e-16, 4.5714e-14, 6.1071e-15, 4.0631e-13, 7.0068e-16, 3.0123e-14,\n 6.5593e-14, 4.8359e-16, 1.4380e-15, 3.9008e-14, 1.1404e-13, 3.2269e-16,\n 1.7050e-13, 1.0403e-14, 1.9342e-14, 1.8310e-14, 2.5740e-14, 1.5740e-13,\n 1.0521e-13, 5.8303e-14, 1.4436e-14, 4.0554e-14, 2.5538e-14, 3.0853e-14,\n 1.7170e-14, 3.9287e-14, 3.2895e-14, 5.8959e-14, 9.5506e-14, 3.9464e-13,\n 2.7802e-15, 3.6920e-14, 6.7302e-14, 1.9550e-15, 1.1736e-15, 1.4651e-15,\n 5.2608e-14, 2.2792e-13, 2.2241e-15, 3.8657e-15, 2.0488e-14, 1.8053e-13,\n 2.1866e-16, 1.1639e-13, 2.7596e-14, 4.7052e-14, 1.0072e-13, 3.2759e-15,\n 2.9862e-14, 5.9983e-16, 3.3503e-14, 5.2831e-15, 5.7097e-15, 8.4666e-15,\n 1.3740e-15, 6.6137e-17, 1.9055e-14, 1.1986e-14, 4.3123e-15, 6.1089e-15,\n 1.1337e-15, 5.0472e-15, 3.6591e-14, 6.8353e-14, 4.9481e-15, 4.8410e-15,\n 1.4708e-14, 5.2513e-15, 2.4859e-14, 3.8742e-15, 1.0541e-13, 7.5205e-16,\n 9.9696e-16, 1.4647e-13, 5.1810e-14, 4.2099e-13, 4.3614e-13, 6.3527e-14,\n 8.6886e-14, 4.8971e-13, 1.4039e-14, 1.7278e-14, 2.7690e-14, 1.6993e-13,\n 1.3753e-14, 1.3269e-13, 3.1145e-15, 6.3977e-14, 5.8312e-14, 7.2656e-13,\n 1.6868e-13, 2.7733e-13, 1.9416e-14, 4.2320e-14, 5.2556e-13, 1.0523e-14,\n 1.3601e-16, 4.9245e-16, 1.7713e-13, 4.4115e-14, 1.6918e-13, 7.8994e-14,\n 4.2430e-14, 6.9635e-15, 2.6735e-13, 2.6251e-13, 1.3705e-13, 4.1193e-14,\n 1.4794e-13, 9.8007e-16, 4.1027e-14, 9.7258e-14, 5.9907e-16, 1.3074e-15,\n 2.3583e-14, 1.9708e-13, 1.3758e-13, 2.1386e-14, 4.0060e-15, 1.8882e-16,\n 2.4194e-13, 1.2046e-13, 3.2557e-15, 2.1128e-15, 6.7493e-14, 2.1707e-14,\n 2.9344e-14, 1.2363e-13, 1.1054e-13, 2.6901e-13, 4.0288e-15, 4.5813e-14,\n 1.0513e-15, 3.1173e-13, 1.8793e-14, 1.0702e-14, 5.4499e-15, 3.8075e-15,\n 2.7771e-14, 4.4473e-15, 5.7992e-14, 6.4470e-15, 2.0624e-15, 3.3705e-14,\n 1.7810e-14, 2.8647e-15, 2.9459e-15, 1.6293e-14, 1.3534e-14, 5.8217e-15,\n 4.8282e-15, 6.4888e-15, 7.6596e-15, 1.0499e-14, 3.4417e-14, 7.9722e-15,\n 8.4199e-15, 4.9582e-15, 2.9682e-15, 1.3860e-14, 3.7584e-14, 4.9676e-14,\n 4.1535e-15, 3.2607e-14, 5.6423e-15, 4.2879e-14, 8.4407e-29, 7.0265e-30,\n 1.9682e-29, 4.3508e-30, 4.2494e-31, 1.1820e-29, 5.9495e-30, 1.8098e-29,\n 1.3687e-31, 1.3118e-29, 2.7468e-30, 5.6413e-30, 2.5285e-29, 9.7334e-31,\n 5.7340e-31, 1.5023e-30, 3.6751e-30, 1.2357e-30, 4.5808e-29, 8.9920e-30,\n 8.6274e-31, 7.9236e-32, 2.0579e-30, 1.3913e-29, 1.3324e-30, 3.1006e-31,\n 4.8275e-31, 3.4124e-30, 7.3035e-32, 2.4674e-30, 1.1017e-29, 1.0404e-29,\n 6.4017e-30, 3.5724e-31, 2.0950e-30, 2.4323e-30, 7.4271e-30, 1.6423e-30,\n 5.2264e-30, 1.1583e-29, 7.3499e-30, 4.5252e-30, 1.5682e-30, 1.3082e-29,\n 1.2933e-29, 2.1671e-30, 1.0078e-30, 2.0294e-30, 1.3229e-29, 3.9866e-30,\n 1.5049e-30, 3.5198e-31, 4.9033e-30, 5.4664e-30, 1.2889e-29, 1.3593e-29,\n 9.6940e-30, 5.6400e-29, 7.3573e-30, 5.6715e-30, 2.0126e-29, 1.0410e-29,\n 1.1841e-29, 1.1494e-29, 2.8551e-30, 3.6572e-30, 5.9154e-31, 1.9680e-30,\n 7.1937e-30, 1.9450e-30, 9.8505e-30, 6.4511e-31, 1.6547e-31, 6.1522e-30,\n 1.6055e-30, 5.3248e-30, 3.4023e-30, 3.0932e-30, 9.4665e-30, 3.9796e-30,\n 3.7966e-30, 2.9967e-29, 1.7294e-30, 2.4605e-30, 5.1727e-30, 3.2551e-31,\n 3.0388e-30, 8.6414e-32, 2.9275e-30, 2.4560e-31, 1.1998e-30, 7.3588e-30,\n 3.9509e-31, 3.2279e-30, 1.0248e-30, 2.7430e-30, 4.3334e-30, 2.1011e-30,\n 1.6791e-30, 3.6043e-30, 3.4643e-30, 7.6103e-31, 1.2319e-29, 1.3622e-29,\n 1.9491e-30, 4.9489e-30, 3.3867e-30, 1.4666e-29, 5.8439e-30, 1.9629e-30,\n 5.7381e-31, 3.1382e-30, 7.6454e-30, 7.4669e-31, 3.3266e-30, 2.2969e-31,\n 1.1563e-30, 3.2431e-30, 5.3409e-31, 1.6963e-30, 2.1167e-30, 2.6349e-30,\n 9.5742e-30, 3.8322e-30, 1.5330e-29, 2.1663e-29, 1.1121e-29, 3.0795e-29,\n 5.9912e-31, 5.9607e-31, 5.9812e-30, 2.2017e-30, 2.3128e-31, 5.7999e-32,\n 2.1734e-30, 5.5509e-31, 2.5643e-30, 1.6184e-30, 8.4384e-30, 1.0996e-30,\n 2.0486e-30, 2.2416e-29, 2.2010e-30, 3.8451e-31, 9.7904e-31, 3.0181e-31,\n 2.9730e-31, 3.6557e-30, 6.2651e-30, 6.9432e-30, 3.8967e-31, 9.9206e-30,\n 1.0265e-30, 9.5704e-31, 2.3944e-29, 4.7698e-30, 7.7252e-31, 3.2327e-31,\n 2.1118e-30, 9.1737e-31, 4.7371e-30, 1.4980e-31, 3.2839e-30, 7.5332e-31,\n 1.2375e-29, 1.5380e-30, 2.8736e-30, 1.0831e-29, 4.3516e-30, 2.1411e-30,\n 9.0107e-30, 3.4445e-30, 5.8203e-30, 5.6167e-31, 5.4247e-29, 7.4995e-31,\n 1.1484e-29, 1.8012e-30, 2.4005e-30, 6.2073e-30, 2.2239e-31, 1.4170e-30,\n 6.0043e-30, 4.0848e-30, 3.6602e-30, 3.2839e-30, 6.3212e-30, 1.1953e-30,\n 4.5822e-30, 4.9429e-30, 5.0203e-30, 4.6950e-30, 1.0303e-29, 1.7504e-29,\n 2.8025e-30, 7.7241e-31, 8.7769e-30, 2.4372e-30, 2.1052e-31, 5.6937e-31,\n 6.6612e-30, 1.5174e-29, 6.1195e-30, 2.6774e-30, 7.3808e-31, 1.2908e-31,\n 5.1443e-30, 8.7182e-30, 1.3048e-30, 5.0749e-31, 1.4413e-30, 1.9751e-30,\n 2.4832e-30, 1.4318e-30, 5.1527e-30, 7.2316e-30, 1.4815e-29, 1.4758e-30,\n 1.5502e-30, 4.1617e-30, 5.2636e-30, 3.3255e-30, 3.2275e-31, 9.9260e-30,\n 3.1777e-31, 1.4515e-30, 5.2872e-30, 5.0664e-31, 6.3890e-30, 9.7770e-30,\n 1.5239e-30, 9.8678e-30, 9.7136e-30, 5.7028e-30, 4.6842e-30, 6.1771e-30,\n 1.4558e-31, 4.2700e-30, 1.3208e-30, 3.6093e-30, 5.4000e-31, 1.8030e-30,\n 3.4567e-31, 2.2219e-29, 3.5678e-30, 5.7287e-30, 6.8786e-31, 2.0568e-30,\n 6.0030e-30, 2.3060e-29, 5.5003e-30, 9.8317e-30, 1.1851e-29, 9.2013e-30,\n 1.4768e-30, 1.9612e-30, 3.4788e-10, 1.2949e-11, 7.6257e-11, 5.0113e-12,\n 1.9719e-11, 6.5656e-12, 1.3437e-12, 2.7567e-11, 1.5221e-11, 6.2665e-12,\n 2.1759e-10, 4.2437e-12, 1.2675e-11, 1.6928e-10, 2.4902e-11, 3.3790e-11,\n 5.6192e-12, 2.0842e-13, 1.4270e-11, 2.0052e-11, 4.2210e-11, 3.0554e-11,\n 1.8410e-11, 4.5499e-12, 4.4739e-13, 3.6656e-12, 1.0497e-12, 2.6352e-11,\n 2.3850e-10, 6.8227e-12, 7.6239e-11, 1.8002e-11, 1.7092e-11, 4.3223e-11,\n 1.2676e-13, 1.0433e-10, 5.3419e-11, 1.6816e-11, 1.0923e-10, 2.6239e-12,\n 4.2858e-12, 8.2766e-12, 7.7732e-12, 2.1107e-11, 1.2013e-11, 3.6095e-13,\n 2.5675e-12, 2.7433e-11, 4.5336e-12, 5.2524e-11, 1.8009e-10, 3.7930e-11,\n 3.9867e-12, 8.6938e-11, 1.1127e-11, 6.0571e-11, 5.7130e-11, 9.2661e-12,\n 8.2164e-11, 6.8985e-12, 2.9697e-12, 5.2948e-14, 8.5901e-13, 1.4189e-12,\n 3.5311e-11, 2.1241e-11, 1.6212e-11, 6.7588e-12, 2.1550e-11, 3.1988e-12,\n 4.4225e-11, 9.5513e-11, 5.8262e-11, 1.4663e-12, 2.5276e-12, 7.6492e-11,\n 5.5062e-11, 1.4766e-10, 6.6375e-11, 4.9021e-12, 2.7529e-12, 5.6820e-12,\n 2.7132e-11, 1.0354e-11, 2.5674e-11, 4.7312e-12, 3.1181e-11, 3.6114e-12,\n 9.1986e-12, 1.0001e-10, 1.3906e-10, 1.1325e-11, 4.9171e-11, 3.3700e-11,\n 1.9910e-11, 4.9652e-11, 4.8828e-11, 3.2807e-12, 2.0547e-11, 1.6652e-11,\n 1.3848e-11, 1.3286e-10, 1.8153e-11, 1.1041e-10, 5.7137e-11, 1.0587e-11,\n 2.2030e-11, 2.5839e-12, 5.4595e-12, 5.2986e-13, 5.9191e-11, 6.1576e-12,\n 5.7803e-11, 8.0753e-12, 1.4014e-10, 3.5535e-11, 2.2901e-11, 1.2172e-10,\n 1.9574e-11, 1.2025e-11, 1.4237e-12, 1.9075e-11, 2.8620e-11, 6.9586e-12,\n 7.0343e-12, 2.9340e-11, 5.8325e-12, 3.0517e-12, 7.0696e-11, 2.8692e-12,\n 4.0013e-12, 4.5644e-12, 4.9664e-12, 1.9822e-11, 2.7583e-11, 1.2416e-11,\n 1.2267e-11, 8.0770e-13, 2.5899e-11, 1.8075e-11, 3.1322e-12, 1.5902e-11,\n 5.5741e-12, 3.4864e-12, 3.6045e-11, 8.3349e-12, 8.8860e-14, 2.4011e-11,\n 1.8041e-11, 5.5734e-12, 4.9788e-11, 2.8234e-11, 9.3456e-12, 3.8407e-12,\n 9.4224e-11, 3.3546e-12, 3.4165e-12, 3.8340e-11, 1.4283e-11, 3.4636e-11,\n 3.3228e-11, 1.1873e-11, 5.1124e-11, 3.0696e-11, 2.5525e-12, 4.9076e-12,\n 6.5310e-14, 5.6935e-12, 3.6090e-11, 5.2279e-14, 1.8607e-10, 9.5236e-14,\n 1.1242e-11, 4.4806e-12, 1.0187e-10, 3.9330e-11, 1.7492e-11, 3.6858e-11,\n 9.4581e-11, 1.4402e-11, 1.6280e-12, 6.7438e-12, 1.1209e-11, 6.0224e-12,\n 1.9767e-12, 4.3233e-12, 7.7375e-12, 1.7606e-12, 8.2297e-11, 4.9482e-13,\n 1.7918e-11, 3.7079e-12, 1.3120e-10, 6.4230e-11, 2.6852e-12, 9.0749e-12,\n 5.0386e-11, 2.5126e-12, 2.5212e-12, 5.9518e-11, 1.4114e-11, 2.9122e-11,\n 8.5989e-12, 3.9610e-11, 4.4214e-12, 1.0042e-11, 4.7260e-11, 2.2182e-12,\n 3.8633e-11, 1.8185e-11, 6.4113e-12, 2.8980e-11, 1.3964e-10, 2.8406e-12,\n 1.2981e-11, 2.4154e-11, 1.1055e-13, 2.0113e-11, 1.8420e-11, 4.3582e-11,\n 1.7524e-12, 3.0019e-11, 6.3448e-12, 9.9957e-12, 2.8282e-11, 1.1240e-13,\n 4.5111e-11, 7.5748e-12, 6.2779e-11, 2.1252e-11, 2.9277e-11, 5.2945e-12,\n 1.3798e-11, 4.9843e-12, 8.6423e-11, 1.7265e-11, 1.1756e-11, 8.2241e-11,\n 7.4187e-11, 7.0942e-11, 1.5025e-11, 7.1914e-11, 2.6119e-12, 3.0682e-13,\n 6.8722e-12, 3.2420e-12, 1.9956e-12, 4.5018e-12, 5.6549e-11, 1.8659e-12,\n 4.8979e-12, 2.5635e-11, 9.4656e-12, 3.5081e-12, 1.4673e-12, 6.6031e-11],\n device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')" }, "41": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[7.0129e-12, 5.1322e-12, 1.6543e-12, ..., 2.6463e-11, 9.1368e-13,\n 1.0311e-11],\n [3.3078e-12, 1.3539e-12, 7.7966e-13, ..., 6.2211e-12, 2.9554e-13,\n 5.4023e-12],\n [7.6799e-13, 4.5394e-14, 1.9020e-13, ..., 2.4405e-13, 6.0986e-14,\n 2.5552e-13],\n ...,\n [2.8066e-12, 2.0270e-13, 7.5638e-13, ..., 1.2478e-12, 1.7443e-13,\n 6.3619e-13],\n [1.4509e-12, 2.4110e-13, 3.6032e-13, ..., 1.2242e-12, 9.1229e-14,\n 2.9545e-13],\n [3.5213e-12, 1.0554e-12, 8.1556e-13, ..., 4.5646e-12, 3.0867e-13,\n 3.2842e-12]], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')" }, "42": { - "step": "tensor(8764.)", - "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.1164e-10, 7.9542e-11, 7.0319e-13, 3.0647e-10, 9.4652e-11, 8.1540e-12,\n 1.1098e-11, 1.7089e-11, 1.9350e-12, 4.2246e-11, 5.4314e-11, 6.2087e-11,\n 9.2479e-11, 1.8490e-11, 1.0209e-10, 4.5941e-11, 1.9513e-10, 2.9521e-11,\n 3.0031e-12, 2.3815e-10, 3.0253e-11, 2.0781e-11, 6.2395e-11, 2.4811e-10,\n 1.3559e-11, 3.3841e-10, 6.0574e-11, 6.3012e-11, 3.2952e-12, 1.9604e-11,\n 2.8212e-10, 7.1360e-12, 1.6534e-10, 5.3957e-12, 1.0927e-10, 2.2432e-12,\n 4.2114e-11, 8.9309e-11, 5.4893e-11, 5.1855e-11, 3.4582e-12, 7.2655e-11,\n 2.5000e-10, 6.7228e-12, 1.1753e-10, 8.7337e-12, 6.7053e-12, 2.5084e-12,\n 7.6104e-11, 2.3768e-11, 2.8983e-11, 1.5994e-11, 1.2276e-11, 3.3027e-11,\n 5.0585e-10, 5.3100e-12, 1.4718e-11, 7.4270e-12, 1.5985e-12, 1.2711e-10,\n 3.9548e-11, 1.4358e-12, 8.2997e-11, 3.4168e-10, 1.3997e-11, 2.1245e-11,\n 1.5300e-10, 2.3939e-12, 4.7801e-10, 4.8924e-12, 6.8626e-11, 4.8232e-11,\n 3.2646e-11, 1.7061e-12, 1.1505e-11, 1.4888e-11, 8.0696e-11, 2.7540e-11,\n 4.4024e-11, 8.4964e-11, 9.2142e-12, 8.8058e-12, 7.9102e-10, 2.6528e-11,\n 1.7633e-12, 3.3005e-12, 6.7526e-12, 3.5460e-12, 6.9178e-11, 5.4487e-12,\n 1.8402e-12, 1.3802e-10, 9.3379e-12, 2.6370e-10, 6.2581e-11, 8.3362e-12,\n 1.9336e-10, 6.9874e-11, 3.1944e-10, 2.3889e-12, 3.6331e-11, 3.2379e-10,\n 4.2518e-10, 7.1089e-10, 8.8288e-12, 3.0960e-11, 1.8531e-12, 1.1313e-10,\n 5.5332e-11, 2.7987e-12, 4.9630e-12, 5.3797e-11, 6.0694e-12, 9.3324e-12,\n 8.2018e-12, 4.4149e-11, 4.5993e-12, 3.5798e-11, 4.0315e-12, 3.2218e-11,\n 2.6046e-10, 1.5149e-12, 6.4147e-10, 2.4133e-11, 4.6628e-10, 4.2828e-12,\n 1.5270e-10, 1.8750e-10, 2.1687e-10, 1.8004e-11, 2.3020e-10, 1.7510e-10,\n 2.8048e-11, 2.1107e-12, 2.8050e-12, 2.4795e-10, 2.1663e-10, 1.5081e-10,\n 2.6720e-11, 4.1817e-12, 4.5429e-11, 2.9924e-10, 7.7673e-11, 1.1616e-10,\n 6.0744e-11, 1.3548e-12, 7.6270e-11, 5.4821e-11, 5.1697e-12, 2.2155e-12,\n 1.2027e-12, 2.0060e-10, 7.7313e-12, 2.4239e-11, 1.5777e-10, 3.3157e-10,\n 1.0613e-10, 2.4195e-10, 2.7772e-11, 2.0487e-10, 2.1822e-10, 1.8605e-10,\n 8.5146e-11, 2.9401e-10, 1.4928e-12, 3.7042e-10, 4.0671e-11, 4.6889e-11,\n 2.6784e-11, 3.7318e-12, 7.6186e-12, 1.7536e-12, 3.9101e-11, 1.1380e-11,\n 5.5170e-11, 1.7376e-10, 9.3171e-11, 3.0606e-12, 8.8128e-11, 1.9640e-12,\n 4.0693e-12, 1.1736e-10, 2.0876e-12, 4.8503e-11, 3.0126e-12, 7.5501e-11,\n 1.2519e-10, 3.4669e-11, 5.6208e-11, 1.2093e-10, 2.1901e-10, 1.3485e-10,\n 1.3235e-10, 2.4167e-12, 1.8420e-11, 2.7692e-12, 3.8212e-12, 3.1253e-11,\n 1.4140e-12, 4.1368e-11, 1.2635e-10, 1.4302e-10, 1.8522e-11, 5.4793e-11,\n 1.7455e-11, 2.8774e-10, 3.0225e-12, 2.0895e-10, 8.4883e-12, 1.9427e-11,\n 5.9626e-12, 5.0386e-11, 5.4723e-12, 2.9273e-11, 2.1902e-10, 3.4888e-11,\n 1.7893e-11, 2.4911e-12, 5.8439e-12, 1.6920e-11, 9.9264e-11, 4.4048e-12,\n 2.0221e-12, 4.5602e-11, 1.1560e-10, 6.2695e-10, 6.9219e-11, 1.4612e-11,\n 1.2149e-11, 3.4990e-10, 1.2232e-10, 2.3325e-11, 3.8856e-10, 1.1436e-11,\n 1.7366e-11, 4.2195e-11, 1.7961e-11, 1.0467e-10, 1.8293e-11, 3.1772e-13,\n 1.6707e-10, 1.0731e-11, 1.5213e-10, 1.9367e-11, 2.9896e-12, 5.5907e-12,\n 1.8701e-10, 7.1360e-12, 2.7064e-10, 3.1534e-10, 1.7637e-10, 3.2053e-11,\n 6.6694e-11, 5.5198e-12, 8.1453e-12, 5.6828e-11], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1535e-10, 3.0533e-11, 4.3705e-11, ..., 5.5667e-12, 3.3978e-11,\n 6.2744e-11],\n [1.1910e-11, 3.0958e-12, 4.6347e-12, ..., 6.1720e-13, 3.4306e-12,\n 6.2202e-12],\n [1.2974e-11, 3.3450e-12, 4.8291e-12, ..., 5.9429e-13, 3.8511e-12,\n 7.1941e-12],\n [1.3593e-11, 3.7530e-12, 5.1103e-12, ..., 6.4459e-13, 4.0581e-12,\n 7.5345e-12]], device='cuda:0')" }, "43": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.1870e-11, 3.8887e-10, 4.8592e-11, ..., 1.0065e-09, 9.4838e-10,\n 4.3853e-10],\n [4.1278e-12, 2.6559e-11, 3.1405e-12, ..., 6.9973e-11, 6.8197e-11,\n 2.9378e-11],\n [2.8808e-12, 1.6713e-11, 2.2569e-12, ..., 4.3530e-11, 3.8708e-11,\n 2.0274e-11],\n [4.5178e-12, 3.0267e-11, 3.5942e-12, ..., 7.7510e-11, 7.5151e-11,\n 3.2104e-11],\n [4.4566e-12, 2.5497e-11, 3.5416e-12, ..., 6.6158e-11, 5.8476e-11,\n 3.1350e-11]], device='cuda:0')" - }, - "44": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.2115e-08, 1.5616e-09, 9.1647e-10, 1.7536e-09, 1.3796e-09],\n device='cuda:0')" - }, - "45": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.1972e-11, 3.8904e-10, 4.8667e-11, ..., 1.0072e-09, 9.4838e-10,\n 4.3920e-10],\n [4.1298e-12, 2.6562e-11, 3.1419e-12, ..., 6.9986e-11, 6.8197e-11,\n 2.9391e-11],\n [2.8906e-12, 1.6730e-11, 2.2640e-12, ..., 4.3593e-11, 3.8708e-11,\n 2.0338e-11],\n [4.5201e-12, 3.0271e-11, 3.5959e-12, ..., 7.7525e-11, 7.5151e-11,\n 3.2119e-11],\n [4.4733e-12, 2.5526e-11, 3.5539e-12, ..., 6.6268e-11, 5.8476e-11,\n 3.1460e-11]], device='cuda:0')" - }, - "46": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.2117e-08, 1.5617e-09, 9.1663e-10, 1.7536e-09, 1.3799e-09],\n device='cuda:0')" - }, - "47": { - "step": "tensor(8764.)", - "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.1870e-11, 3.8887e-10, 4.8592e-11, ..., 1.0065e-09, 9.4838e-10,\n 4.3853e-10],\n [4.1278e-12, 2.6559e-11, 3.1405e-12, ..., 6.9973e-11, 6.8197e-11,\n 2.9378e-11],\n [2.8808e-12, 1.6713e-11, 2.2569e-12, ..., 4.3530e-11, 3.8708e-11,\n 2.0274e-11],\n [4.5178e-12, 3.0267e-11, 3.5942e-12, ..., 7.7510e-11, 7.5151e-11,\n 3.2104e-11],\n [4.4566e-12, 2.5497e-11, 3.5416e-12, ..., 6.6158e-11, 5.8476e-11,\n 3.1350e-11]], device='cuda:0')" - }, - "48": { - "step": "tensor(8764.)", - "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([2.2115e-08, 1.5616e-09, 9.1647e-10, 1.7536e-09, 1.3796e-09],\n device='cuda:0')" - }, - "6": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 1.8240e-05, -1.2752e-05, -5.5179e-07, ..., 5.6834e-07,\n 1.2192e-05, -8.3750e-06],\n [ 1.5906e-35, -3.6146e-35, -2.9685e-36, ..., -2.7258e-37,\n 7.0841e-36, 1.5844e-36],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 1.8623e-06, 2.5461e-06, -6.8691e-07, ..., -1.2447e-06,\n 2.1467e-06, -5.1925e-07],\n [ 7.7225e-09, -2.6913e-08, -2.2121e-08, ..., -5.4497e-08,\n -1.6411e-08, -7.5942e-09],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.3824e-08, 2.5738e-08, 5.0652e-09, ..., 2.9159e-09, 3.2724e-09,\n 4.1916e-09],\n [1.4478e-12, 5.4542e-13, 5.7486e-14, ..., 1.1885e-13, 3.9909e-13,\n 5.9657e-14],\n [1.0917e-11, 1.2662e-12, 6.8646e-13, ..., 2.1001e-13, 1.5280e-12,\n 1.1719e-13],\n ...,\n [9.9311e-11, 4.2032e-10, 7.3648e-11, ..., 5.1009e-11, 5.4706e-11,\n 4.1776e-11],\n [6.0297e-12, 1.1521e-12, 1.0116e-12, ..., 1.0148e-13, 3.1438e-12,\n 2.7017e-14],\n [8.4315e-12, 1.0098e-11, 3.7098e-13, ..., 4.3361e-13, 1.4261e-12,\n 1.0713e-13]], device='cuda:0')" - }, - "7": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 3.2883e-04, 7.6862e-34, 5.6052e-45, ..., -5.7914e-06,\n -1.1233e-06, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.2949e-06, 2.3062e-10, 1.2913e-09, ..., 9.1614e-08, 2.9298e-10,\n 2.3497e-09], device='cuda:0')" + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.8925e-09, 2.9804e-10, 3.2248e-10, 3.4448e-10], device='cuda:0')" }, "8": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 1.2036e-06, -1.2062e-36, 5.6052e-45, ..., 2.0552e-06,\n 3.1110e-08, -5.6052e-45],\n [-5.1410e-06, 4.5052e-37, 5.6052e-45, ..., -2.3500e-06,\n -1.8133e-08, 5.6052e-45],\n [ 2.4959e-06, -1.7744e-36, -5.6052e-45, ..., 2.8970e-06,\n 6.9501e-08, -5.6052e-45],\n ...,\n [ 3.4357e-06, 5.4740e-36, -5.6052e-45, ..., -2.6215e-06,\n -2.3646e-08, -5.6052e-45],\n [-2.7981e-06, -7.8645e-36, 5.6052e-45, ..., 1.5933e-06,\n -1.0148e-07, -5.6052e-45],\n [-3.8832e-06, 3.9572e-36, 5.6052e-45, ..., 4.8201e-08,\n 4.2424e-08, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1442e-10, 8.5329e-13, 1.6967e-12, ..., 2.2218e-11, 3.2155e-12,\n 2.5280e-12],\n [2.4314e-10, 1.0161e-12, 1.9001e-12, ..., 2.7811e-11, 4.2402e-12,\n 4.2827e-12],\n [2.0055e-10, 8.6655e-13, 1.7515e-12, ..., 5.0805e-11, 4.7325e-12,\n 5.5158e-12],\n ...,\n [4.7750e-10, 1.4760e-12, 3.5246e-12, ..., 3.7713e-11, 7.4399e-12,\n 8.0114e-12],\n [2.5125e-10, 2.0858e-12, 3.3771e-12, ..., 2.4608e-11, 7.5347e-12,\n 6.0035e-12],\n [3.5152e-10, 7.8564e-13, 1.7668e-12, ..., 3.2431e-11, 5.1345e-12,\n 9.3739e-12]], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 3.5830e-07, -4.7272e-07, 6.6890e-14, ..., -5.9283e-07,\n -7.0022e-07, 1.6392e-07],\n [ 5.5262e-08, 1.1677e-07, -4.7579e-16, ..., -2.6206e-07,\n -6.7251e-06, -2.1945e-07],\n [ 1.6351e-07, -6.5632e-07, 4.2770e-11, ..., 2.4396e-07,\n -2.4826e-06, -1.8223e-08],\n ...,\n [-2.7947e-07, -1.5997e-07, 1.3550e-10, ..., 6.8615e-07,\n -9.7374e-07, -1.7039e-09],\n [-8.7589e-07, 5.9438e-07, 4.6189e-12, ..., 1.4602e-06,\n -1.6519e-08, 4.9783e-08],\n [ 1.6671e-06, -1.7836e-07, -8.5272e-11, ..., -4.7100e-07,\n 5.3849e-08, -1.7018e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7138e-11, 3.7846e-12, 1.8628e-15, ..., 1.6709e-11, 3.3457e-11,\n 4.4967e-11],\n [3.3173e-11, 1.7046e-11, 3.4975e-15, ..., 7.7845e-12, 4.2268e-10,\n 2.0913e-11],\n [7.2414e-11, 9.6392e-12, 4.7727e-14, ..., 4.4724e-12, 6.8948e-11,\n 1.8952e-11],\n ...,\n [2.8560e-11, 6.7778e-11, 7.7726e-14, ..., 6.2342e-12, 1.4878e-10,\n 5.1638e-13],\n [4.8135e-11, 1.4569e-11, 1.1268e-13, ..., 2.2046e-11, 5.6775e-11,\n 5.9974e-12],\n [8.4850e-11, 1.4482e-11, 1.5858e-14, ..., 3.3600e-11, 1.0312e-11,\n 2.8457e-13]], device='cuda:0')" }, "9": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 1.6150e-04, -1.7041e-04, -4.2214e-05, ..., 9.5297e-06,\n -4.8514e-06, -8.6529e-06],\n [ 5.0013e-04, -1.6913e-04, 8.3829e-05, ..., 7.2260e-05,\n -1.0958e-04, -2.0804e-05],\n [-1.8585e-04, 1.0236e-04, 2.3827e-05, ..., -1.2988e-05,\n -5.3291e-05, -3.2328e-05],\n ...,\n [-7.6993e-05, 1.1819e-04, -9.4086e-05, ..., 5.6729e-05,\n -8.4099e-05, -2.4769e-05],\n [ 7.0313e-04, -5.0192e-04, 2.5318e-05, ..., 1.8532e-04,\n 1.8747e-04, -1.2058e-04],\n [-1.1502e-04, 7.2347e-06, 4.3522e-05, ..., -4.2736e-05,\n -1.3255e-06, 3.2622e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.5626e-07, 2.1562e-07, 4.1847e-08, ..., 3.4197e-08, 4.6768e-08,\n 2.4449e-08],\n [1.9213e-07, 1.3485e-07, 1.7159e-08, ..., 2.4223e-08, 2.2050e-08,\n 2.0725e-08],\n [2.1262e-07, 1.7915e-07, 2.7540e-08, ..., 3.1153e-08, 5.7775e-08,\n 2.3324e-08],\n ...,\n [1.4770e-07, 2.3503e-07, 3.5631e-08, ..., 4.1050e-08, 3.9825e-08,\n 2.6045e-08],\n [3.2427e-07, 2.1834e-07, 2.4829e-08, ..., 2.9111e-08, 3.5394e-08,\n 1.9353e-08],\n [2.5135e-07, 1.1875e-07, 2.8371e-08, ..., 5.8032e-08, 3.6608e-08,\n 2.5142e-08]], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([-3.4355e-06, -1.5383e-05, -3.4650e-06, ..., -1.2359e-05,\n 3.8409e-07, 1.3553e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.3921e-09, 6.1093e-09, 2.8624e-09, ..., 5.2694e-09, 3.1605e-09,\n 4.1730e-09], device='cuda:0')" }, "10": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 0.0026, 0.0059, -0.0014, ..., -0.0007, 0.0087, -0.0020],\n device='cuda:0')", - "exp_avg_sq": "tensor([6.6055e-05, 4.4298e-05, 4.8169e-05, ..., 4.9103e-05, 4.3568e-05,\n 7.2110e-05], device='cuda:0')" + "step": "tensor(10016.)", + "exp_avg": "tensor([[ 1.4383e-07, -1.3417e-06, -4.8123e-08, ..., -9.2854e-08,\n 2.5828e-07, -2.5039e-07],\n [ 8.9377e-08, 1.2158e-06, 1.3673e-07, ..., 1.8003e-07,\n 2.9341e-07, 2.8339e-07],\n [-9.1742e-09, -5.8510e-07, 2.3210e-08, ..., 2.5089e-08,\n -3.6084e-07, -1.2081e-07],\n ...,\n [ 9.4840e-08, -1.2304e-06, -1.4321e-07, ..., 1.0502e-07,\n 6.8465e-07, -1.8084e-07],\n [ 1.8060e-07, -1.2089e-06, -4.3010e-07, ..., 2.1643e-07,\n 1.7319e-07, -7.5282e-07],\n [ 1.3763e-07, -2.2046e-07, -5.3914e-08, ..., 3.7226e-08,\n 2.6425e-07, -2.1010e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0251e-12, 1.7401e-12, 2.7347e-12, ..., 1.6625e-12, 1.7281e-12,\n 2.6461e-12],\n [1.8418e-12, 4.9405e-12, 2.6799e-12, ..., 4.2973e-12, 2.3866e-12,\n 2.6414e-12],\n [3.0638e-12, 6.5872e-12, 2.8719e-12, ..., 3.9160e-12, 3.1680e-12,\n 4.7629e-12],\n ...,\n [3.5742e-12, 6.6160e-12, 2.8266e-12, ..., 4.3170e-12, 2.8925e-12,\n 5.3263e-12],\n [2.5809e-12, 6.1012e-12, 2.4197e-12, ..., 5.6444e-12, 1.7958e-12,\n 2.8557e-12],\n [1.7516e-12, 7.0823e-12, 3.8597e-12, ..., 9.2378e-12, 2.9795e-12,\n 2.2880e-12]], device='cuda:0')" }, "11": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 2.8928e-05, 2.1383e-05, -2.3880e-06, ..., 5.7854e-06,\n -1.4844e-05, -1.5299e-05],\n [-2.3608e-06, -5.2299e-05, 1.3657e-06, ..., -5.3216e-06,\n -5.0624e-05, 7.2171e-06],\n [ 4.2256e-05, 5.7482e-06, 4.6595e-07, ..., -7.8958e-06,\n -4.3823e-06, 3.4916e-06],\n ...,\n [-1.6918e-05, 3.7653e-06, 2.8550e-06, ..., -1.5309e-05,\n 2.5331e-05, -1.5662e-05],\n [-9.7358e-06, 2.7078e-05, -2.9074e-06, ..., 4.5295e-06,\n -3.0897e-05, -4.8744e-06],\n [-4.4109e-06, -1.4746e-05, 1.3946e-05, ..., -3.4323e-05,\n -4.8931e-05, 1.3703e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.4546e-09, 2.3115e-09, 2.4270e-09, ..., 1.9932e-09, 2.2781e-09,\n 3.2835e-09],\n [6.8376e-09, 4.1689e-09, 4.6825e-09, ..., 5.5176e-09, 4.1088e-09,\n 3.8841e-09],\n [1.2571e-08, 4.7028e-09, 5.3241e-09, ..., 4.6011e-09, 4.2787e-09,\n 4.4514e-09],\n ...,\n [1.4762e-08, 4.2571e-09, 4.6193e-09, ..., 4.9616e-09, 4.5590e-09,\n 4.0981e-09],\n [8.6822e-09, 5.4070e-09, 5.2924e-09, ..., 3.7352e-09, 5.1507e-09,\n 5.0505e-09],\n [8.1439e-09, 5.0661e-09, 5.5968e-09, ..., 4.4442e-09, 4.4615e-09,\n 4.9511e-09]], device='cuda:0')" + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 6.0021e-08, -3.2553e-07, 4.9713e-09, ..., 2.2780e-06,\n 8.6043e-06, 3.6332e-05],\n [ 1.4480e-07, -2.4843e-07, -8.7837e-09, ..., 7.5468e-09,\n -3.0002e-07, 1.3550e-07],\n [ 4.7906e-07, 1.9650e-07, -3.2210e-32, ..., 3.9265e-07,\n -1.2447e-06, 1.5269e-09],\n ...,\n [-1.7850e-07, 6.4171e-08, -6.0749e-15, ..., -3.1413e-08,\n 4.4758e-07, 9.7309e-07],\n [ 2.4173e-08, -2.8099e-08, -9.6631e-15, ..., -1.3745e-06,\n 1.3046e-06, 1.3819e-09],\n [-1.1925e-07, -3.4211e-09, 6.4164e-09, ..., 4.0321e-06,\n -2.1851e-08, -3.2193e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9185e-11, 2.3644e-11, 4.5448e-14, ..., 1.3217e-10, 3.1798e-11,\n 1.6684e-10],\n [3.8224e-11, 1.0157e-12, 4.6828e-14, ..., 4.7451e-12, 6.5152e-11,\n 6.1060e-11],\n [1.2827e-11, 7.4580e-12, 2.1721e-18, ..., 2.8518e-12, 1.0346e-10,\n 5.8547e-13],\n ...,\n [1.6589e-10, 1.7890e-12, 3.8632e-17, ..., 7.1532e-12, 1.4612e-11,\n 1.1697e-11],\n [1.2481e-11, 1.8018e-11, 4.8229e-18, ..., 8.4913e-11, 6.2744e-10,\n 6.5968e-13],\n [2.0206e-11, 3.5634e-11, 8.5991e-13, ..., 2.2366e-10, 5.5520e-11,\n 1.6105e-12]], device='cuda:0')" + }, + "12": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 4.9924e-05, 9.5775e-06, -1.5585e-05, ..., 2.1194e-05,\n 6.8780e-06, 1.9381e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.5543e-09, 1.9272e-09, 3.4568e-09, ..., 3.8770e-09, 4.4961e-09,\n 3.5167e-09], device='cuda:0')" + }, + "13": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 2.1761e-07, 8.9785e-10, 3.9884e-07, ..., 1.8240e-07,\n -1.3000e-07, 2.1703e-08],\n [ 3.5875e-07, -1.1533e-07, -7.1832e-07, ..., 3.2054e-07,\n 9.5066e-08, 4.1268e-08],\n [ 1.8207e-07, -1.3665e-07, 7.6200e-07, ..., -6.0928e-07,\n -2.0485e-07, -6.8308e-08],\n ...,\n [-3.0917e-07, -1.2670e-07, 3.6646e-07, ..., 4.5317e-07,\n -2.3019e-07, -2.4820e-07],\n [ 7.5420e-08, 4.3427e-07, 5.6661e-07, ..., -2.4217e-07,\n -3.0063e-07, -1.0919e-07],\n [ 9.4264e-08, -6.9451e-08, 3.5249e-07, ..., -3.6601e-08,\n -1.2295e-07, -1.8545e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2304e-12, 5.8640e-13, 9.8039e-12, ..., 1.0803e-12, 7.0289e-13,\n 6.5312e-13],\n [1.5815e-12, 1.0095e-12, 1.5282e-11, ..., 2.8355e-12, 7.6226e-13,\n 8.8480e-13],\n [2.5201e-12, 9.2520e-13, 1.4950e-11, ..., 8.4340e-12, 1.4941e-12,\n 1.2202e-12],\n ...,\n [2.9555e-12, 1.2850e-12, 3.6760e-12, ..., 1.8609e-12, 1.5176e-12,\n 1.2512e-12],\n [2.8887e-12, 2.2663e-12, 2.9999e-12, ..., 6.7842e-12, 1.1652e-12,\n 1.1676e-12],\n [2.3223e-12, 1.5954e-12, 1.6244e-12, ..., 2.7359e-12, 1.0949e-12,\n 1.1456e-12]], device='cuda:0')" } }, "param_groups": [ { "lr": 0.01, - "name": "scale_256", + "name": "shared", "betas": [ 0.9, 0.999 @@ -248,13 +238,12 @@ "initial_lr": 0.01, "params": [ 0, - 1, - 2 + 1 ] }, { "lr": 0.01, - "name": "scale_512", + "name": "scale_384", "betas": [ 0.9, 0.999 @@ -270,9 +259,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 2, 3, - 4, - 5 + 4 ] }, { @@ -293,9 +282,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 5, 6, - 7, - 8 + 7 ] }, { @@ -316,9 +305,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 8, 9, - 10, - 11 + 10 ] }, { @@ -339,9 +328,9 @@ "decoupled_weight_decay": true, "initial_lr": 0.01, "params": [ + 11, 12, - 13, - 14 + 13 ] }, { @@ -362,6 +351,7 @@ "decoupled_weight_decay": true, "initial_lr": 0.005, "params": [ + 14, 15, 16, 17, @@ -390,12 +380,7 @@ 40, 41, 42, - 43, - 44, - 45, - 46, - 47, - 48 + 43 ] } ] @@ -428,20 +413,26 @@ ] }, "metrics": { - "val_acc": 75.532 + "val_acc": 82.316 }, "train_config": { "name": "david_training", - "run_id": "20251012_032356", + "run_id": "20251012_041353", "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", - "model_variant": "clip_vit_b16", + "model_variant": "clip_vit_l14", "num_classes": 1000, - "preset": "high_accuracy", + "preset": "clip_vit_l14", "custom_config_path": null, "num_classes_override": null, "use_belly_override": null, "belly_expand_override": null, "progressive_training_override": true, + "scale_warmup_epochs_override": { + "384": 0, + "768": 1, + "1024": 2, + "1280": 3 + }, "num_epochs": 20, "batch_size": 1024, "learning_rate": 0.01, @@ -458,8 +449,8 @@ "gradient_clip": 5.0, "scheduler_type": "cosine_restarts", "min_lr": 1e-06, - "freeze_strategy": "performance", - "freeze_threshold": 70.0, + "freeze_strategy": "never", + "freeze_threshold": 90.0, "unfreeze_on_plateau": true, "patience": 10, "track_gradients": true,