| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1329, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0007524454477050414, |
| "grad_norm": 187.81736755371094, |
| "learning_rate": 0.0, |
| "loss": 10.1133, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0015048908954100827, |
| "grad_norm": 151.3889923095703, |
| "learning_rate": 1.4925373134328358e-07, |
| "loss": 9.2148, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.002257336343115124, |
| "grad_norm": 195.20321655273438, |
| "learning_rate": 2.9850746268656716e-07, |
| "loss": 11.0938, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0030097817908201654, |
| "grad_norm": 348.9756774902344, |
| "learning_rate": 4.4776119402985074e-07, |
| "loss": 9.832, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.003762227238525207, |
| "grad_norm": 163.2272491455078, |
| "learning_rate": 5.970149253731343e-07, |
| "loss": 11.2422, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.004514672686230248, |
| "grad_norm": 176.78646850585938, |
| "learning_rate": 7.462686567164179e-07, |
| "loss": 11.4922, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.005267118133935289, |
| "grad_norm": 171.82962036132812, |
| "learning_rate": 8.955223880597015e-07, |
| "loss": 10.2266, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.006019563581640331, |
| "grad_norm": 193.08480834960938, |
| "learning_rate": 1.044776119402985e-06, |
| "loss": 10.5117, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.006772009029345372, |
| "grad_norm": 162.14559936523438, |
| "learning_rate": 1.1940298507462686e-06, |
| "loss": 9.6094, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.007524454477050414, |
| "grad_norm": 150.88475036621094, |
| "learning_rate": 1.3432835820895524e-06, |
| "loss": 8.7812, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.008276899924755455, |
| "grad_norm": 173.12478637695312, |
| "learning_rate": 1.4925373134328358e-06, |
| "loss": 10.4766, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.009029345372460496, |
| "grad_norm": 172.7010498046875, |
| "learning_rate": 1.6417910447761196e-06, |
| "loss": 9.1289, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.009781790820165538, |
| "grad_norm": 140.74220275878906, |
| "learning_rate": 1.791044776119403e-06, |
| "loss": 7.6797, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.010534236267870579, |
| "grad_norm": 163.679443359375, |
| "learning_rate": 1.9402985074626867e-06, |
| "loss": 9.1445, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.011286681715575621, |
| "grad_norm": 167.57139587402344, |
| "learning_rate": 2.08955223880597e-06, |
| "loss": 9.4844, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.012039127163280662, |
| "grad_norm": 181.3193359375, |
| "learning_rate": 2.238805970149254e-06, |
| "loss": 8.9805, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.012791572610985704, |
| "grad_norm": 121.30020904541016, |
| "learning_rate": 2.3880597014925373e-06, |
| "loss": 7.5312, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.013544018058690745, |
| "grad_norm": 128.07334899902344, |
| "learning_rate": 2.537313432835821e-06, |
| "loss": 7.7539, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.014296463506395787, |
| "grad_norm": 135.87106323242188, |
| "learning_rate": 2.686567164179105e-06, |
| "loss": 8.1602, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.015048908954100828, |
| "grad_norm": 128.00323486328125, |
| "learning_rate": 2.835820895522388e-06, |
| "loss": 6.6758, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01580135440180587, |
| "grad_norm": 137.32179260253906, |
| "learning_rate": 2.9850746268656716e-06, |
| "loss": 7.0859, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.01655379984951091, |
| "grad_norm": 165.83033752441406, |
| "learning_rate": 3.1343283582089558e-06, |
| "loss": 6.5977, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.01730624529721595, |
| "grad_norm": 147.80575561523438, |
| "learning_rate": 3.283582089552239e-06, |
| "loss": 7.1211, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.01805869074492099, |
| "grad_norm": 178.7413787841797, |
| "learning_rate": 3.4328358208955225e-06, |
| "loss": 6.6016, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.018811136192626036, |
| "grad_norm": 87.64071655273438, |
| "learning_rate": 3.582089552238806e-06, |
| "loss": 6.8906, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.019563581640331076, |
| "grad_norm": 206.30386352539062, |
| "learning_rate": 3.73134328358209e-06, |
| "loss": 5.9648, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.020316027088036117, |
| "grad_norm": 319.09381103515625, |
| "learning_rate": 3.8805970149253735e-06, |
| "loss": 6.6172, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.021068472535741158, |
| "grad_norm": 70.70848846435547, |
| "learning_rate": 4.029850746268657e-06, |
| "loss": 6.2734, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0218209179834462, |
| "grad_norm": 156.22230529785156, |
| "learning_rate": 4.17910447761194e-06, |
| "loss": 5.5195, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.022573363431151242, |
| "grad_norm": 56.316650390625, |
| "learning_rate": 4.3283582089552236e-06, |
| "loss": 5.3242, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.023325808878856283, |
| "grad_norm": 77.25025177001953, |
| "learning_rate": 4.477611940298508e-06, |
| "loss": 5.0781, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.024078254326561323, |
| "grad_norm": 48.7443962097168, |
| "learning_rate": 4.626865671641791e-06, |
| "loss": 5.1797, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.024830699774266364, |
| "grad_norm": 58.58309555053711, |
| "learning_rate": 4.7761194029850745e-06, |
| "loss": 5.3711, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.025583145221971408, |
| "grad_norm": 54.83428955078125, |
| "learning_rate": 4.925373134328359e-06, |
| "loss": 4.5898, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.02633559066967645, |
| "grad_norm": 153.2790069580078, |
| "learning_rate": 5.074626865671642e-06, |
| "loss": 4.7715, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.02708803611738149, |
| "grad_norm": 126.50374603271484, |
| "learning_rate": 5.2238805970149255e-06, |
| "loss": 4.9414, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.02784048156508653, |
| "grad_norm": 283.3973388671875, |
| "learning_rate": 5.37313432835821e-06, |
| "loss": 5.0352, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.028592927012791574, |
| "grad_norm": 84.64131164550781, |
| "learning_rate": 5.522388059701493e-06, |
| "loss": 4.5098, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.029345372460496615, |
| "grad_norm": 117.75650024414062, |
| "learning_rate": 5.671641791044776e-06, |
| "loss": 4.5391, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.030097817908201655, |
| "grad_norm": 292.9526672363281, |
| "learning_rate": 5.820895522388061e-06, |
| "loss": 5.1367, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.030850263355906696, |
| "grad_norm": 82.79414367675781, |
| "learning_rate": 5.970149253731343e-06, |
| "loss": 4.5117, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.03160270880361174, |
| "grad_norm": 389.5240783691406, |
| "learning_rate": 6.119402985074627e-06, |
| "loss": 5.6406, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.03235515425131678, |
| "grad_norm": 400.56854248046875, |
| "learning_rate": 6.2686567164179116e-06, |
| "loss": 5.3984, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.03310759969902182, |
| "grad_norm": 110.48643493652344, |
| "learning_rate": 6.417910447761194e-06, |
| "loss": 3.9082, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.033860045146726865, |
| "grad_norm": 102.55659484863281, |
| "learning_rate": 6.567164179104478e-06, |
| "loss": 4.4297, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0346124905944319, |
| "grad_norm": 192.07241821289062, |
| "learning_rate": 6.7164179104477625e-06, |
| "loss": 4.1855, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.035364936042136946, |
| "grad_norm": 148.1069793701172, |
| "learning_rate": 6.865671641791045e-06, |
| "loss": 4.291, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.03611738148984198, |
| "grad_norm": 238.80014038085938, |
| "learning_rate": 7.014925373134329e-06, |
| "loss": 4.5215, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.03686982693754703, |
| "grad_norm": 220.73306274414062, |
| "learning_rate": 7.164179104477612e-06, |
| "loss": 4.5684, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.03762227238525207, |
| "grad_norm": 35.45397186279297, |
| "learning_rate": 7.313432835820896e-06, |
| "loss": 3.9785, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03837471783295711, |
| "grad_norm": 257.580810546875, |
| "learning_rate": 7.46268656716418e-06, |
| "loss": 4.3066, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.03912716328066215, |
| "grad_norm": 131.84400939941406, |
| "learning_rate": 7.611940298507463e-06, |
| "loss": 4.2578, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0398796087283672, |
| "grad_norm": 121.55342102050781, |
| "learning_rate": 7.761194029850747e-06, |
| "loss": 4.0273, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.040632054176072234, |
| "grad_norm": 157.86026000976562, |
| "learning_rate": 7.91044776119403e-06, |
| "loss": 4.3867, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.04138449962377728, |
| "grad_norm": 66.99703216552734, |
| "learning_rate": 8.059701492537314e-06, |
| "loss": 3.707, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.042136945071482315, |
| "grad_norm": 35.16154861450195, |
| "learning_rate": 8.208955223880599e-06, |
| "loss": 3.2441, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.04288939051918736, |
| "grad_norm": 44.11848449707031, |
| "learning_rate": 8.35820895522388e-06, |
| "loss": 4.0781, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0436418359668924, |
| "grad_norm": 67.31927490234375, |
| "learning_rate": 8.507462686567165e-06, |
| "loss": 3.3984, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.04439428141459744, |
| "grad_norm": 66.82479858398438, |
| "learning_rate": 8.656716417910447e-06, |
| "loss": 4.0352, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.045146726862302484, |
| "grad_norm": 56.568931579589844, |
| "learning_rate": 8.805970149253732e-06, |
| "loss": 3.6328, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04589917231000752, |
| "grad_norm": 124.09092712402344, |
| "learning_rate": 8.955223880597016e-06, |
| "loss": 4.0215, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.046651617757712566, |
| "grad_norm": 99.17076873779297, |
| "learning_rate": 9.104477611940299e-06, |
| "loss": 3.8984, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.04740406320541761, |
| "grad_norm": 232.51084899902344, |
| "learning_rate": 9.253731343283582e-06, |
| "loss": 4.1426, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.04815650865312265, |
| "grad_norm": 213.96304321289062, |
| "learning_rate": 9.402985074626867e-06, |
| "loss": 4.5879, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.04890895410082769, |
| "grad_norm": 47.709999084472656, |
| "learning_rate": 9.552238805970149e-06, |
| "loss": 3.8027, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.04966139954853273, |
| "grad_norm": 233.9221954345703, |
| "learning_rate": 9.701492537313434e-06, |
| "loss": 4.9902, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.05041384499623777, |
| "grad_norm": 239.6158905029297, |
| "learning_rate": 9.850746268656717e-06, |
| "loss": 5.2031, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.051166290443942816, |
| "grad_norm": 157.87442016601562, |
| "learning_rate": 1e-05, |
| "loss": 4.0195, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.05191873589164785, |
| "grad_norm": 63.87594223022461, |
| "learning_rate": 9.992076069730588e-06, |
| "loss": 3.9609, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0526711813393529, |
| "grad_norm": 148.4019317626953, |
| "learning_rate": 9.984152139461173e-06, |
| "loss": 3.498, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.05342362678705794, |
| "grad_norm": 83.16004180908203, |
| "learning_rate": 9.97622820919176e-06, |
| "loss": 3.6074, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.05417607223476298, |
| "grad_norm": 115.74369049072266, |
| "learning_rate": 9.968304278922346e-06, |
| "loss": 4.2363, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.05492851768246802, |
| "grad_norm": 193.71302795410156, |
| "learning_rate": 9.960380348652933e-06, |
| "loss": 4.709, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.05568096313017306, |
| "grad_norm": 53.86237716674805, |
| "learning_rate": 9.95245641838352e-06, |
| "loss": 3.0889, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.056433408577878104, |
| "grad_norm": 190.28834533691406, |
| "learning_rate": 9.944532488114107e-06, |
| "loss": 3.9102, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.05718585402558315, |
| "grad_norm": 217.49977111816406, |
| "learning_rate": 9.936608557844692e-06, |
| "loss": 4.3711, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.057938299473288185, |
| "grad_norm": 113.78182220458984, |
| "learning_rate": 9.928684627575277e-06, |
| "loss": 3.9707, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.05869074492099323, |
| "grad_norm": 44.046791076660156, |
| "learning_rate": 9.920760697305864e-06, |
| "loss": 3.3438, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.059443190368698266, |
| "grad_norm": 118.92435455322266, |
| "learning_rate": 9.912836767036451e-06, |
| "loss": 3.291, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.06019563581640331, |
| "grad_norm": 32.814632415771484, |
| "learning_rate": 9.904912836767039e-06, |
| "loss": 2.9922, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.060948081264108354, |
| "grad_norm": 43.7454833984375, |
| "learning_rate": 9.896988906497624e-06, |
| "loss": 3.2129, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.06170052671181339, |
| "grad_norm": 70.15777587890625, |
| "learning_rate": 9.88906497622821e-06, |
| "loss": 3.8359, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.062452972159518436, |
| "grad_norm": 74.7402114868164, |
| "learning_rate": 9.881141045958796e-06, |
| "loss": 3.3516, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.06320541760722348, |
| "grad_norm": 28.77251625061035, |
| "learning_rate": 9.873217115689383e-06, |
| "loss": 3.2402, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.06395786305492852, |
| "grad_norm": 52.40445327758789, |
| "learning_rate": 9.86529318541997e-06, |
| "loss": 3.3154, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.06471030850263355, |
| "grad_norm": 56.26734924316406, |
| "learning_rate": 9.857369255150556e-06, |
| "loss": 3.5293, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.0654627539503386, |
| "grad_norm": 55.244136810302734, |
| "learning_rate": 9.849445324881141e-06, |
| "loss": 3.3105, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.06621519939804364, |
| "grad_norm": 39.8226432800293, |
| "learning_rate": 9.841521394611728e-06, |
| "loss": 3.3848, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.06696764484574869, |
| "grad_norm": 24.03954315185547, |
| "learning_rate": 9.833597464342315e-06, |
| "loss": 3.2285, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.06772009029345373, |
| "grad_norm": 90.6539535522461, |
| "learning_rate": 9.825673534072902e-06, |
| "loss": 3.1191, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06847253574115876, |
| "grad_norm": 100.41248321533203, |
| "learning_rate": 9.817749603803487e-06, |
| "loss": 3.1475, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.0692249811888638, |
| "grad_norm": 119.3066177368164, |
| "learning_rate": 9.809825673534073e-06, |
| "loss": 3.5273, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.06997742663656885, |
| "grad_norm": 132.11619567871094, |
| "learning_rate": 9.80190174326466e-06, |
| "loss": 3.5645, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.07072987208427389, |
| "grad_norm": 100.85049438476562, |
| "learning_rate": 9.793977812995247e-06, |
| "loss": 3.2793, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.07148231753197894, |
| "grad_norm": 100.26934051513672, |
| "learning_rate": 9.786053882725834e-06, |
| "loss": 4.3945, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.07223476297968397, |
| "grad_norm": 118.85063934326172, |
| "learning_rate": 9.77812995245642e-06, |
| "loss": 3.6211, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.07298720842738901, |
| "grad_norm": 103.21385955810547, |
| "learning_rate": 9.770206022187005e-06, |
| "loss": 3.7793, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.07373965387509406, |
| "grad_norm": 126.40459442138672, |
| "learning_rate": 9.762282091917592e-06, |
| "loss": 3.2441, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0744920993227991, |
| "grad_norm": 167.4326171875, |
| "learning_rate": 9.754358161648179e-06, |
| "loss": 4.0898, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.07524454477050414, |
| "grad_norm": 112.69780731201172, |
| "learning_rate": 9.746434231378766e-06, |
| "loss": 3.5723, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07599699021820917, |
| "grad_norm": 30.508968353271484, |
| "learning_rate": 9.738510301109351e-06, |
| "loss": 3.5879, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.07674943566591422, |
| "grad_norm": 76.39116668701172, |
| "learning_rate": 9.730586370839936e-06, |
| "loss": 2.8037, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.07750188111361926, |
| "grad_norm": 38.17439651489258, |
| "learning_rate": 9.722662440570524e-06, |
| "loss": 3.2109, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0782543265613243, |
| "grad_norm": 30.669612884521484, |
| "learning_rate": 9.71473851030111e-06, |
| "loss": 3.1318, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.07900677200902935, |
| "grad_norm": 120.1872787475586, |
| "learning_rate": 9.706814580031696e-06, |
| "loss": 3.4766, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0797592174567344, |
| "grad_norm": 128.8622589111328, |
| "learning_rate": 9.698890649762283e-06, |
| "loss": 3.4961, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.08051166290443942, |
| "grad_norm": 59.050209045410156, |
| "learning_rate": 9.69096671949287e-06, |
| "loss": 3.1543, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.08126410835214447, |
| "grad_norm": 85.37413024902344, |
| "learning_rate": 9.683042789223455e-06, |
| "loss": 3.3301, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.08201655379984951, |
| "grad_norm": 105.35246276855469, |
| "learning_rate": 9.675118858954042e-06, |
| "loss": 3.3145, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.08276899924755456, |
| "grad_norm": 73.28331756591797, |
| "learning_rate": 9.667194928684628e-06, |
| "loss": 2.8535, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0835214446952596, |
| "grad_norm": 34.09514617919922, |
| "learning_rate": 9.659270998415215e-06, |
| "loss": 3.668, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.08427389014296463, |
| "grad_norm": 196.14305114746094, |
| "learning_rate": 9.651347068145802e-06, |
| "loss": 4.4473, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.08502633559066967, |
| "grad_norm": 226.3737030029297, |
| "learning_rate": 9.643423137876387e-06, |
| "loss": 4.4688, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.08577878103837472, |
| "grad_norm": 148.45594787597656, |
| "learning_rate": 9.635499207606974e-06, |
| "loss": 3.9121, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.08653122648607976, |
| "grad_norm": 22.061586380004883, |
| "learning_rate": 9.62757527733756e-06, |
| "loss": 3.1465, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0872836719337848, |
| "grad_norm": 115.12189483642578, |
| "learning_rate": 9.619651347068147e-06, |
| "loss": 3.4834, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.08803611738148984, |
| "grad_norm": 142.36178588867188, |
| "learning_rate": 9.611727416798734e-06, |
| "loss": 3.7148, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.08878856282919488, |
| "grad_norm": 171.8651123046875, |
| "learning_rate": 9.603803486529319e-06, |
| "loss": 4.502, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.08954100827689992, |
| "grad_norm": 133.11654663085938, |
| "learning_rate": 9.595879556259906e-06, |
| "loss": 4.0439, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.09029345372460497, |
| "grad_norm": 26.849279403686523, |
| "learning_rate": 9.587955625990491e-06, |
| "loss": 2.6719, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09104589917231001, |
| "grad_norm": 114.27269744873047, |
| "learning_rate": 9.580031695721078e-06, |
| "loss": 4.2188, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.09179834462001504, |
| "grad_norm": 203.3996124267578, |
| "learning_rate": 9.572107765451665e-06, |
| "loss": 4.4941, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.09255079006772009, |
| "grad_norm": 165.19558715820312, |
| "learning_rate": 9.56418383518225e-06, |
| "loss": 4.5957, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.09330323551542513, |
| "grad_norm": 125.5126724243164, |
| "learning_rate": 9.556259904912838e-06, |
| "loss": 3.4189, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.09405568096313018, |
| "grad_norm": 79.48650360107422, |
| "learning_rate": 9.548335974643423e-06, |
| "loss": 3.2871, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.09480812641083522, |
| "grad_norm": 66.11795806884766, |
| "learning_rate": 9.54041204437401e-06, |
| "loss": 3.1514, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.09556057185854025, |
| "grad_norm": 74.2788314819336, |
| "learning_rate": 9.532488114104597e-06, |
| "loss": 3.3936, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0963130173062453, |
| "grad_norm": 35.67583465576172, |
| "learning_rate": 9.524564183835183e-06, |
| "loss": 3.3691, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.09706546275395034, |
| "grad_norm": 52.17413330078125, |
| "learning_rate": 9.51664025356577e-06, |
| "loss": 2.8535, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.09781790820165538, |
| "grad_norm": 119.83101654052734, |
| "learning_rate": 9.508716323296355e-06, |
| "loss": 3.5293, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.09857035364936043, |
| "grad_norm": 146.635986328125, |
| "learning_rate": 9.500792393026942e-06, |
| "loss": 3.2285, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.09932279909706546, |
| "grad_norm": 34.28512191772461, |
| "learning_rate": 9.492868462757529e-06, |
| "loss": 3.127, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.1000752445447705, |
| "grad_norm": 84.61096954345703, |
| "learning_rate": 9.484944532488114e-06, |
| "loss": 3.1426, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.10082768999247554, |
| "grad_norm": 88.358642578125, |
| "learning_rate": 9.477020602218701e-06, |
| "loss": 2.7129, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.10158013544018059, |
| "grad_norm": 53.102962493896484, |
| "learning_rate": 9.469096671949287e-06, |
| "loss": 3.1367, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.10233258088788563, |
| "grad_norm": 55.46281814575195, |
| "learning_rate": 9.461172741679874e-06, |
| "loss": 3.3574, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.10308502633559068, |
| "grad_norm": 72.24687957763672, |
| "learning_rate": 9.45324881141046e-06, |
| "loss": 3.1113, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.1038374717832957, |
| "grad_norm": 84.3861083984375, |
| "learning_rate": 9.445324881141046e-06, |
| "loss": 3.3047, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.10458991723100075, |
| "grad_norm": 88.76127624511719, |
| "learning_rate": 9.437400950871633e-06, |
| "loss": 3.6875, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1053423626787058, |
| "grad_norm": 55.702484130859375, |
| "learning_rate": 9.429477020602219e-06, |
| "loss": 3.3535, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.10609480812641084, |
| "grad_norm": 68.59583282470703, |
| "learning_rate": 9.421553090332806e-06, |
| "loss": 2.8721, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.10684725357411588, |
| "grad_norm": 89.981689453125, |
| "learning_rate": 9.413629160063393e-06, |
| "loss": 3.2344, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.10759969902182091, |
| "grad_norm": 92.87030792236328, |
| "learning_rate": 9.405705229793978e-06, |
| "loss": 3.5117, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.10835214446952596, |
| "grad_norm": 27.346115112304688, |
| "learning_rate": 9.397781299524565e-06, |
| "loss": 2.7305, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.109104589917231, |
| "grad_norm": 43.55765151977539, |
| "learning_rate": 9.38985736925515e-06, |
| "loss": 3.2832, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.10985703536493605, |
| "grad_norm": 91.49488067626953, |
| "learning_rate": 9.381933438985737e-06, |
| "loss": 3.1992, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.11060948081264109, |
| "grad_norm": 67.19734954833984, |
| "learning_rate": 9.374009508716324e-06, |
| "loss": 3.3184, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.11136192626034612, |
| "grad_norm": 47.810272216796875, |
| "learning_rate": 9.366085578446912e-06, |
| "loss": 3.3203, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.11211437170805116, |
| "grad_norm": 70.10330200195312, |
| "learning_rate": 9.358161648177497e-06, |
| "loss": 4.127, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.11286681715575621, |
| "grad_norm": 43.671142578125, |
| "learning_rate": 9.350237717908082e-06, |
| "loss": 2.5596, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.11361926260346125, |
| "grad_norm": 22.097169876098633, |
| "learning_rate": 9.34231378763867e-06, |
| "loss": 2.8418, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.1143717080511663, |
| "grad_norm": 26.284515380859375, |
| "learning_rate": 9.334389857369256e-06, |
| "loss": 3.3594, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.11512415349887133, |
| "grad_norm": 39.73884582519531, |
| "learning_rate": 9.326465927099843e-06, |
| "loss": 3.6699, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.11587659894657637, |
| "grad_norm": 27.474496841430664, |
| "learning_rate": 9.318541996830429e-06, |
| "loss": 2.9297, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.11662904439428141, |
| "grad_norm": 33.951560974121094, |
| "learning_rate": 9.310618066561014e-06, |
| "loss": 3.5488, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.11738148984198646, |
| "grad_norm": 26.033336639404297, |
| "learning_rate": 9.302694136291601e-06, |
| "loss": 2.9043, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.1181339352896915, |
| "grad_norm": 22.7733211517334, |
| "learning_rate": 9.294770206022188e-06, |
| "loss": 3.4277, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.11888638073739653, |
| "grad_norm": 78.39336395263672, |
| "learning_rate": 9.286846275752775e-06, |
| "loss": 3.4902, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.11963882618510158, |
| "grad_norm": 26.790979385375977, |
| "learning_rate": 9.27892234548336e-06, |
| "loss": 3.002, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.12039127163280662, |
| "grad_norm": 21.88675308227539, |
| "learning_rate": 9.270998415213946e-06, |
| "loss": 3.2793, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.12114371708051166, |
| "grad_norm": 36.12630844116211, |
| "learning_rate": 9.263074484944533e-06, |
| "loss": 3.1387, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.12189616252821671, |
| "grad_norm": 61.90777587890625, |
| "learning_rate": 9.25515055467512e-06, |
| "loss": 3.1348, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.12264860797592174, |
| "grad_norm": 53.83844757080078, |
| "learning_rate": 9.247226624405707e-06, |
| "loss": 3.1816, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.12340105342362678, |
| "grad_norm": 48.54594039916992, |
| "learning_rate": 9.239302694136292e-06, |
| "loss": 2.8086, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.12415349887133183, |
| "grad_norm": 36.44153594970703, |
| "learning_rate": 9.231378763866878e-06, |
| "loss": 3.2012, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.12490594431903687, |
| "grad_norm": 69.61957550048828, |
| "learning_rate": 9.223454833597465e-06, |
| "loss": 3.1953, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.1256583897667419, |
| "grad_norm": 103.36559295654297, |
| "learning_rate": 9.215530903328052e-06, |
| "loss": 3.1553, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.12641083521444696, |
| "grad_norm": 66.82914733886719, |
| "learning_rate": 9.207606973058639e-06, |
| "loss": 3.2227, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.127163280662152, |
| "grad_norm": 51.18250274658203, |
| "learning_rate": 9.199683042789224e-06, |
| "loss": 3.082, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.12791572610985705, |
| "grad_norm": 72.96109771728516, |
| "learning_rate": 9.19175911251981e-06, |
| "loss": 3.6699, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.12866817155756208, |
| "grad_norm": 75.40308380126953, |
| "learning_rate": 9.183835182250396e-06, |
| "loss": 3.2969, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.1294206170052671, |
| "grad_norm": 28.419187545776367, |
| "learning_rate": 9.175911251980984e-06, |
| "loss": 2.9824, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.13017306245297217, |
| "grad_norm": 53.255279541015625, |
| "learning_rate": 9.16798732171157e-06, |
| "loss": 3.2578, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.1309255079006772, |
| "grad_norm": 71.64155578613281, |
| "learning_rate": 9.160063391442156e-06, |
| "loss": 3.0547, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.13167795334838225, |
| "grad_norm": 28.97214126586914, |
| "learning_rate": 9.152139461172741e-06, |
| "loss": 3.0166, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.13243039879608728, |
| "grad_norm": 50.072044372558594, |
| "learning_rate": 9.144215530903328e-06, |
| "loss": 3.4219, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.13318284424379231, |
| "grad_norm": 21.372264862060547, |
| "learning_rate": 9.136291600633915e-06, |
| "loss": 2.9746, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.13393528969149737, |
| "grad_norm": 24.634939193725586, |
| "learning_rate": 9.128367670364502e-06, |
| "loss": 3.1484, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.1346877351392024, |
| "grad_norm": 62.78826904296875, |
| "learning_rate": 9.120443740095088e-06, |
| "loss": 3.3242, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.13544018058690746, |
| "grad_norm": 31.50160789489746, |
| "learning_rate": 9.112519809825675e-06, |
| "loss": 2.6777, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1361926260346125, |
| "grad_norm": 39.34284591674805, |
| "learning_rate": 9.10459587955626e-06, |
| "loss": 2.5, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.13694507148231752, |
| "grad_norm": 32.132713317871094, |
| "learning_rate": 9.096671949286847e-06, |
| "loss": 2.5635, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.13769751693002258, |
| "grad_norm": 41.65974044799805, |
| "learning_rate": 9.088748019017434e-06, |
| "loss": 3.3496, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.1384499623777276, |
| "grad_norm": 38.184654235839844, |
| "learning_rate": 9.08082408874802e-06, |
| "loss": 3.9766, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.13920240782543267, |
| "grad_norm": 75.4859848022461, |
| "learning_rate": 9.072900158478607e-06, |
| "loss": 3.1914, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1399548532731377, |
| "grad_norm": 34.991886138916016, |
| "learning_rate": 9.064976228209192e-06, |
| "loss": 3.0166, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.14070729872084273, |
| "grad_norm": 69.22996520996094, |
| "learning_rate": 9.057052297939779e-06, |
| "loss": 3.0352, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.14145974416854779, |
| "grad_norm": 45.45055389404297, |
| "learning_rate": 9.049128367670366e-06, |
| "loss": 3.2051, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.14221218961625282, |
| "grad_norm": 37.568206787109375, |
| "learning_rate": 9.041204437400951e-06, |
| "loss": 3.2168, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.14296463506395787, |
| "grad_norm": 30.181642532348633, |
| "learning_rate": 9.033280507131538e-06, |
| "loss": 3.2676, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1437170805116629, |
| "grad_norm": 23.631832122802734, |
| "learning_rate": 9.025356576862124e-06, |
| "loss": 3.0547, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.14446952595936793, |
| "grad_norm": 40.3087043762207, |
| "learning_rate": 9.01743264659271e-06, |
| "loss": 2.9238, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.145221971407073, |
| "grad_norm": 28.530550003051758, |
| "learning_rate": 9.009508716323298e-06, |
| "loss": 2.5859, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.14597441685477802, |
| "grad_norm": 74.48800659179688, |
| "learning_rate": 9.001584786053883e-06, |
| "loss": 3.0322, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.14672686230248308, |
| "grad_norm": 106.76486206054688, |
| "learning_rate": 8.99366085578447e-06, |
| "loss": 3.7148, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.1474793077501881, |
| "grad_norm": 67.58019256591797, |
| "learning_rate": 8.985736925515056e-06, |
| "loss": 2.9502, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.14823175319789314, |
| "grad_norm": 54.92237091064453, |
| "learning_rate": 8.977812995245643e-06, |
| "loss": 2.8799, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.1489841986455982, |
| "grad_norm": 59.95341873168945, |
| "learning_rate": 8.96988906497623e-06, |
| "loss": 3.7773, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.14973664409330323, |
| "grad_norm": 38.49965286254883, |
| "learning_rate": 8.961965134706815e-06, |
| "loss": 3.5742, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.1504890895410083, |
| "grad_norm": 32.147010803222656, |
| "learning_rate": 8.954041204437402e-06, |
| "loss": 2.9717, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15124153498871332, |
| "grad_norm": 33.17499542236328, |
| "learning_rate": 8.946117274167987e-06, |
| "loss": 2.9531, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.15199398043641835, |
| "grad_norm": 26.919301986694336, |
| "learning_rate": 8.938193343898574e-06, |
| "loss": 2.9121, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.1527464258841234, |
| "grad_norm": 21.65958595275879, |
| "learning_rate": 8.930269413629161e-06, |
| "loss": 2.9121, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.15349887133182843, |
| "grad_norm": 20.233850479125977, |
| "learning_rate": 8.922345483359747e-06, |
| "loss": 3.0332, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.1542513167795335, |
| "grad_norm": 31.14151382446289, |
| "learning_rate": 8.914421553090334e-06, |
| "loss": 3.0938, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.15500376222723852, |
| "grad_norm": 29.060792922973633, |
| "learning_rate": 8.90649762282092e-06, |
| "loss": 3.4043, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.15575620767494355, |
| "grad_norm": 43.115238189697266, |
| "learning_rate": 8.898573692551506e-06, |
| "loss": 3.208, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.1565086531226486, |
| "grad_norm": 25.682483673095703, |
| "learning_rate": 8.890649762282093e-06, |
| "loss": 3.2988, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.15726109857035364, |
| "grad_norm": 23.325185775756836, |
| "learning_rate": 8.882725832012679e-06, |
| "loss": 3.0186, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.1580135440180587, |
| "grad_norm": 19.003551483154297, |
| "learning_rate": 8.874801901743266e-06, |
| "loss": 2.5508, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.15876598946576373, |
| "grad_norm": 34.20718765258789, |
| "learning_rate": 8.866877971473851e-06, |
| "loss": 3.1582, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.1595184349134688, |
| "grad_norm": 25.554487228393555, |
| "learning_rate": 8.858954041204438e-06, |
| "loss": 3.0859, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.16027088036117382, |
| "grad_norm": 25.85458755493164, |
| "learning_rate": 8.851030110935025e-06, |
| "loss": 3.0215, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.16102332580887885, |
| "grad_norm": 89.72293853759766, |
| "learning_rate": 8.84310618066561e-06, |
| "loss": 3.2988, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.1617757712565839, |
| "grad_norm": 61.3826789855957, |
| "learning_rate": 8.835182250396197e-06, |
| "loss": 3.2188, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.16252821670428894, |
| "grad_norm": 35.555458068847656, |
| "learning_rate": 8.827258320126783e-06, |
| "loss": 3.2764, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.163280662151994, |
| "grad_norm": 88.35000610351562, |
| "learning_rate": 8.81933438985737e-06, |
| "loss": 3.1133, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.16403310759969902, |
| "grad_norm": 114.05996704101562, |
| "learning_rate": 8.811410459587957e-06, |
| "loss": 3.9531, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.16478555304740405, |
| "grad_norm": 78.45552062988281, |
| "learning_rate": 8.803486529318542e-06, |
| "loss": 2.957, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.1655379984951091, |
| "grad_norm": 31.660160064697266, |
| "learning_rate": 8.79556259904913e-06, |
| "loss": 2.8115, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.16629044394281414, |
| "grad_norm": 31.968887329101562, |
| "learning_rate": 8.787638668779716e-06, |
| "loss": 2.6855, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.1670428893905192, |
| "grad_norm": 49.5087890625, |
| "learning_rate": 8.779714738510302e-06, |
| "loss": 3.3604, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.16779533483822423, |
| "grad_norm": 22.69972038269043, |
| "learning_rate": 8.771790808240889e-06, |
| "loss": 2.6787, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.16854778028592926, |
| "grad_norm": 47.977970123291016, |
| "learning_rate": 8.763866877971474e-06, |
| "loss": 3.0547, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.16930022573363432, |
| "grad_norm": 65.0685043334961, |
| "learning_rate": 8.755942947702061e-06, |
| "loss": 3.3545, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.17005267118133935, |
| "grad_norm": 43.86599349975586, |
| "learning_rate": 8.748019017432648e-06, |
| "loss": 2.8574, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.1708051166290444, |
| "grad_norm": 72.90522003173828, |
| "learning_rate": 8.740095087163233e-06, |
| "loss": 3.7168, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.17155756207674944, |
| "grad_norm": 41.62009048461914, |
| "learning_rate": 8.73217115689382e-06, |
| "loss": 2.8691, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.17231000752445447, |
| "grad_norm": 53.0240478515625, |
| "learning_rate": 8.724247226624406e-06, |
| "loss": 2.8555, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.17306245297215953, |
| "grad_norm": 24.85753059387207, |
| "learning_rate": 8.716323296354993e-06, |
| "loss": 3.0605, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.17381489841986456, |
| "grad_norm": 21.22967529296875, |
| "learning_rate": 8.70839936608558e-06, |
| "loss": 3.2324, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.1745673438675696, |
| "grad_norm": 104.60713958740234, |
| "learning_rate": 8.700475435816165e-06, |
| "loss": 3.541, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.17531978931527464, |
| "grad_norm": 102.33487701416016, |
| "learning_rate": 8.692551505546752e-06, |
| "loss": 3.2305, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.17607223476297967, |
| "grad_norm": 35.243202209472656, |
| "learning_rate": 8.684627575277338e-06, |
| "loss": 3.1211, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.17682468021068473, |
| "grad_norm": 25.380643844604492, |
| "learning_rate": 8.676703645007925e-06, |
| "loss": 2.9746, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.17757712565838976, |
| "grad_norm": 94.3141860961914, |
| "learning_rate": 8.668779714738512e-06, |
| "loss": 3.5654, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.17832957110609482, |
| "grad_norm": 130.24545288085938, |
| "learning_rate": 8.660855784469097e-06, |
| "loss": 4.0391, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.17908201655379985, |
| "grad_norm": 76.28020477294922, |
| "learning_rate": 8.652931854199684e-06, |
| "loss": 2.9375, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.17983446200150488, |
| "grad_norm": 84.25048065185547, |
| "learning_rate": 8.64500792393027e-06, |
| "loss": 3.4512, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.18058690744920994, |
| "grad_norm": 25.853254318237305, |
| "learning_rate": 8.637083993660857e-06, |
| "loss": 2.8232, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18133935289691497, |
| "grad_norm": 51.54713439941406, |
| "learning_rate": 8.629160063391444e-06, |
| "loss": 3.04, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.18209179834462003, |
| "grad_norm": 81.95057678222656, |
| "learning_rate": 8.621236133122029e-06, |
| "loss": 3.2344, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.18284424379232506, |
| "grad_norm": 41.63261032104492, |
| "learning_rate": 8.613312202852616e-06, |
| "loss": 3.0566, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.1835966892400301, |
| "grad_norm": 41.84830856323242, |
| "learning_rate": 8.605388272583201e-06, |
| "loss": 3.1377, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.18434913468773514, |
| "grad_norm": 37.13138961791992, |
| "learning_rate": 8.597464342313788e-06, |
| "loss": 2.3369, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.18510158013544017, |
| "grad_norm": 39.32001876831055, |
| "learning_rate": 8.589540412044375e-06, |
| "loss": 2.8877, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.18585402558314523, |
| "grad_norm": 32.617576599121094, |
| "learning_rate": 8.58161648177496e-06, |
| "loss": 2.7949, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.18660647103085026, |
| "grad_norm": 24.77182388305664, |
| "learning_rate": 8.573692551505548e-06, |
| "loss": 2.7861, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.1873589164785553, |
| "grad_norm": 26.878522872924805, |
| "learning_rate": 8.565768621236133e-06, |
| "loss": 2.8887, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.18811136192626035, |
| "grad_norm": 46.47650909423828, |
| "learning_rate": 8.55784469096672e-06, |
| "loss": 2.4863, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.18886380737396538, |
| "grad_norm": 33.06129455566406, |
| "learning_rate": 8.549920760697307e-06, |
| "loss": 2.7051, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.18961625282167044, |
| "grad_norm": 43.33230209350586, |
| "learning_rate": 8.541996830427893e-06, |
| "loss": 4.084, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.19036869826937547, |
| "grad_norm": 27.43387222290039, |
| "learning_rate": 8.53407290015848e-06, |
| "loss": 2.6455, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.1911211437170805, |
| "grad_norm": 34.015403747558594, |
| "learning_rate": 8.526148969889065e-06, |
| "loss": 3.3154, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.19187358916478556, |
| "grad_norm": 22.817705154418945, |
| "learning_rate": 8.518225039619652e-06, |
| "loss": 2.9551, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.1926260346124906, |
| "grad_norm": 81.8350601196289, |
| "learning_rate": 8.510301109350239e-06, |
| "loss": 2.792, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.19337848006019565, |
| "grad_norm": 84.2726058959961, |
| "learning_rate": 8.502377179080824e-06, |
| "loss": 2.9844, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.19413092550790068, |
| "grad_norm": 67.91531372070312, |
| "learning_rate": 8.494453248811411e-06, |
| "loss": 2.6729, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.1948833709556057, |
| "grad_norm": 20.062740325927734, |
| "learning_rate": 8.486529318541997e-06, |
| "loss": 2.79, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.19563581640331076, |
| "grad_norm": 59.18655014038086, |
| "learning_rate": 8.478605388272584e-06, |
| "loss": 3.4775, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1963882618510158, |
| "grad_norm": 59.75342559814453, |
| "learning_rate": 8.47068145800317e-06, |
| "loss": 3.0488, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.19714070729872085, |
| "grad_norm": 70.16788482666016, |
| "learning_rate": 8.462757527733758e-06, |
| "loss": 3.7871, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.19789315274642588, |
| "grad_norm": 39.20167541503906, |
| "learning_rate": 8.454833597464343e-06, |
| "loss": 2.959, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1986455981941309, |
| "grad_norm": 55.009517669677734, |
| "learning_rate": 8.446909667194929e-06, |
| "loss": 3.0439, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.19939804364183597, |
| "grad_norm": 113.7911148071289, |
| "learning_rate": 8.438985736925516e-06, |
| "loss": 3.6328, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.200150489089541, |
| "grad_norm": 70.44924926757812, |
| "learning_rate": 8.431061806656103e-06, |
| "loss": 3.0176, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.20090293453724606, |
| "grad_norm": 45.5489616394043, |
| "learning_rate": 8.42313787638669e-06, |
| "loss": 2.9434, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.2016553799849511, |
| "grad_norm": 62.986175537109375, |
| "learning_rate": 8.415213946117275e-06, |
| "loss": 3.1152, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.20240782543265612, |
| "grad_norm": 37.91203689575195, |
| "learning_rate": 8.40729001584786e-06, |
| "loss": 3.6875, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.20316027088036118, |
| "grad_norm": 75.06108856201172, |
| "learning_rate": 8.399366085578447e-06, |
| "loss": 3.127, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.2039127163280662, |
| "grad_norm": 45.03618240356445, |
| "learning_rate": 8.391442155309034e-06, |
| "loss": 2.7373, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.20466516177577126, |
| "grad_norm": 20.670963287353516, |
| "learning_rate": 8.383518225039621e-06, |
| "loss": 2.6094, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.2054176072234763, |
| "grad_norm": 84.76927947998047, |
| "learning_rate": 8.375594294770207e-06, |
| "loss": 3.1133, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.20617005267118135, |
| "grad_norm": 36.80317306518555, |
| "learning_rate": 8.367670364500792e-06, |
| "loss": 2.7793, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.20692249811888638, |
| "grad_norm": 38.66841506958008, |
| "learning_rate": 8.35974643423138e-06, |
| "loss": 3.0479, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.2076749435665914, |
| "grad_norm": 47.253578186035156, |
| "learning_rate": 8.351822503961966e-06, |
| "loss": 3.1748, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.20842738901429647, |
| "grad_norm": 21.542232513427734, |
| "learning_rate": 8.343898573692553e-06, |
| "loss": 2.8145, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.2091798344620015, |
| "grad_norm": 55.47714614868164, |
| "learning_rate": 8.335974643423139e-06, |
| "loss": 2.498, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.20993227990970656, |
| "grad_norm": 54.076080322265625, |
| "learning_rate": 8.328050713153724e-06, |
| "loss": 2.8848, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.2106847253574116, |
| "grad_norm": 45.00600051879883, |
| "learning_rate": 8.320126782884311e-06, |
| "loss": 3.2305, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.21143717080511662, |
| "grad_norm": 27.784671783447266, |
| "learning_rate": 8.312202852614898e-06, |
| "loss": 2.8555, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.21218961625282168, |
| "grad_norm": 21.135204315185547, |
| "learning_rate": 8.304278922345485e-06, |
| "loss": 2.8125, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.2129420617005267, |
| "grad_norm": 27.72955894470215, |
| "learning_rate": 8.29635499207607e-06, |
| "loss": 3.0762, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.21369450714823177, |
| "grad_norm": 23.81853485107422, |
| "learning_rate": 8.288431061806656e-06, |
| "loss": 2.5938, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.2144469525959368, |
| "grad_norm": 41.66539764404297, |
| "learning_rate": 8.280507131537243e-06, |
| "loss": 2.8809, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.21519939804364183, |
| "grad_norm": 38.57289123535156, |
| "learning_rate": 8.27258320126783e-06, |
| "loss": 2.6543, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.21595184349134688, |
| "grad_norm": 46.27495193481445, |
| "learning_rate": 8.264659270998417e-06, |
| "loss": 2.6543, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.21670428893905191, |
| "grad_norm": 88.909912109375, |
| "learning_rate": 8.256735340729002e-06, |
| "loss": 3.042, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.21745673438675697, |
| "grad_norm": 56.075382232666016, |
| "learning_rate": 8.24881141045959e-06, |
| "loss": 2.9414, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.218209179834462, |
| "grad_norm": 26.580392837524414, |
| "learning_rate": 8.240887480190175e-06, |
| "loss": 3.0, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.21896162528216703, |
| "grad_norm": 61.84196853637695, |
| "learning_rate": 8.232963549920762e-06, |
| "loss": 2.8662, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.2197140707298721, |
| "grad_norm": 43.39738082885742, |
| "learning_rate": 8.225039619651349e-06, |
| "loss": 3.292, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.22046651617757712, |
| "grad_norm": 41.86874008178711, |
| "learning_rate": 8.217115689381934e-06, |
| "loss": 3.2148, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.22121896162528218, |
| "grad_norm": 20.22987937927246, |
| "learning_rate": 8.209191759112521e-06, |
| "loss": 3.0645, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.2219714070729872, |
| "grad_norm": 99.28943634033203, |
| "learning_rate": 8.201267828843106e-06, |
| "loss": 3.373, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.22272385252069224, |
| "grad_norm": 121.67118835449219, |
| "learning_rate": 8.193343898573693e-06, |
| "loss": 3.5176, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.2234762979683973, |
| "grad_norm": 105.28216552734375, |
| "learning_rate": 8.18541996830428e-06, |
| "loss": 3.1875, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.22422874341610233, |
| "grad_norm": 70.92816925048828, |
| "learning_rate": 8.177496038034866e-06, |
| "loss": 3.2969, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.22498118886380739, |
| "grad_norm": 22.64484977722168, |
| "learning_rate": 8.169572107765453e-06, |
| "loss": 3.3965, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.22573363431151242, |
| "grad_norm": 39.87211608886719, |
| "learning_rate": 8.161648177496038e-06, |
| "loss": 2.9863, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.22648607975921745, |
| "grad_norm": 62.50984573364258, |
| "learning_rate": 8.153724247226625e-06, |
| "loss": 2.3682, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.2272385252069225, |
| "grad_norm": 32.058197021484375, |
| "learning_rate": 8.145800316957212e-06, |
| "loss": 2.9736, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.22799097065462753, |
| "grad_norm": 39.14604568481445, |
| "learning_rate": 8.137876386687798e-06, |
| "loss": 3.2285, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.2287434161023326, |
| "grad_norm": 27.052776336669922, |
| "learning_rate": 8.129952456418385e-06, |
| "loss": 2.9492, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.22949586155003762, |
| "grad_norm": 83.68045806884766, |
| "learning_rate": 8.12202852614897e-06, |
| "loss": 3.6328, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.23024830699774265, |
| "grad_norm": 117.3252944946289, |
| "learning_rate": 8.114104595879557e-06, |
| "loss": 3.5703, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.2310007524454477, |
| "grad_norm": 65.46855926513672, |
| "learning_rate": 8.106180665610144e-06, |
| "loss": 2.8477, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.23175319789315274, |
| "grad_norm": 70.63121795654297, |
| "learning_rate": 8.09825673534073e-06, |
| "loss": 2.9668, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.2325056433408578, |
| "grad_norm": 37.77394104003906, |
| "learning_rate": 8.090332805071317e-06, |
| "loss": 3.0215, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.23325808878856283, |
| "grad_norm": 52.81776809692383, |
| "learning_rate": 8.082408874801902e-06, |
| "loss": 2.7529, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.23401053423626786, |
| "grad_norm": 97.96956634521484, |
| "learning_rate": 8.074484944532489e-06, |
| "loss": 3.8711, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.23476297968397292, |
| "grad_norm": 94.45693969726562, |
| "learning_rate": 8.066561014263076e-06, |
| "loss": 3.2363, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.23551542513167795, |
| "grad_norm": 113.64404296875, |
| "learning_rate": 8.058637083993661e-06, |
| "loss": 3.5781, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.236267870579383, |
| "grad_norm": 44.172882080078125, |
| "learning_rate": 8.050713153724248e-06, |
| "loss": 3.1006, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.23702031602708803, |
| "grad_norm": 51.807559967041016, |
| "learning_rate": 8.042789223454834e-06, |
| "loss": 2.8213, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.23777276147479307, |
| "grad_norm": 54.96208190917969, |
| "learning_rate": 8.03486529318542e-06, |
| "loss": 2.9482, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.23852520692249812, |
| "grad_norm": 34.15311813354492, |
| "learning_rate": 8.026941362916006e-06, |
| "loss": 2.998, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.23927765237020315, |
| "grad_norm": 57.66706085205078, |
| "learning_rate": 8.019017432646593e-06, |
| "loss": 2.8281, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.2400300978179082, |
| "grad_norm": 31.129295349121094, |
| "learning_rate": 8.01109350237718e-06, |
| "loss": 3.3594, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.24078254326561324, |
| "grad_norm": 67.8250732421875, |
| "learning_rate": 8.003169572107765e-06, |
| "loss": 3.6387, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.24153498871331827, |
| "grad_norm": 78.51448822021484, |
| "learning_rate": 7.995245641838353e-06, |
| "loss": 3.2305, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.24228743416102333, |
| "grad_norm": 82.17552185058594, |
| "learning_rate": 7.987321711568938e-06, |
| "loss": 2.8574, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.24303987960872836, |
| "grad_norm": 76.5505599975586, |
| "learning_rate": 7.979397781299525e-06, |
| "loss": 3.1113, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.24379232505643342, |
| "grad_norm": 19.232769012451172, |
| "learning_rate": 7.971473851030112e-06, |
| "loss": 3.1807, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.24454477050413845, |
| "grad_norm": 65.15279388427734, |
| "learning_rate": 7.963549920760697e-06, |
| "loss": 2.8574, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.24529721595184348, |
| "grad_norm": 83.49951171875, |
| "learning_rate": 7.955625990491284e-06, |
| "loss": 3.1113, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.24604966139954854, |
| "grad_norm": 32.631622314453125, |
| "learning_rate": 7.94770206022187e-06, |
| "loss": 2.7305, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.24680210684725357, |
| "grad_norm": 45.0832405090332, |
| "learning_rate": 7.939778129952457e-06, |
| "loss": 3.2754, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.24755455229495862, |
| "grad_norm": 20.239614486694336, |
| "learning_rate": 7.931854199683044e-06, |
| "loss": 2.8242, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.24830699774266365, |
| "grad_norm": 62.87268829345703, |
| "learning_rate": 7.92393026941363e-06, |
| "loss": 3.1045, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.24905944319036868, |
| "grad_norm": 74.40349578857422, |
| "learning_rate": 7.916006339144216e-06, |
| "loss": 3.1553, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.24981188863807374, |
| "grad_norm": 47.66246795654297, |
| "learning_rate": 7.908082408874802e-06, |
| "loss": 3.1406, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.2505643340857788, |
| "grad_norm": 52.15084457397461, |
| "learning_rate": 7.900158478605389e-06, |
| "loss": 3.4297, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.2513167795334838, |
| "grad_norm": 24.649290084838867, |
| "learning_rate": 7.892234548335976e-06, |
| "loss": 3.1738, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.2520692249811889, |
| "grad_norm": 94.12933349609375, |
| "learning_rate": 7.884310618066563e-06, |
| "loss": 3.291, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.2528216704288939, |
| "grad_norm": 80.10469818115234, |
| "learning_rate": 7.876386687797148e-06, |
| "loss": 3.1777, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.25357411587659895, |
| "grad_norm": 103.66165161132812, |
| "learning_rate": 7.868462757527733e-06, |
| "loss": 3.0996, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.254326561324304, |
| "grad_norm": 41.690093994140625, |
| "learning_rate": 7.86053882725832e-06, |
| "loss": 2.875, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.255079006772009, |
| "grad_norm": 29.56729507446289, |
| "learning_rate": 7.852614896988907e-06, |
| "loss": 3.1309, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.2558314522197141, |
| "grad_norm": 50.44151306152344, |
| "learning_rate": 7.844690966719494e-06, |
| "loss": 3.0566, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.2565838976674191, |
| "grad_norm": 64.72400665283203, |
| "learning_rate": 7.83676703645008e-06, |
| "loss": 3.165, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.25733634311512416, |
| "grad_norm": 20.706647872924805, |
| "learning_rate": 7.828843106180665e-06, |
| "loss": 2.7725, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.2580887885628292, |
| "grad_norm": 31.02082633972168, |
| "learning_rate": 7.820919175911252e-06, |
| "loss": 2.7695, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.2588412340105342, |
| "grad_norm": 21.273944854736328, |
| "learning_rate": 7.81299524564184e-06, |
| "loss": 2.6152, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.2595936794582393, |
| "grad_norm": 21.953739166259766, |
| "learning_rate": 7.805071315372426e-06, |
| "loss": 2.7334, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.26034612490594433, |
| "grad_norm": 27.68943977355957, |
| "learning_rate": 7.797147385103012e-06, |
| "loss": 3.1729, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.26109857035364936, |
| "grad_norm": 21.063129425048828, |
| "learning_rate": 7.789223454833597e-06, |
| "loss": 2.4844, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.2618510158013544, |
| "grad_norm": 27.426313400268555, |
| "learning_rate": 7.781299524564184e-06, |
| "loss": 2.748, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.2626034612490594, |
| "grad_norm": 21.69107437133789, |
| "learning_rate": 7.773375594294771e-06, |
| "loss": 2.7773, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.2633559066967645, |
| "grad_norm": 32.103153228759766, |
| "learning_rate": 7.765451664025358e-06, |
| "loss": 2.709, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.26410835214446954, |
| "grad_norm": 63.66896438598633, |
| "learning_rate": 7.757527733755943e-06, |
| "loss": 3.2031, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.26486079759217457, |
| "grad_norm": 29.75193214416504, |
| "learning_rate": 7.749603803486529e-06, |
| "loss": 3.168, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.2656132430398796, |
| "grad_norm": 25.284969329833984, |
| "learning_rate": 7.741679873217116e-06, |
| "loss": 2.5859, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.26636568848758463, |
| "grad_norm": 25.517702102661133, |
| "learning_rate": 7.733755942947703e-06, |
| "loss": 2.791, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.2671181339352897, |
| "grad_norm": 67.4950942993164, |
| "learning_rate": 7.72583201267829e-06, |
| "loss": 2.9072, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.26787057938299474, |
| "grad_norm": 31.88636016845703, |
| "learning_rate": 7.717908082408875e-06, |
| "loss": 2.8047, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.2686230248306998, |
| "grad_norm": 19.853824615478516, |
| "learning_rate": 7.70998415213946e-06, |
| "loss": 2.5537, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.2693754702784048, |
| "grad_norm": 68.49687957763672, |
| "learning_rate": 7.702060221870048e-06, |
| "loss": 3.1318, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.27012791572610984, |
| "grad_norm": 44.60654830932617, |
| "learning_rate": 7.694136291600635e-06, |
| "loss": 2.7461, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.2708803611738149, |
| "grad_norm": 54.63933181762695, |
| "learning_rate": 7.686212361331222e-06, |
| "loss": 2.6074, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.27163280662151995, |
| "grad_norm": 32.24433517456055, |
| "learning_rate": 7.678288431061807e-06, |
| "loss": 3.6348, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.272385252069225, |
| "grad_norm": 23.62687873840332, |
| "learning_rate": 7.670364500792394e-06, |
| "loss": 2.8691, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.27313769751693, |
| "grad_norm": 24.92971420288086, |
| "learning_rate": 7.66244057052298e-06, |
| "loss": 2.2783, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.27389014296463504, |
| "grad_norm": 72.9671859741211, |
| "learning_rate": 7.654516640253566e-06, |
| "loss": 3.1533, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.2746425884123401, |
| "grad_norm": 23.203428268432617, |
| "learning_rate": 7.646592709984154e-06, |
| "loss": 2.4209, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.27539503386004516, |
| "grad_norm": 26.601865768432617, |
| "learning_rate": 7.638668779714739e-06, |
| "loss": 2.5693, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.2761474793077502, |
| "grad_norm": 22.43524932861328, |
| "learning_rate": 7.630744849445326e-06, |
| "loss": 3.1426, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.2768999247554552, |
| "grad_norm": 25.846656799316406, |
| "learning_rate": 7.622820919175912e-06, |
| "loss": 2.8037, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.27765237020316025, |
| "grad_norm": 22.966304779052734, |
| "learning_rate": 7.614896988906498e-06, |
| "loss": 3.2334, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.27840481565086533, |
| "grad_norm": 31.72697639465332, |
| "learning_rate": 7.606973058637085e-06, |
| "loss": 2.6318, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.27915726109857036, |
| "grad_norm": 26.754465103149414, |
| "learning_rate": 7.5990491283676715e-06, |
| "loss": 2.3926, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.2799097065462754, |
| "grad_norm": 29.274229049682617, |
| "learning_rate": 7.591125198098257e-06, |
| "loss": 2.9092, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.2806621519939804, |
| "grad_norm": 19.97159194946289, |
| "learning_rate": 7.583201267828844e-06, |
| "loss": 2.5391, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.28141459744168545, |
| "grad_norm": 23.58123779296875, |
| "learning_rate": 7.57527733755943e-06, |
| "loss": 2.9385, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.28216704288939054, |
| "grad_norm": 22.98595428466797, |
| "learning_rate": 7.567353407290017e-06, |
| "loss": 2.6777, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.28291948833709557, |
| "grad_norm": 65.53314208984375, |
| "learning_rate": 7.559429477020603e-06, |
| "loss": 3.1846, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.2836719337848006, |
| "grad_norm": 63.19049835205078, |
| "learning_rate": 7.551505546751189e-06, |
| "loss": 2.5977, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.28442437923250563, |
| "grad_norm": 28.387653350830078, |
| "learning_rate": 7.543581616481776e-06, |
| "loss": 2.8359, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.28517682468021066, |
| "grad_norm": 26.09599494934082, |
| "learning_rate": 7.535657686212362e-06, |
| "loss": 2.6807, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.28592927012791575, |
| "grad_norm": 73.98117065429688, |
| "learning_rate": 7.527733755942949e-06, |
| "loss": 3.3809, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2866817155756208, |
| "grad_norm": 53.673030853271484, |
| "learning_rate": 7.519809825673535e-06, |
| "loss": 3.1035, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.2874341610233258, |
| "grad_norm": 49.4766845703125, |
| "learning_rate": 7.5118858954041205e-06, |
| "loss": 2.8252, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.28818660647103084, |
| "grad_norm": 27.39165496826172, |
| "learning_rate": 7.5039619651347075e-06, |
| "loss": 2.5625, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.28893905191873587, |
| "grad_norm": 26.559101104736328, |
| "learning_rate": 7.496038034865294e-06, |
| "loss": 2.7559, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.28969149736644095, |
| "grad_norm": 34.693641662597656, |
| "learning_rate": 7.488114104595881e-06, |
| "loss": 2.5527, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.290443942814146, |
| "grad_norm": 31.282804489135742, |
| "learning_rate": 7.480190174326466e-06, |
| "loss": 3.1592, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.291196388261851, |
| "grad_norm": 23.59662628173828, |
| "learning_rate": 7.472266244057052e-06, |
| "loss": 2.3418, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.29194883370955604, |
| "grad_norm": 37.472572326660156, |
| "learning_rate": 7.464342313787639e-06, |
| "loss": 2.3193, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.2927012791572611, |
| "grad_norm": 54.54274368286133, |
| "learning_rate": 7.4564183835182255e-06, |
| "loss": 3.084, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.29345372460496616, |
| "grad_norm": 52.258155822753906, |
| "learning_rate": 7.4484944532488126e-06, |
| "loss": 3.1748, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2942061700526712, |
| "grad_norm": 29.557064056396484, |
| "learning_rate": 7.440570522979398e-06, |
| "loss": 3.1416, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.2949586155003762, |
| "grad_norm": 21.459806442260742, |
| "learning_rate": 7.432646592709984e-06, |
| "loss": 2.6309, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.29571106094808125, |
| "grad_norm": 65.10810089111328, |
| "learning_rate": 7.424722662440571e-06, |
| "loss": 2.875, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.2964635063957863, |
| "grad_norm": 69.97591400146484, |
| "learning_rate": 7.416798732171157e-06, |
| "loss": 2.9814, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.29721595184349137, |
| "grad_norm": 58.1292839050293, |
| "learning_rate": 7.408874801901744e-06, |
| "loss": 3.0332, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.2979683972911964, |
| "grad_norm": 24.810606002807617, |
| "learning_rate": 7.40095087163233e-06, |
| "loss": 2.7246, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.2987208427389014, |
| "grad_norm": 35.730674743652344, |
| "learning_rate": 7.393026941362916e-06, |
| "loss": 3.0996, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.29947328818660646, |
| "grad_norm": 89.5046615600586, |
| "learning_rate": 7.385103011093503e-06, |
| "loss": 3.4258, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.3002257336343115, |
| "grad_norm": 32.95557403564453, |
| "learning_rate": 7.377179080824089e-06, |
| "loss": 2.3613, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.3009781790820166, |
| "grad_norm": 17.055770874023438, |
| "learning_rate": 7.369255150554676e-06, |
| "loss": 2.6543, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.3017306245297216, |
| "grad_norm": 37.67470932006836, |
| "learning_rate": 7.3613312202852615e-06, |
| "loss": 3.0732, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.30248306997742663, |
| "grad_norm": 28.4992618560791, |
| "learning_rate": 7.3534072900158486e-06, |
| "loss": 2.7207, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.30323551542513166, |
| "grad_norm": 35.26691818237305, |
| "learning_rate": 7.345483359746435e-06, |
| "loss": 2.8926, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.3039879608728367, |
| "grad_norm": 40.095428466796875, |
| "learning_rate": 7.337559429477021e-06, |
| "loss": 3.1211, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.3047404063205418, |
| "grad_norm": 18.42124366760254, |
| "learning_rate": 7.329635499207608e-06, |
| "loss": 2.501, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.3054928517682468, |
| "grad_norm": 21.575937271118164, |
| "learning_rate": 7.321711568938193e-06, |
| "loss": 2.5635, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.30624529721595184, |
| "grad_norm": 35.04372787475586, |
| "learning_rate": 7.31378763866878e-06, |
| "loss": 2.5801, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.30699774266365687, |
| "grad_norm": 38.118961334228516, |
| "learning_rate": 7.305863708399367e-06, |
| "loss": 3.166, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.3077501881113619, |
| "grad_norm": 25.873291015625, |
| "learning_rate": 7.297939778129954e-06, |
| "loss": 2.8125, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.308502633559067, |
| "grad_norm": 22.422454833984375, |
| "learning_rate": 7.29001584786054e-06, |
| "loss": 2.8438, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.309255079006772, |
| "grad_norm": 30.403430938720703, |
| "learning_rate": 7.282091917591125e-06, |
| "loss": 2.7852, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.31000752445447705, |
| "grad_norm": 47.75285339355469, |
| "learning_rate": 7.274167987321712e-06, |
| "loss": 3.0742, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.3107599699021821, |
| "grad_norm": 51.34233474731445, |
| "learning_rate": 7.266244057052298e-06, |
| "loss": 3.1562, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.3115124153498871, |
| "grad_norm": 24.690597534179688, |
| "learning_rate": 7.2583201267828854e-06, |
| "loss": 2.7275, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.3122648607975922, |
| "grad_norm": 26.2873592376709, |
| "learning_rate": 7.250396196513472e-06, |
| "loss": 2.9141, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.3130173062452972, |
| "grad_norm": 18.63983726501465, |
| "learning_rate": 7.242472266244057e-06, |
| "loss": 3.1152, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.31376975169300225, |
| "grad_norm": 55.87972640991211, |
| "learning_rate": 7.234548335974644e-06, |
| "loss": 3.2871, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.3145221971407073, |
| "grad_norm": 41.152286529541016, |
| "learning_rate": 7.22662440570523e-06, |
| "loss": 2.6465, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.3152746425884123, |
| "grad_norm": 47.55572509765625, |
| "learning_rate": 7.218700475435817e-06, |
| "loss": 3.0742, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.3160270880361174, |
| "grad_norm": 24.060579299926758, |
| "learning_rate": 7.2107765451664034e-06, |
| "loss": 3.0566, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.31677953348382243, |
| "grad_norm": 20.185791015625, |
| "learning_rate": 7.202852614896989e-06, |
| "loss": 3.1035, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.31753197893152746, |
| "grad_norm": 36.20608139038086, |
| "learning_rate": 7.194928684627576e-06, |
| "loss": 3.0059, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.3182844243792325, |
| "grad_norm": 34.9112434387207, |
| "learning_rate": 7.187004754358162e-06, |
| "loss": 2.8789, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.3190368698269376, |
| "grad_norm": 16.00592803955078, |
| "learning_rate": 7.179080824088749e-06, |
| "loss": 2.5615, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.3197893152746426, |
| "grad_norm": 39.905330657958984, |
| "learning_rate": 7.171156893819335e-06, |
| "loss": 2.6543, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.32054176072234764, |
| "grad_norm": 39.642520904541016, |
| "learning_rate": 7.163232963549921e-06, |
| "loss": 2.8311, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.32129420617005267, |
| "grad_norm": 34.66411209106445, |
| "learning_rate": 7.155309033280508e-06, |
| "loss": 3.3242, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.3220466516177577, |
| "grad_norm": 25.36420249938965, |
| "learning_rate": 7.147385103011094e-06, |
| "loss": 2.7295, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.3227990970654628, |
| "grad_norm": 26.767236709594727, |
| "learning_rate": 7.139461172741681e-06, |
| "loss": 2.9414, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.3235515425131678, |
| "grad_norm": 29.003219604492188, |
| "learning_rate": 7.131537242472267e-06, |
| "loss": 2.8301, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.32430398796087284, |
| "grad_norm": 17.377384185791016, |
| "learning_rate": 7.123613312202852e-06, |
| "loss": 2.8506, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.32505643340857787, |
| "grad_norm": 28.27768898010254, |
| "learning_rate": 7.1156893819334394e-06, |
| "loss": 2.8789, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.3258088788562829, |
| "grad_norm": 25.299936294555664, |
| "learning_rate": 7.107765451664026e-06, |
| "loss": 2.6748, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.326561324303988, |
| "grad_norm": 20.38831901550293, |
| "learning_rate": 7.099841521394613e-06, |
| "loss": 2.498, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.327313769751693, |
| "grad_norm": 33.19630432128906, |
| "learning_rate": 7.091917591125199e-06, |
| "loss": 3.2666, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.32806621519939805, |
| "grad_norm": 26.92388153076172, |
| "learning_rate": 7.083993660855785e-06, |
| "loss": 2.6367, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.3288186606471031, |
| "grad_norm": 33.628597259521484, |
| "learning_rate": 7.076069730586371e-06, |
| "loss": 2.7441, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.3295711060948081, |
| "grad_norm": 47.53849411010742, |
| "learning_rate": 7.0681458003169574e-06, |
| "loss": 2.251, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.3303235515425132, |
| "grad_norm": 90.70731353759766, |
| "learning_rate": 7.0602218700475445e-06, |
| "loss": 3.3291, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.3310759969902182, |
| "grad_norm": 90.42861938476562, |
| "learning_rate": 7.052297939778131e-06, |
| "loss": 3.8516, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.33182844243792325, |
| "grad_norm": 27.976673126220703, |
| "learning_rate": 7.044374009508717e-06, |
| "loss": 2.7227, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.3325808878856283, |
| "grad_norm": 32.398536682128906, |
| "learning_rate": 7.036450079239303e-06, |
| "loss": 3.0078, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 16.50065803527832, |
| "learning_rate": 7.02852614896989e-06, |
| "loss": 2.5225, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.3340857787810384, |
| "grad_norm": 17.448528289794922, |
| "learning_rate": 7.020602218700476e-06, |
| "loss": 3.0, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.33483822422874343, |
| "grad_norm": 42.91080093383789, |
| "learning_rate": 7.0126782884310625e-06, |
| "loss": 2.8887, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.33559066967644846, |
| "grad_norm": 43.43963623046875, |
| "learning_rate": 7.004754358161649e-06, |
| "loss": 2.7139, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.3363431151241535, |
| "grad_norm": 21.494733810424805, |
| "learning_rate": 6.996830427892235e-06, |
| "loss": 2.8984, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.3370955605718585, |
| "grad_norm": 19.013137817382812, |
| "learning_rate": 6.988906497622822e-06, |
| "loss": 2.6895, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.3378480060195636, |
| "grad_norm": 59.25289535522461, |
| "learning_rate": 6.980982567353408e-06, |
| "loss": 2.7393, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.33860045146726864, |
| "grad_norm": 35.03165817260742, |
| "learning_rate": 6.973058637083995e-06, |
| "loss": 2.5195, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.33935289691497367, |
| "grad_norm": 39.91790008544922, |
| "learning_rate": 6.9651347068145805e-06, |
| "loss": 3.0195, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.3401053423626787, |
| "grad_norm": 31.990583419799805, |
| "learning_rate": 6.957210776545167e-06, |
| "loss": 3.2422, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.34085778781038373, |
| "grad_norm": 20.681529998779297, |
| "learning_rate": 6.949286846275754e-06, |
| "loss": 2.4619, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.3416102332580888, |
| "grad_norm": 20.630233764648438, |
| "learning_rate": 6.94136291600634e-06, |
| "loss": 2.6602, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.34236267870579384, |
| "grad_norm": 45.344696044921875, |
| "learning_rate": 6.933438985736925e-06, |
| "loss": 2.5986, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.3431151241534989, |
| "grad_norm": 32.19722366333008, |
| "learning_rate": 6.925515055467512e-06, |
| "loss": 2.791, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.3438675696012039, |
| "grad_norm": 26.99687385559082, |
| "learning_rate": 6.9175911251980985e-06, |
| "loss": 2.3369, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.34462001504890893, |
| "grad_norm": 18.11928367614746, |
| "learning_rate": 6.9096671949286855e-06, |
| "loss": 2.8008, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.345372460496614, |
| "grad_norm": 26.664819717407227, |
| "learning_rate": 6.901743264659272e-06, |
| "loss": 3.1514, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.34612490594431905, |
| "grad_norm": 19.563344955444336, |
| "learning_rate": 6.893819334389857e-06, |
| "loss": 2.3389, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.3468773513920241, |
| "grad_norm": 23.58795928955078, |
| "learning_rate": 6.885895404120444e-06, |
| "loss": 2.8252, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.3476297968397291, |
| "grad_norm": 30.469026565551758, |
| "learning_rate": 6.87797147385103e-06, |
| "loss": 2.6855, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.34838224228743414, |
| "grad_norm": 28.226003646850586, |
| "learning_rate": 6.870047543581617e-06, |
| "loss": 2.8213, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.3491346877351392, |
| "grad_norm": 25.442476272583008, |
| "learning_rate": 6.8621236133122035e-06, |
| "loss": 2.2754, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.34988713318284426, |
| "grad_norm": 37.09988021850586, |
| "learning_rate": 6.854199683042789e-06, |
| "loss": 2.9346, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.3506395786305493, |
| "grad_norm": 22.494609832763672, |
| "learning_rate": 6.846275752773376e-06, |
| "loss": 2.9346, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.3513920240782543, |
| "grad_norm": 24.106929779052734, |
| "learning_rate": 6.838351822503962e-06, |
| "loss": 2.8359, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.35214446952595935, |
| "grad_norm": 40.518829345703125, |
| "learning_rate": 6.830427892234549e-06, |
| "loss": 2.3438, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.35289691497366443, |
| "grad_norm": 17.524858474731445, |
| "learning_rate": 6.822503961965135e-06, |
| "loss": 2.2739, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.35364936042136946, |
| "grad_norm": 27.322208404541016, |
| "learning_rate": 6.8145800316957216e-06, |
| "loss": 3.1016, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3544018058690745, |
| "grad_norm": 24.024763107299805, |
| "learning_rate": 6.806656101426308e-06, |
| "loss": 3.291, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.3551542513167795, |
| "grad_norm": 20.18027114868164, |
| "learning_rate": 6.798732171156894e-06, |
| "loss": 2.4229, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.35590669676448455, |
| "grad_norm": 26.4113826751709, |
| "learning_rate": 6.790808240887481e-06, |
| "loss": 2.5786, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.35665914221218964, |
| "grad_norm": 21.726591110229492, |
| "learning_rate": 6.782884310618067e-06, |
| "loss": 2.4438, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.35741158765989467, |
| "grad_norm": 28.190261840820312, |
| "learning_rate": 6.774960380348653e-06, |
| "loss": 3.1172, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.3581640331075997, |
| "grad_norm": 16.553571701049805, |
| "learning_rate": 6.7670364500792396e-06, |
| "loss": 2.7676, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.35891647855530473, |
| "grad_norm": 31.165143966674805, |
| "learning_rate": 6.759112519809827e-06, |
| "loss": 2.6465, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.35966892400300976, |
| "grad_norm": 33.22994613647461, |
| "learning_rate": 6.751188589540413e-06, |
| "loss": 2.7314, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.36042136945071485, |
| "grad_norm": 22.18958854675293, |
| "learning_rate": 6.743264659270999e-06, |
| "loss": 2.7051, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.3611738148984199, |
| "grad_norm": 35.11039733886719, |
| "learning_rate": 6.735340729001585e-06, |
| "loss": 2.9121, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3619262603461249, |
| "grad_norm": 52.86737060546875, |
| "learning_rate": 6.727416798732171e-06, |
| "loss": 3.2871, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.36267870579382994, |
| "grad_norm": 31.692039489746094, |
| "learning_rate": 6.719492868462758e-06, |
| "loss": 2.6289, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.36343115124153497, |
| "grad_norm": 28.485288619995117, |
| "learning_rate": 6.711568938193345e-06, |
| "loss": 2.6816, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.36418359668924005, |
| "grad_norm": 45.19233703613281, |
| "learning_rate": 6.703645007923932e-06, |
| "loss": 2.7295, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.3649360421369451, |
| "grad_norm": 21.647083282470703, |
| "learning_rate": 6.695721077654517e-06, |
| "loss": 2.9443, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.3656884875846501, |
| "grad_norm": 45.4495735168457, |
| "learning_rate": 6.687797147385103e-06, |
| "loss": 3.082, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.36644093303235514, |
| "grad_norm": 28.85840606689453, |
| "learning_rate": 6.67987321711569e-06, |
| "loss": 2.8994, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.3671933784800602, |
| "grad_norm": 34.27227020263672, |
| "learning_rate": 6.671949286846276e-06, |
| "loss": 2.832, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.36794582392776526, |
| "grad_norm": 63.276527404785156, |
| "learning_rate": 6.6640253565768635e-06, |
| "loss": 3.7012, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.3686982693754703, |
| "grad_norm": 56.69678497314453, |
| "learning_rate": 6.656101426307449e-06, |
| "loss": 3.2578, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3694507148231753, |
| "grad_norm": 24.20551109313965, |
| "learning_rate": 6.648177496038035e-06, |
| "loss": 2.3926, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.37020316027088035, |
| "grad_norm": 22.95747947692871, |
| "learning_rate": 6.640253565768622e-06, |
| "loss": 2.4639, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.3709556057185854, |
| "grad_norm": 69.18743896484375, |
| "learning_rate": 6.632329635499208e-06, |
| "loss": 2.7012, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.37170805116629047, |
| "grad_norm": 70.50599670410156, |
| "learning_rate": 6.624405705229795e-06, |
| "loss": 3.1016, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.3724604966139955, |
| "grad_norm": 49.86539077758789, |
| "learning_rate": 6.616481774960381e-06, |
| "loss": 2.7959, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.3732129420617005, |
| "grad_norm": 24.0435848236084, |
| "learning_rate": 6.608557844690967e-06, |
| "loss": 2.502, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.37396538750940556, |
| "grad_norm": 21.732086181640625, |
| "learning_rate": 6.600633914421554e-06, |
| "loss": 3.2559, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.3747178329571106, |
| "grad_norm": 36.107078552246094, |
| "learning_rate": 6.59270998415214e-06, |
| "loss": 2.8994, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.37547027840481567, |
| "grad_norm": 56.2667236328125, |
| "learning_rate": 6.584786053882727e-06, |
| "loss": 2.8262, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.3762227238525207, |
| "grad_norm": 39.14205551147461, |
| "learning_rate": 6.5768621236133124e-06, |
| "loss": 2.4551, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.37697516930022573, |
| "grad_norm": 19.946632385253906, |
| "learning_rate": 6.568938193343899e-06, |
| "loss": 2.6494, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.37772761474793076, |
| "grad_norm": 22.237207412719727, |
| "learning_rate": 6.561014263074486e-06, |
| "loss": 2.1465, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.3784800601956358, |
| "grad_norm": 32.88526916503906, |
| "learning_rate": 6.553090332805072e-06, |
| "loss": 2.4648, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.3792325056433409, |
| "grad_norm": 17.94049644470215, |
| "learning_rate": 6.545166402535659e-06, |
| "loss": 2.2949, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.3799849510910459, |
| "grad_norm": 53.39393615722656, |
| "learning_rate": 6.537242472266244e-06, |
| "loss": 3.2441, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.38073739653875094, |
| "grad_norm": 28.436603546142578, |
| "learning_rate": 6.5293185419968304e-06, |
| "loss": 2.377, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.38148984198645597, |
| "grad_norm": 37.433231353759766, |
| "learning_rate": 6.5213946117274175e-06, |
| "loss": 2.5381, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.382242287434161, |
| "grad_norm": 25.794477462768555, |
| "learning_rate": 6.513470681458004e-06, |
| "loss": 2.3105, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.3829947328818661, |
| "grad_norm": 49.73271560668945, |
| "learning_rate": 6.505546751188591e-06, |
| "loss": 3.2764, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.3837471783295711, |
| "grad_norm": 41.859580993652344, |
| "learning_rate": 6.497622820919176e-06, |
| "loss": 2.9141, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.38449962377727614, |
| "grad_norm": 47.98792266845703, |
| "learning_rate": 6.489698890649762e-06, |
| "loss": 2.7812, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.3852520692249812, |
| "grad_norm": 23.1630802154541, |
| "learning_rate": 6.481774960380349e-06, |
| "loss": 2.377, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.3860045146726862, |
| "grad_norm": 41.17527770996094, |
| "learning_rate": 6.4738510301109355e-06, |
| "loss": 2.959, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.3867569601203913, |
| "grad_norm": 35.60078048706055, |
| "learning_rate": 6.4659270998415225e-06, |
| "loss": 2.6143, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.3875094055680963, |
| "grad_norm": 26.28261375427246, |
| "learning_rate": 6.458003169572108e-06, |
| "loss": 2.8418, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.38826185101580135, |
| "grad_norm": 23.71259307861328, |
| "learning_rate": 6.450079239302695e-06, |
| "loss": 2.8457, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.3890142964635064, |
| "grad_norm": 26.641910552978516, |
| "learning_rate": 6.442155309033281e-06, |
| "loss": 2.6387, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.3897667419112114, |
| "grad_norm": 49.22462844848633, |
| "learning_rate": 6.434231378763868e-06, |
| "loss": 2.8408, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.3905191873589165, |
| "grad_norm": 27.4183406829834, |
| "learning_rate": 6.426307448494454e-06, |
| "loss": 2.168, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.3912716328066215, |
| "grad_norm": 21.35692596435547, |
| "learning_rate": 6.41838351822504e-06, |
| "loss": 2.6094, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.39202407825432656, |
| "grad_norm": 23.8715763092041, |
| "learning_rate": 6.410459587955627e-06, |
| "loss": 2.5928, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.3927765237020316, |
| "grad_norm": 76.4928970336914, |
| "learning_rate": 6.402535657686213e-06, |
| "loss": 3.166, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.3935289691497366, |
| "grad_norm": 44.17985916137695, |
| "learning_rate": 6.3946117274168e-06, |
| "loss": 2.8154, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.3942814145974417, |
| "grad_norm": 21.460453033447266, |
| "learning_rate": 6.386687797147385e-06, |
| "loss": 2.8242, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.39503386004514673, |
| "grad_norm": 28.290786743164062, |
| "learning_rate": 6.3787638668779715e-06, |
| "loss": 2.4619, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.39578630549285176, |
| "grad_norm": 21.564462661743164, |
| "learning_rate": 6.3708399366085585e-06, |
| "loss": 2.665, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.3965387509405568, |
| "grad_norm": 37.88861083984375, |
| "learning_rate": 6.362916006339145e-06, |
| "loss": 2.8418, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.3972911963882618, |
| "grad_norm": 24.181049346923828, |
| "learning_rate": 6.354992076069732e-06, |
| "loss": 2.3916, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.3980436418359669, |
| "grad_norm": 27.649242401123047, |
| "learning_rate": 6.347068145800317e-06, |
| "loss": 2.7217, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.39879608728367194, |
| "grad_norm": 26.146526336669922, |
| "learning_rate": 6.339144215530903e-06, |
| "loss": 3.3066, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.39954853273137697, |
| "grad_norm": 24.51007843017578, |
| "learning_rate": 6.33122028526149e-06, |
| "loss": 2.6338, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.400300978179082, |
| "grad_norm": 44.78768539428711, |
| "learning_rate": 6.3232963549920765e-06, |
| "loss": 2.9102, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.40105342362678703, |
| "grad_norm": 31.947906494140625, |
| "learning_rate": 6.3153724247226636e-06, |
| "loss": 2.7119, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.4018058690744921, |
| "grad_norm": 20.330984115600586, |
| "learning_rate": 6.307448494453249e-06, |
| "loss": 2.332, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.40255831452219715, |
| "grad_norm": 18.617891311645508, |
| "learning_rate": 6.299524564183835e-06, |
| "loss": 2.416, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.4033107599699022, |
| "grad_norm": 31.218690872192383, |
| "learning_rate": 6.291600633914422e-06, |
| "loss": 2.6289, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.4040632054176072, |
| "grad_norm": 40.97818374633789, |
| "learning_rate": 6.283676703645008e-06, |
| "loss": 2.3281, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.40481565086531224, |
| "grad_norm": 44.885047912597656, |
| "learning_rate": 6.275752773375595e-06, |
| "loss": 3.1621, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.4055680963130173, |
| "grad_norm": 28.18929100036621, |
| "learning_rate": 6.267828843106181e-06, |
| "loss": 2.3223, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.40632054176072235, |
| "grad_norm": 24.03481674194336, |
| "learning_rate": 6.259904912836767e-06, |
| "loss": 2.5801, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.4070729872084274, |
| "grad_norm": 21.83283805847168, |
| "learning_rate": 6.251980982567354e-06, |
| "loss": 2.6436, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.4078254326561324, |
| "grad_norm": 19.02834129333496, |
| "learning_rate": 6.24405705229794e-06, |
| "loss": 2.5078, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.40857787810383744, |
| "grad_norm": 27.406829833984375, |
| "learning_rate": 6.236133122028527e-06, |
| "loss": 2.6074, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.40933032355154253, |
| "grad_norm": 51.72572708129883, |
| "learning_rate": 6.2282091917591125e-06, |
| "loss": 2.7842, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.41008276899924756, |
| "grad_norm": 51.11612319946289, |
| "learning_rate": 6.220285261489699e-06, |
| "loss": 2.6377, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4108352144469526, |
| "grad_norm": 27.588716506958008, |
| "learning_rate": 6.212361331220286e-06, |
| "loss": 2.5391, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.4115876598946576, |
| "grad_norm": 39.39955139160156, |
| "learning_rate": 6.204437400950872e-06, |
| "loss": 3.208, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.4123401053423627, |
| "grad_norm": 15.496928215026855, |
| "learning_rate": 6.196513470681459e-06, |
| "loss": 2.146, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.41309255079006774, |
| "grad_norm": 18.776878356933594, |
| "learning_rate": 6.188589540412044e-06, |
| "loss": 2.5137, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.41384499623777277, |
| "grad_norm": 29.392667770385742, |
| "learning_rate": 6.180665610142631e-06, |
| "loss": 2.5322, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4145974416854778, |
| "grad_norm": 21.60759925842285, |
| "learning_rate": 6.172741679873218e-06, |
| "loss": 2.6914, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.4153498871331828, |
| "grad_norm": 27.371747970581055, |
| "learning_rate": 6.164817749603804e-06, |
| "loss": 2.6299, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.4161023325808879, |
| "grad_norm": 18.83663558959961, |
| "learning_rate": 6.156893819334391e-06, |
| "loss": 2.1299, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.41685477802859294, |
| "grad_norm": 20.581167221069336, |
| "learning_rate": 6.148969889064976e-06, |
| "loss": 2.3896, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.417607223476298, |
| "grad_norm": 28.915456771850586, |
| "learning_rate": 6.141045958795563e-06, |
| "loss": 2.8584, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.418359668924003, |
| "grad_norm": 27.82697105407715, |
| "learning_rate": 6.133122028526149e-06, |
| "loss": 2.2695, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.41911211437170803, |
| "grad_norm": 35.71852111816406, |
| "learning_rate": 6.1251980982567364e-06, |
| "loss": 2.6367, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.4198645598194131, |
| "grad_norm": 30.599428176879883, |
| "learning_rate": 6.117274167987323e-06, |
| "loss": 3.1572, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.42061700526711815, |
| "grad_norm": 33.88134002685547, |
| "learning_rate": 6.109350237717908e-06, |
| "loss": 2.3701, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.4213694507148232, |
| "grad_norm": 35.95499038696289, |
| "learning_rate": 6.101426307448495e-06, |
| "loss": 2.7178, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.4221218961625282, |
| "grad_norm": 25.86326026916504, |
| "learning_rate": 6.093502377179081e-06, |
| "loss": 2.1943, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.42287434161023324, |
| "grad_norm": 51.53483200073242, |
| "learning_rate": 6.085578446909668e-06, |
| "loss": 3.207, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.4236267870579383, |
| "grad_norm": 27.713850021362305, |
| "learning_rate": 6.0776545166402544e-06, |
| "loss": 2.7949, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.42437923250564336, |
| "grad_norm": 31.49678611755371, |
| "learning_rate": 6.06973058637084e-06, |
| "loss": 2.6533, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.4251316779533484, |
| "grad_norm": 32.17995071411133, |
| "learning_rate": 6.061806656101427e-06, |
| "loss": 2.8281, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.4258841234010534, |
| "grad_norm": 27.868188858032227, |
| "learning_rate": 6.053882725832013e-06, |
| "loss": 2.1934, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.42663656884875845, |
| "grad_norm": 20.03648567199707, |
| "learning_rate": 6.0459587955626e-06, |
| "loss": 2.6455, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.42738901429646353, |
| "grad_norm": 32.74396896362305, |
| "learning_rate": 6.038034865293186e-06, |
| "loss": 2.5557, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.42814145974416856, |
| "grad_norm": 21.329883575439453, |
| "learning_rate": 6.030110935023772e-06, |
| "loss": 2.6055, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.4288939051918736, |
| "grad_norm": 28.882789611816406, |
| "learning_rate": 6.022187004754359e-06, |
| "loss": 2.8438, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4296463506395786, |
| "grad_norm": 31.306201934814453, |
| "learning_rate": 6.014263074484945e-06, |
| "loss": 3.2129, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.43039879608728365, |
| "grad_norm": 45.59975814819336, |
| "learning_rate": 6.006339144215532e-06, |
| "loss": 2.5244, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.43115124153498874, |
| "grad_norm": 34.31132507324219, |
| "learning_rate": 5.998415213946118e-06, |
| "loss": 2.8154, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.43190368698269377, |
| "grad_norm": 34.59330749511719, |
| "learning_rate": 5.990491283676703e-06, |
| "loss": 2.6465, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.4326561324303988, |
| "grad_norm": 27.188886642456055, |
| "learning_rate": 5.9825673534072905e-06, |
| "loss": 2.9131, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.43340857787810383, |
| "grad_norm": 25.449323654174805, |
| "learning_rate": 5.974643423137877e-06, |
| "loss": 2.7393, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.43416102332580886, |
| "grad_norm": 21.703617095947266, |
| "learning_rate": 5.966719492868464e-06, |
| "loss": 2.7178, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.43491346877351394, |
| "grad_norm": 16.706140518188477, |
| "learning_rate": 5.95879556259905e-06, |
| "loss": 2.0869, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.435665914221219, |
| "grad_norm": 16.854990005493164, |
| "learning_rate": 5.950871632329635e-06, |
| "loss": 2.4287, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.436418359668924, |
| "grad_norm": 36.00377655029297, |
| "learning_rate": 5.942947702060222e-06, |
| "loss": 2.8838, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.43717080511662904, |
| "grad_norm": 44.43525695800781, |
| "learning_rate": 5.9350237717908085e-06, |
| "loss": 2.4482, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.43792325056433407, |
| "grad_norm": 24.75007438659668, |
| "learning_rate": 5.9270998415213955e-06, |
| "loss": 2.5791, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.43867569601203915, |
| "grad_norm": 29.451013565063477, |
| "learning_rate": 5.919175911251982e-06, |
| "loss": 2.5898, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.4394281414597442, |
| "grad_norm": 26.943174362182617, |
| "learning_rate": 5.911251980982568e-06, |
| "loss": 2.542, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.4401805869074492, |
| "grad_norm": 23.72393226623535, |
| "learning_rate": 5.903328050713154e-06, |
| "loss": 2.2354, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.44093303235515424, |
| "grad_norm": 31.409198760986328, |
| "learning_rate": 5.89540412044374e-06, |
| "loss": 2.5586, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.44168547780285927, |
| "grad_norm": 44.14702224731445, |
| "learning_rate": 5.887480190174327e-06, |
| "loss": 2.4023, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.44243792325056436, |
| "grad_norm": 33.159690856933594, |
| "learning_rate": 5.8795562599049135e-06, |
| "loss": 2.4902, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.4431903686982694, |
| "grad_norm": 33.96577072143555, |
| "learning_rate": 5.8716323296355e-06, |
| "loss": 2.9766, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.4439428141459744, |
| "grad_norm": 26.576927185058594, |
| "learning_rate": 5.863708399366086e-06, |
| "loss": 2.541, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.44469525959367945, |
| "grad_norm": 20.750225067138672, |
| "learning_rate": 5.855784469096673e-06, |
| "loss": 2.2998, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.4454477050413845, |
| "grad_norm": 32.86729431152344, |
| "learning_rate": 5.847860538827259e-06, |
| "loss": 2.498, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.44620015048908956, |
| "grad_norm": 26.496009826660156, |
| "learning_rate": 5.839936608557845e-06, |
| "loss": 2.1748, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.4469525959367946, |
| "grad_norm": 28.07709312438965, |
| "learning_rate": 5.8320126782884315e-06, |
| "loss": 2.8682, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.4477050413844996, |
| "grad_norm": 21.558242797851562, |
| "learning_rate": 5.824088748019018e-06, |
| "loss": 2.6094, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.44845748683220465, |
| "grad_norm": 46.38016128540039, |
| "learning_rate": 5.816164817749605e-06, |
| "loss": 2.9346, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.4492099322799097, |
| "grad_norm": 61.013710021972656, |
| "learning_rate": 5.808240887480191e-06, |
| "loss": 2.7021, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.44996237772761477, |
| "grad_norm": 46.63782501220703, |
| "learning_rate": 5.800316957210776e-06, |
| "loss": 3.0039, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.4507148231753198, |
| "grad_norm": 29.919158935546875, |
| "learning_rate": 5.792393026941363e-06, |
| "loss": 2.8818, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.45146726862302483, |
| "grad_norm": 28.168922424316406, |
| "learning_rate": 5.7844690966719495e-06, |
| "loss": 3.1172, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.45221971407072986, |
| "grad_norm": 39.816036224365234, |
| "learning_rate": 5.7765451664025366e-06, |
| "loss": 2.4238, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.4529721595184349, |
| "grad_norm": 21.569623947143555, |
| "learning_rate": 5.768621236133123e-06, |
| "loss": 2.3555, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.45372460496614, |
| "grad_norm": 19.12834358215332, |
| "learning_rate": 5.760697305863708e-06, |
| "loss": 2.7578, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.454477050413845, |
| "grad_norm": 35.17719268798828, |
| "learning_rate": 5.752773375594295e-06, |
| "loss": 2.8389, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.45522949586155004, |
| "grad_norm": 21.667118072509766, |
| "learning_rate": 5.744849445324881e-06, |
| "loss": 2.6777, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.45598194130925507, |
| "grad_norm": 43.21159362792969, |
| "learning_rate": 5.736925515055468e-06, |
| "loss": 2.5498, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.4567343867569601, |
| "grad_norm": 27.8854923248291, |
| "learning_rate": 5.7290015847860546e-06, |
| "loss": 2.793, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.4574868322046652, |
| "grad_norm": 24.826038360595703, |
| "learning_rate": 5.72107765451664e-06, |
| "loss": 2.4551, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.4582392776523702, |
| "grad_norm": 39.36992263793945, |
| "learning_rate": 5.713153724247227e-06, |
| "loss": 2.7695, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.45899172310007524, |
| "grad_norm": 23.196136474609375, |
| "learning_rate": 5.705229793977813e-06, |
| "loss": 2.9688, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4597441685477803, |
| "grad_norm": 46.75358581542969, |
| "learning_rate": 5.6973058637084e-06, |
| "loss": 2.4863, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.4604966139954853, |
| "grad_norm": 26.586414337158203, |
| "learning_rate": 5.689381933438986e-06, |
| "loss": 2.6416, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.4612490594431904, |
| "grad_norm": 21.940500259399414, |
| "learning_rate": 5.681458003169572e-06, |
| "loss": 2.6592, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.4620015048908954, |
| "grad_norm": 20.193708419799805, |
| "learning_rate": 5.673534072900159e-06, |
| "loss": 3.4102, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.46275395033860045, |
| "grad_norm": 20.76471519470215, |
| "learning_rate": 5.665610142630745e-06, |
| "loss": 2.6309, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.4635063957863055, |
| "grad_norm": 38.27462387084961, |
| "learning_rate": 5.657686212361332e-06, |
| "loss": 2.4277, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.4642588412340105, |
| "grad_norm": 14.277875900268555, |
| "learning_rate": 5.649762282091918e-06, |
| "loss": 2.7598, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.4650112866817156, |
| "grad_norm": 23.343847274780273, |
| "learning_rate": 5.6418383518225035e-06, |
| "loss": 2.6494, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.4657637321294206, |
| "grad_norm": 21.005477905273438, |
| "learning_rate": 5.6339144215530906e-06, |
| "loss": 2.8164, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.46651617757712566, |
| "grad_norm": 43.00020217895508, |
| "learning_rate": 5.625990491283677e-06, |
| "loss": 2.5244, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.4672686230248307, |
| "grad_norm": 35.95077896118164, |
| "learning_rate": 5.618066561014264e-06, |
| "loss": 2.3779, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.4680210684725357, |
| "grad_norm": 38.55207443237305, |
| "learning_rate": 5.61014263074485e-06, |
| "loss": 2.9541, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.4687735139202408, |
| "grad_norm": 28.089881896972656, |
| "learning_rate": 5.602218700475436e-06, |
| "loss": 2.4502, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.46952595936794583, |
| "grad_norm": 22.415802001953125, |
| "learning_rate": 5.594294770206022e-06, |
| "loss": 2.5527, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.47027840481565086, |
| "grad_norm": 20.582796096801758, |
| "learning_rate": 5.586370839936609e-06, |
| "loss": 2.707, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.4710308502633559, |
| "grad_norm": 21.10422134399414, |
| "learning_rate": 5.578446909667196e-06, |
| "loss": 2.6953, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.4717832957110609, |
| "grad_norm": 41.894432067871094, |
| "learning_rate": 5.570522979397782e-06, |
| "loss": 2.8154, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.472535741158766, |
| "grad_norm": 24.479263305664062, |
| "learning_rate": 5.562599049128368e-06, |
| "loss": 2.292, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.47328818660647104, |
| "grad_norm": 16.830801010131836, |
| "learning_rate": 5.554675118858954e-06, |
| "loss": 2.0449, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.47404063205417607, |
| "grad_norm": 17.007564544677734, |
| "learning_rate": 5.546751188589541e-06, |
| "loss": 1.9434, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4747930775018811, |
| "grad_norm": 16.903120040893555, |
| "learning_rate": 5.5388272583201274e-06, |
| "loss": 1.9873, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.47554552294958613, |
| "grad_norm": 22.137493133544922, |
| "learning_rate": 5.5309033280507145e-06, |
| "loss": 2.7607, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.4762979683972912, |
| "grad_norm": 21.33124542236328, |
| "learning_rate": 5.5229793977813e-06, |
| "loss": 2.1914, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.47705041384499625, |
| "grad_norm": 55.744205474853516, |
| "learning_rate": 5.515055467511886e-06, |
| "loss": 3.4844, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.4778028592927013, |
| "grad_norm": 41.97389602661133, |
| "learning_rate": 5.507131537242473e-06, |
| "loss": 2.457, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.4785553047404063, |
| "grad_norm": 28.83306884765625, |
| "learning_rate": 5.499207606973059e-06, |
| "loss": 2.375, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.47930775018811134, |
| "grad_norm": 31.097944259643555, |
| "learning_rate": 5.491283676703646e-06, |
| "loss": 3.2861, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.4800601956358164, |
| "grad_norm": 20.576139450073242, |
| "learning_rate": 5.483359746434232e-06, |
| "loss": 2.5293, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.48081264108352145, |
| "grad_norm": 21.134475708007812, |
| "learning_rate": 5.475435816164818e-06, |
| "loss": 2.4531, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.4815650865312265, |
| "grad_norm": 29.152189254760742, |
| "learning_rate": 5.467511885895405e-06, |
| "loss": 2.6094, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4823175319789315, |
| "grad_norm": 19.158748626708984, |
| "learning_rate": 5.459587955625991e-06, |
| "loss": 2.5801, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.48306997742663654, |
| "grad_norm": 50.753875732421875, |
| "learning_rate": 5.451664025356578e-06, |
| "loss": 2.7539, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.48382242287434163, |
| "grad_norm": 44.789730072021484, |
| "learning_rate": 5.4437400950871634e-06, |
| "loss": 2.6309, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.48457486832204666, |
| "grad_norm": 14.936765670776367, |
| "learning_rate": 5.43581616481775e-06, |
| "loss": 2.7637, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.4853273137697517, |
| "grad_norm": 19.86391830444336, |
| "learning_rate": 5.427892234548337e-06, |
| "loss": 2.5547, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.4860797592174567, |
| "grad_norm": 19.42682456970215, |
| "learning_rate": 5.419968304278923e-06, |
| "loss": 2.1025, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.48683220466516175, |
| "grad_norm": 51.899208068847656, |
| "learning_rate": 5.41204437400951e-06, |
| "loss": 2.3691, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.48758465011286684, |
| "grad_norm": 40.9661750793457, |
| "learning_rate": 5.404120443740095e-06, |
| "loss": 2.5078, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.48833709556057187, |
| "grad_norm": 22.135168075561523, |
| "learning_rate": 5.3961965134706814e-06, |
| "loss": 2.019, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.4890895410082769, |
| "grad_norm": 27.344635009765625, |
| "learning_rate": 5.3882725832012685e-06, |
| "loss": 2.835, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.4898419864559819, |
| "grad_norm": 42.776100158691406, |
| "learning_rate": 5.380348652931855e-06, |
| "loss": 2.9199, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.49059443190368696, |
| "grad_norm": 34.481712341308594, |
| "learning_rate": 5.372424722662442e-06, |
| "loss": 3.0693, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.49134687735139204, |
| "grad_norm": 57.45832061767578, |
| "learning_rate": 5.364500792393027e-06, |
| "loss": 2.6128, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.49209932279909707, |
| "grad_norm": 37.97975540161133, |
| "learning_rate": 5.356576862123613e-06, |
| "loss": 2.4619, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.4928517682468021, |
| "grad_norm": 34.988643646240234, |
| "learning_rate": 5.3486529318542e-06, |
| "loss": 2.1592, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.49360421369450713, |
| "grad_norm": 21.3216552734375, |
| "learning_rate": 5.3407290015847865e-06, |
| "loss": 2.5303, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.49435665914221216, |
| "grad_norm": 21.58885955810547, |
| "learning_rate": 5.3328050713153735e-06, |
| "loss": 2.0869, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.49510910458991725, |
| "grad_norm": 41.567867279052734, |
| "learning_rate": 5.324881141045959e-06, |
| "loss": 2.9033, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.4958615500376223, |
| "grad_norm": 52.82680892944336, |
| "learning_rate": 5.316957210776545e-06, |
| "loss": 2.5273, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.4966139954853273, |
| "grad_norm": 35.885414123535156, |
| "learning_rate": 5.309033280507132e-06, |
| "loss": 2.915, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.49736644093303234, |
| "grad_norm": 22.456951141357422, |
| "learning_rate": 5.301109350237718e-06, |
| "loss": 2.0615, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.49811888638073737, |
| "grad_norm": 18.177186965942383, |
| "learning_rate": 5.293185419968305e-06, |
| "loss": 2.1211, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.49887133182844245, |
| "grad_norm": 41.87432098388672, |
| "learning_rate": 5.285261489698891e-06, |
| "loss": 2.4551, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.4996237772761475, |
| "grad_norm": 33.8329963684082, |
| "learning_rate": 5.277337559429478e-06, |
| "loss": 2.9609, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.5003762227238525, |
| "grad_norm": 32.781150817871094, |
| "learning_rate": 5.269413629160064e-06, |
| "loss": 2.5273, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5011286681715575, |
| "grad_norm": 19.160852432250977, |
| "learning_rate": 5.26148969889065e-06, |
| "loss": 2.7012, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.5018811136192626, |
| "grad_norm": 23.525550842285156, |
| "learning_rate": 5.253565768621236e-06, |
| "loss": 2.6514, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.5026335590669676, |
| "grad_norm": 23.408510208129883, |
| "learning_rate": 5.2456418383518225e-06, |
| "loss": 2.8096, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.5033860045146726, |
| "grad_norm": 34.85614776611328, |
| "learning_rate": 5.2377179080824095e-06, |
| "loss": 2.1074, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.5041384499623778, |
| "grad_norm": 22.156509399414062, |
| "learning_rate": 5.229793977812996e-06, |
| "loss": 2.4199, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5048908954100828, |
| "grad_norm": 24.354738235473633, |
| "learning_rate": 5.221870047543583e-06, |
| "loss": 2.5723, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.5056433408577878, |
| "grad_norm": 18.33073616027832, |
| "learning_rate": 5.213946117274168e-06, |
| "loss": 2.585, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.5063957863054929, |
| "grad_norm": 34.77322769165039, |
| "learning_rate": 5.206022187004754e-06, |
| "loss": 2.8555, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.5071482317531979, |
| "grad_norm": 15.46507453918457, |
| "learning_rate": 5.198098256735341e-06, |
| "loss": 1.9648, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.5079006772009029, |
| "grad_norm": 20.744842529296875, |
| "learning_rate": 5.1901743264659275e-06, |
| "loss": 2.2227, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.508653122648608, |
| "grad_norm": 26.760475158691406, |
| "learning_rate": 5.182250396196515e-06, |
| "loss": 2.6406, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.509405568096313, |
| "grad_norm": 47.131629943847656, |
| "learning_rate": 5.1743264659271e-06, |
| "loss": 2.5049, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.510158013544018, |
| "grad_norm": 40.88092041015625, |
| "learning_rate": 5.166402535657686e-06, |
| "loss": 2.7178, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.510910458991723, |
| "grad_norm": 44.75162887573242, |
| "learning_rate": 5.158478605388273e-06, |
| "loss": 2.5703, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.5116629044394282, |
| "grad_norm": 24.004650115966797, |
| "learning_rate": 5.150554675118859e-06, |
| "loss": 2.5869, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5124153498871332, |
| "grad_norm": 29.9807071685791, |
| "learning_rate": 5.142630744849446e-06, |
| "loss": 2.7354, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.5131677953348383, |
| "grad_norm": 26.726299285888672, |
| "learning_rate": 5.134706814580032e-06, |
| "loss": 2.4414, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.5139202407825433, |
| "grad_norm": 23.798734664916992, |
| "learning_rate": 5.126782884310618e-06, |
| "loss": 2.8223, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.5146726862302483, |
| "grad_norm": 45.92054748535156, |
| "learning_rate": 5.118858954041205e-06, |
| "loss": 2.998, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.5154251316779533, |
| "grad_norm": 40.293460845947266, |
| "learning_rate": 5.110935023771791e-06, |
| "loss": 2.1846, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.5161775771256584, |
| "grad_norm": 26.087142944335938, |
| "learning_rate": 5.103011093502378e-06, |
| "loss": 2.582, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.5169300225733634, |
| "grad_norm": 33.69111251831055, |
| "learning_rate": 5.0950871632329636e-06, |
| "loss": 2.2061, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.5176824680210684, |
| "grad_norm": 20.17823600769043, |
| "learning_rate": 5.08716323296355e-06, |
| "loss": 2.2803, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.5184349134687735, |
| "grad_norm": 26.224407196044922, |
| "learning_rate": 5.079239302694137e-06, |
| "loss": 2.6611, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.5191873589164786, |
| "grad_norm": 21.59946632385254, |
| "learning_rate": 5.071315372424723e-06, |
| "loss": 2.8125, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.5199398043641836, |
| "grad_norm": 16.415241241455078, |
| "learning_rate": 5.06339144215531e-06, |
| "loss": 2.0098, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.5206922498118887, |
| "grad_norm": 69.1198501586914, |
| "learning_rate": 5.055467511885895e-06, |
| "loss": 2.7305, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.5214446952595937, |
| "grad_norm": 52.50381851196289, |
| "learning_rate": 5.0475435816164816e-06, |
| "loss": 2.7988, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.5221971407072987, |
| "grad_norm": 43.8442268371582, |
| "learning_rate": 5.039619651347069e-06, |
| "loss": 2.7412, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.5229495861550038, |
| "grad_norm": 23.745641708374023, |
| "learning_rate": 5.031695721077655e-06, |
| "loss": 2.2217, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.5237020316027088, |
| "grad_norm": 21.59581184387207, |
| "learning_rate": 5.023771790808242e-06, |
| "loss": 2.2441, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.5244544770504138, |
| "grad_norm": 41.607452392578125, |
| "learning_rate": 5.015847860538827e-06, |
| "loss": 2.2539, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.5252069224981188, |
| "grad_norm": 44.614837646484375, |
| "learning_rate": 5.007923930269414e-06, |
| "loss": 2.8076, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.5259593679458239, |
| "grad_norm": 31.962142944335938, |
| "learning_rate": 5e-06, |
| "loss": 2.1489, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.526711813393529, |
| "grad_norm": 43.057865142822266, |
| "learning_rate": 4.992076069730587e-06, |
| "loss": 3.002, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.527464258841234, |
| "grad_norm": 31.688989639282227, |
| "learning_rate": 4.984152139461173e-06, |
| "loss": 2.0718, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.5282167042889391, |
| "grad_norm": 35.44694900512695, |
| "learning_rate": 4.97622820919176e-06, |
| "loss": 2.2939, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.5289691497366441, |
| "grad_norm": 54.46326446533203, |
| "learning_rate": 4.968304278922346e-06, |
| "loss": 2.6084, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.5297215951843491, |
| "grad_norm": 22.62406349182129, |
| "learning_rate": 4.960380348652932e-06, |
| "loss": 2.6279, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.5304740406320542, |
| "grad_norm": 37.78304672241211, |
| "learning_rate": 4.952456418383519e-06, |
| "loss": 2.7197, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.5312264860797592, |
| "grad_norm": 29.49541473388672, |
| "learning_rate": 4.944532488114105e-06, |
| "loss": 2.5576, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.5319789315274642, |
| "grad_norm": 19.669097900390625, |
| "learning_rate": 4.936608557844692e-06, |
| "loss": 2.8604, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.5327313769751693, |
| "grad_norm": 38.62272262573242, |
| "learning_rate": 4.928684627575278e-06, |
| "loss": 2.4404, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.5334838224228743, |
| "grad_norm": 21.684125900268555, |
| "learning_rate": 4.920760697305864e-06, |
| "loss": 2.8936, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.5342362678705794, |
| "grad_norm": 17.511150360107422, |
| "learning_rate": 4.912836767036451e-06, |
| "loss": 2.5947, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5349887133182845, |
| "grad_norm": 33.95096206665039, |
| "learning_rate": 4.904912836767036e-06, |
| "loss": 2.5986, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.5357411587659895, |
| "grad_norm": 16.671566009521484, |
| "learning_rate": 4.8969889064976235e-06, |
| "loss": 2.8828, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.5364936042136945, |
| "grad_norm": 21.556991577148438, |
| "learning_rate": 4.88906497622821e-06, |
| "loss": 2.5596, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.5372460496613995, |
| "grad_norm": 19.081218719482422, |
| "learning_rate": 4.881141045958796e-06, |
| "loss": 2.6641, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.5379984951091046, |
| "grad_norm": 19.19892120361328, |
| "learning_rate": 4.873217115689383e-06, |
| "loss": 2.957, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.5387509405568096, |
| "grad_norm": 20.48495101928711, |
| "learning_rate": 4.865293185419968e-06, |
| "loss": 2.915, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.5395033860045146, |
| "grad_norm": 16.29701805114746, |
| "learning_rate": 4.857369255150555e-06, |
| "loss": 2.5693, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.5402558314522197, |
| "grad_norm": 20.534658432006836, |
| "learning_rate": 4.8494453248811415e-06, |
| "loss": 2.3193, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.5410082768999247, |
| "grad_norm": 20.344722747802734, |
| "learning_rate": 4.841521394611728e-06, |
| "loss": 2.7607, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.5417607223476298, |
| "grad_norm": 50.48695755004883, |
| "learning_rate": 4.833597464342314e-06, |
| "loss": 2.8174, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5425131677953349, |
| "grad_norm": 21.673259735107422, |
| "learning_rate": 4.825673534072901e-06, |
| "loss": 2.1982, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.5432656132430399, |
| "grad_norm": 20.62663459777832, |
| "learning_rate": 4.817749603803487e-06, |
| "loss": 2.2461, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.5440180586907449, |
| "grad_norm": 29.592178344726562, |
| "learning_rate": 4.809825673534073e-06, |
| "loss": 3.083, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.54477050413845, |
| "grad_norm": 28.092294692993164, |
| "learning_rate": 4.8019017432646595e-06, |
| "loss": 2.4023, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.545522949586155, |
| "grad_norm": 26.026023864746094, |
| "learning_rate": 4.793977812995246e-06, |
| "loss": 2.79, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.54627539503386, |
| "grad_norm": 34.060489654541016, |
| "learning_rate": 4.786053882725833e-06, |
| "loss": 2.8916, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.547027840481565, |
| "grad_norm": 23.83104705810547, |
| "learning_rate": 4.778129952456419e-06, |
| "loss": 2.4453, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.5477802859292701, |
| "grad_norm": 27.824453353881836, |
| "learning_rate": 4.770206022187005e-06, |
| "loss": 3.0391, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.5485327313769752, |
| "grad_norm": 46.81410598754883, |
| "learning_rate": 4.762282091917591e-06, |
| "loss": 2.4746, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.5492851768246803, |
| "grad_norm": 30.927135467529297, |
| "learning_rate": 4.7543581616481775e-06, |
| "loss": 2.6143, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5500376222723853, |
| "grad_norm": 23.535202026367188, |
| "learning_rate": 4.7464342313787645e-06, |
| "loss": 2.3213, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.5507900677200903, |
| "grad_norm": 21.322975158691406, |
| "learning_rate": 4.738510301109351e-06, |
| "loss": 2.4536, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.5515425131677953, |
| "grad_norm": 22.54486656188965, |
| "learning_rate": 4.730586370839937e-06, |
| "loss": 2.623, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.5522949586155004, |
| "grad_norm": 26.132186889648438, |
| "learning_rate": 4.722662440570523e-06, |
| "loss": 1.9863, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.5530474040632054, |
| "grad_norm": 36.999229431152344, |
| "learning_rate": 4.714738510301109e-06, |
| "loss": 2.8018, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.5537998495109104, |
| "grad_norm": 55.02450180053711, |
| "learning_rate": 4.706814580031696e-06, |
| "loss": 2.1533, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.5545522949586155, |
| "grad_norm": 27.328872680664062, |
| "learning_rate": 4.6988906497622825e-06, |
| "loss": 2.6514, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.5553047404063205, |
| "grad_norm": 29.182422637939453, |
| "learning_rate": 4.690966719492869e-06, |
| "loss": 2.9434, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.5560571858540256, |
| "grad_norm": 23.462890625, |
| "learning_rate": 4.683042789223456e-06, |
| "loss": 2.5322, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.5568096313017307, |
| "grad_norm": 25.650156021118164, |
| "learning_rate": 4.675118858954041e-06, |
| "loss": 3.1553, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5575620767494357, |
| "grad_norm": 36.816558837890625, |
| "learning_rate": 4.667194928684628e-06, |
| "loss": 2.9004, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.5583145221971407, |
| "grad_norm": 40.148284912109375, |
| "learning_rate": 4.659270998415214e-06, |
| "loss": 2.457, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.5590669676448458, |
| "grad_norm": 20.173446655273438, |
| "learning_rate": 4.6513470681458005e-06, |
| "loss": 2.6436, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.5598194130925508, |
| "grad_norm": 26.01722526550293, |
| "learning_rate": 4.6434231378763876e-06, |
| "loss": 2.5381, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.5605718585402558, |
| "grad_norm": 16.02857208251953, |
| "learning_rate": 4.635499207606973e-06, |
| "loss": 2.0049, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.5613243039879608, |
| "grad_norm": 17.349332809448242, |
| "learning_rate": 4.62757527733756e-06, |
| "loss": 2.4648, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.5620767494356659, |
| "grad_norm": 39.4999885559082, |
| "learning_rate": 4.619651347068146e-06, |
| "loss": 2.707, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.5628291948833709, |
| "grad_norm": 38.11347198486328, |
| "learning_rate": 4.611727416798732e-06, |
| "loss": 2.666, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.563581640331076, |
| "grad_norm": 34.45002365112305, |
| "learning_rate": 4.603803486529319e-06, |
| "loss": 2.8965, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.5643340857787811, |
| "grad_norm": 24.278921127319336, |
| "learning_rate": 4.595879556259905e-06, |
| "loss": 2.9795, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5650865312264861, |
| "grad_norm": 36.90974426269531, |
| "learning_rate": 4.587955625990492e-06, |
| "loss": 2.7666, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.5658389766741911, |
| "grad_norm": 21.165529251098633, |
| "learning_rate": 4.580031695721078e-06, |
| "loss": 2.478, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.5665914221218962, |
| "grad_norm": 23.80830955505371, |
| "learning_rate": 4.572107765451664e-06, |
| "loss": 2.4609, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.5673438675696012, |
| "grad_norm": 30.803369522094727, |
| "learning_rate": 4.564183835182251e-06, |
| "loss": 2.8076, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.5680963130173062, |
| "grad_norm": 22.102771759033203, |
| "learning_rate": 4.556259904912837e-06, |
| "loss": 2.5107, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.5688487584650113, |
| "grad_norm": 16.512863159179688, |
| "learning_rate": 4.5483359746434236e-06, |
| "loss": 2.1992, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.5696012039127163, |
| "grad_norm": 43.92239761352539, |
| "learning_rate": 4.54041204437401e-06, |
| "loss": 3.0449, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.5703536493604213, |
| "grad_norm": 31.005207061767578, |
| "learning_rate": 4.532488114104596e-06, |
| "loss": 2.6104, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.5711060948081265, |
| "grad_norm": 25.206518173217773, |
| "learning_rate": 4.524564183835183e-06, |
| "loss": 2.3311, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.5718585402558315, |
| "grad_norm": 25.389394760131836, |
| "learning_rate": 4.516640253565769e-06, |
| "loss": 1.9697, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5726109857035365, |
| "grad_norm": 28.341934204101562, |
| "learning_rate": 4.508716323296355e-06, |
| "loss": 2.7266, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.5733634311512416, |
| "grad_norm": 55.47808074951172, |
| "learning_rate": 4.500792393026942e-06, |
| "loss": 2.7305, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.5741158765989466, |
| "grad_norm": 28.395244598388672, |
| "learning_rate": 4.492868462757528e-06, |
| "loss": 1.937, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.5748683220466516, |
| "grad_norm": 15.90774154663086, |
| "learning_rate": 4.484944532488115e-06, |
| "loss": 2.2666, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.5756207674943566, |
| "grad_norm": 23.520118713378906, |
| "learning_rate": 4.477020602218701e-06, |
| "loss": 2.3945, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.5763732129420617, |
| "grad_norm": 24.288490295410156, |
| "learning_rate": 4.469096671949287e-06, |
| "loss": 2.8779, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.5771256583897667, |
| "grad_norm": 20.855445861816406, |
| "learning_rate": 4.461172741679873e-06, |
| "loss": 2.1201, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.5778781038374717, |
| "grad_norm": 23.80067253112793, |
| "learning_rate": 4.45324881141046e-06, |
| "loss": 2.3896, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.5786305492851769, |
| "grad_norm": 27.069873809814453, |
| "learning_rate": 4.445324881141047e-06, |
| "loss": 3.1094, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.5793829947328819, |
| "grad_norm": 33.97768783569336, |
| "learning_rate": 4.437400950871633e-06, |
| "loss": 2.501, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5801354401805869, |
| "grad_norm": 24.86118507385254, |
| "learning_rate": 4.429477020602219e-06, |
| "loss": 2.8809, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.580887885628292, |
| "grad_norm": 30.0194091796875, |
| "learning_rate": 4.421553090332805e-06, |
| "loss": 2.0205, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.581640331075997, |
| "grad_norm": 18.77094841003418, |
| "learning_rate": 4.413629160063391e-06, |
| "loss": 2.5732, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.582392776523702, |
| "grad_norm": 28.159244537353516, |
| "learning_rate": 4.4057052297939784e-06, |
| "loss": 2.7314, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.5831452219714071, |
| "grad_norm": 30.579219818115234, |
| "learning_rate": 4.397781299524565e-06, |
| "loss": 2.0508, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.5838976674191121, |
| "grad_norm": 24.77485466003418, |
| "learning_rate": 4.389857369255151e-06, |
| "loss": 2.2637, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.5846501128668171, |
| "grad_norm": 17.51915740966797, |
| "learning_rate": 4.381933438985737e-06, |
| "loss": 2.1367, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.5854025583145221, |
| "grad_norm": 18.201080322265625, |
| "learning_rate": 4.374009508716324e-06, |
| "loss": 2.3232, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.5861550037622273, |
| "grad_norm": 19.63605499267578, |
| "learning_rate": 4.36608557844691e-06, |
| "loss": 2.875, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.5869074492099323, |
| "grad_norm": 27.237627029418945, |
| "learning_rate": 4.3581616481774964e-06, |
| "loss": 2.6475, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5876598946576373, |
| "grad_norm": 17.824848175048828, |
| "learning_rate": 4.350237717908083e-06, |
| "loss": 1.8301, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.5884123401053424, |
| "grad_norm": 21.104175567626953, |
| "learning_rate": 4.342313787638669e-06, |
| "loss": 1.7656, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.5891647855530474, |
| "grad_norm": 23.795610427856445, |
| "learning_rate": 4.334389857369256e-06, |
| "loss": 3.1006, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.5899172310007524, |
| "grad_norm": 23.139272689819336, |
| "learning_rate": 4.326465927099842e-06, |
| "loss": 1.9956, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.5906696764484575, |
| "grad_norm": 25.720857620239258, |
| "learning_rate": 4.318541996830428e-06, |
| "loss": 1.9121, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.5914221218961625, |
| "grad_norm": 21.973257064819336, |
| "learning_rate": 4.3106180665610144e-06, |
| "loss": 2.7388, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.5921745673438675, |
| "grad_norm": 30.504762649536133, |
| "learning_rate": 4.302694136291601e-06, |
| "loss": 2.4004, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.5929270127915726, |
| "grad_norm": 21.440141677856445, |
| "learning_rate": 4.294770206022188e-06, |
| "loss": 2.5205, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.5936794582392777, |
| "grad_norm": 19.103830337524414, |
| "learning_rate": 4.286846275752774e-06, |
| "loss": 2.1895, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.5944319036869827, |
| "grad_norm": 25.186803817749023, |
| "learning_rate": 4.27892234548336e-06, |
| "loss": 2.9531, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.5951843491346878, |
| "grad_norm": 23.751972198486328, |
| "learning_rate": 4.270998415213946e-06, |
| "loss": 2.5225, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.5959367945823928, |
| "grad_norm": 30.363698959350586, |
| "learning_rate": 4.2630744849445325e-06, |
| "loss": 3.0273, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.5966892400300978, |
| "grad_norm": 28.2047061920166, |
| "learning_rate": 4.2551505546751195e-06, |
| "loss": 2.4619, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.5974416854778029, |
| "grad_norm": 18.83759880065918, |
| "learning_rate": 4.247226624405706e-06, |
| "loss": 2.3926, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.5981941309255079, |
| "grad_norm": 16.413726806640625, |
| "learning_rate": 4.239302694136292e-06, |
| "loss": 2.1045, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.5989465763732129, |
| "grad_norm": 24.916812896728516, |
| "learning_rate": 4.231378763866879e-06, |
| "loss": 2.0381, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.5996990218209179, |
| "grad_norm": 22.680103302001953, |
| "learning_rate": 4.223454833597464e-06, |
| "loss": 2.5254, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.600451467268623, |
| "grad_norm": 17.02437400817871, |
| "learning_rate": 4.215530903328051e-06, |
| "loss": 2.4658, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.6012039127163281, |
| "grad_norm": 40.846778869628906, |
| "learning_rate": 4.2076069730586375e-06, |
| "loss": 2.2783, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.6019563581640331, |
| "grad_norm": 32.31572723388672, |
| "learning_rate": 4.199683042789224e-06, |
| "loss": 2.0996, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6027088036117382, |
| "grad_norm": 28.528690338134766, |
| "learning_rate": 4.191759112519811e-06, |
| "loss": 2.1387, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.6034612490594432, |
| "grad_norm": 19.70213508605957, |
| "learning_rate": 4.183835182250396e-06, |
| "loss": 2.2217, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.6042136945071482, |
| "grad_norm": 33.513206481933594, |
| "learning_rate": 4.175911251980983e-06, |
| "loss": 2.9395, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.6049661399548533, |
| "grad_norm": 25.72749900817871, |
| "learning_rate": 4.167987321711569e-06, |
| "loss": 2.7637, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.6057185854025583, |
| "grad_norm": 51.15900421142578, |
| "learning_rate": 4.1600633914421555e-06, |
| "loss": 2.708, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.6064710308502633, |
| "grad_norm": 36.24783706665039, |
| "learning_rate": 4.1521394611727425e-06, |
| "loss": 2.4258, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.6072234762979684, |
| "grad_norm": 22.41239356994629, |
| "learning_rate": 4.144215530903328e-06, |
| "loss": 2.4014, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.6079759217456734, |
| "grad_norm": 27.632205963134766, |
| "learning_rate": 4.136291600633915e-06, |
| "loss": 2.1709, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.6087283671933785, |
| "grad_norm": 24.77882194519043, |
| "learning_rate": 4.128367670364501e-06, |
| "loss": 2.5947, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.6094808126410836, |
| "grad_norm": 18.057119369506836, |
| "learning_rate": 4.120443740095087e-06, |
| "loss": 2.4512, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.6102332580887886, |
| "grad_norm": 23.234073638916016, |
| "learning_rate": 4.112519809825674e-06, |
| "loss": 2.5381, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.6109857035364936, |
| "grad_norm": 31.53257179260254, |
| "learning_rate": 4.1045958795562605e-06, |
| "loss": 2.2202, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.6117381489841986, |
| "grad_norm": 23.6964168548584, |
| "learning_rate": 4.096671949286847e-06, |
| "loss": 2.2139, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.6124905944319037, |
| "grad_norm": 31.621091842651367, |
| "learning_rate": 4.088748019017433e-06, |
| "loss": 2.8975, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6132430398796087, |
| "grad_norm": 28.83985137939453, |
| "learning_rate": 4.080824088748019e-06, |
| "loss": 2.8174, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.6139954853273137, |
| "grad_norm": 19.182252883911133, |
| "learning_rate": 4.072900158478606e-06, |
| "loss": 2.4541, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.6147479307750188, |
| "grad_norm": 16.74087142944336, |
| "learning_rate": 4.064976228209192e-06, |
| "loss": 2.0703, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.6155003762227238, |
| "grad_norm": 22.0015869140625, |
| "learning_rate": 4.0570522979397786e-06, |
| "loss": 2.2656, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.6162528216704289, |
| "grad_norm": 40.94550323486328, |
| "learning_rate": 4.049128367670365e-06, |
| "loss": 2.499, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.617005267118134, |
| "grad_norm": 18.632471084594727, |
| "learning_rate": 4.041204437400951e-06, |
| "loss": 2.1719, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.617757712565839, |
| "grad_norm": 30.165760040283203, |
| "learning_rate": 4.033280507131538e-06, |
| "loss": 2.5557, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.618510158013544, |
| "grad_norm": 25.652528762817383, |
| "learning_rate": 4.025356576862124e-06, |
| "loss": 2.7559, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.6192626034612491, |
| "grad_norm": 27.220035552978516, |
| "learning_rate": 4.01743264659271e-06, |
| "loss": 2.9453, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.6200150489089541, |
| "grad_norm": 22.394641876220703, |
| "learning_rate": 4.0095087163232966e-06, |
| "loss": 2.2749, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.6207674943566591, |
| "grad_norm": 22.86411476135254, |
| "learning_rate": 4.001584786053883e-06, |
| "loss": 2.6201, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.6215199398043642, |
| "grad_norm": 20.468843460083008, |
| "learning_rate": 3.993660855784469e-06, |
| "loss": 1.8496, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.6222723852520692, |
| "grad_norm": 21.8277530670166, |
| "learning_rate": 3.985736925515056e-06, |
| "loss": 1.79, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.6230248306997742, |
| "grad_norm": 20.242324829101562, |
| "learning_rate": 3.977812995245642e-06, |
| "loss": 2.3403, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.6237772761474794, |
| "grad_norm": 25.844030380249023, |
| "learning_rate": 3.969889064976228e-06, |
| "loss": 2.2485, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.6245297215951844, |
| "grad_norm": 42.171226501464844, |
| "learning_rate": 3.961965134706815e-06, |
| "loss": 2.3018, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6252821670428894, |
| "grad_norm": 37.05338668823242, |
| "learning_rate": 3.954041204437401e-06, |
| "loss": 2.584, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.6260346124905944, |
| "grad_norm": 17.200483322143555, |
| "learning_rate": 3.946117274167988e-06, |
| "loss": 2.2383, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.6267870579382995, |
| "grad_norm": 39.11799240112305, |
| "learning_rate": 3.938193343898574e-06, |
| "loss": 2.7314, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.6275395033860045, |
| "grad_norm": 25.06464958190918, |
| "learning_rate": 3.93026941362916e-06, |
| "loss": 2.7432, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.6282919488337095, |
| "grad_norm": 16.09369468688965, |
| "learning_rate": 3.922345483359747e-06, |
| "loss": 1.9019, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.6290443942814146, |
| "grad_norm": 38.14901351928711, |
| "learning_rate": 3.9144215530903326e-06, |
| "loss": 2.4102, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.6297968397291196, |
| "grad_norm": 18.259464263916016, |
| "learning_rate": 3.90649762282092e-06, |
| "loss": 2.2725, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.6305492851768246, |
| "grad_norm": 48.563934326171875, |
| "learning_rate": 3.898573692551506e-06, |
| "loss": 2.4902, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.6313017306245298, |
| "grad_norm": 35.83158874511719, |
| "learning_rate": 3.890649762282092e-06, |
| "loss": 2.2686, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.6320541760722348, |
| "grad_norm": 25.336288452148438, |
| "learning_rate": 3.882725832012679e-06, |
| "loss": 2.1768, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6328066215199398, |
| "grad_norm": 29.03192901611328, |
| "learning_rate": 3.874801901743264e-06, |
| "loss": 2.5166, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.6335590669676449, |
| "grad_norm": 32.02183151245117, |
| "learning_rate": 3.866877971473851e-06, |
| "loss": 2.5801, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.6343115124153499, |
| "grad_norm": 40.6337776184082, |
| "learning_rate": 3.858954041204438e-06, |
| "loss": 2.1455, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.6350639578630549, |
| "grad_norm": 40.04218292236328, |
| "learning_rate": 3.851030110935024e-06, |
| "loss": 2.0576, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.63581640331076, |
| "grad_norm": 21.415939331054688, |
| "learning_rate": 3.843106180665611e-06, |
| "loss": 2.1914, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.636568848758465, |
| "grad_norm": 33.20722579956055, |
| "learning_rate": 3.835182250396197e-06, |
| "loss": 2.9111, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.63732129420617, |
| "grad_norm": 47.37984085083008, |
| "learning_rate": 3.827258320126783e-06, |
| "loss": 2.5576, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.6380737396538751, |
| "grad_norm": 51.218971252441406, |
| "learning_rate": 3.8193343898573694e-06, |
| "loss": 3.1221, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.6388261851015802, |
| "grad_norm": 22.271236419677734, |
| "learning_rate": 3.811410459587956e-06, |
| "loss": 2.5859, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.6395786305492852, |
| "grad_norm": 20.053430557250977, |
| "learning_rate": 3.8034865293185427e-06, |
| "loss": 2.4189, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6403310759969902, |
| "grad_norm": 25.01032257080078, |
| "learning_rate": 3.7955625990491284e-06, |
| "loss": 3.1533, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.6410835214446953, |
| "grad_norm": 18.8804988861084, |
| "learning_rate": 3.787638668779715e-06, |
| "loss": 2.3438, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.6418359668924003, |
| "grad_norm": 20.691516876220703, |
| "learning_rate": 3.7797147385103017e-06, |
| "loss": 2.4365, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.6425884123401053, |
| "grad_norm": 24.400440216064453, |
| "learning_rate": 3.771790808240888e-06, |
| "loss": 2.2988, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.6433408577878104, |
| "grad_norm": 23.2838077545166, |
| "learning_rate": 3.7638668779714745e-06, |
| "loss": 2.0371, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.6440933032355154, |
| "grad_norm": 28.432334899902344, |
| "learning_rate": 3.7559429477020602e-06, |
| "loss": 1.9199, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.6448457486832204, |
| "grad_norm": 18.364774703979492, |
| "learning_rate": 3.748019017432647e-06, |
| "loss": 2.3975, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.6455981941309256, |
| "grad_norm": 26.896156311035156, |
| "learning_rate": 3.740095087163233e-06, |
| "loss": 2.3828, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.6463506395786306, |
| "grad_norm": 20.20989227294922, |
| "learning_rate": 3.7321711568938197e-06, |
| "loss": 2.9043, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.6471030850263356, |
| "grad_norm": 51.84599685668945, |
| "learning_rate": 3.7242472266244063e-06, |
| "loss": 2.377, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6478555304740407, |
| "grad_norm": 26.037067413330078, |
| "learning_rate": 3.716323296354992e-06, |
| "loss": 2.5391, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.6486079759217457, |
| "grad_norm": 36.53365707397461, |
| "learning_rate": 3.7083993660855787e-06, |
| "loss": 2.3242, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.6493604213694507, |
| "grad_norm": 27.17704963684082, |
| "learning_rate": 3.700475435816165e-06, |
| "loss": 2.3184, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.6501128668171557, |
| "grad_norm": 46.70039367675781, |
| "learning_rate": 3.6925515055467515e-06, |
| "loss": 2.2803, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.6508653122648608, |
| "grad_norm": 20.36431121826172, |
| "learning_rate": 3.684627575277338e-06, |
| "loss": 2.4951, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.6516177577125658, |
| "grad_norm": 35.15061950683594, |
| "learning_rate": 3.6767036450079243e-06, |
| "loss": 2.1846, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.6523702031602708, |
| "grad_norm": 25.147287368774414, |
| "learning_rate": 3.6687797147385105e-06, |
| "loss": 2.3579, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.653122648607976, |
| "grad_norm": 23.692909240722656, |
| "learning_rate": 3.6608557844690967e-06, |
| "loss": 2.8662, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.653875094055681, |
| "grad_norm": 28.69312858581543, |
| "learning_rate": 3.6529318541996833e-06, |
| "loss": 2.2891, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.654627539503386, |
| "grad_norm": 18.955467224121094, |
| "learning_rate": 3.64500792393027e-06, |
| "loss": 2.5234, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6553799849510911, |
| "grad_norm": 16.43120574951172, |
| "learning_rate": 3.637083993660856e-06, |
| "loss": 1.9131, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.6561324303987961, |
| "grad_norm": 25.750568389892578, |
| "learning_rate": 3.6291600633914427e-06, |
| "loss": 2.6816, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.6568848758465011, |
| "grad_norm": 23.512271881103516, |
| "learning_rate": 3.6212361331220285e-06, |
| "loss": 2.207, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.6576373212942062, |
| "grad_norm": 25.66358757019043, |
| "learning_rate": 3.613312202852615e-06, |
| "loss": 2.1592, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.6583897667419112, |
| "grad_norm": 18.43921661376953, |
| "learning_rate": 3.6053882725832017e-06, |
| "loss": 1.6904, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.6591422121896162, |
| "grad_norm": 22.820770263671875, |
| "learning_rate": 3.597464342313788e-06, |
| "loss": 2.502, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.6598946576373212, |
| "grad_norm": 27.273193359375, |
| "learning_rate": 3.5895404120443745e-06, |
| "loss": 2.0859, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.6606471030850264, |
| "grad_norm": 25.108341217041016, |
| "learning_rate": 3.5816164817749603e-06, |
| "loss": 2.7012, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.6613995485327314, |
| "grad_norm": 31.935596466064453, |
| "learning_rate": 3.573692551505547e-06, |
| "loss": 2.5947, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.6621519939804364, |
| "grad_norm": 29.96102523803711, |
| "learning_rate": 3.5657686212361335e-06, |
| "loss": 2.4814, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6629044394281415, |
| "grad_norm": 30.193965911865234, |
| "learning_rate": 3.5578446909667197e-06, |
| "loss": 2.2949, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.6636568848758465, |
| "grad_norm": 21.395172119140625, |
| "learning_rate": 3.5499207606973063e-06, |
| "loss": 2.4214, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.6644093303235515, |
| "grad_norm": 39.11041259765625, |
| "learning_rate": 3.5419968304278925e-06, |
| "loss": 2.4277, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.6651617757712566, |
| "grad_norm": 39.84598922729492, |
| "learning_rate": 3.5340729001584787e-06, |
| "loss": 2.5557, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.6659142212189616, |
| "grad_norm": 22.32029914855957, |
| "learning_rate": 3.5261489698890653e-06, |
| "loss": 2.6318, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 47.957611083984375, |
| "learning_rate": 3.5182250396196515e-06, |
| "loss": 2.3809, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.6674191121143717, |
| "grad_norm": 18.636598587036133, |
| "learning_rate": 3.510301109350238e-06, |
| "loss": 2.2236, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.6681715575620768, |
| "grad_norm": 26.87550163269043, |
| "learning_rate": 3.5023771790808243e-06, |
| "loss": 1.9717, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6689240030097818, |
| "grad_norm": 29.201322555541992, |
| "learning_rate": 3.494453248811411e-06, |
| "loss": 2.0947, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.6696764484574869, |
| "grad_norm": 24.85993194580078, |
| "learning_rate": 3.4865293185419976e-06, |
| "loss": 2.4033, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6704288939051919, |
| "grad_norm": 20.475067138671875, |
| "learning_rate": 3.4786053882725833e-06, |
| "loss": 2.0635, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.6711813393528969, |
| "grad_norm": 27.593063354492188, |
| "learning_rate": 3.47068145800317e-06, |
| "loss": 2.4746, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.671933784800602, |
| "grad_norm": 19.604068756103516, |
| "learning_rate": 3.462757527733756e-06, |
| "loss": 2.3154, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.672686230248307, |
| "grad_norm": 37.860382080078125, |
| "learning_rate": 3.4548335974643428e-06, |
| "loss": 2.1055, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.673438675696012, |
| "grad_norm": 20.184446334838867, |
| "learning_rate": 3.4469096671949285e-06, |
| "loss": 2.1973, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.674191121143717, |
| "grad_norm": 27.288856506347656, |
| "learning_rate": 3.438985736925515e-06, |
| "loss": 2.4014, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.6749435665914221, |
| "grad_norm": 26.36166000366211, |
| "learning_rate": 3.4310618066561018e-06, |
| "loss": 2.4404, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.6756960120391272, |
| "grad_norm": 22.578481674194336, |
| "learning_rate": 3.423137876386688e-06, |
| "loss": 2.3037, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.6764484574868322, |
| "grad_norm": 22.866018295288086, |
| "learning_rate": 3.4152139461172746e-06, |
| "loss": 2.585, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.6772009029345373, |
| "grad_norm": 23.510807037353516, |
| "learning_rate": 3.4072900158478608e-06, |
| "loss": 1.9209, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6779533483822423, |
| "grad_norm": 41.13738250732422, |
| "learning_rate": 3.399366085578447e-06, |
| "loss": 2.1924, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.6787057938299473, |
| "grad_norm": 22.456920623779297, |
| "learning_rate": 3.3914421553090336e-06, |
| "loss": 2.7236, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.6794582392776524, |
| "grad_norm": 42.51992416381836, |
| "learning_rate": 3.3835182250396198e-06, |
| "loss": 2.707, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.6802106847253574, |
| "grad_norm": 34.998878479003906, |
| "learning_rate": 3.3755942947702064e-06, |
| "loss": 2.7812, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.6809631301730624, |
| "grad_norm": 21.747095108032227, |
| "learning_rate": 3.3676703645007926e-06, |
| "loss": 2.3213, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.6817155756207675, |
| "grad_norm": 18.266437530517578, |
| "learning_rate": 3.359746434231379e-06, |
| "loss": 1.957, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.6824680210684725, |
| "grad_norm": 44.764652252197266, |
| "learning_rate": 3.351822503961966e-06, |
| "loss": 3.2598, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.6832204665161776, |
| "grad_norm": 21.49046516418457, |
| "learning_rate": 3.3438985736925516e-06, |
| "loss": 2.2383, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.6839729119638827, |
| "grad_norm": 20.83407211303711, |
| "learning_rate": 3.335974643423138e-06, |
| "loss": 2.4727, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.6847253574115877, |
| "grad_norm": 52.79618453979492, |
| "learning_rate": 3.3280507131537244e-06, |
| "loss": 2.8594, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6854778028592927, |
| "grad_norm": 33.63954162597656, |
| "learning_rate": 3.320126782884311e-06, |
| "loss": 2.2773, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.6862302483069977, |
| "grad_norm": 36.19979476928711, |
| "learning_rate": 3.3122028526148976e-06, |
| "loss": 2.6445, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.6869826937547028, |
| "grad_norm": 21.096250534057617, |
| "learning_rate": 3.3042789223454834e-06, |
| "loss": 1.8545, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.6877351392024078, |
| "grad_norm": 22.03937530517578, |
| "learning_rate": 3.29635499207607e-06, |
| "loss": 2.6279, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.6884875846501128, |
| "grad_norm": 31.599443435668945, |
| "learning_rate": 3.2884310618066562e-06, |
| "loss": 2.6084, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.6892400300978179, |
| "grad_norm": 50.962554931640625, |
| "learning_rate": 3.280507131537243e-06, |
| "loss": 3.5176, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.6899924755455229, |
| "grad_norm": 35.13251876831055, |
| "learning_rate": 3.2725832012678294e-06, |
| "loss": 2.2129, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.690744920993228, |
| "grad_norm": 21.519636154174805, |
| "learning_rate": 3.2646592709984152e-06, |
| "loss": 2.6162, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.6914973664409331, |
| "grad_norm": 22.46910285949707, |
| "learning_rate": 3.256735340729002e-06, |
| "loss": 2.4355, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.6922498118886381, |
| "grad_norm": 24.34375, |
| "learning_rate": 3.248811410459588e-06, |
| "loss": 2.3936, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.6930022573363431, |
| "grad_norm": 19.537216186523438, |
| "learning_rate": 3.2408874801901746e-06, |
| "loss": 2.2109, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.6937547027840482, |
| "grad_norm": 25.251739501953125, |
| "learning_rate": 3.2329635499207613e-06, |
| "loss": 2.5303, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.6945071482317532, |
| "grad_norm": 19.39518928527832, |
| "learning_rate": 3.2250396196513475e-06, |
| "loss": 2.082, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.6952595936794582, |
| "grad_norm": 23.059263229370117, |
| "learning_rate": 3.217115689381934e-06, |
| "loss": 2.6348, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.6960120391271633, |
| "grad_norm": 24.483816146850586, |
| "learning_rate": 3.20919175911252e-06, |
| "loss": 2.0283, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.6967644845748683, |
| "grad_norm": 27.044761657714844, |
| "learning_rate": 3.2012678288431065e-06, |
| "loss": 1.9946, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.6975169300225733, |
| "grad_norm": 24.321277618408203, |
| "learning_rate": 3.1933438985736926e-06, |
| "loss": 2.2832, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.6982693754702785, |
| "grad_norm": 24.639503479003906, |
| "learning_rate": 3.1854199683042793e-06, |
| "loss": 2.165, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.6990218209179835, |
| "grad_norm": 21.22592544555664, |
| "learning_rate": 3.177496038034866e-06, |
| "loss": 2.1494, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.6997742663656885, |
| "grad_norm": 22.190229415893555, |
| "learning_rate": 3.1695721077654516e-06, |
| "loss": 2.1587, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7005267118133935, |
| "grad_norm": 20.478050231933594, |
| "learning_rate": 3.1616481774960383e-06, |
| "loss": 1.9883, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.7012791572610986, |
| "grad_norm": 29.108064651489258, |
| "learning_rate": 3.1537242472266245e-06, |
| "loss": 2.8047, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.7020316027088036, |
| "grad_norm": 21.049943923950195, |
| "learning_rate": 3.145800316957211e-06, |
| "loss": 2.1621, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.7027840481565086, |
| "grad_norm": 35.474388122558594, |
| "learning_rate": 3.1378763866877977e-06, |
| "loss": 2.6064, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.7035364936042137, |
| "grad_norm": 22.91497039794922, |
| "learning_rate": 3.1299524564183835e-06, |
| "loss": 2.1855, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.7042889390519187, |
| "grad_norm": 24.449195861816406, |
| "learning_rate": 3.12202852614897e-06, |
| "loss": 2.0146, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.7050413844996237, |
| "grad_norm": 20.208375930786133, |
| "learning_rate": 3.1141045958795563e-06, |
| "loss": 1.6709, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.7057938299473289, |
| "grad_norm": 37.04838943481445, |
| "learning_rate": 3.106180665610143e-06, |
| "loss": 2.0508, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.7065462753950339, |
| "grad_norm": 26.501577377319336, |
| "learning_rate": 3.0982567353407295e-06, |
| "loss": 2.418, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.7072987208427389, |
| "grad_norm": 27.098785400390625, |
| "learning_rate": 3.0903328050713157e-06, |
| "loss": 2.5312, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.708051166290444, |
| "grad_norm": 33.405418395996094, |
| "learning_rate": 3.082408874801902e-06, |
| "loss": 2.3438, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.708803611738149, |
| "grad_norm": 28.807294845581055, |
| "learning_rate": 3.074484944532488e-06, |
| "loss": 2.6094, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.709556057185854, |
| "grad_norm": 22.320127487182617, |
| "learning_rate": 3.0665610142630747e-06, |
| "loss": 1.645, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.710308502633559, |
| "grad_norm": 35.30049514770508, |
| "learning_rate": 3.0586370839936613e-06, |
| "loss": 2.5449, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.7110609480812641, |
| "grad_norm": 26.332334518432617, |
| "learning_rate": 3.0507131537242475e-06, |
| "loss": 2.6299, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.7118133935289691, |
| "grad_norm": 21.510595321655273, |
| "learning_rate": 3.042789223454834e-06, |
| "loss": 2.1787, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.7125658389766741, |
| "grad_norm": 30.63580322265625, |
| "learning_rate": 3.03486529318542e-06, |
| "loss": 2.3877, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.7133182844243793, |
| "grad_norm": 24.134626388549805, |
| "learning_rate": 3.0269413629160065e-06, |
| "loss": 3.1738, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.7140707298720843, |
| "grad_norm": 25.873851776123047, |
| "learning_rate": 3.019017432646593e-06, |
| "loss": 2.1338, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.7148231753197893, |
| "grad_norm": 20.822885513305664, |
| "learning_rate": 3.0110935023771793e-06, |
| "loss": 2.5898, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7155756207674944, |
| "grad_norm": 21.63072395324707, |
| "learning_rate": 3.003169572107766e-06, |
| "loss": 2.1758, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.7163280662151994, |
| "grad_norm": 21.866065979003906, |
| "learning_rate": 2.9952456418383517e-06, |
| "loss": 2.1553, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.7170805116629044, |
| "grad_norm": 16.720848083496094, |
| "learning_rate": 2.9873217115689383e-06, |
| "loss": 2.7178, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.7178329571106095, |
| "grad_norm": 17.399744033813477, |
| "learning_rate": 2.979397781299525e-06, |
| "loss": 1.9292, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.7185854025583145, |
| "grad_norm": 18.03335952758789, |
| "learning_rate": 2.971473851030111e-06, |
| "loss": 2.499, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.7193378480060195, |
| "grad_norm": 18.41415023803711, |
| "learning_rate": 2.9635499207606977e-06, |
| "loss": 1.9775, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.7200902934537246, |
| "grad_norm": 21.423734664916992, |
| "learning_rate": 2.955625990491284e-06, |
| "loss": 2.2734, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.7208427389014297, |
| "grad_norm": 20.504459381103516, |
| "learning_rate": 2.94770206022187e-06, |
| "loss": 2.478, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.7215951843491347, |
| "grad_norm": 26.129322052001953, |
| "learning_rate": 2.9397781299524568e-06, |
| "loss": 2.8418, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.7223476297968398, |
| "grad_norm": 23.1035213470459, |
| "learning_rate": 2.931854199683043e-06, |
| "loss": 2.1973, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7231000752445448, |
| "grad_norm": 35.233028411865234, |
| "learning_rate": 2.9239302694136296e-06, |
| "loss": 2.3066, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.7238525206922498, |
| "grad_norm": 30.950870513916016, |
| "learning_rate": 2.9160063391442158e-06, |
| "loss": 2.5923, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.7246049661399548, |
| "grad_norm": 20.875741958618164, |
| "learning_rate": 2.9080824088748024e-06, |
| "loss": 2.1748, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.7253574115876599, |
| "grad_norm": 33.3520622253418, |
| "learning_rate": 2.900158478605388e-06, |
| "loss": 2.707, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.7261098570353649, |
| "grad_norm": 17.897817611694336, |
| "learning_rate": 2.8922345483359748e-06, |
| "loss": 1.9912, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.7268623024830699, |
| "grad_norm": 23.978641510009766, |
| "learning_rate": 2.8843106180665614e-06, |
| "loss": 2.3223, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.7276147479307751, |
| "grad_norm": 39.2357177734375, |
| "learning_rate": 2.8763866877971476e-06, |
| "loss": 3.0039, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.7283671933784801, |
| "grad_norm": 21.9942569732666, |
| "learning_rate": 2.868462757527734e-06, |
| "loss": 2.4873, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.7291196388261851, |
| "grad_norm": 35.64141845703125, |
| "learning_rate": 2.86053882725832e-06, |
| "loss": 2.7051, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.7298720842738902, |
| "grad_norm": 45.55552673339844, |
| "learning_rate": 2.8526148969889066e-06, |
| "loss": 2.7578, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7306245297215952, |
| "grad_norm": 19.321287155151367, |
| "learning_rate": 2.844690966719493e-06, |
| "loss": 2.1943, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.7313769751693002, |
| "grad_norm": 24.38201332092285, |
| "learning_rate": 2.8367670364500794e-06, |
| "loss": 2.4873, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.7321294206170053, |
| "grad_norm": 22.028440475463867, |
| "learning_rate": 2.828843106180666e-06, |
| "loss": 2.5156, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.7328818660647103, |
| "grad_norm": 26.298290252685547, |
| "learning_rate": 2.8209191759112518e-06, |
| "loss": 2.7314, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.7336343115124153, |
| "grad_norm": 30.888376235961914, |
| "learning_rate": 2.8129952456418384e-06, |
| "loss": 2.71, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.7343867569601203, |
| "grad_norm": 23.976533889770508, |
| "learning_rate": 2.805071315372425e-06, |
| "loss": 2.3789, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.7351392024078255, |
| "grad_norm": 31.292388916015625, |
| "learning_rate": 2.797147385103011e-06, |
| "loss": 2.5811, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.7358916478555305, |
| "grad_norm": 19.439210891723633, |
| "learning_rate": 2.789223454833598e-06, |
| "loss": 2.6377, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.7366440933032355, |
| "grad_norm": 19.47076988220215, |
| "learning_rate": 2.781299524564184e-06, |
| "loss": 2.5439, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.7373965387509406, |
| "grad_norm": 20.66202735900879, |
| "learning_rate": 2.7733755942947706e-06, |
| "loss": 2.0352, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7381489841986456, |
| "grad_norm": 24.99919891357422, |
| "learning_rate": 2.7654516640253572e-06, |
| "loss": 2.1938, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.7389014296463506, |
| "grad_norm": 32.048587799072266, |
| "learning_rate": 2.757527733755943e-06, |
| "loss": 2.3311, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.7396538750940557, |
| "grad_norm": 24.181419372558594, |
| "learning_rate": 2.7496038034865296e-06, |
| "loss": 2.415, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.7404063205417607, |
| "grad_norm": 21.071016311645508, |
| "learning_rate": 2.741679873217116e-06, |
| "loss": 2.7002, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.7411587659894657, |
| "grad_norm": 23.08110237121582, |
| "learning_rate": 2.7337559429477024e-06, |
| "loss": 2.6738, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.7419112114371708, |
| "grad_norm": 19.222745895385742, |
| "learning_rate": 2.725832012678289e-06, |
| "loss": 2.168, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.7426636568848759, |
| "grad_norm": 36.517940521240234, |
| "learning_rate": 2.717908082408875e-06, |
| "loss": 1.9395, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.7434161023325809, |
| "grad_norm": 21.429569244384766, |
| "learning_rate": 2.7099841521394614e-06, |
| "loss": 2.1455, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.744168547780286, |
| "grad_norm": 25.69976043701172, |
| "learning_rate": 2.7020602218700476e-06, |
| "loss": 2.5479, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.744920993227991, |
| "grad_norm": 23.321632385253906, |
| "learning_rate": 2.6941362916006342e-06, |
| "loss": 2.5186, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.745673438675696, |
| "grad_norm": 18.7263126373291, |
| "learning_rate": 2.686212361331221e-06, |
| "loss": 2.4336, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.746425884123401, |
| "grad_norm": 23.556350708007812, |
| "learning_rate": 2.6782884310618066e-06, |
| "loss": 2.4292, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.7471783295711061, |
| "grad_norm": 22.186206817626953, |
| "learning_rate": 2.6703645007923932e-06, |
| "loss": 2.4961, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.7479307750188111, |
| "grad_norm": 23.775405883789062, |
| "learning_rate": 2.6624405705229794e-06, |
| "loss": 2.4878, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.7486832204665161, |
| "grad_norm": 33.59183883666992, |
| "learning_rate": 2.654516640253566e-06, |
| "loss": 2.7451, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.7494356659142212, |
| "grad_norm": 18.139806747436523, |
| "learning_rate": 2.6465927099841527e-06, |
| "loss": 2.0146, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.7501881113619263, |
| "grad_norm": 17.442407608032227, |
| "learning_rate": 2.638668779714739e-06, |
| "loss": 1.8535, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.7509405568096313, |
| "grad_norm": 30.57038116455078, |
| "learning_rate": 2.630744849445325e-06, |
| "loss": 2.3447, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.7516930022573364, |
| "grad_norm": 18.132648468017578, |
| "learning_rate": 2.6228209191759112e-06, |
| "loss": 2.1621, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.7524454477050414, |
| "grad_norm": 27.932897567749023, |
| "learning_rate": 2.614896988906498e-06, |
| "loss": 2.4824, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7531978931527464, |
| "grad_norm": 33.71088790893555, |
| "learning_rate": 2.606973058637084e-06, |
| "loss": 2.457, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.7539503386004515, |
| "grad_norm": 20.68466567993164, |
| "learning_rate": 2.5990491283676707e-06, |
| "loss": 2.0396, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.7547027840481565, |
| "grad_norm": 21.711130142211914, |
| "learning_rate": 2.5911251980982573e-06, |
| "loss": 2.6387, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.7554552294958615, |
| "grad_norm": 28.704111099243164, |
| "learning_rate": 2.583201267828843e-06, |
| "loss": 2.3721, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.7562076749435666, |
| "grad_norm": 33.77923583984375, |
| "learning_rate": 2.5752773375594297e-06, |
| "loss": 2.7725, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.7569601203912716, |
| "grad_norm": 18.9777774810791, |
| "learning_rate": 2.567353407290016e-06, |
| "loss": 2.3325, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.7577125658389767, |
| "grad_norm": 34.291168212890625, |
| "learning_rate": 2.5594294770206025e-06, |
| "loss": 2.2109, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.7584650112866818, |
| "grad_norm": 30.99751853942871, |
| "learning_rate": 2.551505546751189e-06, |
| "loss": 2.2041, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.7592174567343868, |
| "grad_norm": 28.4600830078125, |
| "learning_rate": 2.543581616481775e-06, |
| "loss": 3.1016, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.7599699021820918, |
| "grad_norm": 32.46207046508789, |
| "learning_rate": 2.5356576862123615e-06, |
| "loss": 2.2295, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7607223476297968, |
| "grad_norm": 30.938323974609375, |
| "learning_rate": 2.5277337559429477e-06, |
| "loss": 2.6611, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.7614747930775019, |
| "grad_norm": 33.81102752685547, |
| "learning_rate": 2.5198098256735343e-06, |
| "loss": 1.8555, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.7622272385252069, |
| "grad_norm": 33.322418212890625, |
| "learning_rate": 2.511885895404121e-06, |
| "loss": 2.1885, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.7629796839729119, |
| "grad_norm": 26.505693435668945, |
| "learning_rate": 2.503961965134707e-06, |
| "loss": 2.9521, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.763732129420617, |
| "grad_norm": 25.791608810424805, |
| "learning_rate": 2.4960380348652933e-06, |
| "loss": 2.3506, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.764484574868322, |
| "grad_norm": 34.77909851074219, |
| "learning_rate": 2.48811410459588e-06, |
| "loss": 2.5376, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.7652370203160271, |
| "grad_norm": 22.43770408630371, |
| "learning_rate": 2.480190174326466e-06, |
| "loss": 1.6523, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.7659894657637322, |
| "grad_norm": 48.59208297729492, |
| "learning_rate": 2.4722662440570523e-06, |
| "loss": 2.3481, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.7667419112114372, |
| "grad_norm": 19.5473575592041, |
| "learning_rate": 2.464342313787639e-06, |
| "loss": 2.248, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.7674943566591422, |
| "grad_norm": 17.761960983276367, |
| "learning_rate": 2.4564183835182255e-06, |
| "loss": 1.9951, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7682468021068473, |
| "grad_norm": 39.91436004638672, |
| "learning_rate": 2.4484944532488117e-06, |
| "loss": 2.8223, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.7689992475545523, |
| "grad_norm": 16.507017135620117, |
| "learning_rate": 2.440570522979398e-06, |
| "loss": 2.2949, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.7697516930022573, |
| "grad_norm": 19.836973190307617, |
| "learning_rate": 2.432646592709984e-06, |
| "loss": 2.1084, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.7705041384499624, |
| "grad_norm": 24.554914474487305, |
| "learning_rate": 2.4247226624405707e-06, |
| "loss": 2.0625, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.7712565838976674, |
| "grad_norm": 27.172744750976562, |
| "learning_rate": 2.416798732171157e-06, |
| "loss": 2.2188, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.7720090293453724, |
| "grad_norm": 29.99898910522461, |
| "learning_rate": 2.4088748019017435e-06, |
| "loss": 2.3184, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.7727614747930776, |
| "grad_norm": 29.691516876220703, |
| "learning_rate": 2.4009508716323297e-06, |
| "loss": 2.2871, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.7735139202407826, |
| "grad_norm": 25.524301528930664, |
| "learning_rate": 2.3930269413629164e-06, |
| "loss": 2.543, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.7742663656884876, |
| "grad_norm": 23.30611801147461, |
| "learning_rate": 2.3851030110935025e-06, |
| "loss": 2.9375, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.7750188111361926, |
| "grad_norm": 18.776622772216797, |
| "learning_rate": 2.3771790808240887e-06, |
| "loss": 1.8032, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7757712565838977, |
| "grad_norm": 24.228599548339844, |
| "learning_rate": 2.3692551505546754e-06, |
| "loss": 2.1113, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.7765237020316027, |
| "grad_norm": 37.123287200927734, |
| "learning_rate": 2.3613312202852615e-06, |
| "loss": 2.1606, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.7772761474793077, |
| "grad_norm": 16.47333335876465, |
| "learning_rate": 2.353407290015848e-06, |
| "loss": 1.8906, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.7780285929270128, |
| "grad_norm": 17.088388442993164, |
| "learning_rate": 2.3454833597464344e-06, |
| "loss": 2.2246, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.7787810383747178, |
| "grad_norm": 22.337467193603516, |
| "learning_rate": 2.3375594294770205e-06, |
| "loss": 2.2988, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.7795334838224228, |
| "grad_norm": 20.124755859375, |
| "learning_rate": 2.329635499207607e-06, |
| "loss": 2.3594, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.780285929270128, |
| "grad_norm": 23.33896255493164, |
| "learning_rate": 2.3217115689381938e-06, |
| "loss": 2.1382, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.781038374717833, |
| "grad_norm": 23.720806121826172, |
| "learning_rate": 2.31378763866878e-06, |
| "loss": 2.0571, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.781790820165538, |
| "grad_norm": 17.930727005004883, |
| "learning_rate": 2.305863708399366e-06, |
| "loss": 2.0557, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.782543265613243, |
| "grad_norm": 27.225309371948242, |
| "learning_rate": 2.2979397781299524e-06, |
| "loss": 2.1133, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.7832957110609481, |
| "grad_norm": 21.77535057067871, |
| "learning_rate": 2.290015847860539e-06, |
| "loss": 2.0059, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.7840481565086531, |
| "grad_norm": 31.862289428710938, |
| "learning_rate": 2.2820919175911256e-06, |
| "loss": 2.6641, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.7848006019563581, |
| "grad_norm": 21.0166072845459, |
| "learning_rate": 2.2741679873217118e-06, |
| "loss": 2.4512, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.7855530474040632, |
| "grad_norm": 18.94448471069336, |
| "learning_rate": 2.266244057052298e-06, |
| "loss": 2.0381, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.7863054928517682, |
| "grad_norm": 22.67876625061035, |
| "learning_rate": 2.2583201267828846e-06, |
| "loss": 2.6689, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.7870579382994732, |
| "grad_norm": 18.799575805664062, |
| "learning_rate": 2.250396196513471e-06, |
| "loss": 2.0874, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.7878103837471784, |
| "grad_norm": 21.82719612121582, |
| "learning_rate": 2.2424722662440574e-06, |
| "loss": 2.4414, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.7885628291948834, |
| "grad_norm": 26.506649017333984, |
| "learning_rate": 2.2345483359746436e-06, |
| "loss": 2.5059, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.7893152746425884, |
| "grad_norm": 19.323801040649414, |
| "learning_rate": 2.22662440570523e-06, |
| "loss": 2.3105, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.7900677200902935, |
| "grad_norm": 37.666603088378906, |
| "learning_rate": 2.2187004754358164e-06, |
| "loss": 1.9395, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.7908201655379985, |
| "grad_norm": 19.168920516967773, |
| "learning_rate": 2.2107765451664026e-06, |
| "loss": 2.2466, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.7915726109857035, |
| "grad_norm": 38.56500244140625, |
| "learning_rate": 2.2028526148969892e-06, |
| "loss": 2.4648, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.7923250564334086, |
| "grad_norm": 23.99535369873047, |
| "learning_rate": 2.1949286846275754e-06, |
| "loss": 2.876, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.7930775018811136, |
| "grad_norm": 40.79738998413086, |
| "learning_rate": 2.187004754358162e-06, |
| "loss": 2.6338, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.7938299473288186, |
| "grad_norm": 42.04122543334961, |
| "learning_rate": 2.1790808240887482e-06, |
| "loss": 2.3599, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.7945823927765236, |
| "grad_norm": 22.063844680786133, |
| "learning_rate": 2.1711568938193344e-06, |
| "loss": 2.4443, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.7953348382242288, |
| "grad_norm": 20.444236755371094, |
| "learning_rate": 2.163232963549921e-06, |
| "loss": 2.2334, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.7960872836719338, |
| "grad_norm": 31.267562866210938, |
| "learning_rate": 2.1553090332805072e-06, |
| "loss": 2.167, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.7968397291196389, |
| "grad_norm": 18.25832176208496, |
| "learning_rate": 2.147385103011094e-06, |
| "loss": 2.7217, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.7975921745673439, |
| "grad_norm": 21.88541030883789, |
| "learning_rate": 2.13946117274168e-06, |
| "loss": 2.3711, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.7983446200150489, |
| "grad_norm": 21.097015380859375, |
| "learning_rate": 2.1315372424722662e-06, |
| "loss": 2.1494, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.7990970654627539, |
| "grad_norm": 27.271303176879883, |
| "learning_rate": 2.123613312202853e-06, |
| "loss": 2.0898, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.799849510910459, |
| "grad_norm": 23.769197463989258, |
| "learning_rate": 2.1156893819334395e-06, |
| "loss": 2.5215, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.800601956358164, |
| "grad_norm": 27.01453399658203, |
| "learning_rate": 2.1077654516640257e-06, |
| "loss": 1.9517, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.801354401805869, |
| "grad_norm": 21.828453063964844, |
| "learning_rate": 2.099841521394612e-06, |
| "loss": 2.6025, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.8021068472535741, |
| "grad_norm": 21.228717803955078, |
| "learning_rate": 2.091917591125198e-06, |
| "loss": 2.0664, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.8028592927012792, |
| "grad_norm": 32.78303909301758, |
| "learning_rate": 2.0839936608557847e-06, |
| "loss": 2.1641, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.8036117381489842, |
| "grad_norm": 26.406593322753906, |
| "learning_rate": 2.0760697305863713e-06, |
| "loss": 2.2568, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.8043641835966893, |
| "grad_norm": 26.602384567260742, |
| "learning_rate": 2.0681458003169575e-06, |
| "loss": 2.9897, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.8051166290443943, |
| "grad_norm": 25.720447540283203, |
| "learning_rate": 2.0602218700475437e-06, |
| "loss": 2.3076, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8058690744920993, |
| "grad_norm": 28.323110580444336, |
| "learning_rate": 2.0522979397781303e-06, |
| "loss": 2.4307, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.8066215199398044, |
| "grad_norm": 21.25274085998535, |
| "learning_rate": 2.0443740095087165e-06, |
| "loss": 2.4961, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.8073739653875094, |
| "grad_norm": 20.181909561157227, |
| "learning_rate": 2.036450079239303e-06, |
| "loss": 2.2793, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.8081264108352144, |
| "grad_norm": 19.529109954833984, |
| "learning_rate": 2.0285261489698893e-06, |
| "loss": 2.3984, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.8088788562829194, |
| "grad_norm": 27.370973587036133, |
| "learning_rate": 2.0206022187004755e-06, |
| "loss": 2.3125, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.8096313017306245, |
| "grad_norm": 22.67449378967285, |
| "learning_rate": 2.012678288431062e-06, |
| "loss": 2.1079, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.8103837471783296, |
| "grad_norm": 30.263660430908203, |
| "learning_rate": 2.0047543581616483e-06, |
| "loss": 2.3652, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.8111361926260346, |
| "grad_norm": 21.763072967529297, |
| "learning_rate": 1.9968304278922345e-06, |
| "loss": 2.3447, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.8118886380737397, |
| "grad_norm": 26.992006301879883, |
| "learning_rate": 1.988906497622821e-06, |
| "loss": 2.4863, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.8126410835214447, |
| "grad_norm": 22.929861068725586, |
| "learning_rate": 1.9809825673534077e-06, |
| "loss": 2.2822, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8133935289691497, |
| "grad_norm": 22.8551082611084, |
| "learning_rate": 1.973058637083994e-06, |
| "loss": 2.377, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.8141459744168548, |
| "grad_norm": 26.721590042114258, |
| "learning_rate": 1.96513470681458e-06, |
| "loss": 2.7217, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.8148984198645598, |
| "grad_norm": 17.116928100585938, |
| "learning_rate": 1.9572107765451663e-06, |
| "loss": 1.8262, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.8156508653122648, |
| "grad_norm": 28.113954544067383, |
| "learning_rate": 1.949286846275753e-06, |
| "loss": 2.9307, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.8164033107599699, |
| "grad_norm": 15.134292602539062, |
| "learning_rate": 1.9413629160063395e-06, |
| "loss": 1.7354, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.8171557562076749, |
| "grad_norm": 25.39183807373047, |
| "learning_rate": 1.9334389857369257e-06, |
| "loss": 1.7539, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.81790820165538, |
| "grad_norm": 29.560977935791016, |
| "learning_rate": 1.925515055467512e-06, |
| "loss": 2.2822, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.8186606471030851, |
| "grad_norm": 33.22944259643555, |
| "learning_rate": 1.9175911251980985e-06, |
| "loss": 2.252, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.8194130925507901, |
| "grad_norm": 19.814882278442383, |
| "learning_rate": 1.9096671949286847e-06, |
| "loss": 2.2861, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.8201655379984951, |
| "grad_norm": 31.048839569091797, |
| "learning_rate": 1.9017432646592713e-06, |
| "loss": 2.1475, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8209179834462002, |
| "grad_norm": 16.46122932434082, |
| "learning_rate": 1.8938193343898575e-06, |
| "loss": 2.1201, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.8216704288939052, |
| "grad_norm": 24.316068649291992, |
| "learning_rate": 1.885895404120444e-06, |
| "loss": 1.7188, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.8224228743416102, |
| "grad_norm": 22.87055778503418, |
| "learning_rate": 1.8779714738510301e-06, |
| "loss": 2.0498, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.8231753197893152, |
| "grad_norm": 21.308670043945312, |
| "learning_rate": 1.8700475435816165e-06, |
| "loss": 2.3086, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.8239277652370203, |
| "grad_norm": 24.183080673217773, |
| "learning_rate": 1.8621236133122031e-06, |
| "loss": 2.4404, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.8246802106847254, |
| "grad_norm": 26.43325424194336, |
| "learning_rate": 1.8541996830427893e-06, |
| "loss": 2.7451, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.8254326561324304, |
| "grad_norm": 20.95301628112793, |
| "learning_rate": 1.8462757527733757e-06, |
| "loss": 2.1162, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.8261851015801355, |
| "grad_norm": 22.1946964263916, |
| "learning_rate": 1.8383518225039621e-06, |
| "loss": 2.2295, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.8269375470278405, |
| "grad_norm": 23.528072357177734, |
| "learning_rate": 1.8304278922345483e-06, |
| "loss": 2.2217, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.8276899924755455, |
| "grad_norm": 26.633913040161133, |
| "learning_rate": 1.822503961965135e-06, |
| "loss": 2.5986, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8284424379232506, |
| "grad_norm": 22.813688278198242, |
| "learning_rate": 1.8145800316957214e-06, |
| "loss": 2.4839, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.8291948833709556, |
| "grad_norm": 19.97185707092285, |
| "learning_rate": 1.8066561014263076e-06, |
| "loss": 2.1719, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.8299473288186606, |
| "grad_norm": 17.617671966552734, |
| "learning_rate": 1.798732171156894e-06, |
| "loss": 1.6826, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.8306997742663657, |
| "grad_norm": 25.46587371826172, |
| "learning_rate": 1.7908082408874801e-06, |
| "loss": 2.4238, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.8314522197140707, |
| "grad_norm": 27.127437591552734, |
| "learning_rate": 1.7828843106180668e-06, |
| "loss": 2.2109, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.8322046651617758, |
| "grad_norm": 39.776519775390625, |
| "learning_rate": 1.7749603803486532e-06, |
| "loss": 2.2764, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.8329571106094809, |
| "grad_norm": 22.824893951416016, |
| "learning_rate": 1.7670364500792394e-06, |
| "loss": 2.8037, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.8337095560571859, |
| "grad_norm": 28.222532272338867, |
| "learning_rate": 1.7591125198098258e-06, |
| "loss": 2.5269, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.8344620015048909, |
| "grad_norm": 17.80474281311035, |
| "learning_rate": 1.7511885895404122e-06, |
| "loss": 1.8408, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.835214446952596, |
| "grad_norm": 36.78727340698242, |
| "learning_rate": 1.7432646592709988e-06, |
| "loss": 2.3691, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.835966892400301, |
| "grad_norm": 18.49757957458496, |
| "learning_rate": 1.735340729001585e-06, |
| "loss": 2.2607, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.836719337848006, |
| "grad_norm": 22.9407958984375, |
| "learning_rate": 1.7274167987321714e-06, |
| "loss": 2.75, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.837471783295711, |
| "grad_norm": 19.55245018005371, |
| "learning_rate": 1.7194928684627576e-06, |
| "loss": 2.248, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.8382242287434161, |
| "grad_norm": 21.383373260498047, |
| "learning_rate": 1.711568938193344e-06, |
| "loss": 2.6201, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.8389766741911211, |
| "grad_norm": 19.142234802246094, |
| "learning_rate": 1.7036450079239304e-06, |
| "loss": 1.8564, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.8397291196388262, |
| "grad_norm": 17.51129913330078, |
| "learning_rate": 1.6957210776545168e-06, |
| "loss": 1.571, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.8404815650865313, |
| "grad_norm": 21.427541732788086, |
| "learning_rate": 1.6877971473851032e-06, |
| "loss": 2.2041, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.8412340105342363, |
| "grad_norm": 22.243871688842773, |
| "learning_rate": 1.6798732171156896e-06, |
| "loss": 2.1689, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.8419864559819413, |
| "grad_norm": 29.453140258789062, |
| "learning_rate": 1.6719492868462758e-06, |
| "loss": 2.4053, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.8427389014296464, |
| "grad_norm": 27.817018508911133, |
| "learning_rate": 1.6640253565768622e-06, |
| "loss": 2.54, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8434913468773514, |
| "grad_norm": 20.197452545166016, |
| "learning_rate": 1.6561014263074488e-06, |
| "loss": 2.2637, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.8442437923250564, |
| "grad_norm": 22.869123458862305, |
| "learning_rate": 1.648177496038035e-06, |
| "loss": 2.3154, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.8449962377727614, |
| "grad_norm": 29.69762420654297, |
| "learning_rate": 1.6402535657686214e-06, |
| "loss": 2.209, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.8457486832204665, |
| "grad_norm": 21.819568634033203, |
| "learning_rate": 1.6323296354992076e-06, |
| "loss": 2.1567, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.8465011286681715, |
| "grad_norm": 20.180278778076172, |
| "learning_rate": 1.624405705229794e-06, |
| "loss": 2.3389, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.8472535741158767, |
| "grad_norm": 18.55019187927246, |
| "learning_rate": 1.6164817749603806e-06, |
| "loss": 1.8306, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.8480060195635817, |
| "grad_norm": 33.824039459228516, |
| "learning_rate": 1.608557844690967e-06, |
| "loss": 2.9336, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.8487584650112867, |
| "grad_norm": 28.131019592285156, |
| "learning_rate": 1.6006339144215532e-06, |
| "loss": 2.2119, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.8495109104589917, |
| "grad_norm": 29.770904541015625, |
| "learning_rate": 1.5927099841521396e-06, |
| "loss": 1.7197, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.8502633559066968, |
| "grad_norm": 21.487455368041992, |
| "learning_rate": 1.5847860538827258e-06, |
| "loss": 2.4395, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8510158013544018, |
| "grad_norm": 36.66151809692383, |
| "learning_rate": 1.5768621236133122e-06, |
| "loss": 1.9824, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.8517682468021068, |
| "grad_norm": 38.3604850769043, |
| "learning_rate": 1.5689381933438988e-06, |
| "loss": 2.7559, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.8525206922498119, |
| "grad_norm": 30.512598037719727, |
| "learning_rate": 1.561014263074485e-06, |
| "loss": 2.1714, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.8532731376975169, |
| "grad_norm": 38.16960525512695, |
| "learning_rate": 1.5530903328050714e-06, |
| "loss": 2.3359, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.8540255831452219, |
| "grad_norm": 20.379392623901367, |
| "learning_rate": 1.5451664025356578e-06, |
| "loss": 2.3291, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.8547780285929271, |
| "grad_norm": 22.714763641357422, |
| "learning_rate": 1.537242472266244e-06, |
| "loss": 2.3301, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.8555304740406321, |
| "grad_norm": 18.340160369873047, |
| "learning_rate": 1.5293185419968307e-06, |
| "loss": 1.7939, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.8562829194883371, |
| "grad_norm": 51.088600158691406, |
| "learning_rate": 1.521394611727417e-06, |
| "loss": 2.3945, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.8570353649360422, |
| "grad_norm": 30.266088485717773, |
| "learning_rate": 1.5134706814580033e-06, |
| "loss": 2.0225, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.8577878103837472, |
| "grad_norm": 32.32817459106445, |
| "learning_rate": 1.5055467511885897e-06, |
| "loss": 2.5049, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8585402558314522, |
| "grad_norm": 16.877460479736328, |
| "learning_rate": 1.4976228209191759e-06, |
| "loss": 1.998, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.8592927012791572, |
| "grad_norm": 27.261184692382812, |
| "learning_rate": 1.4896988906497625e-06, |
| "loss": 2.1865, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.8600451467268623, |
| "grad_norm": 20.137563705444336, |
| "learning_rate": 1.4817749603803489e-06, |
| "loss": 2.1787, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.8607975921745673, |
| "grad_norm": 18.921850204467773, |
| "learning_rate": 1.473851030110935e-06, |
| "loss": 1.6484, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.8615500376222723, |
| "grad_norm": 22.27260971069336, |
| "learning_rate": 1.4659270998415215e-06, |
| "loss": 2.6924, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.8623024830699775, |
| "grad_norm": 23.848426818847656, |
| "learning_rate": 1.4580031695721079e-06, |
| "loss": 2.5029, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.8630549285176825, |
| "grad_norm": 38.04443359375, |
| "learning_rate": 1.450079239302694e-06, |
| "loss": 1.7031, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.8638073739653875, |
| "grad_norm": 33.86138153076172, |
| "learning_rate": 1.4421553090332807e-06, |
| "loss": 2.2695, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.8645598194130926, |
| "grad_norm": 22.020042419433594, |
| "learning_rate": 1.434231378763867e-06, |
| "loss": 2.0498, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.8653122648607976, |
| "grad_norm": 25.139564514160156, |
| "learning_rate": 1.4263074484944533e-06, |
| "loss": 1.9087, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8660647103085026, |
| "grad_norm": 21.85015106201172, |
| "learning_rate": 1.4183835182250397e-06, |
| "loss": 2.2334, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.8668171557562077, |
| "grad_norm": 29.274450302124023, |
| "learning_rate": 1.4104595879556259e-06, |
| "loss": 2.4326, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.8675696012039127, |
| "grad_norm": 27.56648063659668, |
| "learning_rate": 1.4025356576862125e-06, |
| "loss": 1.917, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.8683220466516177, |
| "grad_norm": 23.9324951171875, |
| "learning_rate": 1.394611727416799e-06, |
| "loss": 2.3818, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.8690744920993227, |
| "grad_norm": 25.811965942382812, |
| "learning_rate": 1.3866877971473853e-06, |
| "loss": 2.2402, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.8698269375470279, |
| "grad_norm": 20.791032791137695, |
| "learning_rate": 1.3787638668779715e-06, |
| "loss": 1.9756, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.8705793829947329, |
| "grad_norm": 24.097496032714844, |
| "learning_rate": 1.370839936608558e-06, |
| "loss": 2.2168, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.871331828442438, |
| "grad_norm": 22.489957809448242, |
| "learning_rate": 1.3629160063391445e-06, |
| "loss": 2.1572, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.872084273890143, |
| "grad_norm": 37.01300811767578, |
| "learning_rate": 1.3549920760697307e-06, |
| "loss": 2.4814, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.872836719337848, |
| "grad_norm": 18.329721450805664, |
| "learning_rate": 1.3470681458003171e-06, |
| "loss": 2.2793, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.873589164785553, |
| "grad_norm": 27.86050033569336, |
| "learning_rate": 1.3391442155309033e-06, |
| "loss": 2.1816, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.8743416102332581, |
| "grad_norm": 23.993194580078125, |
| "learning_rate": 1.3312202852614897e-06, |
| "loss": 2.7588, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.8750940556809631, |
| "grad_norm": 22.006431579589844, |
| "learning_rate": 1.3232963549920763e-06, |
| "loss": 1.7539, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.8758465011286681, |
| "grad_norm": 32.45913314819336, |
| "learning_rate": 1.3153724247226625e-06, |
| "loss": 2.4727, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.8765989465763732, |
| "grad_norm": 22.738069534301758, |
| "learning_rate": 1.307448494453249e-06, |
| "loss": 2.1963, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.8773513920240783, |
| "grad_norm": 19.741193771362305, |
| "learning_rate": 1.2995245641838353e-06, |
| "loss": 1.9995, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.8781038374717833, |
| "grad_norm": 22.3425235748291, |
| "learning_rate": 1.2916006339144215e-06, |
| "loss": 2.584, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.8788562829194884, |
| "grad_norm": 18.124391555786133, |
| "learning_rate": 1.283676703645008e-06, |
| "loss": 1.9648, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.8796087283671934, |
| "grad_norm": 20.027179718017578, |
| "learning_rate": 1.2757527733755946e-06, |
| "loss": 2.0439, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.8803611738148984, |
| "grad_norm": 20.139663696289062, |
| "learning_rate": 1.2678288431061807e-06, |
| "loss": 2.1653, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8811136192626035, |
| "grad_norm": 18.437063217163086, |
| "learning_rate": 1.2599049128367671e-06, |
| "loss": 2.1299, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.8818660647103085, |
| "grad_norm": 19.62114715576172, |
| "learning_rate": 1.2519809825673536e-06, |
| "loss": 1.9883, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.8826185101580135, |
| "grad_norm": 32.16853713989258, |
| "learning_rate": 1.24405705229794e-06, |
| "loss": 1.9561, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.8833709556057185, |
| "grad_norm": 31.71169090270996, |
| "learning_rate": 1.2361331220285262e-06, |
| "loss": 2.4297, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.8841234010534236, |
| "grad_norm": 27.213788986206055, |
| "learning_rate": 1.2282091917591128e-06, |
| "loss": 1.7173, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.8848758465011287, |
| "grad_norm": 17.10274314880371, |
| "learning_rate": 1.220285261489699e-06, |
| "loss": 2.0039, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.8856282919488337, |
| "grad_norm": 20.940834045410156, |
| "learning_rate": 1.2123613312202854e-06, |
| "loss": 2.0146, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.8863807373965388, |
| "grad_norm": 21.25338363647461, |
| "learning_rate": 1.2044374009508718e-06, |
| "loss": 2.4961, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.8871331828442438, |
| "grad_norm": 18.00691032409668, |
| "learning_rate": 1.1965134706814582e-06, |
| "loss": 1.9626, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.8878856282919488, |
| "grad_norm": 28.76074981689453, |
| "learning_rate": 1.1885895404120444e-06, |
| "loss": 2.0713, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.8886380737396539, |
| "grad_norm": 34.1113395690918, |
| "learning_rate": 1.1806656101426308e-06, |
| "loss": 1.8286, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.8893905191873589, |
| "grad_norm": 19.671710968017578, |
| "learning_rate": 1.1727416798732172e-06, |
| "loss": 2.1851, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.8901429646350639, |
| "grad_norm": 27.864065170288086, |
| "learning_rate": 1.1648177496038036e-06, |
| "loss": 2.6172, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.890895410082769, |
| "grad_norm": 23.696453094482422, |
| "learning_rate": 1.15689381933439e-06, |
| "loss": 2.0938, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.891647855530474, |
| "grad_norm": 25.530902862548828, |
| "learning_rate": 1.1489698890649762e-06, |
| "loss": 2.0488, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.8924003009781791, |
| "grad_norm": 18.939353942871094, |
| "learning_rate": 1.1410459587955628e-06, |
| "loss": 1.9146, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.8931527464258842, |
| "grad_norm": 21.1546630859375, |
| "learning_rate": 1.133122028526149e-06, |
| "loss": 1.918, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.8939051918735892, |
| "grad_norm": 16.869848251342773, |
| "learning_rate": 1.1251980982567354e-06, |
| "loss": 1.4795, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.8946576373212942, |
| "grad_norm": 23.08144760131836, |
| "learning_rate": 1.1172741679873218e-06, |
| "loss": 2.001, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.8954100827689992, |
| "grad_norm": 17.821651458740234, |
| "learning_rate": 1.1093502377179082e-06, |
| "loss": 1.7251, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.8961625282167043, |
| "grad_norm": 27.713359832763672, |
| "learning_rate": 1.1014263074484946e-06, |
| "loss": 2.6104, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.8969149736644093, |
| "grad_norm": 25.884157180786133, |
| "learning_rate": 1.093502377179081e-06, |
| "loss": 1.751, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.8976674191121143, |
| "grad_norm": 28.492116928100586, |
| "learning_rate": 1.0855784469096672e-06, |
| "loss": 2.0498, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.8984198645598194, |
| "grad_norm": 33.259117126464844, |
| "learning_rate": 1.0776545166402536e-06, |
| "loss": 2.5342, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.8991723100075244, |
| "grad_norm": 31.423341751098633, |
| "learning_rate": 1.06973058637084e-06, |
| "loss": 1.9385, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.8999247554552295, |
| "grad_norm": 21.758747100830078, |
| "learning_rate": 1.0618066561014264e-06, |
| "loss": 2.3877, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.9006772009029346, |
| "grad_norm": 32.80032730102539, |
| "learning_rate": 1.0538827258320128e-06, |
| "loss": 1.9854, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.9014296463506396, |
| "grad_norm": 24.677797317504883, |
| "learning_rate": 1.045958795562599e-06, |
| "loss": 2.1455, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.9021820917983446, |
| "grad_norm": 28.141752243041992, |
| "learning_rate": 1.0380348652931856e-06, |
| "loss": 2.4795, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.9029345372460497, |
| "grad_norm": 23.31185531616211, |
| "learning_rate": 1.0301109350237718e-06, |
| "loss": 2.3716, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9036869826937547, |
| "grad_norm": 41.367584228515625, |
| "learning_rate": 1.0221870047543582e-06, |
| "loss": 2.3271, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.9044394281414597, |
| "grad_norm": 21.5883731842041, |
| "learning_rate": 1.0142630744849446e-06, |
| "loss": 2.2163, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.9051918735891648, |
| "grad_norm": 27.00819969177246, |
| "learning_rate": 1.006339144215531e-06, |
| "loss": 2.7285, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.9059443190368698, |
| "grad_norm": 18.80669593811035, |
| "learning_rate": 9.984152139461172e-07, |
| "loss": 1.9658, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.9066967644845748, |
| "grad_norm": 24.32224464416504, |
| "learning_rate": 9.904912836767039e-07, |
| "loss": 2.1133, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.90744920993228, |
| "grad_norm": 21.93855094909668, |
| "learning_rate": 9.8256735340729e-07, |
| "loss": 2.0918, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.908201655379985, |
| "grad_norm": 21.887161254882812, |
| "learning_rate": 9.746434231378764e-07, |
| "loss": 1.9634, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.90895410082769, |
| "grad_norm": 32.432167053222656, |
| "learning_rate": 9.667194928684629e-07, |
| "loss": 2.4023, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.909706546275395, |
| "grad_norm": 22.609710693359375, |
| "learning_rate": 9.587955625990493e-07, |
| "loss": 2.417, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.9104589917231001, |
| "grad_norm": 36.706790924072266, |
| "learning_rate": 9.508716323296357e-07, |
| "loss": 1.7422, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9112114371708051, |
| "grad_norm": 46.86161804199219, |
| "learning_rate": 9.42947702060222e-07, |
| "loss": 2.5244, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.9119638826185101, |
| "grad_norm": 31.959516525268555, |
| "learning_rate": 9.350237717908083e-07, |
| "loss": 2.3408, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.9127163280662152, |
| "grad_norm": 21.65765380859375, |
| "learning_rate": 9.270998415213947e-07, |
| "loss": 1.7529, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.9134687735139202, |
| "grad_norm": 18.55181884765625, |
| "learning_rate": 9.191759112519811e-07, |
| "loss": 1.8828, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.9142212189616253, |
| "grad_norm": 21.481040954589844, |
| "learning_rate": 9.112519809825675e-07, |
| "loss": 2.3438, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.9149736644093304, |
| "grad_norm": 29.45285987854004, |
| "learning_rate": 9.033280507131538e-07, |
| "loss": 2.1631, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.9157261098570354, |
| "grad_norm": 29.11750030517578, |
| "learning_rate": 8.954041204437401e-07, |
| "loss": 2.2168, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.9164785553047404, |
| "grad_norm": 24.79958152770996, |
| "learning_rate": 8.874801901743266e-07, |
| "loss": 2.1729, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.9172310007524455, |
| "grad_norm": 23.481903076171875, |
| "learning_rate": 8.795562599049129e-07, |
| "loss": 1.9126, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.9179834462001505, |
| "grad_norm": 21.63692855834961, |
| "learning_rate": 8.716323296354994e-07, |
| "loss": 2.04, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9187358916478555, |
| "grad_norm": 30.64655876159668, |
| "learning_rate": 8.637083993660857e-07, |
| "loss": 2.207, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.9194883370955605, |
| "grad_norm": 37.03800582885742, |
| "learning_rate": 8.55784469096672e-07, |
| "loss": 2.0947, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.9202407825432656, |
| "grad_norm": 29.37847328186035, |
| "learning_rate": 8.478605388272584e-07, |
| "loss": 1.6558, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.9209932279909706, |
| "grad_norm": 27.9404296875, |
| "learning_rate": 8.399366085578448e-07, |
| "loss": 2.4248, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.9217456734386757, |
| "grad_norm": 25.742717742919922, |
| "learning_rate": 8.320126782884311e-07, |
| "loss": 2.4385, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.9224981188863808, |
| "grad_norm": 17.55823516845703, |
| "learning_rate": 8.240887480190175e-07, |
| "loss": 2.1953, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.9232505643340858, |
| "grad_norm": 21.086668014526367, |
| "learning_rate": 8.161648177496038e-07, |
| "loss": 2.3545, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.9240030097817908, |
| "grad_norm": 18.935644149780273, |
| "learning_rate": 8.082408874801903e-07, |
| "loss": 1.7041, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.9247554552294959, |
| "grad_norm": 19.29863739013672, |
| "learning_rate": 8.003169572107766e-07, |
| "loss": 1.8579, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.9255079006772009, |
| "grad_norm": 28.539777755737305, |
| "learning_rate": 7.923930269413629e-07, |
| "loss": 2.9229, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9262603461249059, |
| "grad_norm": 23.44830894470215, |
| "learning_rate": 7.844690966719494e-07, |
| "loss": 2.0674, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.927012791572611, |
| "grad_norm": 24.71790313720703, |
| "learning_rate": 7.765451664025357e-07, |
| "loss": 2.1211, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.927765237020316, |
| "grad_norm": 21.996749877929688, |
| "learning_rate": 7.68621236133122e-07, |
| "loss": 2.1787, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.928517682468021, |
| "grad_norm": 30.374027252197266, |
| "learning_rate": 7.606973058637085e-07, |
| "loss": 2.1143, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.9292701279157262, |
| "grad_norm": 27.452085494995117, |
| "learning_rate": 7.527733755942948e-07, |
| "loss": 2.0303, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.9300225733634312, |
| "grad_norm": 22.854434967041016, |
| "learning_rate": 7.448494453248812e-07, |
| "loss": 2.0107, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.9307750188111362, |
| "grad_norm": 41.897850036621094, |
| "learning_rate": 7.369255150554675e-07, |
| "loss": 1.9414, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.9315274642588413, |
| "grad_norm": 17.855621337890625, |
| "learning_rate": 7.290015847860539e-07, |
| "loss": 2.1172, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.9322799097065463, |
| "grad_norm": 23.190418243408203, |
| "learning_rate": 7.210776545166403e-07, |
| "loss": 2.3647, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.9330323551542513, |
| "grad_norm": 22.126853942871094, |
| "learning_rate": 7.131537242472266e-07, |
| "loss": 2.7139, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9337848006019563, |
| "grad_norm": 23.941364288330078, |
| "learning_rate": 7.052297939778129e-07, |
| "loss": 2.2729, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.9345372460496614, |
| "grad_norm": 30.85197639465332, |
| "learning_rate": 6.973058637083995e-07, |
| "loss": 2.5273, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.9352896914973664, |
| "grad_norm": 42.83109664916992, |
| "learning_rate": 6.893819334389858e-07, |
| "loss": 2.3301, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.9360421369450714, |
| "grad_norm": 27.508867263793945, |
| "learning_rate": 6.814580031695723e-07, |
| "loss": 2.6396, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.9367945823927766, |
| "grad_norm": 26.918298721313477, |
| "learning_rate": 6.735340729001586e-07, |
| "loss": 2.4189, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.9375470278404816, |
| "grad_norm": 19.541494369506836, |
| "learning_rate": 6.656101426307449e-07, |
| "loss": 2.501, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.9382994732881866, |
| "grad_norm": 21.202077865600586, |
| "learning_rate": 6.576862123613313e-07, |
| "loss": 2.0938, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.9390519187358917, |
| "grad_norm": 20.202423095703125, |
| "learning_rate": 6.497622820919177e-07, |
| "loss": 2.1953, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.9398043641835967, |
| "grad_norm": 24.55869483947754, |
| "learning_rate": 6.41838351822504e-07, |
| "loss": 2.1191, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.9405568096313017, |
| "grad_norm": 35.99618911743164, |
| "learning_rate": 6.339144215530904e-07, |
| "loss": 2.4551, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9413092550790068, |
| "grad_norm": 27.567293167114258, |
| "learning_rate": 6.259904912836768e-07, |
| "loss": 1.7285, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.9420617005267118, |
| "grad_norm": 24.59737205505371, |
| "learning_rate": 6.180665610142631e-07, |
| "loss": 2.9912, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.9428141459744168, |
| "grad_norm": 20.435455322265625, |
| "learning_rate": 6.101426307448495e-07, |
| "loss": 2.1045, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.9435665914221218, |
| "grad_norm": 25.387649536132812, |
| "learning_rate": 6.022187004754359e-07, |
| "loss": 1.8057, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.944319036869827, |
| "grad_norm": 19.371496200561523, |
| "learning_rate": 5.942947702060222e-07, |
| "loss": 2.0029, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.945071482317532, |
| "grad_norm": 19.404163360595703, |
| "learning_rate": 5.863708399366086e-07, |
| "loss": 1.791, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.945823927765237, |
| "grad_norm": 36.32655715942383, |
| "learning_rate": 5.78446909667195e-07, |
| "loss": 1.9404, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.9465763732129421, |
| "grad_norm": 26.90781021118164, |
| "learning_rate": 5.705229793977814e-07, |
| "loss": 2.1074, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.9473288186606471, |
| "grad_norm": 25.104326248168945, |
| "learning_rate": 5.625990491283677e-07, |
| "loss": 2.0557, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.9480812641083521, |
| "grad_norm": 20.98943519592285, |
| "learning_rate": 5.546751188589541e-07, |
| "loss": 1.9453, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9488337095560572, |
| "grad_norm": 18.50478172302246, |
| "learning_rate": 5.467511885895405e-07, |
| "loss": 2.0991, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.9495861550037622, |
| "grad_norm": 32.75830078125, |
| "learning_rate": 5.388272583201268e-07, |
| "loss": 2.3687, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.9503386004514672, |
| "grad_norm": 21.3521785736084, |
| "learning_rate": 5.309033280507132e-07, |
| "loss": 2.4414, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.9510910458991723, |
| "grad_norm": 20.554296493530273, |
| "learning_rate": 5.229793977812995e-07, |
| "loss": 1.7754, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.9518434913468774, |
| "grad_norm": 22.69301414489746, |
| "learning_rate": 5.150554675118859e-07, |
| "loss": 2.3613, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.9525959367945824, |
| "grad_norm": 26.686033248901367, |
| "learning_rate": 5.071315372424723e-07, |
| "loss": 2.6074, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.9533483822422875, |
| "grad_norm": 19.40122413635254, |
| "learning_rate": 4.992076069730586e-07, |
| "loss": 2.0215, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.9541008276899925, |
| "grad_norm": 30.532285690307617, |
| "learning_rate": 4.91283676703645e-07, |
| "loss": 1.71, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.9548532731376975, |
| "grad_norm": 21.527864456176758, |
| "learning_rate": 4.833597464342314e-07, |
| "loss": 2.6123, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.9556057185854026, |
| "grad_norm": 23.632919311523438, |
| "learning_rate": 4.7543581616481783e-07, |
| "loss": 2.293, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9563581640331076, |
| "grad_norm": 20.96917724609375, |
| "learning_rate": 4.6751188589540413e-07, |
| "loss": 2.4497, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.9571106094808126, |
| "grad_norm": 36.56161880493164, |
| "learning_rate": 4.5958795562599054e-07, |
| "loss": 2.1211, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.9578630549285176, |
| "grad_norm": 30.125322341918945, |
| "learning_rate": 4.516640253565769e-07, |
| "loss": 2.375, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.9586155003762227, |
| "grad_norm": 28.567535400390625, |
| "learning_rate": 4.437400950871633e-07, |
| "loss": 2.1699, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.9593679458239278, |
| "grad_norm": 26.635305404663086, |
| "learning_rate": 4.358161648177497e-07, |
| "loss": 2.0947, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.9601203912716328, |
| "grad_norm": 25.772676467895508, |
| "learning_rate": 4.27892234548336e-07, |
| "loss": 2.0137, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.9608728367193379, |
| "grad_norm": 22.599668502807617, |
| "learning_rate": 4.199683042789224e-07, |
| "loss": 2.7881, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.9616252821670429, |
| "grad_norm": 24.592472076416016, |
| "learning_rate": 4.1204437400950875e-07, |
| "loss": 2.1631, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.9623777276147479, |
| "grad_norm": 23.07600212097168, |
| "learning_rate": 4.0412044374009516e-07, |
| "loss": 2.127, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.963130173062453, |
| "grad_norm": 24.716087341308594, |
| "learning_rate": 3.9619651347068146e-07, |
| "loss": 1.8828, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.963882618510158, |
| "grad_norm": 21.794273376464844, |
| "learning_rate": 3.8827258320126786e-07, |
| "loss": 2.3472, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.964635063957863, |
| "grad_norm": 28.48988914489746, |
| "learning_rate": 3.8034865293185427e-07, |
| "loss": 2.3457, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.9653875094055681, |
| "grad_norm": 21.161699295043945, |
| "learning_rate": 3.724247226624406e-07, |
| "loss": 2.4102, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.9661399548532731, |
| "grad_norm": 26.318927764892578, |
| "learning_rate": 3.6450079239302697e-07, |
| "loss": 2.4238, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.9668924003009782, |
| "grad_norm": 33.44709014892578, |
| "learning_rate": 3.565768621236133e-07, |
| "loss": 2.5547, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.9676448457486833, |
| "grad_norm": 27.735422134399414, |
| "learning_rate": 3.486529318541997e-07, |
| "loss": 2.8076, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.9683972911963883, |
| "grad_norm": 20.180845260620117, |
| "learning_rate": 3.4072900158478613e-07, |
| "loss": 2.3311, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.9691497366440933, |
| "grad_norm": 26.456632614135742, |
| "learning_rate": 3.3280507131537243e-07, |
| "loss": 2.4238, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.9699021820917983, |
| "grad_norm": 24.67302131652832, |
| "learning_rate": 3.2488114104595883e-07, |
| "loss": 2.251, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.9706546275395034, |
| "grad_norm": 25.135757446289062, |
| "learning_rate": 3.169572107765452e-07, |
| "loss": 2.0205, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9714070729872084, |
| "grad_norm": 46.5058708190918, |
| "learning_rate": 3.0903328050713154e-07, |
| "loss": 2.8018, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.9721595184349134, |
| "grad_norm": 29.164596557617188, |
| "learning_rate": 3.0110935023771794e-07, |
| "loss": 2.541, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.9729119638826185, |
| "grad_norm": 19.991748809814453, |
| "learning_rate": 2.931854199683043e-07, |
| "loss": 2.126, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.9736644093303235, |
| "grad_norm": 19.993240356445312, |
| "learning_rate": 2.852614896988907e-07, |
| "loss": 2.1357, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.9744168547780286, |
| "grad_norm": 30.28998565673828, |
| "learning_rate": 2.7733755942947705e-07, |
| "loss": 2.7969, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.9751693002257337, |
| "grad_norm": 32.665958404541016, |
| "learning_rate": 2.694136291600634e-07, |
| "loss": 2.0776, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.9759217456734387, |
| "grad_norm": 32.19696044921875, |
| "learning_rate": 2.6148969889064975e-07, |
| "loss": 2.1387, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.9766741911211437, |
| "grad_norm": 24.746389389038086, |
| "learning_rate": 2.5356576862123616e-07, |
| "loss": 1.9941, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.9774266365688488, |
| "grad_norm": 30.807029724121094, |
| "learning_rate": 2.456418383518225e-07, |
| "loss": 2.4258, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.9781790820165538, |
| "grad_norm": 19.208616256713867, |
| "learning_rate": 2.3771790808240892e-07, |
| "loss": 2.1309, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9789315274642588, |
| "grad_norm": 24.626890182495117, |
| "learning_rate": 2.2979397781299527e-07, |
| "loss": 2.1152, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.9796839729119639, |
| "grad_norm": 18.25760269165039, |
| "learning_rate": 2.2187004754358165e-07, |
| "loss": 2.1079, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.9804364183596689, |
| "grad_norm": 19.399744033813477, |
| "learning_rate": 2.13946117274168e-07, |
| "loss": 1.9756, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.9811888638073739, |
| "grad_norm": 32.49583053588867, |
| "learning_rate": 2.0602218700475438e-07, |
| "loss": 1.9805, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.981941309255079, |
| "grad_norm": 21.51534652709961, |
| "learning_rate": 1.9809825673534073e-07, |
| "loss": 1.9082, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.9826937547027841, |
| "grad_norm": 29.631881713867188, |
| "learning_rate": 1.9017432646592713e-07, |
| "loss": 2.188, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.9834462001504891, |
| "grad_norm": 23.09160804748535, |
| "learning_rate": 1.8225039619651348e-07, |
| "loss": 2.3164, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.9841986455981941, |
| "grad_norm": 22.58462905883789, |
| "learning_rate": 1.7432646592709986e-07, |
| "loss": 2.4209, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.9849510910458992, |
| "grad_norm": 35.65068435668945, |
| "learning_rate": 1.6640253565768621e-07, |
| "loss": 2.5005, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.9857035364936042, |
| "grad_norm": 19.236482620239258, |
| "learning_rate": 1.584786053882726e-07, |
| "loss": 1.9756, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.9864559819413092, |
| "grad_norm": 22.260229110717773, |
| "learning_rate": 1.5055467511885897e-07, |
| "loss": 2.3398, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.9872084273890143, |
| "grad_norm": 23.633285522460938, |
| "learning_rate": 1.4263074484944535e-07, |
| "loss": 2.1797, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.9879608728367193, |
| "grad_norm": 25.102895736694336, |
| "learning_rate": 1.347068145800317e-07, |
| "loss": 2.6221, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.9887133182844243, |
| "grad_norm": 22.20816993713379, |
| "learning_rate": 1.2678288431061808e-07, |
| "loss": 2.418, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.9894657637321295, |
| "grad_norm": 19.845365524291992, |
| "learning_rate": 1.1885895404120446e-07, |
| "loss": 1.8213, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.9902182091798345, |
| "grad_norm": 31.5147647857666, |
| "learning_rate": 1.1093502377179082e-07, |
| "loss": 2.2051, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.9909706546275395, |
| "grad_norm": 21.427370071411133, |
| "learning_rate": 1.0301109350237719e-07, |
| "loss": 2.3633, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.9917231000752446, |
| "grad_norm": 21.501970291137695, |
| "learning_rate": 9.508716323296357e-08, |
| "loss": 2.2607, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.9924755455229496, |
| "grad_norm": 24.47955322265625, |
| "learning_rate": 8.716323296354993e-08, |
| "loss": 1.9883, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.9932279909706546, |
| "grad_norm": 33.163780212402344, |
| "learning_rate": 7.92393026941363e-08, |
| "loss": 1.9531, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.9939804364183596, |
| "grad_norm": 25.771236419677734, |
| "learning_rate": 7.131537242472267e-08, |
| "loss": 2.1113, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.9947328818660647, |
| "grad_norm": 30.363033294677734, |
| "learning_rate": 6.339144215530904e-08, |
| "loss": 2.3027, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.9954853273137697, |
| "grad_norm": 22.448755264282227, |
| "learning_rate": 5.546751188589541e-08, |
| "loss": 2.293, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.9962377727614747, |
| "grad_norm": 23.290424346923828, |
| "learning_rate": 4.754358161648178e-08, |
| "loss": 2.6187, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.9969902182091799, |
| "grad_norm": 24.690128326416016, |
| "learning_rate": 3.961965134706815e-08, |
| "loss": 2.208, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.9977426636568849, |
| "grad_norm": 22.215335845947266, |
| "learning_rate": 3.169572107765452e-08, |
| "loss": 2.2271, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.9984951091045899, |
| "grad_norm": 37.365562438964844, |
| "learning_rate": 2.377179080824089e-08, |
| "loss": 2.3867, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.999247554552295, |
| "grad_norm": 21.778207778930664, |
| "learning_rate": 1.584786053882726e-08, |
| "loss": 2.248, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 24.791046142578125, |
| "learning_rate": 7.92393026941363e-09, |
| "loss": 2.4365, |
| "step": 1329 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1329, |
| "total_flos": 2.392704719865774e+18, |
| "train_loss": 2.812455360292043, |
| "train_runtime": 1486.3359, |
| "train_samples_per_second": 228.75, |
| "train_steps_per_second": 0.894 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 1329, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.392704719865774e+18, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|